In [1]:
import os
os.chdir("/home/ec2-user/SageMaker/foodi-ml/")

In [2]:
import os
import numpy as np
import torch
from tqdm import tqdm
from addict import Dict

import params
from retrieval.train import train
from retrieval.utils import helper
from retrieval.model import loss
from retrieval.model.model import Retrieval
from retrieval.data.loaders import get_loaders
from retrieval.utils.logger import create_logger
from retrieval.utils.helper import load_model
from retrieval.utils.file_utils import load_yaml_opts, parse_loader_name

In [3]:
# Evaluation imports
import retrieval.train.evaluation as evaluation 
from retrieval.model.similarity.similarity import Normalization
from retrieval.model.similarity.measure import l2norm, cosine_sim

In [4]:
def get_tokenizers(train_loader):
    tokenizers = train_loader.dataset.tokenizer
    if type(tokenizers) != list:
        tokenizers = [tokenizers]
    return tokenizers

In [5]:
def get_data_path(opt):
    if 'DATA_PATH' not in os.environ:
        if not opt.dataset.data_path:
            raise Exception('''
                DATA_PATH not specified.
                Please, run "$ export DATA_PATH=/path/to/dataset"
                or add path to yaml file
            ''')
        return opt.dataset.data_path
    else:
        return os.environ['DATA_PATH']

In [6]:
os.environ["DATA_PATH"] = "/home/ec2-user/SageMaker/data/"

options = "options/adapt/foodi-ml/i2t.yaml"

args = {"options": options}
args = Dict(args)
opt = load_yaml_opts(args.options)

logger = create_logger(level='info' if opt.engine.debug else 'info')

# Get path of the data
data_path = get_data_path(opt)

# Get loaders
train_loader, val_loaders, adapt_loaders = get_loaders(data_path, args.local_rank, opt)

2021-08-18 17:44:15,457 - [INFO    ] - Loaded vocab containing 2487 tokens
2021-08-18 17:44:15,457 - [INFO    ] - Loaded from .vocab_cache/foodiml_vocab.json.
2021-08-18 17:44:15,458 - [INFO    ] - Created tokenizer with init 2487 tokens.
2021-08-18 17:44:15,499 - [INFO    ] - [FoodiML] Loaded 5608 images and 5608 annotations.
2021-08-18 17:44:15,503 - [INFO    ] - Loaded vocab containing 2487 tokens
2021-08-18 17:44:15,503 - [INFO    ] - Loaded from .vocab_cache/foodiml_vocab.json.
2021-08-18 17:44:15,503 - [INFO    ] - Created tokenizer with init 2487 tokens.
2021-08-18 17:44:15,542 - [INFO    ] - [FoodiML] Loaded 2403 images and 2403 annotations.
2021-08-18 17:44:15,544 - [INFO    ] - Adapt loaders: 0


In [7]:
tokenizers = get_tokenizers(train_loader)

In [8]:
model = Retrieval(**opt.model, tokenizers=tokenizers)

2021-08-18 17:44:17,082 - [INFO    ] - Image encoder created: ('full_image',)
2021-08-18 17:44:17,373 - [INFO    ] - Text encoder created: gru_glove
2021-08-18 17:44:17,443 - [INFO    ] - Created similarity: AdaptiveEmbeddingI2T(
  (norm): Normalization(
    (norm): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  )
  (adapt_txt): ADAPT(
    (fc_gamma): Sequential(
      (0): Linear(in_features=2048, out_features=2048, bias=True)
    )
    (fc_beta): Sequential(
      (0): Linear(in_features=2048, out_features=2048, bias=True)
    )
  )
  (fovea): Fovea(smooth=10,train_smooth: False)
)
2021-08-18 17:44:20,406 - [INFO    ] - Setting devices: img: cuda,txt: cuda, loss: cuda
2021-08-18 17:44:20,407 - [INFO    ] - Using similarity: ('adapt_i2t',)


In [9]:
def set_criterion(opt, model):
    if 'name' in opt.criterion:
        logger.info(opt.criterion)
        multimodal_criterion = loss.get_loss(**opt.criterion)
        multilanguage_criterion = loss.get_loss(**opt.criterion)
    else:
        multimodal_criterion = loss.ContrastiveLoss(**opt.criterion)
        multilanguage_criterion = loss.ContrastiveLoss(**opt.ml_criterion)
    set_model_criterion(opt, model, multilanguage_criterion, multimodal_criterion)
    # return multimodal_criterion, multilanguage_criterion


def set_model_criterion(opt, model, multilanguage_criterion, multimodal_criterion):
    model.mm_criterion = multimodal_criterion
    model.ml_criterion = None
    if len(opt.dataset.adapt.data) > 0:
        model.ml_criterion = multilanguage_criterion

In [10]:
print_fn = (lambda x: x) if not model.master else tqdm.write
set_criterion(opt, model)

In [11]:
# Trainer
trainer = train.Trainer(
    model=model,
    args=opt,
    sysoutlog=print_fn,
    path=opt.exp.outpath,
    world_size=1 # TODO
)

In [12]:
# Load best model trained so far
trainer.load()

In [13]:
#metrics, val_metrics = trainer.evaluate_loaders(valid_loaders)

In [14]:
loader_metrics = {}
final_sum = 0.
nb_loaders = len(val_loaders)
loader = val_loaders[0]
loader_name = str(loader.dataset)

## Evaluate

In [15]:
# Create embeddings
img_emb, txt_emb, lens = evaluation.predict_loader(
    trainer.model, 
    loader, 
    trainer.device
)

                                                       

In [16]:
model=trainer.model
lengths=lens
device=trainer.device
shared_size=128
return_sims=False
latent_size = opt['model']['img_enc']['params']['img_dim']

In [17]:
model.eval()
_metrics_ = ('r1', 'r5', 'r10', 'medr', 'meanr')

In [18]:
img_emb = torch.FloatTensor(img_emb)
txt_emb = torch.FloatTensor(txt_emb)

In [19]:
# AdaptiveEmbeddingI2T: forward
BB, LT, KK = img_emb.shape
txt_emb = txt_emb.permute(0, 2, 1)
if LT != latent_size:
    print("Permutting image tensor")
    img_emb = img_emb.permute(0, 2, 1)

In [20]:
# Create Normalization layer and set to device
norm = Normalization(latent_size, norm_method='batchnorm')
norm = norm.to(device)
txt_emb = txt_emb.to(device)
txt_emb = norm(txt_emb)
txt_emb_cpu = txt_emb.to('cpu')
del norm
del txt_emb
torch.cuda.empty_cache()

In [21]:
# Matrix to put results
sims = torch.zeros(img_emb.shape[0], txt_emb_cpu.shape[0])

# Global image representation
img_emb = img_emb.mean(-1)

### Loop

In [22]:
sims = torch.zeros(sims.shape)

In [None]:
for i, img_tensor in enumerate(img_emb):
    print("Iteration: ", i)
    # Start 1,184 GB
    img_vector_cpu = img_tensor.unsqueeze(0)

    txt_emb = txt_emb_cpu.to(device)
    img_vector = img_vector_cpu.to(device)
    
    # 2,639 GB
    txt_output = trainer.model.similarity.similarity.adapt_txt(value=txt_emb, query=img_vector)
    txt_output_fovea = trainer.model.similarity.similarity.fovea(txt_output)
    txt_output_fovea_cpu = txt_output_fovea.to('cpu')
    del txt_output
    del txt_output_fovea
    del txt_emb
    del img_vector
    torch.cuda.empty_cache()
    
    # 8,414 GB
    txt_vector_cpu = txt_output_fovea_cpu.max(dim=-1)[0]
    txt_vector_cpu = l2norm(txt_vector_cpu, dim=-1)
    img_vector_cpu = l2norm(img_vector_cpu, dim=-1)
    
    # similarity
    sim = cosine_sim(img_vector_cpu, txt_vector_cpu)
    sim = sim.squeeze(-1)
    sims[i,:] = sim

### Loop unfolded

In [22]:
# Start 1,184 GB
i, img_tensor = (3, img_emb[3, :])
img_vector_cpu = img_tensor.unsqueeze(0)

txt_emb = txt_emb_cpu.to(device)
img_vector = img_vector_cpu.to(device)

In [23]:
# 2,639 GB
txt_output = trainer.model.similarity.similarity.adapt_txt(value=txt_emb, query=img_vector)

In [24]:
txt_output_fovea = trainer.model.similarity.similarity.fovea(txt_output)
txt_output_fovea_cpu = txt_output_fovea.to('cpu')

In [25]:
del txt_output
del txt_output_fovea
del txt_emb
del img_vector
torch.cuda.empty_cache()

In [26]:
# 8,414 GB
txt_vector_cpu = txt_output_fovea_cpu.max(dim=-1)[0]

In [27]:
txt_vector_cpu = l2norm(txt_vector_cpu, dim=-1)
img_vector_cpu = l2norm(img_vector_cpu, dim=-1)

In [29]:
sim = cosine_sim(img_vector_cpu, txt_vector_cpu)
sim = sim.squeeze(-1)
sims[i,:] = sim

In [30]:
torch.cuda.empty_cache()

In [23]:
#sims = model.get_sim_matrix(
#        embed_a=img_emb,
#        embed_b=txt_emb,
#        lens=lengths
#)