# Deep Dive Evaluation during Training

Execution 

```{bash}
cd /home/ec2-user/SageMaker/foodi-ml
source activate python3
export DATA_PATH=/home/ec2-user/SageMaker/data/
python run.py options/adapt/foodi-ml/i2t.yaml

#nvidia-smi
```

In [1]:
import os
os.chdir("/home/ec2-user/SageMaker/foodi-ml/")

In [2]:
import os
import numpy as np
import torch
from tqdm import tqdm
from addict import Dict

import params
from retrieval.train import train
from retrieval.utils import helper
from retrieval.model import loss
from retrieval.model.model import Retrieval
from retrieval.data.loaders import get_loaders
from retrieval.utils.logger import create_logger
from retrieval.utils.helper import load_model
from retrieval.utils.file_utils import load_yaml_opts, parse_loader_name

# Functions

In [3]:
def get_data_path(opt):
    if 'DATA_PATH' not in os.environ:
        if not opt.dataset.data_path:
            raise Exception('''
                DATA_PATH not specified.
                Please, run "$ export DATA_PATH=/path/to/dataset"
                or add path to yaml file
            ''')
        return opt.dataset.data_path
    else:
        return os.environ['DATA_PATH']

In [4]:
def get_tokenizers(train_loader):
    tokenizers = train_loader.dataset.tokenizer
    if type(tokenizers) != list:
        tokenizers = [tokenizers]
    return tokenizers

In [5]:
def set_criterion(opt, model):
    if 'name' in opt.criterion:
        logger.info(opt.criterion)
        multimodal_criterion = loss.get_loss(**opt.criterion)
        multilanguage_criterion = loss.get_loss(**opt.criterion)
    else:
        multimodal_criterion = loss.ContrastiveLoss(**opt.criterion)
        multilanguage_criterion = loss.ContrastiveLoss(**opt.ml_criterion)
    set_model_criterion(opt, model, multilanguage_criterion, multimodal_criterion)
    # return multimodal_criterion, multilanguage_criterion


def set_model_criterion(opt, model, multilanguage_criterion, multimodal_criterion):
    model.mm_criterion = multimodal_criterion
    model.ml_criterion = None
    if len(opt.dataset.adapt.data) > 0:
        model.ml_criterion = multilanguage_criterion

# Run

In [6]:
os.environ["DATA_PATH"] = "/home/ec2-user/SageMaker/data/"

options = "options/adapt/foodi-ml/i2t.yaml"

args = {"options": options}
args = Dict(args)
opt = load_yaml_opts(args.options)

logger = create_logger(level='debug' if opt.engine.debug else 'info')

# Get path of the data
data_path = get_data_path(opt)

# Get loaders
train_loader, val_loaders, adapt_loaders = get_loaders(data_path, args.local_rank, opt)

2021-08-18 10:24:50,236 - [INFO    ] - Loaded vocab containing 2487 tokens
2021-08-18 10:24:50,237 - [INFO    ] - Loaded from .vocab_cache/foodiml_vocab.json.
2021-08-18 10:24:50,237 - [INFO    ] - Created tokenizer with init 2487 tokens.
2021-08-18 10:24:50,279 - [INFO    ] - [FoodiML] Loaded 5608 images and 5608 annotations.
2021-08-18 10:24:50,283 - [INFO    ] - Loaded vocab containing 2487 tokens
2021-08-18 10:24:50,283 - [INFO    ] - Loaded from .vocab_cache/foodiml_vocab.json.
2021-08-18 10:24:50,284 - [INFO    ] - Created tokenizer with init 2487 tokens.
2021-08-18 10:24:50,323 - [INFO    ] - [FoodiML] Loaded 2403 images and 2403 annotations.
2021-08-18 10:24:50,326 - [INFO    ] - Adapt loaders: 0


In [7]:
tokenizers = get_tokenizers(train_loader)

In [8]:
model = Retrieval(**opt.model, tokenizers=tokenizers)

2021-08-18 10:24:52,108 - [INFO    ] - Image encoder created: ('full_image',)
2021-08-18 10:24:52,400 - [INFO    ] - Text encoder created: gru_glove
2021-08-18 10:24:52,470 - [INFO    ] - Created similarity: AdaptiveEmbeddingI2T(
  (norm): Normalization(
    (norm): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  )
  (adapt_txt): ADAPT(
    (fc_gamma): Sequential(
      (0): Linear(in_features=2048, out_features=2048, bias=True)
    )
    (fc_beta): Sequential(
      (0): Linear(in_features=2048, out_features=2048, bias=True)
    )
  )
  (fovea): Fovea(smooth=10,train_smooth: False)
)
2021-08-18 10:24:55,431 - [INFO    ] - Setting devices: img: cuda,txt: cuda, loss: cuda
2021-08-18 10:24:55,431 - [INFO    ] - Using similarity: ('adapt_i2t',)


In [9]:
print_fn = (lambda x: x) if not model.master else tqdm.write
print_fn

<bound method tqdm.write of <class 'tqdm._tqdm.tqdm'>>

In [10]:
set_criterion(opt, model)

In [11]:
# Trainer
trainer = train.Trainer(
    model=model,
    args=opt,
    sysoutlog=print_fn,
    path=opt.exp.outpath,
    world_size=1 # TODO
)

In [12]:
trainer.setup_optim(
        lr=opt.optimizer.lr,
        lr_scheduler=opt.optimizer.lr_scheduler,
        clip_grad=opt.optimizer.grad_clip,
        log_grad_norm=False,
        log_histograms=False,
        optimizer=opt.optimizer,
        freeze_modules=opt.model.freeze_modules
)

2021-08-18 10:24:55,463 - [INFO    ] - lr 0.001
2021-08-18 10:24:55,464 - [INFO    ] - [0.5, 2.0, 4000]
2021-08-18 10:24:55,464 - [INFO    ] - [10000, 20000, 3000]


Freezing model.txt_enc.embed.glove
lr: 0.001, #layers: 478, #params: 99,845,812
Total Params: 102,349,912, 


# Save

In [13]:
#trainer.save(
#    path = "runs",
#    is_best = True,
#    epoch = 0,
#)

# Load

In [14]:
#trainer.load()

# Trainer

In [15]:
train_loader=train_loader
valid_loaders=val_loaders
lang_loaders=adapt_loaders
nb_epochs=opt.engine.nb_epochs
valid_interval=opt.engine.valid_interval
log_interval=opt.engine.print_freq

In [16]:
print(train_loader)
print(valid_loaders)
print(lang_loaders)
print(nb_epochs)
print(valid_interval)
print(log_interval)

<torch.utils.data.dataloader.DataLoader object at 0x7f957f351208>
[<torch.utils.data.dataloader.DataLoader object at 0x7f957e9aa278>]
[]
1
500
10


# Train epoch(Deep dive) -------------------------------- START

In [17]:
train_loader
lang_loaders
epoch = 0
valid_loaders=val_loaders
log_interval=50
valid_interval=500

In [18]:
batch = next(iter(train_loader))

## Run Evaluation (Deep Dive) ----------------- START

### Evaluate Loaders (Deep Dive) ----------------- START

In [66]:
### Dep dive evaluate_loaders
# metrics, val_metric = self.evaluate_loaders(valid_loaders)

In [67]:
loaders = valid_loaders

In [68]:
loader_metrics = {}
final_sum = 0.
nb_loaders = len(loaders)

In [69]:
loader = loaders[0]

In [70]:
loader_name = str(loader.dataset)

#### Predict Loader (Deep Dive) ----------------- START

In [71]:
### Deep dive predict_loader
# img_emb, txt_emb, lens = evaluation.predict_loader(self.model, loader, self.device)

In [72]:
# arguments
model = trainer.model
data_loader = loader

In [73]:
img_embs, cap_embs, cap_lens = None, None, None
max_n_word = 77
model.eval()
print("Done")

Done


In [74]:
genload = iter(data_loader)
batch = next(genload)

In [75]:
len(batch['caption'][0])

32

In [76]:
ids = batch['index']
cap, lengths = batch['caption']
img_emb, cap_emb = model.forward_batch(batch)

In [77]:
batch['index']

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31])

In [78]:
img_emb.shape

torch.Size([32, 2048, 49])

In [79]:
cap_emb.shape

torch.Size([32, 36, 2048])

In [80]:
is_tensor = True
img_embs = np.zeros((len(data_loader.dataset), img_emb.size(1), img_emb.size(2)))
cap_embs = np.zeros((len(data_loader.dataset), max_n_word, cap_emb.size(2)))
cap_lens = [0] * len(data_loader.dataset)

In [81]:
max_n_word

77

In [82]:
len(cap_lens)

2403

In [83]:
img_embs.shape

(2403, 2048, 49)

In [84]:
cap_embs.shape

(2403, 77, 2048)

In [85]:
# cache embeddings
img_embs[ids] = img_emb.data.cpu().numpy()
if is_tensor:
    cap_embs[ids,:max(lengths),:] = cap_emb.data.cpu().numpy()
else:
    cap_embs[ids,] = cap_emb.data.cpu().numpy()

In [86]:
img_embs.shape

(2403, 2048, 49)

In [93]:
cap_embs.shape

(2403, 77, 2048)

In [95]:
len(cap_lens)

2403

#### Predict Loader (Deep Dive) ----------------- END

In [96]:
from retrieval.train.evaluation import predict_loader

In [97]:
img_emb, txt_emb, lens = predict_loader(trainer.model, loader, trainer.device)

                                                       

In [99]:
print(img_emb.shape)
print(txt_emb.shape)
print(len(lens))

(2403, 2048, 49)
(2403, 77, 2048)
2403


In [105]:
from retrieval.train.evaluation import evaluate

In [None]:
result = evaluate(
    model=trainer.model, 
    img_emb=img_emb,
    txt_emb=txt_emb, 
    lengths=lens,
    device=trainer.device, 
    shared_size=128
)

Test  :  74%|███████▎  | 14/19 [02:53<01:01, 12.38s/it]

#### Evaluate (Deep Dive) ----------------- START

In [None]:
model = trainer.model
img_emb = img_emb
txt_emb = txt_emb
lengths = lens
device = trainer.device
shared_size=128
return_sims=False

#### Evaluate (Deep Dive) ----------------- END

### Evaluate Loaders (Deep Dive) ----------------- END

### Run Evaluation (Deep Dive) ----------------- END

## Train epoch(Deep dive) -------------------------------- END