# Exploration of execution

Execution 

```{bash}
cd /home/ec2-user/SageMaker/foodi-ml
source activate python3
export DATA_PATH=/home/ec2-user/SageMaker/data/
python run.py options/adapt/foodi-ml/i2t.yaml

#watch -n 1 "nvidia-smi"
```

In [1]:
import os
os.chdir("/home/ec2-user/SageMaker/foodi-ml/")

In [2]:
#!pip install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp36-cp36m-linux_x86_64.whl

In [3]:
#!pip install -r requirements.txt

In [4]:
import os
import torch
from tqdm import tqdm
from addict import Dict

import params
from retrieval.train import train
from retrieval.utils import helper
from retrieval.model import loss
from retrieval.model.model import Retrieval
from retrieval.data.loaders import get_loaders
from retrieval.utils.logger import create_logger
from retrieval.utils.helper import load_model
from retrieval.utils.file_utils import load_yaml_opts, parse_loader_name

# Functions

In [5]:
def get_data_path(opt):
    if 'DATA_PATH' not in os.environ:
        if not opt.dataset.data_path:
            raise Exception('''
                DATA_PATH not specified.
                Please, run "$ export DATA_PATH=/path/to/dataset"
                or add path to yaml file
            ''')
        return opt.dataset.data_path
    else:
        return os.environ['DATA_PATH']

In [6]:
def get_tokenizers(train_loader):
    tokenizers = train_loader.dataset.tokenizer
    if type(tokenizers) != list:
        tokenizers = [tokenizers]
    return tokenizers

In [7]:
def set_criterion(opt, model):
    if 'name' in opt.criterion:
        logger.info(opt.criterion)
        multimodal_criterion = loss.get_loss(**opt.criterion)
        multilanguage_criterion = loss.get_loss(**opt.criterion)
    else:
        multimodal_criterion = loss.ContrastiveLoss(**opt.criterion)
        multilanguage_criterion = loss.ContrastiveLoss(**opt.ml_criterion)
    set_model_criterion(opt, model, multilanguage_criterion, multimodal_criterion)
    # return multimodal_criterion, multilanguage_criterion


def set_model_criterion(opt, model, multilanguage_criterion, multimodal_criterion):
    model.mm_criterion = multimodal_criterion
    model.ml_criterion = None
    if len(opt.dataset.adapt.data) > 0:
        model.ml_criterion = multilanguage_criterion

# Run

In [8]:
os.environ["DATA_PATH"] = "/home/ec2-user/SageMaker/data/"

In [9]:
options = "options/adapt/foodi-ml/i2t.yaml"

In [10]:
args = {
    "options": options,
}
args = Dict(args)
opt = load_yaml_opts(args.options)

In [11]:
logger = create_logger(level='debug' if opt.engine.debug else 'info')
#logger.info(f'Used args   : \n{args}')
#logger.info(f'Used options: \n{opt}')

In [12]:
# Get path of the data
data_path = get_data_path(opt)

In [13]:
# Get loaders
train_loader, val_loaders, adapt_loaders = get_loaders(data_path, args.local_rank, opt)

2021-09-15 11:15:35,822 - [INFO    ] - Loaded vocab containing 245967 tokens
2021-09-15 11:15:35,823 - [INFO    ] - Loaded from .vocab_cache/foodiml_vocab.json.
2021-09-15 11:15:35,823 - [INFO    ] - Created tokenizer with init 245967 tokens.
2021-09-15 11:15:37,044 - [INFO    ] - [FoodiML] Loaded 14052 images annotated 
2021-09-15 11:15:37,232 - [INFO    ] - Loaded vocab containing 245967 tokens
2021-09-15 11:15:37,233 - [INFO    ] - Loaded from .vocab_cache/foodiml_vocab.json.
2021-09-15 11:15:37,233 - [INFO    ] - Created tokenizer with init 245967 tokens.
2021-09-15 11:15:37,479 - [INFO    ] - [FoodiML] Loaded 2897 images annotated 
2021-09-15 11:15:37,482 - [INFO    ] - Adapt loaders: 0


In [14]:
tokenizers = get_tokenizers(train_loader)

In [15]:
%%time
model = Retrieval(**opt.model, tokenizers=tokenizers)

2021-09-15 11:15:39,056 - [INFO    ] - Image encoder created: ('full_image',)
2021-09-15 11:15:41,290 - [INFO    ] - Text encoder created: gru_glove
2021-09-15 11:15:41,429 - [INFO    ] - Created similarity: AdaptiveEmbeddingI2T(
  (norm): Normalization(
    (norm): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  )
  (adapt_txt): ADAPT(
    (fc_gamma): Sequential(
      (0): Linear(in_features=2048, out_features=2048, bias=True)
    )
    (fc_beta): Sequential(
      (0): Linear(in_features=2048, out_features=2048, bias=True)
    )
  )
  (fovea): Fovea(smooth=10,train_smooth: False)
)
2021-09-15 11:15:41,430 - [INFO    ] - Created similarity: AdaptiveEmbeddingI2T_eval(
  (norm): Normalization(
    (norm): BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  )
  (adapt_txt): ADAPT(
    (fc_gamma): Sequential(
      (0): Linear(in_features=2048, out_features=2048, bias=True)
    )
    (fc_beta): Sequential(
      (0): L

new similarity class initialised


2021-09-15 11:15:44,486 - [INFO    ] - Setting devices: img: cuda,txt: cuda, loss: cuda
2021-09-15 11:15:44,486 - [INFO    ] - Using similarity: ('adapt_i2t',)


CPU times: user 5.53 s, sys: 1.6 s, total: 7.13 s
Wall time: 7 s


In [16]:
print_fn = (lambda x: x) if not model.master else tqdm.write

In [17]:
set_criterion(opt, model)

In [18]:
# Trainer
trainer = train.Trainer(
        model=model,
        args=opt,
        sysoutlog=print_fn,
        path=opt.exp.outpath,
        world_size=1 # TODO
)

In [19]:
trainer.setup_optim(
        lr=opt.optimizer.lr,
        lr_scheduler=opt.optimizer.lr_scheduler,
        clip_grad=opt.optimizer.grad_clip,
        log_grad_norm=False,
        log_histograms=False,
        optimizer=opt.optimizer,
        freeze_modules=opt.model.freeze_modules
)

2021-09-15 11:15:44,535 - [INFO    ] - lr 0.001
2021-09-15 11:15:44,536 - [INFO    ] - [0.5, 2.0, 4000]
2021-09-15 11:15:44,536 - [INFO    ] - [10000, 20000, 3000]


Freezing model.txt_enc.embed.glove
lr: 0.001, #layers: 478, #params: 172,889,812
Total Params: 246,679,912, 


In [20]:
trainer.fit(
        train_loader=train_loader,
        valid_loaders=val_loaders,
        lang_loaders=adapt_loaders,
        nb_epochs=opt.engine.nb_epochs,
        valid_interval=opt.engine.valid_interval,
        log_interval=opt.engine.print_freq
)

logs/foodi-ml/adapt_i2t/ already exists! Do you want to rewrite it? [y/n]  y


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]
Steps :   0%|          | 0/469 [00:00<?, ?it/s][A
Steps :   0%|          | 1/469 [00:06<50:22,  6.46s/it][A
Steps :   0%|          | 2/469 [00:07<28:59,  3.72s/it][A
Steps :   1%|          | 3/469 [00:08<21:49,  2.81s/it][A
Steps :   1%|          | 4/469 [00:09<18:15,  2.36s/it][A
Steps :   1%|          | 5/469 [00:10<16:09,  2.09s/it][A
Steps :   1%|▏         | 6/469 [00:11<14:43,  1.91s/it][A
Steps :   1%|▏         | 7/469 [00:12<13:43,  1.78s/it][A
Steps :   2%|▏         | 8/469 [00:13<12:55,  1.68s/it][A
                                              1.61s/it][A
Epochs:   0%|          | 0/10 [00:15<?, ?it/s]         
Steps :   2%|▏         | 9/469 [00:15<13:13,  1.72s/it][A
Steps :   2%|▏         | 10/469 [00:15<11:52,  1.55s/it][A

Batch_Time:   0.971925, Countdown:  50.000000, Epoch:   0.000000, Iteration:  10.000000, K:   0.086441, Loss: 319.261261, Lr_Base:   0.000504, Norm: 685.981467, Total_Loss: 319.261261



Steps :   2%|▏         | 11/469 [00:16<11:29,  1.50s/it][A
Steps :   3%|▎         | 12/469 [00:17<11:10,  1.47s/it][A
Steps :   3%|▎         | 13/469 [00:18<10:52,  1.43s/it][A
Steps :   3%|▎         | 14/469 [00:19<10:35,  1.40s/it][A
Steps :   3%|▎         | 15/469 [00:20<10:22,  1.37s/it][A
Steps :   3%|▎         | 16/469 [00:21<10:08,  1.34s/it][A
Steps :   4%|▎         | 17/469 [00:22<09:58,  1.32s/it][A
Steps :   4%|▍         | 18/469 [00:23<09:54,  1.32s/it][A
                                               1.30s/it][A
Epochs:   0%|          | 0/10 [00:25<?, ?it/s]          
Steps :   4%|▍         | 19/469 [00:25<10:07,  1.35s/it][A
Steps :   4%|▍         | 20/469 [00:25<09:35,  1.28s/it][A

Batch_Time:   0.884861, Countdown:  50.000000, Epoch:   0.000000, Iteration:  20.000000, K:   0.165410, Loss: 294.108215, Lr_Base:   0.000508, Norm: 724.500198, Total_Loss: 294.108215



Steps :   4%|▍         | 21/469 [00:26<09:28,  1.27s/it][A
Steps :   5%|▍         | 22/469 [00:27<09:21,  1.26s/it][A
Steps :   5%|▍         | 23/469 [00:28<09:15,  1.25s/it][A
Steps :   5%|▌         | 24/469 [00:29<09:08,  1.23s/it][A
Steps :   5%|▌         | 25/469 [00:30<09:04,  1.23s/it][A
Steps :   6%|▌         | 26/469 [00:31<08:59,  1.22s/it][A
Steps :   6%|▌         | 27/469 [00:32<08:57,  1.22s/it][A
Steps :   6%|▌         | 28/469 [00:33<08:54,  1.21s/it][A
                                               1.20s/it][A
Epochs:   0%|          | 0/10 [00:36<?, ?it/s]          
Steps :   6%|▌         | 29/469 [00:36<09:06,  1.24s/it][A
Steps :   6%|▋         | 30/469 [00:36<08:47,  1.20s/it][A

Batch_Time:   1.026769, Countdown:  50.000000, Epoch:   0.000000, Iteration:  30.000000, K:   0.237553, Loss: 268.784546, Lr_Base:   0.000511, Norm: 762.339800, Total_Loss: 268.784546



Steps :   7%|▋         | 31/469 [00:36<08:42,  1.19s/it][A
Steps :   7%|▋         | 32/469 [00:38<08:39,  1.19s/it][A
Steps :   7%|▋         | 33/469 [00:38<08:34,  1.18s/it][A
Steps :   7%|▋         | 34/469 [00:39<08:31,  1.18s/it][A
Steps :   7%|▋         | 35/469 [00:40<08:27,  1.17s/it][A
Steps :   8%|▊         | 36/469 [00:41<08:23,  1.16s/it][A
Steps :   8%|▊         | 37/469 [00:42<08:20,  1.16s/it][A
Steps :   8%|▊         | 38/469 [00:43<08:17,  1.15s/it][A
                                               1.15s/it][A
Epochs:   0%|          | 0/10 [00:45<?, ?it/s]          
Steps :   8%|▊         | 39/469 [00:45<08:24,  1.17s/it][A
Steps :   9%|▊         | 40/469 [00:45<08:10,  1.14s/it][A

Batch_Time:   0.891554, Countdown:  50.000000, Epoch:   0.000000, Iteration:  40.000000, K:   0.303460, Loss: 250.043564, Lr_Base:   0.000515, Norm: 655.773358, Total_Loss: 250.043564



Steps :   9%|▊         | 41/469 [00:46<08:07,  1.14s/it][A
Steps :   9%|▉         | 42/469 [00:47<08:05,  1.14s/it][A
Steps :   9%|▉         | 43/469 [00:48<08:02,  1.13s/it][A
Steps :   9%|▉         | 44/469 [00:49<08:00,  1.13s/it][A
Steps :  10%|▉         | 45/469 [00:50<07:57,  1.13s/it][A
Steps :  10%|▉         | 46/469 [00:51<07:54,  1.12s/it][A
Steps :  10%|█         | 47/469 [00:52<07:51,  1.12s/it][A
Steps :  10%|█         | 48/469 [00:53<07:49,  1.12s/it][A
                                               1.11s/it][A
Epochs:   0%|          | 0/10 [00:55<?, ?it/s]          
Steps :  10%|█         | 49/469 [00:55<07:55,  1.13s/it][A
Steps :  11%|█         | 50/469 [00:55<07:45,  1.11s/it][A

Batch_Time:   0.910376, Countdown:  50.000000, Epoch:   0.000000, Iteration:  50.000000, K:   0.363670, Loss: 230.083801, Lr_Base:   0.000519, Norm: 600.548945, Total_Loss: 230.083801



Steps :  11%|█         | 51/469 [00:56<07:43,  1.11s/it][A
Steps :  11%|█         | 52/469 [00:57<07:41,  1.11s/it][A
Steps :  11%|█▏        | 53/469 [00:58<07:38,  1.10s/it][A
Steps :  12%|█▏        | 54/469 [00:59<07:37,  1.10s/it][A
Steps :  12%|█▏        | 55/469 [01:00<07:35,  1.10s/it][A
Steps :  12%|█▏        | 56/469 [01:01<07:34,  1.10s/it][A
Steps :  12%|█▏        | 57/469 [01:02<07:33,  1.10s/it][A
Steps :  12%|█▏        | 58/469 [01:03<07:31,  1.10s/it][A
                                               1.10s/it][A
Epochs:   0%|          | 0/10 [01:05<?, ?it/s]          
Steps :  13%|█▎        | 59/469 [01:05<07:36,  1.11s/it][A
Steps :  13%|█▎        | 60/469 [01:05<07:27,  1.10s/it][A

Batch_Time:   0.947669, Countdown:  50.000000, Epoch:   0.000000, Iteration:  60.000000, K:   0.418675, Loss: 206.502167, Lr_Base:   0.000523, Norm: 478.541279, Total_Loss: 206.502167



Steps :  13%|█▎        | 61/469 [01:06<07:26,  1.09s/it][A
Steps :  13%|█▎        | 62/469 [01:07<07:24,  1.09s/it][A
Steps :  13%|█▎        | 63/469 [01:08<07:22,  1.09s/it][A
Steps :  14%|█▎        | 64/469 [01:09<07:21,  1.09s/it][A
Steps :  14%|█▍        | 65/469 [01:10<07:19,  1.09s/it][A
Steps :  14%|█▍        | 66/469 [01:11<07:17,  1.09s/it][A
Steps :  14%|█▍        | 67/469 [01:12<07:15,  1.08s/it][A
Steps :  14%|█▍        | 68/469 [01:13<07:13,  1.08s/it][A
                                               1.08s/it][A
Epochs:   0%|          | 0/10 [01:15<?, ?it/s]          
Steps :  15%|█▍        | 69/469 [01:15<07:18,  1.10s/it][A
Steps :  15%|█▍        | 70/469 [01:15<07:11,  1.08s/it][A

Batch_Time:   1.012359, Countdown:  50.000000, Epoch:   0.000000, Iteration:  70.000000, K:   0.468925, Loss: 190.604874, Lr_Base:   0.000526, Norm: 537.942991, Total_Loss: 190.604874



Steps :  15%|█▌        | 71/469 [01:16<07:09,  1.08s/it][A
Steps :  15%|█▌        | 72/469 [01:17<07:08,  1.08s/it][A
Steps :  16%|█▌        | 73/469 [01:18<07:06,  1.08s/it][A
Steps :  16%|█▌        | 74/469 [01:19<07:05,  1.08s/it][A
Steps :  16%|█▌        | 75/469 [01:20<07:03,  1.08s/it][A
Steps :  16%|█▌        | 76/469 [01:21<07:02,  1.07s/it][A
Steps :  16%|█▋        | 77/469 [01:22<07:01,  1.07s/it][A
Steps :  17%|█▋        | 78/469 [01:23<06:59,  1.07s/it][A
                                               1.07s/it][A
Epochs:   0%|          | 0/10 [01:25<?, ?it/s]          
Steps :  17%|█▋        | 79/469 [01:25<07:03,  1.09s/it][A
Steps :  17%|█▋        | 80/469 [01:25<06:56,  1.07s/it][A

Batch_Time:   0.930950, Countdown:  50.000000, Epoch:   0.000000, Iteration:  80.000000, K:   0.514832, Loss: 189.690567, Lr_Base:   0.000530, Norm: 374.068432, Total_Loss: 189.690567



Steps :  17%|█▋        | 81/469 [01:26<06:55,  1.07s/it][A
Steps :  17%|█▋        | 82/469 [01:27<06:53,  1.07s/it][A
Steps :  18%|█▊        | 83/469 [01:28<06:52,  1.07s/it][A
Steps :  18%|█▊        | 84/469 [01:29<06:51,  1.07s/it][A
Steps :  18%|█▊        | 85/469 [01:30<06:49,  1.07s/it][A
Steps :  18%|█▊        | 86/469 [01:31<06:48,  1.07s/it][A
Steps :  19%|█▊        | 87/469 [01:32<06:47,  1.07s/it][A
Steps :  19%|█▉        | 88/469 [01:33<06:46,  1.07s/it][A
                                               1.07s/it][A
Epochs:   0%|          | 0/10 [01:35<?, ?it/s]          
Steps :  19%|█▉        | 89/469 [01:35<06:49,  1.08s/it][A
Steps :  19%|█▉        | 90/469 [01:35<06:43,  1.07s/it][A

Batch_Time:   0.983562, Countdown:  50.000000, Epoch:   0.000000, Iteration:  90.000000, K:   0.556770, Loss: 164.150513, Lr_Base:   0.000534, Norm: 395.810625, Total_Loss: 164.150513



Steps :  19%|█▉        | 91/469 [01:36<06:42,  1.06s/it][A
Steps :  20%|█▉        | 92/469 [01:37<06:41,  1.06s/it][A
Steps :  20%|█▉        | 93/469 [01:38<06:39,  1.06s/it][A
Steps :  20%|██        | 94/469 [01:39<06:38,  1.06s/it][A
Steps :  20%|██        | 95/469 [01:40<06:37,  1.06s/it][A
Steps :  20%|██        | 96/469 [01:41<06:36,  1.06s/it][A
Steps :  21%|██        | 97/469 [01:42<06:34,  1.06s/it][A
Steps :  21%|██        | 98/469 [01:43<06:33,  1.06s/it][A
                                               1.06s/it][A
Epochs:   0%|          | 0/10 [01:46<?, ?it/s]          
Steps :  21%|██        | 99/469 [01:46<06:36,  1.07s/it][A
Steps :  21%|██▏       | 100/469 [01:46<06:31,  1.06s/it][A

Batch_Time:   0.994153, Countdown:  50.000000, Epoch:   0.000000, Iteration: 100.000000, K:   0.595084, Loss: 149.425079, Lr_Base:   0.000538, Norm: 383.481084, Total_Loss: 149.425079



Steps :  22%|██▏       | 101/469 [01:47<06:30,  1.06s/it][A
Steps :  22%|██▏       | 102/469 [01:48<06:29,  1.06s/it][A
Steps :  22%|██▏       | 103/469 [01:49<06:28,  1.06s/it][A
Steps :  22%|██▏       | 104/469 [01:50<06:27,  1.06s/it][A


RuntimeError: cuDNN error: CUDNN_STATUS_EXECUTION_FAILED

# EXPLORE

In [None]:
torch.__version__

In [None]:
#!pip install https://download.pytorch.org/whl/cu100/torch-1.0.1-cp36-cp36m-win_amd64.whl

In [None]:
import torch
a = torch.randn(3, 3, device="cuda:0")
a = torch.mm(a, a.t()) # make symmetric positive-definite
torch.cholesky(a)

## Data Loaders

In [None]:
index = 0
image_id = train_loader.dataset.data_wrapper.image_ids[index]
caption = train_loader.dataset.captions[index]
cap_tokens = train_loader.dataset.tokenizer(caption)
image = train_loader.dataset.load_img(image_id)

import matplotlib.pyplot as plt
plt.imshow(image.permute(1, 2, 0)  )

In [None]:
import numpy as np
np.max(list(train_loader.dataset.data_wrapper.image_ids))

In [None]:
len(train_loader.dataset.captions)

In [None]:
image.shape

In [76]:
# Check valid loader
#image_id2 = val_loaders[0].dataset.data_wrapper.image_ids[index]
#image2 = val_loaders[0].dataset.load_img(image_id2)
#import matplotlib.pyplot as plt
#plt.imshow(  image2.permute(1, 2, 0)  )