# DEEP DIVE MODEL

In [1]:
import os
os.chdir("/home/ec2-user/SageMaker/foodi-ml/")

In [2]:
#!pip install -r requirements.txt

In [3]:
import os
import torch
from tqdm import tqdm

import params
from retrieval.train import train
from retrieval.utils import helper
from retrieval.model import loss
from retrieval.model.model import Retrieval
from retrieval.data.loaders import get_loaders
from retrieval.utils.logger import create_logger
from retrieval.utils.helper import load_model
from retrieval.utils.file_utils import load_yaml_opts, parse_loader_name

In [4]:
from addict import Dict

# Functions

In [5]:
def get_data_path(opt):
    if 'DATA_PATH' not in os.environ:
        if not opt.dataset.data_path:
            raise Exception('''
                DATA_PATH not specified.
                Please, run "$ export DATA_PATH=/path/to/dataset"
                or add path to yaml file
            ''')
        return opt.dataset.data_path
    else:
        return os.environ['DATA_PATH']

In [6]:
def get_tokenizers(train_loader):
    tokenizers = train_loader.dataset.tokenizer
    if type(tokenizers) != list:
        tokenizers = [tokenizers]
    return tokenizers

In [7]:
def set_criterion(opt, model):
    if 'name' in opt.criterion:
        logger.info(opt.criterion)
        multimodal_criterion = loss.get_loss(**opt.criterion)
        multilanguage_criterion = loss.get_loss(**opt.criterion)
    else:
        multimodal_criterion = loss.ContrastiveLoss(**opt.criterion)
        multilanguage_criterion = loss.ContrastiveLoss(**opt.ml_criterion)
    set_model_criterion(opt, model, multilanguage_criterion, multimodal_criterion)
    # return multimodal_criterion, multilanguage_criterion


def set_model_criterion(opt, model, multilanguage_criterion, multimodal_criterion):
    model.mm_criterion = multimodal_criterion
    model.ml_criterion = None
    if len(opt.dataset.adapt.data) > 0:
        model.ml_criterion = multilanguage_criterion

# Run

In [8]:
os.environ["DATA_PATH"] = "/home/ec2-user/SageMaker/data/"

In [9]:
options = "options/adapt/foodi-ml/i2t.yaml"

In [10]:
args = {
    "options": options,
}
args = Dict(args)
opt = load_yaml_opts(args.options)

In [11]:
logger = create_logger(level='debug' if opt.engine.debug else 'info')
#logger.info(f'Used args   : \n{args}')
#logger.info(f'Used options: \n{opt}')

In [12]:
# Get path of the data
data_path = get_data_path(opt)

In [13]:
# Get loaders
train_loader, val_loaders, adapt_loaders = get_loaders(data_path, args.local_rank, opt)

2021-08-16 17:27:00,708 - [INFO    ] - Loaded vocab containing 2487 tokens
2021-08-16 17:27:00,708 - [INFO    ] - Loaded from .vocab_cache/foodiml_vocab.json.
2021-08-16 17:27:00,709 - [INFO    ] - Created tokenizer with init 2487 tokens.
2021-08-16 17:27:00,752 - [INFO    ] - [FoodiML] Loaded 8011 images and 8011 annotations.
2021-08-16 17:27:00,757 - [INFO    ] - Loaded vocab containing 2487 tokens
2021-08-16 17:27:00,757 - [INFO    ] - Loaded from .vocab_cache/foodiml_vocab.json.
2021-08-16 17:27:00,757 - [INFO    ] - Created tokenizer with init 2487 tokens.
2021-08-16 17:27:00,794 - [INFO    ] - [FoodiML] Loaded 0 images and 0 annotations.
2021-08-16 17:27:00,794 - [INFO    ] - Adapt loaders: 0


In [14]:
tokenizers = get_tokenizers(train_loader)

In [15]:
model = Retrieval(**opt.model, tokenizers=tokenizers)

2021-08-16 17:27:02,334 - [INFO    ] - Image encoder created: ('simple',)
2021-08-16 17:27:02,442 - [INFO    ] - Text encoder created: gru_glove
2021-08-16 17:27:02,461 - [INFO    ] - Created similarity: AdaptiveEmbeddingI2T(
  (norm): Normalization(
    (norm): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  )
  (adapt_txt): ADAPT(
    (fc_gamma): Sequential(
      (0): Linear(in_features=1024, out_features=1024, bias=True)
    )
    (fc_beta): Sequential(
      (0): Linear(in_features=1024, out_features=1024, bias=True)
    )
  )
  (fovea): Fovea(smooth=10,train_smooth: False)
)
2021-08-16 17:27:05,322 - [INFO    ] - Setting devices: img: cuda,txt: cuda, loss: cuda
2021-08-16 17:27:05,322 - [INFO    ] - Using similarity: ('adapt_i2t',)


<font color='red'> **Deep dive on Retrieval --------------------- (1)**

In [16]:
from retrieval.model.model import Retrieval

In [19]:
opt.model['img_enc']['name'] = 'resnet50'

In [20]:
model = Retrieval(**opt.model, tokenizers=tokenizers)

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /home/ec2-user/.cache/torch/checkpoints/resnet50-19c8e357.pth
100%|██████████| 102502400/102502400 [00:00<00:00, 132237875.34it/s]
2021-08-16 17:28:10,308 - [INFO    ] - Image encoder created: ('resnet50',)
2021-08-16 17:28:10,413 - [INFO    ] - Text encoder created: gru_glove
2021-08-16 17:28:10,430 - [INFO    ] - Created similarity: AdaptiveEmbeddingI2T(
  (norm): Normalization(
    (norm): BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=False, track_running_stats=True)
  )
  (adapt_txt): ADAPT(
    (fc_gamma): Sequential(
      (0): Linear(in_features=1024, out_features=1024, bias=True)
    )
    (fc_beta): Sequential(
      (0): Linear(in_features=1024, out_features=1024, bias=True)
    )
  )
  (fovea): Fovea(smooth=10,train_smooth: False)
)
2021-08-16 17:28:10,483 - [INFO    ] - Setting devices: img: cuda,txt: cuda, loss: cuda
2021-08-16 17:28:10,484 - [INFO    ] - Using similarity: ('adapt_i2t',)


<font color='red'> **Finish Deep dive on Retrieval --------------------- (1)**