In [1]:
import os
import pickle
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch
import random

In [2]:
with open('/common/home/gg676/Downloads/embeddings_train1.pkl', 'rb') as f:
    obj = f.read()
    data = pickle.loads(obj, encoding='latin1')    
with open('/common/home/gg676/Downloads/ingredients_embeddings_train.pkl', 'rb') as f:
    obj = f.read()
    only_txt = pickle.loads(obj, encoding='latin1')
data[1] = only_txt[0]
    

    
with open('/common/home/gg676/Downloads/embeddings_val1.pkl', 'rb') as f:
    val_emb = pickle.load(f)
val_img = val_emb[0]
with open('/common/home/gg676/Downloads/ingredients_embeddings_val.pkl', 'rb') as f:
    val_data = pickle.load(f)
val_txt = val_data[0]





with open('/common/home/gg676/Downloads/embeddings_test1.pkl', 'rb') as f:
    test_emb = pickle.load(f)
test_img = test_emb[0]
with open('/common/home/gg676/Downloads/ingredients_embeddings_test.pkl', 'rb') as f:
    test_data = pickle.load(f)
test_txt = test_data[0]

In [3]:
class FeatureDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
    
    def __len__(self):
        return self.dataset[0].shape[0]
    
    def __getitem__(self, idx):
        text, img = self.dataset[1][idx], self.dataset[0][idx]
        return text, img

dataset = FeatureDataset(data)
train_loader = DataLoader(dataset, batch_size=64, shuffle=False)

In [4]:
class TextEncoder(nn.Module):
    def __init__(self,output_size,input_size=1024):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3),
            nn.LeakyReLU()
        )
        self.output = nn.Linear(512, output_size)
    
    def forward(self, x):
        x = self.layers(x)
        return self.output(x)

In [5]:
class ImgEncoder(nn.Module):
    def __init__(self,output_size,input_size=1024):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.Dropout(),
            nn.LeakyReLU()
        )
        self.output = nn.Linear(512, output_size)
    
    def forward(self, x):
        x = self.layers(x)
        return self.output(x)

In [6]:
def rank(txt_data, img_data):
    results_dict = {}
    projection_txt, projection_img = txt_data, img_data
    
    idxs = range(1000)
    
    glob_rank = []
    glob_recall = {1:0.0,5:0.0,10:0.0}
    
    for i in range(10):
        ids = random.sample(range(0,txt_data.shape[0]-1), 1000)
        
        txt_sample = projection_txt[ids,:]
        img_sample = projection_img[ids,:]
        
        similarity = np.dot(txt_sample.cpu().numpy(), img_sample.T.cpu().numpy())

        med_rank = []
        
        recall = {1:0.0,5:0.0,10:0.0}
        
        for ii in idxs:
            # get a column of similarities
            sim = similarity[ii,:]
            # sort indices in descending order
            sorting = np.argsort(sim)[::-1].tolist()
            # find where the index of the pair sample ended up in the sorting
            pos = sorting.index(ii)  
            if (pos+1) == 1:
                recall[1]+=1
            if (pos+1) <=5:
                recall[5]+=1
            if (pos+1)<=10:
                recall[10]+=1
            # store the position
            med_rank.append(pos+1)
        for i in recall.keys():
            recall[i]=recall[i]/1000
        med = np.median(med_rank)
        for i in recall.keys():
            glob_recall[i]+=recall[i]
        glob_rank.append(med)

    for i in glob_recall.keys():
        glob_recall[i] = glob_recall[i]/10
    med_dict = {}
    med_dict["mean_median"] = np.average(glob_rank)
    med_dict["recall"] = glob_recall
    med_dict["median_all"] = glob_rank
#     print("Mean median", np.average(glob_rank))
#     print("Recall", glob_recall)
    print("Result:",med_dict)
    
    return med_dict

In [7]:
def save_model(model, file_name):
    torch.save(model, '/common/home/gg676/535/saved_models/'+file_name)

In [8]:
def load_model(file_name):
    model = torch.load('/common/home/gg676/535/saved_models/'+file_name)
    return model

In [9]:
import time
from tqdm import tqdm
def train(train_loader, img_model, txt_model, criterion, optimizer_txt, optimizer_img, epoch):
    img_model.train()
    txt_model.train()   
    running_loss = []
    optimizer_txt.zero_grad()
    optimizer_img.zero_grad()
    
    for i, (txt_emb,img_emb) in tqdm(enumerate(train_loader)):
        
        txt_emb = txt_emb.to('cuda:1')
        img_emb = img_emb.to('cuda:1')
        
        optimizer_txt.zero_grad()
        optimizer_img.zero_grad()
        
        out_txt_emb = txt_model(txt_emb)
        out_img_emb = img_model(img_emb) 
        
        loss = criterion(out_txt_emb, out_img_emb)
               
        loss.backward()
        optimizer_txt.step()
        optimizer_img.step()
        
        running_loss.append(loss.item())
    return sum(running_loss)/len(running_loss)

In [10]:
no_epochs = 15
img_model = ImgEncoder(512).to('cuda:1')
txt_model = TextEncoder(512).to('cuda:1')
lowest_median_rank = 99.0
optimizer_txt = torch.optim.Adam(txt_model.parameters(), lr=1e-6, weight_decay=1e-7)
optimizer_img = torch.optim.Adam(img_model.parameters(), lr=1e-6, weight_decay=1e-7)
criterion = nn.MSELoss() 
for epoch in range(no_epochs):
    img_model.train()
    txt_model.train()
    train_loss = train(train_loader, img_model, txt_model, criterion, optimizer_txt, optimizer_img, epoch)
    print('  Epoch {} loss: {} {}'.format(epoch, train_loss, "\n"))
    with torch.no_grad():
        txt_model.eval()
        img_model.eval()
        out_text, out_img = txt_model(torch.tensor(val_txt).to('cuda:1')), img_model(torch.tensor(val_img).to('cuda:1'))
        med_rank = rank(out_text,out_img)
        if med_rank['mean_median'] < lowest_median_rank:
            lowest_median_rank = med_rank['mean_median']
            save_model(txt_model, 'text_INGREDIENTS_model_mse')
            save_model(img_model, 'img_INGREDIENTS_model_mse')

4400it [00:25, 173.27it/s]


  Epoch 0 loss: 0.4053185410729863 

Result: {'mean_median': 61.8, 'recall': {1: 0.045099999999999994, 5: 0.1385, 10: 0.20940000000000003}, 'median_all': [61.5, 61.0, 61.0, 64.0, 62.0, 61.5, 62.0, 60.0, 60.0, 65.0]}


4400it [00:22, 192.04it/s]


  Epoch 1 loss: 0.3077897673845291 

Result: {'mean_median': 30.7, 'recall': {1: 0.07809999999999999, 5: 0.22230000000000003, 10: 0.31050000000000005}, 'median_all': [30.5, 30.0, 31.5, 28.0, 31.5, 33.0, 30.5, 30.0, 30.0, 32.0]}


4400it [00:21, 201.59it/s]


  Epoch 2 loss: 0.26038436016237193 

Result: {'mean_median': 30.35, 'recall': {1: 0.0754, 5: 0.2129, 10: 0.3088}, 'median_all': [30.5, 31.0, 28.5, 31.0, 28.0, 32.0, 29.5, 29.0, 29.0, 35.0]}


4400it [00:23, 188.33it/s]


  Epoch 3 loss: 0.22650299374352803 

Result: {'mean_median': 36.45, 'recall': {1: 0.0652, 5: 0.1959, 10: 0.2854}, 'median_all': [35.0, 34.0, 35.0, 37.5, 37.0, 43.0, 41.0, 34.0, 33.0, 35.0]}


4400it [00:21, 206.68it/s]


  Epoch 4 loss: 0.19878581437197598 

Result: {'mean_median': 51.05, 'recall': {1: 0.055499999999999994, 5: 0.1623, 10: 0.23750000000000004}, 'median_all': [50.0, 57.0, 50.5, 55.0, 49.0, 54.0, 47.0, 47.0, 54.0, 47.0]}


4400it [00:21, 202.38it/s]


  Epoch 5 loss: 0.17476449509235945 

Result: {'mean_median': 70.8, 'recall': {1: 0.041299999999999996, 5: 0.13069999999999998, 10: 0.19270000000000004}, 'median_all': [77.0, 75.0, 73.5, 62.0, 70.0, 72.0, 67.0, 74.0, 67.5, 70.0]}


4400it [00:19, 228.80it/s]


  Epoch 6 loss: 0.15335763737220656 

Result: {'mean_median': 97.0, 'recall': {1: 0.030000000000000006, 5: 0.09999999999999999, 10: 0.1518}, 'median_all': [103.5, 99.0, 96.0, 102.5, 106.5, 94.0, 94.0, 95.0, 90.0, 89.5]}


4400it [00:20, 218.99it/s]


  Epoch 7 loss: 0.13418167308819565 

Result: {'mean_median': 125.8, 'recall': {1: 0.0217, 5: 0.08119999999999998, 10: 0.1247}, 'median_all': [127.0, 116.5, 128.5, 129.5, 130.5, 123.5, 127.0, 139.0, 115.5, 121.0]}


4400it [00:19, 225.49it/s]


  Epoch 8 loss: 0.11688399741934104 

Result: {'mean_median': 150.4, 'recall': {1: 0.019999999999999997, 5: 0.0658, 10: 0.10659999999999999}, 'median_all': [154.5, 154.5, 159.5, 150.5, 158.0, 149.0, 152.0, 136.0, 154.5, 135.5]}


4400it [00:20, 216.01it/s]


  Epoch 9 loss: 0.10145226512612267 

Result: {'mean_median': 170.65, 'recall': {1: 0.0157, 5: 0.05600000000000001, 10: 0.08909999999999998}, 'median_all': [172.0, 171.0, 161.5, 173.5, 179.5, 156.5, 173.5, 159.0, 169.5, 190.5]}


4400it [00:22, 198.60it/s]


  Epoch 10 loss: 0.0877266572212631 

Result: {'mean_median': 190.55, 'recall': {1: 0.016400000000000005, 5: 0.05179999999999999, 10: 0.0842}, 'median_all': [177.5, 189.5, 206.5, 187.5, 205.5, 197.0, 195.0, 184.0, 184.0, 179.0]}


4400it [00:22, 197.42it/s]


  Epoch 11 loss: 0.07559911211952568 

Result: {'mean_median': 201.7, 'recall': {1: 0.014100000000000001, 5: 0.048299999999999996, 10: 0.0794}, 'median_all': [219.0, 205.0, 195.5, 200.5, 199.5, 189.0, 207.0, 194.0, 214.0, 193.5]}


4400it [00:23, 184.00it/s]


  Epoch 12 loss: 0.06490885891443626 

Result: {'mean_median': 202.85, 'recall': {1: 0.0131, 5: 0.0455, 10: 0.07669999999999999}, 'median_all': [194.5, 205.0, 197.5, 207.5, 203.0, 200.0, 198.5, 211.0, 202.0, 209.5]}


4400it [00:24, 179.26it/s]


  Epoch 13 loss: 0.05556344847025519 

Result: {'mean_median': 213.0, 'recall': {1: 0.013800000000000002, 5: 0.048799999999999996, 10: 0.07719999999999999}, 'median_all': [212.0, 224.0, 221.0, 216.5, 195.5, 214.5, 216.0, 205.0, 224.0, 201.5]}


4400it [00:22, 193.26it/s]


  Epoch 14 loss: 0.04739933844994415 

Result: {'mean_median': 217.15, 'recall': {1: 0.014900000000000002, 5: 0.04699999999999999, 10: 0.07209999999999998}, 'median_all': [218.0, 218.5, 195.5, 218.5, 210.5, 218.0, 236.5, 225.5, 211.0, 219.5]}


In [14]:
txt_model = load_model('text_INGREDIENTS_model_mse')
img_model = load_model('img_INGREDIENTS_model_mse')
with torch.no_grad():
    img_model.eval()
    txt_model.eval()
    test_result = rank(txt_model(torch.tensor(test_txt).to('cuda:1')), img_model(torch.tensor(test_img).to('cuda:1')))
test_result

Result: {'mean_median': 31.7, 'recall': {1: 0.075, 5: 0.21280000000000002, 10: 0.3101}, 'median_all': [32.0, 35.0, 33.0, 31.0, 34.0, 32.0, 28.0, 30.0, 31.0, 31.0]}


{'mean_median': 31.7,
 'recall': {1: 0.075, 5: 0.21280000000000002, 10: 0.3101},
 'median_all': [32.0, 35.0, 33.0, 31.0, 34.0, 32.0, 28.0, 30.0, 31.0, 31.0]}

In [15]:
out_text_test, out_img_test = txt_model(torch.tensor(test_txt).to('cuda:1')), img_model(torch.tensor(test_img).to('cuda:1'))

In [16]:
with open('/common/home/gg676/535/task_2/tsne_data/ingredients_text.pkl', 'wb') as fp:
    pickle.dump(out_text_test, fp)
with open('/common/home/gg676/535/task_2/tsne_data/ingredients_img.pkl', 'wb') as fp:
    pickle.dump(out_img_test, fp)