In [1]:
import os
import pickle
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch
import random

In [2]:
with open('/common/home/gg676/Downloads/embeddings_train1.pkl', 'rb') as f:
    obj = f.read()
    data = pickle.loads(obj, encoding='latin1')    
with open('/common/home/gg676/Downloads/title_embeddings_train.pkl', 'rb') as f:
    obj = f.read()
    only_txt = pickle.loads(obj, encoding='latin1')
data[1] = only_txt[0]
        
with open('/common/home/gg676/Downloads/embeddings_val1.pkl', 'rb') as f:
    val_emb = pickle.load(f)
val_img = val_emb[0]
with open('/common/home/gg676/Downloads/title_embeddings_val.pkl', 'rb') as f:
    val_data = pickle.load(f)
val_txt = val_data[0]

with open('/common/home/gg676/Downloads/embeddings_test1.pkl', 'rb') as f:
    test_emb = pickle.load(f)
test_img = test_emb[0]
with open('/common/home/gg676/Downloads/title_embeddings_test.pkl', 'rb') as f:
    test_data = pickle.load(f)
test_txt = test_data[0]

In [3]:
class FeatureDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
    
    def __len__(self):
        return self.dataset[0].shape[0]
    
    def __getitem__(self, idx):
        text, img = self.dataset[1][idx], self.dataset[0][idx]
        return text, img

dataset = FeatureDataset(data)
train_loader = DataLoader(dataset, batch_size=64, shuffle=False)

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
class TextEncoder(nn.Module):
    def __init__(self,output_size,input_size=1024):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3),
            nn.LeakyReLU()
        )
        self.output = nn.Linear(512, output_size)
    
    def forward(self, x):
        x = self.layers(x)
        return self.output(x)

In [5]:
class ImgEncoder(nn.Module):
    def __init__(self,output_size,input_size=1024):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.Dropout(),
            nn.LeakyReLU()
        )
        self.output = nn.Linear(512, output_size)
    
    def forward(self, x):
        x = self.layers(x)
        return self.output(x)

In [6]:
def rank(txt_data, img_data):
    results_dict = {}
    projection_txt, projection_img = txt_data, img_data
    
    idxs = range(1000)
    
    glob_rank = []
    glob_recall = {1:0.0,5:0.0,10:0.0}
    
    for i in range(10):
        ids = random.sample(range(0,txt_data.shape[0]-1), 1000)
        
        txt_sample = projection_txt[ids,:]
        img_sample = projection_img[ids,:]
        
        similarity = np.dot(txt_sample.cpu().numpy(), img_sample.T.cpu().numpy())

        med_rank = []
        
        recall = {1:0.0,5:0.0,10:0.0}
        
        for ii in idxs:
            # get a column of similarities
            sim = similarity[ii,:]
            # sort indices in descending order
            sorting = np.argsort(sim)[::-1].tolist()
            # find where the index of the pair sample ended up in the sorting
            pos = sorting.index(ii)  
            if (pos+1) == 1:
                recall[1]+=1
            if (pos+1) <=5:
                recall[5]+=1
            if (pos+1)<=10:
                recall[10]+=1
            # store the position
            med_rank.append(pos+1)
        for i in recall.keys():
            recall[i]=recall[i]/1000
        med = np.median(med_rank)
        for i in recall.keys():
            glob_recall[i]+=recall[i]
        glob_rank.append(med)

    for i in glob_recall.keys():
        glob_recall[i] = glob_recall[i]/10
    med_dict = {}
    med_dict["mean_median"] = np.average(glob_rank)
    med_dict["recall"] = glob_recall
    med_dict["median_all"] = glob_rank
    print("Result:",med_dict)   
    return med_dict

In [7]:
import time
from tqdm import tqdm
def train(train_loader, img_model, txt_model, criterion, optimizer_txt, optimizer_img, epoch):
    img_model.train()
    txt_model.train()   
    running_loss = []
    optimizer_txt.zero_grad()
    optimizer_img.zero_grad()
    
    for i, (txt_emb,img_emb) in tqdm(enumerate(train_loader)):
        
        txt_emb = txt_emb.to('cuda:1')
        img_emb = img_emb.to('cuda:1')
        
        optimizer_txt.zero_grad()
        optimizer_img.zero_grad()
        
        out_txt_emb = txt_model(txt_emb)
        out_img_emb = img_model(img_emb) 
        
        loss = criterion(out_txt_emb, out_img_emb)
        
        loss.backward()
        optimizer_txt.step()
        optimizer_img.step()
        
        running_loss.append(loss.item())
    return sum(running_loss)/len(running_loss)

In [8]:
def load_model(file_name):
    model = torch.load('/common/home/gg676/535/saved_models/'+file_name)
    return model

In [9]:
def save_model(model, file_name):
    torch.save(model, '/common/home/gg676/535/saved_models/'+file_name)

In [10]:
no_epochs = 10
img_model = ImgEncoder(512).to('cuda:1')
txt_model = TextEncoder(512).to('cuda:1')
lowest_median_rank = 990.0
optimizer_txt = torch.optim.Adam(txt_model.parameters(), lr=1e-6, weight_decay=1e-7)
optimizer_img = torch.optim.Adam(img_model.parameters(), lr=1e-6, weight_decay=1e-7)
criterion = nn.MSELoss() 
for epoch in range(no_epochs):
    img_model.train()
    txt_model.train()
    train_loss = train(train_loader, img_model, txt_model, criterion, optimizer_txt, optimizer_img, epoch)
    print('Epoch {} loss: {} {}'.format(epoch, train_loss, "\n"))
    with torch.no_grad():
        txt_model.eval()
        img_model.eval()
        out_text, out_img = txt_model(torch.tensor(val_txt).to('cuda:1')), img_model(torch.tensor(val_img).to('cuda:1'))
        med_rank = rank(out_text,out_img)
        if med_rank['mean_median'] < lowest_median_rank:
            lowest_median_rank = med_rank['mean_median']
            save_model(txt_model, 'text_TITLE_model_mse')
            save_model(img_model, 'img_TITLE_model_mse')

4400it [00:22, 198.60it/s]


Epoch 0 loss: 0.4090594600479711 

Result: {'mean_median': 174.6, 'recall': {1: 0.0158, 5: 0.0529, 10: 0.08829999999999998}, 'median_all': [173.5, 181.0, 181.0, 176.5, 185.0, 170.0, 172.5, 165.0, 166.0, 175.5]}


4400it [00:15, 276.28it/s]


Epoch 1 loss: 0.3159580859135498 

Result: {'mean_median': 101.2, 'recall': {1: 0.0308, 5: 0.10400000000000001, 10: 0.15829999999999997}, 'median_all': [96.0, 108.0, 92.5, 94.5, 98.0, 94.0, 108.5, 103.5, 112.0, 105.0]}


4400it [00:15, 280.12it/s]


Epoch 2 loss: 0.26683003372428094 

Result: {'mean_median': 95.45, 'recall': {1: 0.034600000000000006, 5: 0.10969999999999999, 10: 0.1644}, 'median_all': [96.0, 93.5, 90.0, 98.0, 96.0, 96.0, 96.5, 95.5, 100.0, 93.0]}


4400it [00:16, 266.60it/s]


Epoch 3 loss: 0.23176537082953885 

Result: {'mean_median': 115.55, 'recall': {1: 0.029300000000000003, 5: 0.09679999999999998, 10: 0.1466}, 'median_all': [128.5, 115.5, 131.5, 124.5, 101.0, 113.0, 121.0, 117.0, 98.0, 105.5]}


4400it [00:15, 276.15it/s]


Epoch 4 loss: 0.20357283630493012 

Result: {'mean_median': 137.25, 'recall': {1: 0.0256, 5: 0.08209999999999998, 10: 0.1272}, 'median_all': [138.0, 130.5, 139.0, 141.5, 120.0, 138.0, 135.0, 153.0, 147.0, 130.5]}


4400it [00:15, 279.05it/s]


Epoch 5 loss: 0.1795284356515516 

Result: {'mean_median': 171.5, 'recall': {1: 0.020199999999999996, 5: 0.06720000000000001, 10: 0.1062}, 'median_all': [169.0, 161.5, 175.0, 183.5, 175.0, 170.5, 163.0, 181.0, 162.0, 174.5]}


4400it [00:15, 282.06it/s]


Epoch 6 loss: 0.15833782666108825 

Result: {'mean_median': 205.5, 'recall': {1: 0.015199999999999997, 5: 0.053000000000000005, 10: 0.08579999999999999}, 'median_all': [220.0, 183.0, 206.0, 214.0, 199.0, 209.0, 203.5, 204.0, 196.0, 220.5]}


4400it [00:15, 278.52it/s]


Epoch 7 loss: 0.13942369107834318 

Result: {'mean_median': 225.35, 'recall': {1: 0.013800000000000002, 5: 0.044599999999999994, 10: 0.07109999999999998}, 'median_all': [225.5, 231.0, 242.0, 234.0, 222.0, 225.0, 222.5, 224.5, 212.0, 215.0]}


4400it [00:15, 281.65it/s]


Epoch 8 loss: 0.12252715356309306 

Result: {'mean_median': 249.5, 'recall': {1: 0.0096, 5: 0.0377, 10: 0.06039999999999999}, 'median_all': [253.5, 260.0, 233.0, 227.0, 258.5, 257.0, 237.0, 254.5, 267.0, 247.5]}


4400it [00:15, 285.28it/s]


Epoch 9 loss: 0.10732662891799753 

Result: {'mean_median': 261.35, 'recall': {1: 0.007700000000000001, 5: 0.0341, 10: 0.057300000000000004}, 'median_all': [255.0, 266.0, 245.5, 251.5, 298.5, 264.0, 264.0, 262.5, 252.5, 254.0]}


In [14]:
txt_model = load_model('text_TITLE_model_mse')
img_model = load_model('img_TITLE_model_mse')
with torch.no_grad():
    img_model.eval()
    txt_model.eval()
    r_text = txt_model(torch.tensor(test_txt).to('cuda:1'))
    r_img = img_model(torch.tensor(test_img).to('cuda:1'))
    test_result = rank(r_text, r_img)
test_result

Result: {'mean_median': 102.75, 'recall': {1: 0.03270000000000001, 5: 0.11439999999999999, 10: 0.16860000000000003}, 'median_all': [110.0, 95.5, 107.0, 116.0, 98.0, 101.0, 109.0, 93.0, 95.0, 103.0]}


{'mean_median': 102.75,
 'recall': {1: 0.03270000000000001,
  5: 0.11439999999999999,
  10: 0.16860000000000003},
 'median_all': [110.0,
  95.5,
  107.0,
  116.0,
  98.0,
  101.0,
  109.0,
  93.0,
  95.0,
  103.0]}

In [15]:
out_text_test, out_img_test = txt_model(torch.tensor(test_txt).to('cuda:1')), img_model(torch.tensor(test_img).to('cuda:1'))

In [16]:
with open('/common/home/gg676/535/task_2/tsne_data/title_text.pkl', 'wb') as fp:
    pickle.dump(out_text_test, fp)
with open('/common/home/gg676/535/task_2/tsne_data/title_img.pkl', 'wb') as fp:
    pickle.dump(out_img_test, fp)