In [None]:
import os
import pickle
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import numpy as np

from torch.utils.tensorboard import SummaryWriter


In [None]:
os.getcwd()

In [None]:
device = torch.device("cuda:0")

In [None]:
class EmbeddingDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
    
    def __len__(self):
        return self.dataset[0].shape[0]
    
    def __getitem__(self, idx):
        img,text,label = self.dataset[0][idx],self.dataset[1][idx],self.dataset[3][idx]
        
        return img, text, label

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
class Encoder(nn.Module):
    def __init__(self,output_size,input_size=1024):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(input_size,512),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3),
            nn.LeakyReLU()
        )
        self.layer2 = nn.Linear(512,output_size)
    
    def forward(self, x):
        x = self.layer1(x)
        return self.layer2(x)

In [None]:
import pickle
with open('/common/home/apc120/Downloads/labelled/labelled_data.pkl', 'rb') as f:
    obj = f.read()
    data = pickle.loads(obj, encoding='latin1')


with open('/common/home/apc120/Downloads/labelled/embeddings_val1.pkl', 'rb') as f:
    val_emb = pickle.load(f)
val_img = val_emb[0]
val_txt = val_emb[1]

with open('/common/home/apc120/Downloads/labelled/embeddings_test1.pkl', 'rb') as f:
    test_emb = pickle.load(f)
test_img = test_emb[0]
test_txt = test_emb[1]


dataset = EmbeddingDataset(data)
train_loader = DataLoader(dataset, batch_size=128, shuffle=True)

In [None]:
import random
import numpy as np
import torch
def calculate_rank_recall(txt_data, img_data):
    results_dict = {}
    projection_txt, projection_img = txt_data, img_data
    
    idxs = range(1000)
    
    glob_rank = []
    glob_recall = {1:0.0,5:0.0,10:0.0}
    
    for i in range(10):
        ids = random.sample(range(0,txt_data.shape[0]-1), 1000)
        
        txt_sample = projection_txt[ids,:]
        img_sample = projection_img[ids,:]
        
        similarity = np.dot(txt_sample.cpu().numpy(), img_sample.T.cpu().numpy())

        med_rank = []
        
        recall = {1:0.0,5:0.0,10:0.0}
        
        for ii in idxs:
            # get a column of similarities
            sim = similarity[ii,:]
            # sort indices in descending order
            sorting = np.argsort(sim)[::-1].tolist()
            # find where the index of the pair sample ended up in the sorting
            pos = sorting.index(ii)  
            if (pos+1) == 1:
                recall[1]+=1
            if (pos+1) <=5:
                recall[5]+=1
            if (pos+1)<=10:
                recall[10]+=1
            # store the position
            med_rank.append(pos+1)
        for i in recall.keys():
            recall[i]=recall[i]/1000
        med = np.median(med_rank)
        for i in recall.keys():
            glob_recall[i]+=recall[i]
        glob_rank.append(med)

    for i in glob_recall.keys():
        glob_recall[i] = glob_recall[i]/10
    med_dict = {}
    med_dict["mean_median"] = np.average(glob_rank)
    med_dict["recall"] = glob_recall
    med_dict["median_all"] = glob_rank
#     print("Mean median", np.average(glob_rank))
#     print("Recall", glob_recall)
    print("Result:",med_dict)
    return med_dict

In [None]:
latent_dims = 256
img_model = Encoder(latent_dims).to(device)
txt_model = Encoder(latent_dims).to(device)
params = list(txt_model.parameters()) + list(img_model.parameters()) 
optimizer = optim.Adam(params, lr=1e-6)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20], gamma=0.1, last_epoch=-1, verbose=True)
loss = nn.TripletMarginWithDistanceLoss(distance_function=lambda x, y: 1.0 - F.cosine_similarity(x, y))

In [None]:
def get_hard_negative(anchor,positive,label):

    labels_equal = torch.eq(label.unsqueeze(0),label.unsqueeze(1))
    mask_neg = torch.logical_not(labels_equal)
    distance_matrix = torch.matmul(anchor, positive.T)
    anchor_negative_dist = mask_neg * distance_matrix
    hard_negative_idx = torch.argmax(anchor_negative_dist, dim=1)
    hard_negative = torch.index_select(positive, 0, hard_negative_idx)
    
    return hard_negative

In [None]:
from tqdm import tqdm
def train(train_loader, img_model, txt_model, loss, optimizer): 
    total_loss = 0.0       
    for i, (image_emb,text_emb,label) in tqdm(enumerate(train_loader)):
        image_emb = image_emb.to(device)
        text_emb = text_emb.to(device)
        label = label.to(device)
        with torch.no_grad():
            hard_negative_emb = get_hard_negative(image_emb,text_emb,label)
        optimizer.zero_grad()
        anchor = img_model(image_emb.to(device)) 
        positive = txt_model(text_emb.to(device))
        negative = txt_model(hard_negative_emb.to(device))
        curr_loss = loss(anchor,positive,negative) 
        curr_loss.backward()
        optimizer.step()
        total_loss += curr_loss.item()
        
    return total_loss/i

In [None]:
EPOCHS = 10
best_loss = 1e18
img_model.train()
txt_model.train()
res_dict = {}
for epoch in range(1,EPOCHS+1):
    train_loss = train(train_loader, img_model, txt_model,loss,optimizer)
    med_dict = None
    with torch.no_grad():
        img_model.eval()
        txt_model.eval()
        
        out_text, out_img = txt_model(torch.tensor(val_txt).to('cuda')), img_model(torch.tensor(val_img).to('cuda'))
        med_dict = calculate_rank_recall(out_text,out_img)
        
    if med_dict['mean_median'] < best_loss:
        print("BEST EPOCH:",epoch)
        torch.save(img_model, f'{latent_dims}_best_img_model_all.pth')
        torch.save(txt_model, f'{latent_dims}_best_txt_model_all.pth')
        best_loss = med_dict['mean_median']

    scheduler.step()
    med_dict["loss"] = train_loss
    res_dict["Epoch"+str(epoch)] = med_dict
    print('Epoch {} loss: {}'.format(epoch, train_loss))

In [None]:
results_dict = {}

img_model = img_model.eval()
txt_model = txt_model.eval()
projection_img, projection_txt = img_model(test_img),txt_model(test_txt)
print("projections extracted")
idxs = range(1000)
glob_rank = []
glob_recall = {1:0.0,5:0.0,10:0.0}
sample_size = 1000
for i in range(10):
    ids = random.sample(range(0,X_test.shape[0]-1), sample_size)
    txt_sample = projection_txt[ids,:]
    img_sample = projection_img[ids,:]
    similarity = np.dot(img_sample.detach().numpy(),txt_sample.T.detach().numpy())
    med_rank = []
    recall = {1:0.0,5:0.0,10:0.0}
    for ii in idxs:
        # get a column of similarities
        sim = similarity[ii,:]
        # sort indices in descending order
        sorting = np.argsort(sim)[::-1].tolist()
        # find where the index of the pair sample ended up in the sorting
        pos = sorting.index(ii)  
        if (pos+1) == 1:
            recall[1]+=1
        if (pos+1) <=5:
            recall[5]+=1
        if (pos+1)<=10:
            recall[10]+=1
        # store the position
        med_rank.append(pos+1)
    for i in recall.keys():
        recall[i]=recall[i]/sample_size
    med = np.median(med_rank)
    for i in recall.keys():
        glob_recall[i]+=recall[i]
    glob_rank.append(med)

for i in glob_recall.keys():
    glob_recall[i] = glob_recall[i]/10
med_dict = {}
med_dict["mean_median"] = np.average(glob_rank)
med_dict["recall"] = glob_recall
med_dict["median_all"] = glob_rank
print("Mean median", np.average(glob_rank))
print("Recall", glob_recall)
print("Result:",med_dict)
res_dict["test_result"] = med_dict
#results_dict[dim] = med_dict

In [None]:
dim_arr = [4,32,64,256]
for dim in dim_arr:
    img_model=torch.load(f'{dim}_best_img_model_all.pth')
    txt_model = torch.load(f'{dim}_best_txt_model_all.pth')
    with torch.no_grad():
        img_model.eval()
        txt_model.eval()
        test_result = calculate_rank_recall(txt_model(torch.tensor(test_txt).to('cuda')),img_model(torch.tensor(test_img).to('cuda')))
    

In [None]:
import json
with open("results_all_data.json", "w") as outfile:
    json.dump(res_dict, outfile)

In [None]:
import torch
with torch.no_grad():
    img_model=torch.load('best_img_model_ingr.pth')
    txt_model = torch.load('best_txt_model_ingr.pth')
    img_model.eval()
    txt_model.eval()
    txt_sample = txt_model(test_txt)
    img_sample = img_model(test_img)
    torch.save(txt_sample,'/common/home/apc120/Desktop/triplet_txt_ingr.pkl')
    torch.save(img_sample,'/common/home/apc120/Desktop/triplet_img_ingr.pkl')

In [None]:
import torch
with torch.no_grad():
    img_model=torch.load('best_img_model_all.pth')
    txt_model = torch.load('best_txt_model_all.pth')
    txt_sample = txt_model(torch.tensor(test_txt).to('cuda'))
    img_sample = img_model(torch.tensor(test_img).to('cuda'))
    torch.save(txt_sample,'/common/home/apc120/Desktop/Triplet_txt_all.pkl')
    torch.save(img_sample,'/common/home/apc120/Desktop/Triplet_img_all.pkl')