In [1]:
import sys

In [2]:
sys.version_info

sys.version_info(major=3, minor=6, micro=5, releaselevel='final', serial=0)

In [9]:
import argparse
import time
import torch
import torchvision.transforms as transforms
import numpy as np
import os

from misc.dataset import CocoCaptionsRV, Multi30k
from misc.evaluation import eval_recall, eval_recall5
from misc.model import joint_embedding
from misc.utils import collate_fn_padded
from torch.utils.data import DataLoader

os.environ["CUDA_VISIBLE_DEVICES"]="3"

In [10]:
batch_size = 128

## Test Evaluation

In [11]:
def cosine_sim(A, B):
    """
        Return similarity of each image with each caption
        One line of the output matrix correspond to one image
        Each row correspond to one caption
    """
    img_norm = np.linalg.norm(A, axis=1)
    caps_norm = np.linalg.norm(B, axis=1)
    scores = np.dot(A, B.T)
    norms = np.dot(np.expand_dims(img_norm, 1),np.expand_dims(caps_norm.T, 1).T)
    scores = (scores / norms)
    return scores

In [12]:
def multilingual_recall(imgs, caps, indices, ks=[1,5,10]):
    """
        Compute multingual recall
    """
    imgs = np.vstack(imgs)
    caps = np.vstack(caps)

    scores = -cosine_sim(imgs, caps)
    ranks = np.argsort(np.argsort(scores))
    # scores represent all the similarity between each images and each captions
    recall = {k:0 for k in ks}
    nb_imgs, nb_caps = ranks.shape
    for i in range(nb_imgs):
        for k in ks:
            for j in range(nb_caps):
                if indices[j] == i and ranks[i][j] < k: #if the caption correspond to the image and is ranked less than k
                    recall[k] += 1
                
    scores = np.transpose(scores)
    ranks_caps = np.argsort(np.argsort(scores))
    recall_caps = {k:0 for k in ks}
    print("Caption ranks :", ranks_caps)
    nb_caps, nb_imgs = ranks.shape
    for i in range(nb_caps):
        for k in ks:
            if ranks[indices[i]][i] < k:
                recall_caps[k] += 1
    
    return [recall[k] / imgs.shape[0]*100 for k in ks], [recall_caps[k] / ranks_caps.shape[0]*100 for k in ks]

# Evaluation on english

In [13]:
normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

prepro_val = transforms.Compose([
        transforms.Scale((400, 400)),
        transforms.RandomCrop((400,400)),
        transforms.ToTensor(),
        normalize,
    ])


In [14]:
models = []
models.append(["best_sota_coco.pth.tar", "wiki.en.bin"]) # state of the art model
#models.append(["best_w2vec.pth.tar", "w2vec_model_vec.en.vec"]) # word2vec model
#models.append(["best_bivec_coco.pth.tar", "bivec_model_vec.en-fr.en.vec"]) # bivec on coco only
#models.append(["best_correct_en.pth.tar", "wiki.multi.en.vec"]) # muse embeddings on english

class arguments:
    def __init__(self, dict):
        self.dict = 'data/'+dict
        
        

for model, dic in models:
    print("Testing model :", model)
    aa = arguments(dic)
    coco_dataset = CocoCaptionsRV(aa, sset="val", transform=prepro_val)

    coco_dataset_loader = DataLoader(coco_dataset, batch_size=batch_size, shuffle=False,
                                num_workers=6, collate_fn=collate_fn_padded, pin_memory=True)
    
    # load model
    checkpoint = torch.load("weights/"+model, map_location=lambda storage, loc: storage)
    join_emb = joint_embedding(checkpoint['args_dict']).cuda()
    join_emb.load_state_dict(checkpoint["state_dict"])
    join_emb = torch.nn.DataParallel(join_emb.cuda().eval())
    
    
    imgs_enc = list()
    caps_enc = list()

    print("### Beginning of evaluation ###")
    for i, (imgs, caps, lengths) in enumerate(coco_dataset_loader, 0):
        input_imgs, input_caps = imgs.cuda(), caps.cuda()

        with torch.no_grad():
            output_imgs, output_caps = join_emb(input_imgs, input_caps, lengths)

        imgs_enc.append(output_imgs.cpu().data.numpy())
        caps_enc.append(output_caps.cpu().data.numpy())
    

    print(model, eval_recall5(imgs_enc, caps_enc))
    


Testing model : best_sota_coco.pth.tar
Using .bin file


FileNotFoundError: [Errno 2] No such file or directory: 'data/best_sota_coco.pth.tar'

# Multilang evaluation

In [None]:
models = []
models.append(["best_sota_coco.pth.tar", "wiki.en.bin"]) # state of the art model
models.append(["best_w2vec.pth.tar", "w2vec_model_vec.en.vec"]) # word2vec model
models.append(["best_bivec_coco.pth.tar", "bivec_model_vec.en-fr.en.vec"]) # bivec on coco only
models.append(["best_correct_en.pth.tar", "wiki.multi.en.vec"]) # muse embeddings on english