In [None]:
# !conda install pytorch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 cudatoolkit=10.2 -c pytorch
# !pip install ftfy==5.8
# !conda install transformers
# !pip install git+https://github.com/openai/CLIP.git

# import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import os, random
import torch

import warnings
warnings.filterwarnings("ignore")

import clip
# !pip install ipywidgets
# !git clone https://github.com/FreddeFrallan/Multilingual-CLIP
# !cd Multilingual-CLIP


# Validation pipeline 

In [None]:
import torch 

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

device

In [None]:
# from rich import print 

In [None]:
# Read the images from of the dataset 
import os 

img_folder = 'photos/'

if not os.path.exists(img_folder) or len(os.listdir(img_folder)) == 0:
    os.makedirs(img_folder, exist_ok=True)

In [None]:
import json 

data = [] 

with open("en_ar_XTD10_edited_v2.jsonl") as filino:

    for file_i in filino:

        dic_obj = json.loads(file_i)
        data.append(dic_obj)

In [None]:
print("Dataset size is: ", len(data) )

In [None]:
print(data[:10])

In [None]:
Check_id_duplication = [] 

In [None]:
for idx, data_obj in enumerate(data):

    Check_id_duplication.append(data_obj["id"])

In [None]:
# If the len is 1000, there is no duplicates

len(set(Check_id_duplication)) == 1000

In [None]:
# data = [
#     {'image_id': 0, 'id': 391895, 'caption': 'رجل يرتدي خوذة حمراء على دراجة بخارية صغيرة على طريق ترابي'},
#     {'image_id': 1, 'id': 522418, 'caption': 'امرأة ترتدي شبكة على رأسها تقطع كعكة'},
#     {'image_id': 2, 'id': 184613, 'caption': 'طفل يحمل مظلة مزهرة ويأكل ثورًا'},
# ]

# Sort the list of dictionaries based on the 'id' key
sorted_data = sorted(data, key=lambda x: x['id'])

print(sorted_data[:20])
# # Print the sorted list
# for item in sorted_data:
#     print(item)

In [None]:
# get only 10 examples
# sorted_data

In [None]:
len(sorted_data)

In [None]:
print(sorted_data[:10])

In [None]:
image_name_list = []

for lin in sorted_data:
    # print(lin["image_name"])
    image_name_list.append(lin["image_name"])

In [None]:
print(image_name_list)

In [None]:
sorted_data[0]

In [None]:
# Create a mapping dictionary between the ids and paths

id2path = {}


for im_path, sort_sample in zip(image_name_list, sorted_data):


    # print(json.loads(lin)["text"])
    # print(im_path.split("_")[-1].split(".")[0])

    input_str = im_path.split("_")[-1].split(".")[0]
    # print(input_str)
    result = int(input_str.lstrip('0'))
    # Check the ids
    if sort_sample['id'] != result:
        print("stop ........................................................")
    id2path[result] = im_path

    # print(result)

In [None]:
id2path

In [None]:
# Check if each image file exists in the folder

folder_path = "photos/XTD10_dataset"

missing_images = []

for image_path in image_name_list:
    full_image_path = os.path.join(folder_path, image_path)
    if not os.path.exists(full_image_path):
        missing_images.append(image_path)

if missing_images:
    print("The following images are missing:")
    for image_path in missing_images:
        print(image_path)
else:
    print("All images are present in the folder.")

In [None]:
# Delete the images that are not included on the testing dataset 

import os


not_exist_paths = []
exist_paths = [] 

# Get a list of all files in the folder
all_files = os.listdir(folder_path)

# Remove any files in the folder that are not in the list of image paths
for file_name in all_files:
    if file_name not in image_name_list:
        file_path = os.path.join(folder_path, file_name)
        os.remove(file_path)
        # print(f"Removed: {file_path}")
        not_exist_paths.append(file_path)

    elif file_name in image_name_list:

        exist_paths.append(file_name)


destroy_images = set(not_exist_paths).difference(set(exist_paths))


print("img_names", len(all_files))
print("destroy_images", len(destroy_images))
print("not_exist_paths", len(not_exist_paths))
print("remaining images", len(all_files)- len(destroy_images))

# print("Finished removing unwanted images.")

Define the the text model 

In [None]:
import pickle

import torch
import transformers

class MultilingualClipEdited(torch.nn.Module):
    def __init__(self, model_name, tokenizer_name, head_name, weights_dir='data/weights/', cache_dir=None,in_features=None,out_features=None):
        super().__init__()
        self.model_name = model_name
        self.tokenizer_name = tokenizer_name
        self.head_path = weights_dir + head_name

        self.tokenizer = transformers.AutoTokenizer.from_pretrained(tokenizer_name, cache_dir=cache_dir)
        # print(self.tokenizer )
        self.transformer = transformers.AutoModel.from_pretrained(model_name, cache_dir=cache_dir)
        self.clip_head = torch.nn.Linear(in_features=in_features, out_features=out_features)
        self._load_head()

    def forward(self, txt):
        txt_tok = self.tokenizer(txt, padding=True, return_tensors='pt')
        embs = self.transformer(**txt_tok)[0]
        
        # print("embs shape: ", embs.shape)

        att = txt_tok['attention_mask']

        # print("att shape: ", att.shape)
    
        embs = (embs * att.unsqueeze(2)).sum(dim=1) / att.sum(dim=1)[:, None]

        # print("embs after att shape: ", embs.shape)

        return self.clip_head(embs)

    def _load_head(self):
        with open(self.head_path, 'rb') as f:
            lin_weights = pickle.loads(f.read())
        self.clip_head.weight = torch.nn.Parameter(torch.tensor(lin_weights[0]).float().t())
        self.clip_head.bias = torch.nn.Parameter(torch.tensor(lin_weights[1]).float())

AVAILABLE_MODELS = {


    'arabert-large-vit-B-16-plus-mscoc-60': {
    'model_name': 'Arabic-Clip/arabertv2-Vit-B-16-plus-epoch-60-trained-mscoco-training',
    'tokenizer_name': 'Arabic-Clip/arabertv2-Vit-B-16-plus-epoch-60-trained-mscoco-training',
    'head_name': 'heads_of_the_model_bert-large-arabertv2-Vit-B-16-plus-240-60_.pickle'
    },
    'arabert-large-vit-B-16-plus-mscoc-60-32': {
    'model_name': 'Arabic-Clip/arabertv2-Vit-B-16-plus-epoch-60-trained-mscoco-training-fp32',
    'tokenizer_name': 'Arabic-Clip/arabertv2-Vit-B-16-plus-epoch-60-trained-mscoco-training-fp32',
    'head_name': 'heads_of_the_model_bert-large-arabertv2-Vit-B-16-plus-240-60_32.pickle'
    },


}


In [None]:
def load_model(name, cache_dir=None,in_features=None,out_features=None):
    config = AVAILABLE_MODELS[name]
    print(config)
    return MultilingualClipEdited(**config, cache_dir=cache_dir, in_features= in_features, out_features=out_features)

In [None]:
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# device

In [None]:
import os
import requests

def download_file(url, folder_path, filename=None):
    # If filename is not specified, use the last part of the URL as the filename
    if filename is None:
        filename = os.path.basename(url)
    
    # Full path where the file should be saved
    file_path = os.path.join(folder_path, filename)
    
    # Check if the file already exists
    if not os.path.exists(file_path):
        # Make sure the folder exists
        os.makedirs(folder_path, exist_ok=True)
        
        # Download the file
        response = requests.get(url)
        response.raise_for_status()  # Check for HTTP request errors
        
        # Write the file to the specified path
        with open(file_path, 'wb') as file:
            file.write(response.content)
        
        print(f"File downloaded and saved to {file_path}")
    else:
        print(f"File already exists at {file_path}")

# Example usage
url = "https://huggingface.co/Arabic-Clip-Archive/arabertv2-Vit-B-16-plus-epoch-60-trained-mscoco-training/resolve/main/heads_of_the_model_bert-large-arabertv2-Vit-B-16-plus-240-60_.pickle"
folder_path = "data/weights/"
download_file(url, folder_path)


In [None]:
# Open the pickle file in binary read mode

pickle_file_path = 'data/weights/heads_of_the_model_bert-large-arabertv2-Vit-B-16-plus-240-60_.pickle'  # Replace with the actual path to your pickle file
with open(pickle_file_path, 'rb') as file:
    loaded_content = pickle.load(file)
    print(len(loaded_content))
    print(loaded_content[0].shape)
    print(loaded_content[1].shape)

In [None]:

# Text model name 
# 

text_model = load_model('arabert-large-vit-B-16-plus-mscoc-60', in_features= 1024, out_features=640)


# Define the language model with lambda 

language_model = lambda queries: np.asarray(text_model(queries).detach().to('cpu')) 

In [None]:
text_model


### Define the image model 

In [None]:
# !pip install open_clip_torch

In [None]:
# clip_model, compose = clip.load('RN50x4')
# import torch
import open_clip
import torch
import torch.nn.functional as F
from urllib.request import urlopen
from PIL import Image
# from open_clip # import create_model_from_pretrained, get_tokenizer # works on open-clip-torch>=2.23.0, timm>=0.9.8


device = "cuda" if torch.cuda.is_available() else "cpu"

print("Device: ", device)

clip_model, _, compose = open_clip.create_model_and_transforms('ViT-B-16-plus-240', pretrained="laion400m_e32")
tokenizer = open_clip.get_tokenizer('ViT-B-16-plus-240')
clip_model.to(device)


# clip_model, compose = create_model_from_pretrained('hf-hub:timm/ViT-B-16-SigLIP-512')
# tokenizer = get_tokenizer('hf-hub:timm/ViT-B-16-SigLIP-512')

In [None]:
compose

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

In [None]:
clip_model.to(device)

### Defind  the image model 

In [None]:
image_model = lambda images: np.asarray(clip_model.encode_image(images.to(device)).float().detach().to('cpu'))

# Utils

In [None]:
# Define the needed libraries in the code 

from tqdm.notebook import tqdm
import os 

from PIL import Image

### Defind a dataset class for images 

In [None]:

class CustomDataSet(torch.utils.data.Dataset):
    def __init__(self, main_dir, transform):
        self.main_dir = main_dir
        self.transform = transform
        self.total_imgs = image_name_list
        print(self.total_imgs)

    def __len__(self):
        return len(self.total_imgs)

    def get_image_name(self, idx):

        return self.total_imgs[idx]

    def __getitem__(self, idx):
        img_loc = os.path.join(self.main_dir, self.total_imgs[idx])
        image = Image.open(img_loc)

        return self.transform(image)

### Defind a dataset class for text dataset  

In [None]:
class SimpleTextDataset(torch.utils.data.Dataset):

    def __init__(self, texts):
        """Define  the class init"""
        self.texts = texts

    def __len__(self):
        """Return the length of the text dataset"""
        return len(self.texts)

    def __getitem__(self, idx):
        """Get the item based on index"""
        return self.texts[idx]

In [None]:
def text_encoder(text):
    """Normalize the text embeddings"""
    embedding = language_model(text)
    embedding = embedding / np.linalg.norm(embedding)

    return embedding

def precompute_text_features(loader):
    """Compute the text embeddings of the whole dataset based on the loader provided"""
    text_features = []

    for _, (texts) in enumerate(tqdm(loader)):

        embedding = language_model(texts)
        embedding = embedding / np.linalg.norm(embedding)

        text_features.extend(embedding)

    return np.array(text_features)

In [None]:
def precompute_image_features(loader):
    image_features = []
    
    for i, (images) in enumerate(tqdm(loader)):

        features = image_model(images)

        features = features / np.linalg.norm(features)
        image_features.extend(features)

    return np.array(image_features)

In [None]:
def show_images(image_list):
    for im_path in image_list:
        print(im_path)
        display(Image.open(im_path))

In [None]:
# text = 'بجعة تطفو أسفل النهر بالقارب'

# image_paths = find_image(text, dataset, image_features, n=3)
# show_images(image_paths)

Build the image dataset 

In [None]:
dataset = CustomDataSet("photos/XTD10_dataset", transform=compose)

In [None]:
# check if the image_paths sorted_data in the same order of the image dataset:


for i, item in enumerate(sorted_data):

    if item['image_name'] != dataset.get_image_name(i):
        print("stop")
        break


In [None]:
len(dataset)

### Define the image_loder

In [None]:
image_loader = torch.utils.data.DataLoader(
    dataset,
    batch_size=16,
    shuffle=False,
    num_workers=0,
    drop_last=False)

### Define the text_loder

In [None]:
text_dataset = SimpleTextDataset([elem["caption_ar"] for elem in sorted_data])

text_loader = torch.utils.data.DataLoader(
    text_dataset,
    batch_size=64,
    shuffle=False)

In [None]:
# Check this to utalize the GPU memory in the images 
# https://discuss.pytorch.org/t/not-using-multiprocessing-but-getting-cuda-error-re-forked-subprocess/54610/8

In [None]:
import numpy as np

In [None]:
# !pip install ipywidgets

In [None]:
image_features = precompute_image_features(image_loader)

In [None]:
image_emb_path = 'image_features.pickle'

In [None]:
text_emb_path = 'text_features.pickle'

In [None]:
import pickle


with open(image_emb_path, 'wb') as handle:
    pickle.dump(image_features, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
import pickle

with open(image_emb_path, 'rb') as handle:
    image_features_new = pickle.load(handle)

image_features_new

In [None]:
text_features = precompute_text_features(text_loader)

text_features

In [None]:
import pickle


with open(text_emb_path, 'wb') as handle:
    pickle.dump(text_features, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:

with open(text_emb_path, 'rb') as handle:
    text_features_new = pickle.load(handle)

text_features_new

In [None]:
image_features_new

In [None]:
text_features_new.shape

In [None]:
text_features_new[0][:]

In [None]:
image_features_new[0][:]

In [None]:
# # Take a look later over this

# logit_scale = clip_model.logit_scale.exp().float().detach().to('cpu')
# print(logit_scale)
# logit_scale * text_features_new

In [None]:
# logit_scale_val = logit_scale.item()

In [None]:
def get_path_coco(image_id):
    # image_id = int(image_id)
    # print(type(image_id))

    im_path = id2path[image_id]
    
    return f"photos/XTD10_dataset/{im_path}" # f"photos/val2014/COCO_val2014_{image_id:012d}.jpg"

In [None]:
import numpy as np

In [None]:
mat_indx_mrr = np.zeros((1000,1000),dtype=np.int64)

In [None]:
mat_indx_mrr.shape

In [None]:
mat_indx_mrr

In [None]:
collect_rr_testing = []

In [None]:
# Check which axis the for loop get back
# So, it loop over the raws

chck_found = np.random.randint(10, size=(2, 4))
for index, distances in enumerate(chck_found):
    print(index)
    print(distances)

In [None]:
# Check the scores  

text_features_new.shape

In [None]:
image_features_new.shape

In [None]:
def compare_embeddings(logit_scale, img_embs, txt_embs):
  # normalized features
  image_features = img_embs / img_embs.norm(dim=-1, keepdim=True)
  text_features = txt_embs / txt_embs.norm(dim=-1, keepdim=True)


  # logits_per_image = logit_scale * image_features @ text_features.t()


  logits_per_text = logit_scale * text_features @ image_features.t()

  # print("type: ", type(logits_per_text))
  
  return logits_per_text

In [None]:
# # https://github.com/gpleiss/temperature_scaling
# # CLIP Temperature scaler
# logit_scale = clip_model.logit_scale.exp().float().to('cpu')

# print(logit_scale)

# language_logits = {}

 


# language_logits["Arabic"] = compare_embeddings(logit_scale, torch.from_numpy(image_features_new), torch.from_numpy(text_features_new))
# language_logits["Arabic"].shape

In [None]:
sorted_data[400+25]

In [None]:
sorted_data[400+86]

In [None]:
# trial_1 = []

In [None]:
def compute_mrr(data, dataset, n):
    """Compute the MRR for the data based on n"""
    collect_rr = []
    pbar = tqdm(total=len(data), position=0, leave=True)

    # print("text_features")
    # print(text_features)
    # print("image_features")
    # print(image_features)

    # print("image_features shape: ")
    # print(image_features.shape)
    # print()
    # print("text_features shape: ")
    # print(text_features.shape)
    # found = np.matmul(text_features, image_features.T)
    found = np.matmul(text_features_new, image_features_new.T)

    # # instead: first shift the values of f so that the highest number is 0:
    # found -= np.max(found)
    # found_scalled = np.exp(found) / np.sum(np.exp(found)) # safe to do, gives the correct answer


    # found_scalled = softmax(found) # .softmax(dim=-1).cpu().detach().numpy()
    # print("print the matrix for the text features and the images featutes maltiplication found")

    # print(found)

    for index, distances in enumerate(found): # It return the rows, one by one

        pbar.update(1)
        # print()
        # print("index: ", index)
        # print("data[index]['id']: inside the loop", data[index]["id"])
        image_path = get_path_coco(data[index]["id"])
        # print(data[index]["id"])
        # print("New link")
        # print("image_path in compute_mrr ", image_path)
        # print("caption: ", data[index]["caption"])
        # print("distances")
        # print(distances)
        # print("n: ", n)

        
        collect_rr.append(new_rr(distances, image_path, dataset, n,index))


    pbar.close()
    print(100*"=")
    # trial_1 = collect_rr.copy()
    # print(collect_rr)
    
    return np.average(collect_rr)


def new_rr(distances, target_image, dataset, n):
    """Calculate the RR for the given target image"""
    image_paths = []

    # print("distances: ", distances)
    # print("type(distances): ", type(distances))
    idxs = distances.argsort()[-n:][::-1] # Get the indcies for the images distances based on n

    # print(idxs)
        
    # print(type(idxs))

    # idxs = distances.argsort()[-n:][::-1] # Get the indcies for the images distances based on n
    
    # print("distances.argsort(): ", distances.argsort())
    # print("distances.argsort()[-n:]: ", distances.argsort()[-n:])
    # print("distances.argsort()[-n:][::-1]: ", distances.argsort()[-n:][::-1])

    # print("idxs of the images from the top to the lower: ", idxs)
    # print("target_image: ", target_image)
    for idx in idxs:
        # print("'photos/val2014/' + dataset.get_image_name(idx): ", 'photos/val2014/' + dataset.get_image_name(idx))
        image_paths.append('photos/XTD10_dataset/' + dataset.get_image_name(idx))
        # image_paths.append(get_path_coco(data[idx]["id"]))

    # print("target_image: ", target_image)
    # print("image_paths: ", image_paths)

    if target_image in image_paths:

        return 1/(image_paths.index(target_image) + 1)
    else:
        # print("new_rr: ", 0)
        return 0


def internal_hits(distances, target_image, dataset, n):
    """Calculate the hits of the target images based on the existance of it or not"""
    image_paths = []
    idxs = distances.argsort()[-n:][::-1]

    if target_image in idxs:
        return 1
    else:
        return 0

def compute_hits(data, dataset, n):

    index_cnt = 0

    collect_rr = []

    pbar = tqdm(total=len(data), position=0, leave=True)

    found = np.matmul(text_features_new, image_features_new.T)

    for index, distances in enumerate(found):
        pbar.update(1)
        # image_path = get_path_coco(data[index]["id"])
        image_path = index # get_path_coco(data[index]["id"])
        
        collect_rr.append(internal_hits(distances, image_path, dataset, n))
        # collect_rr_testing.append(internal_hits(distances, image_path, dataset, n))
        break

    
    pbar.close()
    # print(len(collect_rr_testing))
    return np.average(collect_rr)

In [None]:
# def compute_mrr(data, dataset, n):
#     """Compute the MRR for the data based on n"""
#     collect_rr = []

#     found = np.matmul(text_features, image_features.T)


#     for index, cos_vlaues in enumerate(found):

#         image_path = get_image_path(data[index]["id"])

#         result = 0

#         image_paths = []

#         idxs = cos_vlaues.argsort()[-n:][::-1] 
        
#         for idx in idxs:
#             image_paths.append(get_image_path(idx))

#         if target_image in image_paths:

#             result = 1/(image_paths.index(target_image) + 1)

#         collect_rr.append(result)

#     return np.average(collect_rr)


In [None]:
def compute_mrr(data, dataset, n):
    """Compute the MRR for the data based on n"""
    collect_rr = []

    found = np.matmul(text_features_new, image_features_new.T)
    for index, distances in enumerate(found): # It return the rows, one by one

        image_path = get_path_coco(data[index]["id"])
        collect_rr.append(new_rr(distances, image_path, dataset, n,index))

        

    return np.average(collect_rr)

def new_rr(distances, target_image, dataset, n,index):
    """Calculate the RR for the given target image"""
    image_paths = []

    idxs = distances.argsort()[-n:][::-1] 
    
    # print("target_image: ", target_image)
    

    for idx in idxs:
        image_paths.append('photos/XTD10_dataset/' + dataset.get_image_name(idx))
    

    # print("image_paths: ", image_paths)
    
    if target_image in image_paths:

        return 1/(image_paths.index(target_image) + 1)
    else:
        return 0


In [None]:
# # image_encoder - ResNet or Vision Transformer
# # text_encoder - CBOW or Text Transformer
# # I[n, h, w, c] - minibatch of aligned images
# # T[n, l] - minibatch of aligned texts
# # W_i[d_i, d_e] - learned proj of image to embed
# # W_t[d_t, d_e] - learned proj of text to embed
# # t - learned temperature parameter
# # extract feature representations of each modality
# I_f = image_encoder(I) #[n, d_i]
# T_f = text_encoder(T) #[n, d_t]
# # joint multimodal embedding [n, d_e]
# I_e = l2_normalize(np.dot(I_f, W_i), axis=1)
# T_e = l2_normalize(np.dot(T_f, W_t), axis=1)
# # scaled pairwise cosine similarities [n, n]
# logits = np.dot(I_e, T_e.T) * np.exp(t)
# # symmetric loss function
# labels = np.arange(n)
# loss_i = cross_entropy_loss(logits, labels, axis=0)
# loss_t = cross_entropy_loss(logits, labels, axis=1)
# loss = (loss_i + loss_t)/2


# Figure 3. Numpy-like pseudocode for the core of an implementa-
# tion of CLIP.

In [None]:
print('MRR@1:', compute_mrr(sorted_data, dataset, 1))

In [None]:
print('MRR@5:', compute_mrr(sorted_data, dataset, 5))

In [None]:
print('MRR@10:', compute_mrr(sorted_data, dataset,10))

In [None]:
# print(compute_hits(sorted_data, dataset, 1)* 100)

In [None]:
# print(compute_hits(sorted_data, dataset, 5)* 100)

In [None]:
# print(compute_hits(sorted_data, dataset, 10)* 100)

## Evaluation based on Recall metric

In [None]:
image_features_new.shape

In [None]:
text_features_new.shape

In [None]:
image_features_new_pt = torch.from_numpy(image_features_new)

text_features_new_pt = torch.from_numpy(text_features_new)

text_to_image_map = torch.LongTensor(list(range(text_features_new.shape[0])))
print(text_to_image_map.shape) # .type(torch.int64)

print(text_to_image_map.unsqueeze(1).shape)

In [None]:
torch.set_printoptions(precision=8)

In [None]:
# https://github.com/openai/CLIP/issues/115
import torch
from torchvision.datasets import CocoCaptions
import torch.utils.data as dutils
from typing import List
import clip

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')




def recall_at_k(k_vals, image_encodings,text_encodings,text_to_image_map):
    print("Encoding all data...")
 
    num_text = text_encodings.shape[0]
    
    # text-to-image recall
    print("Text-to-image recall...")


    dist_matrix = text_encodings @ image_encodings.T  # dist_matrix[i] gives logits for ith text

    inds = torch.argsort(dist_matrix, dim=1, descending=True)
    inds = inds.to(device)
    text_to_image_recall = []

    

    text_to_image_map = text_to_image_map.to(device)
    
    for k in k_vals:
        # Extract top k indices only
        topk = inds[:, :k]

        text_to_image_map_new = text_to_image_map.repeat(k, 1).t()

        correct = torch.eq(topk, text_to_image_map_new).any(dim=1)  #  value along dimension 1 (which typically corresponds to rows in a 2D tensor) ###### any(dim=1) >> check if True over the row 
        
        num_correct = correct.sum().item()

        text_to_image_recall.append(num_correct / num_text)

    print(text_to_image_recall)

    print("Done.")
    return text_to_image_recall

In [None]:
k_vals = [1,5,10]
t2i= recall_at_k(k_vals=k_vals, image_encodings=image_features_new_pt,text_encodings=text_features_new_pt,text_to_image_map=text_to_image_map)

print("Text-to-image Recall@K")

print("Returned value: ", t2i)
for k, x in zip(k_vals, t2i):
    print(k, " ", (x/100) * 100)
    # print(f" R@{k}: {100*x:.2f}%")
