In [2]:
import sys
path = "../../.."
if path not in sys.path:
    sys.path.insert(0, path)

In [3]:
from data_retrieval import lipade_groundtruth
from data_retrieval.tools.data_loader import getDataLoader
from sklearn.metrics.pairwise import cosine_similarity
from PIL import Image
from tqdm import tqdm
import numpy as np
import torch
import clip

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
corpus = "lipade_groundtruth"
distancePath = "../results/distance/" + corpus + "/"
rawPath = "../results/raw/" + corpus + "/"

In [4]:
clip.available_models()

['RN50',
 'RN101',
 'RN50x4',
 'RN50x16',
 'RN50x64',
 'ViT-B/32',
 'ViT-B/16',
 'ViT-L/14',
 'ViT-L/14@336px']

In [5]:
x,_,y = lipade_groundtruth.getDataset(mode="similar")
for i in range(len(x)):
    x[i] = Image.open(x[i])

In [6]:
representationsPerModel = []

for modelName in clip.available_models():
    model, preprocess = clip.load(modelName, device)

    representations = []
    with torch.no_grad():
        for i in tqdm(range(len(x)), desc=modelName):
            image = preprocess(x[i]).unsqueeze(0).to(device)
            representations.append(model.encode_image(image).cpu())

    representationsPerModel.append(representations)

RN50: 100%|██████████| 279/279 [00:06<00:00, 46.04it/s]
RN101: 100%|██████████| 279/279 [00:04<00:00, 63.00it/s]
RN50x4: 100%|██████████| 279/279 [00:04<00:00, 61.71it/s]
RN50x16: 100%|██████████| 279/279 [00:06<00:00, 42.02it/s]
RN50x64: 100%|██████████| 279/279 [00:13<00:00, 21.12it/s]
ViT-B/32: 100%|██████████| 279/279 [00:03<00:00, 77.78it/s]
ViT-B/16: 100%|██████████| 279/279 [00:03<00:00, 77.37it/s]
ViT-L/14: 100%|██████████| 279/279 [00:05<00:00, 51.26it/s]
ViT-L/14@336px: 100%|██████████| 279/279 [00:07<00:00, 35.62it/s]


In [7]:
for i in range(len(representationsPerModel)):
    representationsPerModel[i] = np.array(representationsPerModel[i]).reshape((len(x),-1))

In [8]:
sim = []
for i in range(len(representationsPerModel)):
    sim.append(cosine_similarity(representationsPerModel[i], representationsPerModel[i]))
sim = np.array(sim)

In [9]:
distance = 1 - (sim+1)/2
for i in range(len(distance)):
    distance[i] -= np.diag(distance[i])

In [10]:
names = ["resnet50", "resnet101", "resnet50x4", "resnet50x16", "resnet50x64", "vitb32", "vitb16", "vitl14", "vitl14_336p"]

In [11]:
for i in range(len(distance)):
    np.save(distancePath + "clip_transfer_" + names[i] + ".npy", distance[i])
    np.save(rawPath + "clip_transfer_" + names[i] + ".npy", representationsPerModel[i])