In [23]:
import os
import random
import numpy as np
import pandas as pd

from PIL import Image
from IPython.display import display
from transformers import ViTImageProcessor, ViTForImageClassification, ViTModel

In [24]:
train = pd.read_csv("../data/train.csv")

In [60]:
processor = ViTImageProcessor.from_pretrained('google/vit-large-patch32-224-in21k')
model = ViTModel.from_pretrained('google/vit-large-patch32-224-in21k')

classification_processor = ViTImageProcessor.from_pretrained('google/vit-large-patch32-384')
classification_model = ViTForImageClassification.from_pretrained('google/vit-large-patch32-384')

Downloading (…)rocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/504 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.23G [00:00<?, ?B/s]

Downloading (…)rocessor_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/69.7k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.23G [00:00<?, ?B/s]

In [61]:
def get_random_pair(df: pd.DataFrame):
    row_index = random.randint(0, len(df))
    path1 = df.iloc[row_index, df.columns.get_loc("image_url1")]
    path2 = df.iloc[row_index, df.columns.get_loc("image_url2")]
    label = df.iloc[row_index, df.columns.get_loc("is_same")]

    path1 = os.path.join("..", "data", "images", path1.split("/")[-1])
    path2 = os.path.join("..", "data", "images", path2.split("/")[-1])

    image1 = Image.open(path1)
    image2 = Image.open(path2)

    return image1, image2, label

In [62]:
def test(df):
    image1, image2, label = get_random_pair(df)

    inputs1 = processor(images=image1, return_tensors="pt")
    inputs2 = processor(images=image2, return_tensors="pt")
    outputs1 = model(**inputs1)
    outputs2 = model(**inputs2)
    tensor1 = outputs1.last_hidden_state.detach().numpy()
    tensor2 = outputs2.last_hidden_state.detach().numpy()

    euclidean_similarity = np.linalg.norm(tensor1 - tensor2)
    cosine_similarity = np.dot(tensor1.flatten(), tensor2.flatten()) / (np.linalg.norm(tensor1) * np.linalg.norm(tensor2))
    dot_similarity = np.sum(tensor1 * tensor2)

    return image1, image2, label, euclidean_similarity, cosine_similarity, dot_similarity

In [63]:
def test2(df):
    image1, image2, label = get_random_pair(df)

    inputs1 = classification_processor(images=image1, return_tensors="pt")
    inputs2 = classification_processor(images=image2, return_tensors="pt")
    outputs1 = classification_model(**inputs1)
    outputs2 = classification_model(**inputs2)
    tensor1 = outputs1.logits.detach().numpy()
    tensor2 = outputs2.logits.detach().numpy()

    euclidean_similarity = np.linalg.norm(tensor1 - tensor2)
    cosine_similarity = np.dot(tensor1.flatten(), tensor2.flatten()) / (np.linalg.norm(tensor1) * np.linalg.norm(tensor2))
    dot_similarity = np.sum(tensor1 * tensor2)

    return image1, image2, label, euclidean_similarity, cosine_similarity, dot_similarity

In [64]:
def save_test_results(df, number):
    result_columns = ['label', 'euclidean_similarity', 'cosine_similarity', 'dot_similarity']
    result_data = []

    for i in range(number):
        image1, image2, label, euclidean_similarity, cosine_similarity, dot_similarity = test2(df)
        result_data.append([ label, euclidean_similarity, cosine_similarity, dot_similarity])

    result_df = pd.DataFrame(result_data, columns=result_columns)
    return result_df

In [65]:
result_dataframe = save_test_results(train, number=10)
result_dataframe

Unnamed: 0,label,euclidean_similarity,cosine_similarity,dot_similarity
0,0,34.877274,0.67125,1235.877441
1,0,33.597183,0.73297,1492.023071
2,1,0.0,1.0,2039.595337
3,1,0.477496,0.999938,1622.979492
4,0,33.665638,0.711089,1350.064941
5,0,27.940718,0.791423,1450.228027
6,0,23.031408,0.855501,1556.84082
7,1,7.106678,0.983108,1438.709229
8,0,31.874458,0.468731,446.969482
9,0,31.080542,0.696917,1106.03125


In [66]:
result_dataframe = save_test_results(train, number=10)
result_dataframe

Unnamed: 0,label,euclidean_similarity,cosine_similarity,dot_similarity
0,1,0.0,1.0,2277.552002
1,1,0.0,1.0,893.115234
2,0,33.557167,0.664662,1115.984131
3,0,36.466961,0.677999,1385.994629
4,0,25.505917,0.757273,1007.293457
5,0,22.014118,0.84791,1230.35083
6,0,32.681847,0.659912,1005.107056
7,0,38.466099,0.715483,1314.044434
8,1,0.414023,0.999938,1369.157471
9,0,34.889606,0.668951,1022.654053
