In [1]:
import torch
import clip

from PIL import Image
import os

import torch
import torch.nn.functional as F

device = "cuda" if torch.cuda.is_available() else "cpu"


In [2]:
model, preprocess = clip.load("ViT-B/32", device=device)

In [3]:
def get_image_embedding(image):
    preprocessed_image = preprocess(image).unsqueeze(0).to(device)
    with torch.no_grad():
        image_features = model.encode_image(preprocessed_image)
        return image_features
    
def get_text_embedding(text):
    with torch.no_grad():
        return model.encode_text(clip.tokenize([text]).to(device))


def get_images_from_folder(folder_path):
    images = []

    for filename in os.listdir(folder_path):
        # Check if the file is an image file
        if filename.endswith(".jpg") or filename.endswith(".png") or filename.endswith(".jpeg"):
            # Open the image using PIL
            image_path = os.path.join(folder_path, filename)
            image = Image.open(image_path)
            # Append the image to the list
            images.append(get_image_embedding(image))

    return images




In [38]:
crown_image_embeddings = get_images_from_folder("Dataset/GoldRatings-10a/1-crown")
royalty_image_embeddings = get_images_from_folder("Dataset/GoldRatings-10a/2-royalty")
fire_image_embeddings = get_images_from_folder("Dataset/GoldRatings-10a/4-fire")
smoke_image_embeddings = get_images_from_folder("Dataset/GoldRatings-10a/3-smoke")


crown_tensor = torch.squeeze(torch.stack(crown_image_embeddings, dim=0), dim=1)
royalty_tensor = torch.squeeze(torch.stack(royalty_image_embeddings, dim=0), dim=1)
fire_tensor = torch.squeeze(torch.stack(fire_image_embeddings, dim=0), dim=1)
smoke_tensor = torch.squeeze(torch.stack(smoke_image_embeddings, dim=0), dim=1)

print(crown_tensor.size())
print(royalty_tensor.size())
print(fire_tensor.size())
print(smoke_tensor.size())

# Average of embeddings
crown_average_embedding = torch.mean(crown_tensor, dim=0)
royalty_average_embedding = torch.mean(royalty_tensor, dim=0)
fire_average_embedding = torch.mean(fire_tensor, dim=0)
smoke_average_embedding = torch.mean(smoke_tensor, dim=0)



# print(crown_average_embedding.size())
# # print(crown_average_embedding)

# print(royalty_average_embedding.size())
# # print(royalty_average_embedding)

# print(fire_average_embedding.size())
# # print(fire_average_embedding)

# print(smoke_average_embedding.size())
# # print(smoke_average_embedding)

offset_vector_embedding = royalty_average_embedding - crown_average_embedding + smoke_average_embedding

cos_sim = F.cosine_similarity(offset_vector_embedding, fire_average_embedding, dim=0)

print(cos_sim)

angle_in_degrees = torch.rad2deg(torch.acos(cos_sim))

print("Angle for Image embedding before normalizing: Averaging ", angle_in_degrees)

rho, p_value = spearmanr(offset_vector_embedding.cpu().numpy(), fire_average_embedding.cpu().numpy())
print("Spearman's correlation coefficient for Image embedding before normalizing:", rho)


crown_average_embedding = F.normalize(crown_average_embedding , p=2,dim=0)
royalty_average_embedding = F.normalize(royalty_average_embedding , p=2,dim=0)
fire_average_embeddingmoke = F.normalize(fire_average_embedding, p=2,dim=0)
smoke_average_embedding = F.normalize(smoke_average_embedding , p=2,dim=0)

offset_vector_embedding = royalty_average_embedding - crown_average_embedding + smoke_average_embedding

cos_sim = F.cosine_similarity(offset_vector_embedding, fire_average_embedding, dim=0)

print(cos_sim)

angle_in_degrees = torch.rad2deg(torch.acos(cos_sim))

print("Angle for Image embedding after normalizing: Averaging ", angle_in_degrees)

rho, p_value = spearmanr(offset_vector_embedding.cpu().numpy(), fire_average_embedding.cpu().numpy())
print("Spearman's correlation coefficient for Image embedding after normalizing:", rho)




torch.Size([16, 512])
torch.Size([16, 512])
torch.Size([16, 512])
torch.Size([16, 512])
tensor(0.8016)
Angle for Image embedding before normalizing: Averaging  tensor(36.7156)
Spearman's correlation coefficient for Image embedding before normalizing: 0.5271508537325048
tensor(0.7941)
Angle for Image embedding after normalizing: Averaging  tensor(37.4341)
Spearman's correlation coefficient for Image embedding after normalizing: 0.5147009754694957


In [39]:
crown = get_text_embedding('crown').squeeze(0)
royalty = get_text_embedding('royalty').squeeze(0)
smoke = get_text_embedding('smoke').squeeze(0)
fire = get_text_embedding('fire').squeeze(0)

offset_vector_embedding = royalty - crown + smoke

cos_sim = F.cosine_similarity(offset_vector_embedding, fire, dim=0)

print(cos_sim)

angle_in_degrees = torch.rad2deg(torch.acos(cos_sim))

print("Angle for text embedding before normalizing: Averaging ", angle_in_degrees)
rho, p_value = spearmanr(offset_vector_embedding.cpu().numpy(), fire.cpu().numpy())
print("Spearman's correlation coefficient for text embedding before normalizing:", rho)


crown = F.normalize(crown , p=2,dim=0)
royalty = F.normalize(royalty , p=2,dim=0)
smoke = F.normalize(smoke , p=2,dim=0)
fire = F.normalize(fire, p=2,dim=0)

offset_vector_embedding = royalty - crown + smoke

cos_sim = F.cosine_similarity(offset_vector_embedding, fire, dim=0)

print(cos_sim)

angle_in_degrees = torch.rad2deg(torch.acos(cos_sim))

print("Angle for text embedding after normalizing: Averaging ", angle_in_degrees)
rho, p_value = spearmanr(offset_vector_embedding.cpu().numpy(), fire.cpu().numpy())
print("Spearman's correlation coefficient for text embedding after normalizing:", rho)


tensor(0.8574)
Angle for text embedding before normalizing: Averaging  tensor(30.9766)
Spearman's correlation coefficient for text embedding before normalizing: 0.3216292014282281
tensor(0.8340)
Angle for text embedding after normalizing: Averaging  tensor(33.4874)
Spearman's correlation coefficient for text embedding after normalizing: 0.30276694161202095


In [None]:
crown_image_embeddings = get_images_from_folder("Dataset/GoldRatings-10b/laugh_happiness__nod_agreement__/laugh")
royalty_image_embeddings = get_images_from_folder("Dataset/GoldRatings-10b/laugh_happiness__nod_agreement__/happiness")
fire_image_embeddings = get_images_from_folder("Dataset/GoldRatings-10b/laugh_happiness__nod_agreement__/agreement")
smoke_image_embeddings = get_images_from_folder("Dataset/GoldRatings-10b/laugh_happiness__nod_agreement__/nod")


crown_tensor = torch.squeeze(torch.stack(crown_image_embeddings, dim=0), dim=1)
royalty_tensor = torch.squeeze(torch.stack(royalty_image_embeddings, dim=0), dim=1)
fire_tensor = torch.squeeze(torch.stack(fire_image_embeddings, dim=0), dim=1)
smoke_tensor = torch.squeeze(torch.stack(smoke_image_embeddings, dim=0), dim=1)

print(crown_tensor.size())
print(royalty_tensor.size())
print(fire_tensor.size())
print(smoke_tensor.size())

# Average of embeddings
crown_average_embedding = torch.mean(crown_tensor, dim=0)
royalty_average_embedding = torch.mean(royalty_tensor, dim=0)
fire_average_embedding = torch.mean(fire_tensor, dim=0)
smoke_average_embedding = torch.mean(smoke_tensor, dim=0)

crown_average_embedding = F.normalize(crown_average_embedding , p=2,dim=0)
royalty_average_embedding = F.normalize(royalty_average_embedding , p=2,dim=0)
fire_average_embeddingmoke = F.normalize(fire_average_embedding, p=2,dim=0)
smoke_average_embedding = F.normalize(smoke_average_embedding , p=2,dim=0)

print(crown_average_embedding.size())
# print(crown_average_embedding)

print(royalty_average_embedding.size())
# print(royalty_average_embedding)

print(fire_average_embedding.size())
# print(fire_average_embedding)

print(smoke_average_embedding.size())
# print(smoke_average_embedding)

offset_vector_embedding = royalty_average_embedding - crown_average_embedding + smoke_average_embedding

cos_sim = F.cosine_similarity(offset_vector_embedding, fire_average_embedding, dim=0)

print(cos_sim)

angle_in_degrees = torch.rad2deg(torch.acos(cos_sim))
print("Angle : Averaging ", angle_in_degrees)

In [26]:
crown = get_text_embedding('laugh').squeeze(0)
royalty = get_text_embedding('happiness').squeeze(0)
smoke = get_text_embedding('nod').squeeze(0)
fire = get_text_embedding('agreement').squeeze(0)

# crown = F.normalize(get_text_embedding('laugh').squeeze(0) , p=2,dim=0)
# royalty = F.normalize(get_text_embedding('happiness').squeeze(0) , p=2,dim=0)
# smoke = F.normalize(get_text_embedding('nod').squeeze(0) , p=2,dim=0)
# fire = F.normalize(get_text_embedding('agreement').squeeze(0) , p=2,dim=0)

offset_vector_embedding = royalty - crown + smoke

cos_sim = F.cosine_similarity(offset_vector_embedding, fire, dim=0)

print(cos_sim)

angle_in_degrees = torch.rad2deg(torch.acos(cos_sim))

print("Angle for text embedding : Averaging ", angle_in_degrees)

tensor(0.8358)
Angle for text embedding : Averaging  tensor(33.3014)


In [27]:
crown = get_text_embedding('King').squeeze(0)
royalty = get_text_embedding('Woman').squeeze(0)
smoke = get_text_embedding('Man').squeeze(0)
fire = get_text_embedding('Woman').squeeze(0)

offset_vector_embedding = royalty - crown + smoke

cos_sim = F.cosine_similarity(offset_vector_embedding, fire, dim=0)

print(cos_sim)

angle_in_degrees = torch.rad2deg(torch.acos(cos_sim))

print("Angle for text embedding : Averaging ", angle_in_degrees)

tensor(0.8955)
Angle for text embedding : Averaging  tensor(26.4227)


In [22]:
crown = F.normalize(get_text_embedding('King').squeeze(0) , p=2,dim=0)
royalty = F.normalize(get_text_embedding('Woman').squeeze(0) , p=2,dim=0)
smoke = F.normalize(get_text_embedding('Man').squeeze(0) , p=2,dim=0)
fire = F.normalize(get_text_embedding('Woman').squeeze(0) , p=2,dim=0)

offset_vector_embedding = royalty - crown + smoke

cos_sim = F.cosine_similarity(offset_vector_embedding, fire, dim=0)

print(cos_sim)

angle_in_degrees = torch.rad2deg(torch.acos(cos_sim))

print("Angle for text embedding : Averaging ", angle_in_degrees)

tensor(0.9015)
Angle for text embedding : Averaging  tensor(25.6495)


In [13]:
from scipy.stats import spearmanr

def find_image_embedding_arithmetics(pair1,pair2):
    first_pair = pair1.split(':')
    second_pair = pair2.split(':')

    first_image_embeddings = get_images_from_folder(os.path.join("Dataset" ,first_pair[0]))
    second_image_embeddings = get_images_from_folder(os.path.join("Dataset" ,first_pair[1]))
    third_image_embeddings = get_images_from_folder(os.path.join("Dataset" ,second_pair[0]))
    fourth_image_embeddings = get_images_from_folder(os.path.join("Dataset" ,second_pair[1]))


    first_image_embeddings = torch.squeeze(torch.stack(first_image_embeddings, dim=0), dim=1)
    second_image_embeddings = torch.squeeze(torch.stack(second_image_embeddings, dim=0), dim=1)
    third_image_embeddings = torch.squeeze(torch.stack(third_image_embeddings, dim=0), dim=1)
    fourth_image_embeddings = torch.squeeze(torch.stack(fourth_image_embeddings, dim=0), dim=1)

    # Average of embeddings
    first_image_embeddings = torch.mean(first_image_embeddings, dim=0)
    second_image_embeddings = torch.mean(second_image_embeddings, dim=0)
    third_image_embeddings = torch.mean(third_image_embeddings, dim=0)
    fourth_image_embeddings = torch.mean(fourth_image_embeddings, dim=0)


    offset_vector_embedding = second_image_embeddings - first_image_embeddings + third_image_embeddings

    cos_sim = F.cosine_similarity(offset_vector_embedding, fourth_image_embeddings, dim=0)

    cos_image_before_normalization = torch.rad2deg(torch.acos(cos_sim))

    print("Angle for Image embedding before normalizing: Averaging ", cos_image_before_normalization)

    rho_image_before_normalization, p_value = spearmanr(offset_vector_embedding.cpu().numpy(), fourth_image_embeddings.cpu().numpy())
    print("Spearman's correlation coefficient for Image embedding before normalizing:", rho_image_before_normalization)


    first_image_embeddings = F.normalize(first_image_embeddings , p=2,dim=0)
    second_image_embeddings = F.normalize(second_image_embeddings , p=2,dim=0)
    third_image_embeddings = F.normalize(third_image_embeddings, p=2,dim=0)
    fourth_image_embeddings = F.normalize(fourth_image_embeddings , p=2,dim=0)

    offset_vector_embedding = second_image_embeddings - first_image_embeddings + third_image_embeddings

    cos_sim = F.cosine_similarity(offset_vector_embedding, fourth_image_embeddings, dim=0)

    cos_image_after_normalization = torch.rad2deg(torch.acos(cos_sim))

    print("Angle for Image embedding after normalizing: Averaging ", cos_image_after_normalization)

    rho_image_after_normalization, p_value = spearmanr(offset_vector_embedding.cpu().numpy(), fourth_image_embeddings.cpu().numpy())
    print("Spearman's correlation coefficient for Image embedding after normalizing:", rho_image_after_normalization)

    return cos_image_before_normalization, rho_image_before_normalization,cos_image_after_normalization, rho_image_after_normalization, offset_vector_embedding, fourth_image_embeddings


def find_text_embedding_arithmetics(pair1,pair2):
    first_pair = pair1.split(':')
    second_pair = pair2.split(':')

    first = get_text_embedding(first_pair[0]).squeeze(0)
    second = get_text_embedding(first_pair[1]).squeeze(0)
    third = get_text_embedding(second_pair[0]).squeeze(0)
    fourth = get_text_embedding(second_pair[1]).squeeze(0)

    offset_vector_embedding = second - first + third

    # King - Queen + Man = Woman

    cos_sim = F.cosine_similarity(offset_vector_embedding, fourth, dim=0)

    cos_text_before_normalization = torch.rad2deg(torch.acos(cos_sim))
    

    print("Angle for text embedding before normalizing: Averaging ", cos_text_before_normalization)
    rho_text_before_normalization , p_value = spearmanr(offset_vector_embedding.cpu().numpy(), fourth.cpu().numpy())
    print("Spearman's correlation coefficient for text embedding before normalizing:", rho_text_before_normalization)


    first = F.normalize(first , p=2,dim=0)
    second = F.normalize(second , p=2,dim=0)
    third = F.normalize(third , p=2,dim=0)
    fourth = F.normalize(fourth, p=2,dim=0)

    offset_vector_embedding = second - first + third

    cos_sim = F.cosine_similarity(offset_vector_embedding, fourth, dim=0)

    cos_text_after_normalization = torch.rad2deg(torch.acos(cos_sim))

    print("Angle for text embedding after normalizing: Averaging ", cos_text_after_normalization)
    rho_text_after_normalization, p_value = spearmanr(offset_vector_embedding.cpu().numpy(), fourth.cpu().numpy())
    print("Spearman's correlation coefficient for text embedding after normalizing:", rho_text_after_normalization)

    return cos_text_before_normalization, rho_text_before_normalization, cos_text_after_normalization, rho_text_after_normalization, offset_vector_embedding, fourth



In [8]:
import os
import glob
import pandas as pd


def process_file(file_path, folder_path):
    lines_read = 0

    pair1 = ''
    pair2 = ''
    with open(file_path, 'r') as file:
        for line in file:
            if not line.startswith('#'):
                parts = line.split()
                if len(parts) >= 2:
                    num, text = parts[0], ' '.join(parts[1:])
                    formatted_text = text.strip().replace('"', '')
                    if lines_read == 0:
                        pair1 = formatted_text
                    else: pair2 = formatted_text
                    lines_read += 1
                    if lines_read >= 2:
                        break

    cos_image_before_normalization, rho_image_before_normalization,cos_image_after_normalization, rho_image_after_normalization, offset_vector_embedding_image, fourth_image_embedding = find_image_embedding_arithmetics(pair1,pair2)
    cos_text_before_normalization, rho_text_before_normalization, cos_text_after_normalization, rho_text_after_normalization, offset_vector_embedding_text, fourth_text_embedding = find_text_embedding_arithmetics(pair1,pair2)

    new_data = {
        'pair1':pair1,
        'pair2':pair2,
        'cos_image_before_normalization':cos_image_before_normalization.item(),
        'rho_image_before_normalization':rho_image_before_normalization.item(),
        'cos_image_after_normalization':cos_image_after_normalization.item(),
        'rho_image_after_normalization':rho_image_after_normalization.item(),
        'cos_text_before_normalization':cos_text_before_normalization.item(),
        'rho_text_before_normalization':rho_text_before_normalization.item(),
        'cos_text_after_normalization':cos_text_after_normalization.item(),
        'rho_text_after_normalization':rho_text_after_normalization.item()
    }

    embedding_data = {
        'pair1':pair1,
        'pair2':pair2,
        'offset_vector_embedding_image' : offset_vector_embedding_image,
        'fourth_image_embedding' : fourth_image_embedding,
        'offset_vector_embedding_text' : offset_vector_embedding_text,
        'fourth_text_embedding':fourth_text_embedding
    }

    return new_data, embedding_data


def process_folder(folder_path,results,embedding_results):
    for file_path in glob.glob(os.path.join(folder_path, '*.txt')):
        similarity_data, embedding_data = process_file(file_path, folder_path)
        results.append(similarity_data)
        embedding_results.append(embedding_data)
        
        

    for sub_folder in os.listdir(folder_path):
        sub_folder_path = os.path.join(folder_path, sub_folder)
        if os.path.isdir(sub_folder_path):
            process_folder(sub_folder_path,results,embedding_results)
            

        


In [14]:
# Path to the main 'data' folder
data_folder = 'SemEval-2012-Gold-Ratings'

results = []
embedding_results = []

# Process Training and Testing folders
for sub_folder in ['Training', 'Testing']:
    sub_folder_path = os.path.join(data_folder, sub_folder)
    process_folder(sub_folder_path,results,embedding_results)

df = pd.DataFrame(results)

df.to_csv('data.csv', index=False)

df_embedding = pd.DataFrame(embedding_results)

df_embedding.to_csv('embeddings.csv', index=False)

Angle for Image embedding before normalizing: Averaging  tensor(35.1905)
Spearman's correlation coefficient for Image embedding before normalizing: 0.5661750874045846
Angle for Image embedding after normalizing: Averaging  tensor(35.8650)
Spearman's correlation coefficient for Image embedding after normalizing: 0.5539337069843558
Angle for text embedding before normalizing: Averaging  tensor(30.9766)
Spearman's correlation coefficient for text embedding before normalizing: 0.3216292014282281
Angle for text embedding after normalizing: Averaging  tensor(33.4874)
Spearman's correlation coefficient for text embedding after normalizing: 0.30276694161202095
Angle for Image embedding before normalizing: Averaging  tensor(32.8775)
Spearman's correlation coefficient for Image embedding before normalizing: 0.6287231885364096
Angle for Image embedding after normalizing: Averaging  tensor(33.7027)
Spearman's correlation coefficient for Image embedding after normalizing: 0.6204848415273343
Angle f