In [None]:
!pip install -r requirements.txt

In [None]:
from transformers import AutoTokenizer, AutoModel, BertTokenizer, BertModel
import torch
import requests
import json
import matplotlib.pyplot as plt
from PIL import Image 
import pandas as pd
import seaborn as sns
import math
import os
from dotenv import load_dotenv

In [None]:
load_dotenv()
api_key = os.getenv('api_key')
endpoint = os.getenv('endpoint')

In [None]:
# embedding_model_1 = 'BAAI/bge-base-en-v1.5'
# tokenizer_baai = AutoTokenizer.from_pretrained(embedding_model_1)
# model_baai = AutoModel.from_pretrained(embedding_model_1)

In [None]:
# def compute_embedding_baai(text):
#     inputs = tokenizer_baai(text, return_tensors="pt", padding=True, truncation=True) 
    
#     # Generate the embeddings 
#     with torch.no_grad():    
#         embeddings = model_baai(**inputs).last_hidden_state.mean(dim=1).squeeze()

#     return embeddings.tolist()

In [None]:
tokenizer_bert = BertTokenizer.from_pretrained('bert-large-cased')
model_bert = BertModel.from_pretrained("bert-large-cased")

In [None]:
def bert_text_embedding(text):
    inputs = tokenizer_bert(text, return_tensors="pt", padding=True, truncation=True) 
    
    # Generate the embeddings 
    with torch.no_grad():    
        embeddings = model_bert(**inputs).last_hidden_state.mean(dim=1).squeeze()

    return embeddings.tolist()

In [None]:
version = "?api-version=2024-02-01&model-version=2023-04-15"
version_old = "?api-version=2024-02-01&model-version=2022-04-11"

vec_img_url = endpoint + "/computervision/retrieval:vectorizeImage" + version  # For doing the image vectorization
vec_txt_url = endpoint + "/computervision/retrieval:vectorizeText" + version  # For the prompt vectorization
vec_txt_url_old = endpoint + "/computervision/retrieval:vectorizeText" + version_old

headers = {
    'Content-type': 'application/json',
    'Ocp-Apim-Subscription-Key': api_key
}

In [None]:
def gpt_image_embedding(imageurl):
    image = {'url': imageurl}
    r = requests.post(vec_img_url, data=json.dumps(image), headers=headers)
    print(r.json())
    image_emb = r.json()['vector']

    return image_emb

In [None]:
def gpt_text_embedding(promptxt):
    prompt = {'text': promptxt}
    r = requests.post(vec_txt_url, data=json.dumps(prompt), headers=headers)
    text_emb = r.json()['vector']

    return text_emb

In [None]:
def gpt_text_embedding_old(promptxt):
    prompt = {'text': promptxt}
    r = requests.post(vec_txt_url_old, data=json.dumps(prompt), headers=headers)
    text_emb = r.json()['vector']

    return text_emb

In [None]:
def get_cosine_similarity(vector1, vector2):
    dot_product = 0
    length = min(len(vector1), len(vector2))

    for i in range(length):
        dot_product += vector1[i] * vector2[i]

    magnitude1 = math.sqrt(sum(x * x for x in vector1))
    magnitude2 = math.sqrt(sum(x * x for x in vector2))
    similarity = dot_product / (magnitude1 * magnitude2)

    return similarity

In [None]:
def similarity_results(image_emb, prompts, model):
    if model == "gpt":
        simil_values_list = [
            get_cosine_similarity(image_emb, gpt_text_embedding(prompt))
            for prompt in prompts
        ]
    elif model == "gpt_old":
        simil_values_list = [
            get_cosine_similarity(image_emb, gpt_text_embedding_old(prompt))
            for prompt in prompts
        ]
    elif model == "bert":
        simil_values_list = [
            get_cosine_similarity(image_emb, bert_text_embedding(prompt))
            for prompt in prompts
        ]
    else:
        print("model name invalid")
        return
    sorted_results = sorted(zip(prompts, simil_values_list),
                            key=lambda x: x[1],
                            reverse=True)

    df = pd.DataFrame(columns=['prompt', 'similarity'])
    for idx, (prompt, simil_val) in enumerate(sorted_results):
        df.loc[idx, 'prompt'] = prompt
        df.loc[idx, 'similarity'] = simil_val

    df["similarity"] = df.similarity.astype(float)

    return df

In [None]:
imageurl1 = "https://github.com/retkowsky/images/blob/master/i4.jpg?raw=true"
image_emb1 = gpt_image_embedding(imageurl1)
plt.imshow(Image.open(requests.get(imageurl1, stream=True).raw))
plt.axis('off')
plt.show()

In [None]:
prompts = [
    'bird', 'a truck', 'a car', 'a blue car', 'a white car', 'a BMW white car',
    'a tesla car', 'a mercedes car', 'a man', 'a ford car', 'traffic', 'advertisement', 'human'
]

In [None]:
#matched version of model
gpt_df = similarity_results(image_emb1, prompts, "gpt")
cm = sns.light_palette("green", as_cmap=True)
gpt_df.style.background_gradient(cmap=cm)

In [None]:
#both gpt but different model versions
gpt_df_old = similarity_results(image_emb1, prompts, "gpt_old")
cm = sns.light_palette("green", as_cmap=True)
gpt_df_old.style.background_gradient(cmap=cm)

In [None]:
#gpt for image embedding and bert for text embedding
bert_df = similarity_results(image_emb1, prompts, "bert")
cm = sns.light_palette("green", as_cmap=True)
bert_df.style.background_gradient(cmap=cm)