env: findingzeke

In [None]:
import os
import glob
import requests
from dotenv import load_dotenv
import numpy as np

import matplotlib.pyplot as plt  
from matplotlib.image import  imread

In [None]:
load_dotenv()

In [None]:
# Replace with your actual API URL and API key in the .env file 
api_version="2023-02-01-preview"
api_url = f"{os.getenv('AZURE_AI_SERVICES_ENDPOINT')}/computervision/retrieval:vectorizeImage?overload=stream&api-version={api_version}"
api_key = os.getenv('AZURE_AI_SERVICES_KEY')

print(api_url)

In [None]:
test_image_path = "../images/test/zeke.png"
image_collection_path = "../images/collection"

In [None]:
def getImageEmbedding(image_path, api_url, api_key):

    # Open the image file in binary mode  
    with open(image_path, 'rb') as image_file:  
        # Set up the headers - assuming the API requires an API key  
        headers = {  
            'Content-Type': 'application/octet-stream',  # Specify the content type as a binary stream  
            'Ocp-Apim-Subscription-Key': api_key,       
        }  
        
        # Make the POST request to the API  
        response = requests.post(api_url, headers=headers, data=image_file)    
    
        # Check the response  
        if response.status_code == 200:  
            # print("Success! Image has been vectorized.")              
            vectorized_image_data = response.json()  # Assuming the response is JSON - adjust if the format is different  
            # print(vectorized_image_data["vector"])

            return vectorized_image_data["vector"]
        else:  
            print(f"Error: {response.status_code} - {response.text}")
            return None

In [None]:
def getFilesInDirectory(directory_path):
    
    return glob.glob(os.path.join(directory_path, "*.png")  )  

In [None]:
def getCosineSimilarity(vector_a, vector_b):  
    # Calculate the dot product of the two vectors  
    dot_product = np.dot(vector_a, vector_b)  
      
    # Calculate the magnitude (norm) of each vector  
    norm_a = np.linalg.norm(vector_a)  
    norm_b = np.linalg.norm(vector_b)  
      
    # Calculate the cosine similarity  
    similarity = dot_product / (norm_a * norm_b)  
      
    return similarity  

In [None]:
#Get all files in the directory
file_collection = getFilesInDirectory(image_collection_path)
print(f"{len(file_collection)} files found.")

In [None]:
cosine_similarity_dictionary = {}

embeddings_test_image = getImageEmbedding(image_path=test_image_path,api_url=api_url, api_key=api_key)
print(f"Embedding size: {len(embeddings_test_image)}")

for file in file_collection:
    cosine_similarity = getCosineSimilarity(embeddings_test_image, getImageEmbedding(image_path=file,api_url=api_url, api_key=api_key))
    # print(f"file:{file}, cosine_similarity:{cosine_similarity}")
    cosine_similarity_dictionary[file] = cosine_similarity

print("Embeddings generated and similarity calculated.")
print(type(cosine_similarity_dictionary))

In [None]:
#Sort descending
sorted_data = sorted(cosine_similarity_dictionary.items(), key=lambda item: item[1], reverse=True)  
print(type(sorted_data))

#View sorted scores
for index, (path, score) in enumerate(sorted_data):    
    print(f"{index}, {path}: {score}")  


In [None]:
print(f"**Showing Zeke**")

plt.figure(figsize=(2,2))
plt.imshow(imread(test_image_path))  
plt.title(f"{test_image_path}")
plt.show()

top_N = 3

print(f"Showing top {top_N} best matches")

for index, (path, score) in enumerate(sorted_data[:top_N]):    

    plt.figure(figsize=(2,2))
    plt.imshow(imread(path))  
    plt.title(f"{path}, {round(score,2)}")
    plt.show()  

In [None]:
print(f"Showing least {top_N} matches")

for index, (path, score) in enumerate(sorted_data[-top_N:]):    

    plt.figure(figsize=(2,2))
    plt.imshow(imread(path))  
    plt.title(f"{path}, {round(score,2)}")
    plt.show()  