# Embeddings Vectoriales Multimodales

Un embedding vectorial también puede representar datos no textuales, como imágenes.

Configuración de la API de Azure Computer Vision y funciones auxiliares para obtener embeddings de imágenes.

In [None]:
import os

import requests
from PIL import Image
import dotenv
import matplotlib.pyplot as plt

dotenv.load_dotenv()

# API Key authentication
AZURE_AIVISION_API_KEY = os.environ["AZURE_AIVISION_API_KEY"]
AZURE_COMPUTER_VISION_URL = os.environ["AZURE_COMPUTER_VISION_URL"]

def get_model_params():
    return {"api-version": "2024-02-01", "model-version": "2023-04-15"}

def get_auth_headers():
    return {"Ocp-Apim-Subscription-Key": AZURE_AIVISION_API_KEY}

def get_image_embedding(image_file):
    url = f"{AZURE_COMPUTER_VISION_URL}/computervision/retrieval:vectorizeImage"
    headers = get_auth_headers()
    headers["Content-Type"] = "application/octet-stream"
    # Read the image file as binary data and send directly
    with open(image_file, "rb") as image_data:
        response = requests.post(url, headers=headers, params=get_model_params(), data=image_data)
    
    if response.status_code != 200:
        print(image_file, response.status_code, response.json())
    return response.json()["vector"]




Procesamiento de todas las imágenes en el directorio "./data", generando sus embeddings y guardándolos en un archivo JSON.

In [None]:
import json

vectors = {}
for image_file in os.listdir("./data"):
    # Filter only .jpg files (case-insensitive)
    if image_file.lower().endswith('.jpg'):
        image_path = f"./data/{image_file}"
        try:
            image_embedding = get_image_embedding(image_path)
            vectors[image_file] = image_embedding
            print(f"Procesado: {image_file}")
        except Exception as e:
            print(f"Error con {image_file}: {e}")

# save the embeddings to a file
with open("./data/images_ai-vision.json", "w") as f:
    json.dump(vectors, f)
    

Carga y visualización de una imagen de ejemplo ("perro-1.jpg") y verificación de su vector generado.

In [None]:
Image.open("./data/perro-1.jpg")

In [None]:
vectors["perro-1.jpg"]

In [None]:
len(vectors["perro-1.jpg"])

# Análisis de Vectores Multimodales

Carga de los vectores de imágenes previamente generados desde el archivo JSON.

In [None]:
with open('./data/images_ai-vision.json') as f:
    image_vectors = json.load(f)


## Más similar a una imagen objetivo

Definición de funciones para calcular la similitud del coseno y encontrar las imágenes más similares a un vector dado.

In [None]:
import pandas as pd

def cosine_similarity(v1, v2):
    """Calculate the cosine similarity between two vectors"""
    dot_product = sum([a * b for a, b in zip(v1, v2)])
    magnitude = (sum([a**2 for a in v1]) * sum([a**2 for a in v2])) ** 0.5
    return dot_product / magnitude

def most_similar(target_vector: str, vectors: dict) -> list[list]:
    """Return the most similar images and their similarities relative to the given images"""
    similarities = {w: cosine_similarity(target_vector, vector) for w, vector in vectors.items()}
    most_similar = sorted(similarities, key=similarities.get, reverse=True)
    return pd.DataFrame([(vector_key, similarities[vector_key]) for vector_key in most_similar], columns=['vector key', 'similarity'])


Visualización de la imagen objetivo y búsqueda de las 3 imágenes más similares en el conjunto de datos.

In [None]:
# Render the target image
target_image = "perro-1.jpg"
plt.imshow(plt.imread(f"./data/{target_image}"))


In [None]:
most_similar_df = most_similar(image_vectors[target_image], image_vectors)[0:3]
most_similar_df


In [None]:
# Now render each of those images
for image_name in most_similar_df['vector key'][1:]:
    plt.imshow(plt.imread(f'./data/{image_name}'))
    plt.axis('off')
    plt.show()


## Búsqueda con texto

Función para convertir texto en un vector embedding utilizando la API de Azure.

In [None]:
def get_text_embedding(text):
    url = f"{AZURE_COMPUTER_VISION_URL}/computervision/retrieval:vectorizeText"
    headers = get_auth_headers()
    headers["Content-Type"] = "application/json"
    return requests.post(url, headers=headers, params=get_model_params(),
                         json={"text": text}).json()["vector"]


Búsqueda de imágenes similares utilizando un término de texto (ej. "Wing") y visualización de los resultados.

In [None]:
embedding = get_text_embedding("Ocean") # Examples: "Bebe" "Sabana" "Ocean"
most_similar_df = most_similar(embedding, image_vectors)

for image_name in most_similar_df['vector key'][0:3]:
    plt.imshow(plt.imread(f'./data/{image_name}'))
    plt.axis('off')
    plt.show()
