In [None]:
!pip install numpy==1.24.4 --force

In [None]:
from IPython.display import clear_output

!pip install annoy
!pip install cohere
!pip install umap-learn

clear_output()

In [None]:
import cohere
import numpy as np
import pandas as pd
import requests
import umap

from annoy import AnnoyIndex
from google.colab import userdata
from pickle import loads

def get_url_content(url):
    response = requests.get(url)
    if response.status_code != 200:
        print(file="ERROR: no existe el fichero {url}")
        print(response)
        return None
    return response.content

def get_nearest_verses_by_vector(dataframe, search_index, v, debug=False):
    similar_item_ids = search_index.get_nns_by_vector(v, 5, include_distances=True)
    verses = dataframe.iloc[similar_item_ids[0]]["text"].tolist()
    labels = dataframe.iloc[similar_item_ids[0]]["label"].tolist()
    nearest = list(zip(verses, labels))
    if debug:
        print(f"Verso: {v}")
        for n in nearest:
            print("    ", n)
    return nearest

In [None]:
# Lee los versos originales y sus posiciones en el espacio 3D
df = pd.read_csv(f"https://both.rocks/maquinito-15/upniverso_data.csv")


# Lee los parámetros de la función que transforma 768 dimensiones en 3
umap_content = get_url_content('https://both.rocks/maquinito-15/upniverso_umap.pkl')
reducer = loads(umap_content)


# Lee los índices para que las búsquedas sean más rápidas
index_content = get_url_content('https://both.rocks/maquinito-15/upniverso_index.ann')
with open('upniverso_index.ann', 'wb') as f:
    f.write(index_content)

search_index = AnnoyIndex(3, "euclidean")
search_index.load('upniverso_index.ann')


# Inicializa la librería de Cohere
co = cohere.Client(userdata.get('COHERE_API_KEY'))

In [None]:
verso = "La lluvia ya no moja nuestro amor"

response = co.embed(texts=[verso], model="embed-multilingual-v2.0")
embeddings_768 = response.embeddings[0]
embeddings_3 = reducer.transform([embeddings_768])[0]

nearest_verses = get_nearest_verses_by_vector(df, search_index, embeddings_3, debug=True)