In [37]:
import os
import numpy as np
from decouple import config, AutoConfig
config = AutoConfig(search_path="/home/harry/chatbotDjango") 

In [38]:
MISTRAL_API_KEY = config("MISTRAL_API_KEY")

In [39]:
from mistralai import Mistral

model = "mistral-embed"

client = Mistral(api_key=MISTRAL_API_KEY)

In [40]:
def get_embedding(text, model=model):
    if not isinstance(text, list):
        text = [text]
    response = client.embeddings.create(model=model, inputs=text)
    return np.array([entry.embedding for entry in response.data])

In [41]:
def cosine_similarity(emb1, emb2):
    return np.dot(emb1, emb2) / (np.linalg.norm(emb1) * np.linalg.norm(emb2))

In [42]:
data = [1,2,3,4,5,6,7]
query = 5.3

In [52]:
docs = [
    "Harry Was Here before you", 
    "You Were Here before Harry",
    "Harry Was not Here",
    "The new Assassin's Creed game would be release in two weeks",
]

In [53]:
embeddings = get_embedding(docs)

In [54]:
embeddings.shape

(4, 1024)

In [55]:
query = "Harry Was Here and You Were not"

In [56]:
query_embedding = get_embedding([query])
similarities = [cosine_similarity(query_embedding[0], doc_embedding) for doc_embedding in embeddings]
results = list(zip(similarities, docs))
results.sort(reverse=True)

for similarity, doc in results:
    print(f"Similarity: {similarity:.4f} | {doc}")

Similarity: 0.9126 | Harry Was not Here
Similarity: 0.9010 | Harry Was Here before you
Similarity: 0.8604 | You Were Here before Harry
Similarity: 0.5051 | The new Assassin's Creed game would be release in two weeks
