In [14]:
import setup
import os
import numpy as np
from decouple import config, AutoConfig
config = AutoConfig(search_path="/home/harry/chatbotDjango") 

setup.init_django()

In [15]:
MISTRAL_API_KEY = config("MISTRAL_API_KEY")

In [16]:
from mistralai import Mistral

model = "mistral-embed"

client = Mistral(api_key=MISTRAL_API_KEY)

In [23]:
def get_embedding(texts, model=model):
    if not isinstance(texts, list):
        texts = [texts]
        single_text = True
    else:
        single_text = False
    cleaned_texts = [t.replace("\n", "").strip() for t in texts]
    response = client.embeddings.create(model=model, inputs=cleaned_texts)
    embeddings = np.array([entry.embedding for entry in response.data])
    return embeddings[0] if single_text else embeddings

In [24]:
from data.models import BlogPost
RECREATE_DATA = True

In [25]:
docs = [
    "Harry Was Here before you", 
    "You Were Here before Harry",
    "Harry Was not Here",
    "The new Assassin's Creed game would be release in two weeks",
]

In [26]:
new_data = []
for i, x in enumerate(docs):
    new_data.append(
        BlogPost(title=f"Blog Post {i+1}", content=x, can_delete=True)
    )

if RECREATE_DATA:
    qs = BlogPost.objects.filter(can_delete=True)
    qs.delete()
    BlogPost.objects.bulk_create(new_data)

In [27]:
qs = BlogPost.objects.filter(can_delete=True)
qs.count()

4

In [29]:
import time

for obj in qs:
    if obj.embedding is None:
        embedding = get_embedding(obj.get_embedding_text_raw())
        obj.embedding = embedding 
        obj.save()
        time.sleep(2)

In [32]:
query = "I bought a new game called Assassin's Creed"
query_embedding = get_embedding(query)

In [33]:
from pgvector.django import CosineDistance
from django.db.models import F

qs = BlogPost.objects.annotate(
    distance=CosineDistance('embedding', query_embedding),
    similarity=1 - F("distance")
).order_by("distance")[:2]

for obj in qs:
    print(obj.title, obj.distance, obj.similarity * 100)

Blog Post 4 0.22786113643550343 77.21388635644966
Blog Post 3 0.44179786741732885 55.82021325826712
