In [9]:
import os
from decouple import config, AutoConfig
from langchain_community.chat_models import ChatOpenAI
from langchain.schema import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
config = AutoConfig(search_path="/home/harry/Chatbot") 

In [10]:
from mistralai import Mistral

In [11]:
MISTRAL_API_KEY = config("MISTRAL_API_KEY")
UPSTASH_VECTOR_REST_URL = config("UPSTASH_VECTOR_REST_URL")
UPSTASH_VECTOR_REST_TOKEN = config("UPSTASH_VECTOR_REST_TOKEN")

In [12]:
model = "mistral-embed"

client = Mistral(api_key=MISTRAL_API_KEY)

In [13]:
def get_embedding(text, model=model):
    text = text.replace("\n", " ")
    return client.embeddings.create(model=model, inputs=[text]).data[0].embedding

In [14]:
documents = [
    "The cat jumped over the dog",
    "The cow jumped over the moon",
    "The turkey ran in circles",
]

In [15]:
embeddings = [get_embedding(x) for x in documents]

In [16]:
dataset = {}
for i, embedding in enumerate(embeddings):
    dataset[i] = embedding

In [17]:
from upstash_vector import Vector

from upstash_vector import Index

index = Index(url=UPSTASH_VECTOR_REST_URL, token=UPSTASH_VECTOR_REST_TOKEN)

In [18]:
vectors = []
for key, value in dataset.items():
    print(key)
    my_id = key
    embedding = value
    vectors.append(Vector(id=my_id, vector=embedding))

0
1
2


In [19]:
vectors

[Vector(id=0, vector=[0.021087646484375, -0.004070281982421875, 0.058624267578125, 0.0195465087890625, 0.033599853515625, -0.0079345703125, 0.05157470703125, -0.0252227783203125, -0.0090484619140625, -0.028472900390625, -0.034881591796875, 0.0333251953125, -0.0516357421875, 0.01184844970703125, -0.0285797119140625, 0.020263671875, -0.00946807861328125, -0.01092529296875, 0.05975341796875, -0.01153564453125, -0.00417327880859375, -0.0121002197265625, -0.0281982421875, 0.004718780517578125, -0.01464080810546875, 0.018341064453125, 0.0115509033203125, -0.032958984375, -0.035186767578125, 0.0009150505065917969, -0.01280975341796875, -0.057403564453125, -0.00926971435546875, 0.01160430908203125, 0.0131988525390625, -0.046630859375, -0.01369476318359375, -0.039031982421875, 0.03240966796875, 0.016510009765625, -0.03814697265625, -0.0264892578125, -0.01369476318359375, 0.00022423267364501953, 0.0120697021484375, 0.013671875, -0.0016298294067382812, -0.045684814453125, 0.02850341796875, -0.018

In [20]:
index.upsert(
  vectors=vectors
)

'Success'

In [21]:
dataset[3] = get_embedding("Harry was here before you even exist")

In [22]:
index.upsert(vectors=[Vector(id=3, vector=dataset[3])])

'Success'

In [23]:
query_str = "Harry was here before you even exist"
query_embedding = get_embedding(query_str)

In [24]:
results = index.query(
  vector=query_embedding,
  top_k=3,
  include_vectors=True,
  include_metadata=True
)

for result in results:
    print(result.id, result.score * 100)

3 100.0
0 81.40495
1 81.345093
