In [49]:
# from decouple import config
# import helpers
from dotenv import load_dotenv

from sentence_transformers import SentenceTransformer


load_dotenv()

True

In [39]:
import os

GEMINI_API_KEY = os.environ.get("GEMINI_KEY_API")

UPSTASH_VECTOR_REST_URL = os.environ.get("UPSTASH_VECTOR_REST_URL")
                                         
UPSTASH_VECTOR_REST_TOKEN = os.environ.get("UPSTASH_VECTOR_REST_TOKEN")



In [40]:
import google.generativeai as genai

genai.configure(api_key = GEMINI_API_KEY)



In [41]:
def get_embedding(text, model = "models/text-embedding-004"):
    result = genai.embed_content(
        model=model,
        content=text,
        task_type="retrieval_document",
        title="Embedding of single string")
    return result.get('embedding')
# 'all-mpnet-base-v2'
# "all-MiniLM-L6-v2"

# def get_embedding(text, model = SentenceTransformer("all-mpnet-base-v2")):
#     result = model.encode(text)
#     return result


In [42]:
documents = [
    "The cat jumped over the dog",
    "The red cat jumped over the dog",
    "The turkey ran in circles",
]

In [43]:
embeddings = [get_embedding(x) for x in documents]

In [44]:
dataset = {}
for i, embedding in enumerate(embeddings):
    dataset[i] = embedding

In [45]:
from upstash_vector import Index

from upstash_vector import Vector

index = Index(url=UPSTASH_VECTOR_REST_URL, token=UPSTASH_VECTOR_REST_TOKEN)


In [46]:
vectors = []
for key, value in dataset.items():
    my_id = key
    embedding = value
    vectors.append(Vector(id = my_id, vector = embedding))

In [47]:
vectors
    

[Vector(id=0, vector=[-0.042506296, 0.056425516, -0.027823804, -0.0027511285, 0.020853056, 0.04588147, 0.03822576, 0.04662816, 0.016475918, 0.029581804, 0.0007471781, 0.046243828, 0.056942623, 0.045737293, 0.030321145, -0.05630371, 0.034174286, 0.029754195, -0.057086296, 0.03971113, 0.024884485, -0.004218633, 0.041659795, -0.021013167, -0.02456974, -0.007012416, 0.023041751, -0.015221896, -0.026244044, -0.058766704, 0.072859235, 0.07379805, 0.0270901, -0.022657154, 0.06282408, 0.036756404, -0.0028998163, 0.06261325, 0.029468736, 0.0035312593, -0.0680046, -0.025684688, -0.023025684, 0.049055215, 0.013001667, -0.062557206, -0.019169262, 0.035958186, -0.029930832, 0.018073365, 0.053794894, 0.02550974, -0.013162042, 0.0098282015, -0.04086811, -0.015825864, -0.0071733044, -0.0076980824, 0.00029273142, -0.03827002, 0.009877706, -0.03872153, -0.0073028654, 0.010214837, -0.040635243, -0.03434032, -0.04390391, 0.01791637, -0.0851918, 0.05944589, -0.033264134, 0.04951623, -0.045049444, 0.0291117

In [48]:
index.upsert(
    vectors = vectors
)

'Success'

In [32]:
query_str = "Obi cleaned the house and did his homework",
query_embedding = get_embedding(query_str)
query_embedding[0]

[-0.03876068,
 0.05516515,
 -0.05788112,
 0.041592054,
 0.06121118,
 0.042075675,
 -0.0007621665,
 0.077921905,
 -0.0147123085,
 -0.004326515,
 -0.00067744835,
 0.02535968,
 0.064007886,
 0.021384174,
 0.0025901424,
 -0.087588735,
 0.048245106,
 0.014003629,
 -0.084476344,
 0.030901749,
 -0.0080297105,
 0.024833864,
 0.008099976,
 -0.034740083,
 -0.009603178,
 -0.015277683,
 0.028875772,
 -0.006932748,
 -0.07526888,
 -0.051409323,
 0.030645045,
 0.094075926,
 0.011813955,
 -0.054961815,
 0.05516533,
 0.05960014,
 -0.07232187,
 0.021771353,
 0.014382617,
 -0.034224026,
 -0.07977847,
 -0.042925734,
 -0.010235988,
 0.03591162,
 -0.047099117,
 -0.033358242,
 -0.020804098,
 0.05099944,
 0.019499395,
 0.027154889,
 0.03785817,
 0.02187185,
 -0.024701724,
 0.013871481,
 -0.026956398,
 -0.055256926,
 0.008514624,
 -0.012182543,
 0.013559688,
 -0.021164255,
 -0.013274675,
 -0.03717176,
 -0.02474062,
 -0.053979665,
 -0.041572265,
 -0.017627656,
 -0.010770196,
 0.013301982,
 -0.075738855,
 0.0049

In [33]:
results = index.query(
  vector=query_embedding[0],
  top_k=3,
  include_vectors=True,
  include_metadata=True
)

for result in results:
    print(result.id, result.score)

0 0.87254477
1 0.87084484
2 0.8624703


In [34]:
dataset[2] = get_embedding("Obi cleaned the house and did his homework")

In [35]:
dataset[2]

[-0.03876068,
 0.05516515,
 -0.05788112,
 0.041592054,
 0.06121118,
 0.042075675,
 -0.0007621665,
 0.077921905,
 -0.0147123085,
 -0.004326515,
 -0.00067744835,
 0.02535968,
 0.064007886,
 0.021384174,
 0.0025901424,
 -0.087588735,
 0.048245106,
 0.014003629,
 -0.084476344,
 0.030901749,
 -0.0080297105,
 0.024833864,
 0.008099976,
 -0.034740083,
 -0.009603178,
 -0.015277683,
 0.028875772,
 -0.006932748,
 -0.07526888,
 -0.051409323,
 0.030645045,
 0.094075926,
 0.011813955,
 -0.054961815,
 0.05516533,
 0.05960014,
 -0.07232187,
 0.021771353,
 0.014382617,
 -0.034224026,
 -0.07977847,
 -0.042925734,
 -0.010235988,
 0.03591162,
 -0.047099117,
 -0.033358242,
 -0.020804098,
 0.05099944,
 0.019499395,
 0.027154889,
 0.03785817,
 0.02187185,
 -0.024701724,
 0.013871481,
 -0.026956398,
 -0.055256926,
 0.008514624,
 -0.012182543,
 0.013559688,
 -0.021164255,
 -0.013274675,
 -0.03717176,
 -0.02474062,
 -0.053979665,
 -0.041572265,
 -0.017627656,
 -0.010770196,
 0.013301982,
 -0.075738855,
 0.0049

In [36]:
index.upsert(vectors = [Vector(id=2, vector = dataset[2])])

'Success'

In [37]:
results = index.query(
  vector=query_embedding[0],
  top_k=3,
  include_vectors=True,
  include_metadata=True
)

for result in results:
    print(result.id, result.score * 100)

2 100.0
0 87.254477
1 87.08448399999999
