# Demonstration
Basics of how vector encodings work, notice similarity scores.

In [4]:
from sentence_transformers import SentenceTransformer
# Load the model
model = SentenceTransformer("all-MiniLM-L6-v2")
test_passages = [
    "A long time ago in a galaxy far far away",
    "There was a space ship floating with anakin on it",
    "A farmer milks his cows",
    "they actually were on his land",
    "When a phone falls down into a drain, something interesting happens",
]
test_query = [
    "Star wars begins on episode 4 with a jedi who must travel to a different planet"
]

passage_embeddings = model.encode(test_passages)
query_embeddings = model.encode(test_query)

# Compute the (cosine) similarity scores
scores = model.similarity(query_embeddings, passage_embeddings) * 100

# Print the test passages with their corresponding similarities
print(test_query[0])
for passage, score in zip(test_passages, scores.tolist()[0]):
    print(f"{round(score, 2)}% - {passage}")

Star wars begins on episode 4 with a jedi who must travel to a different planet
22.31% - A long time ago in a galaxy far far away
35.85% - There was a space ship floating with anakin on it
9.22% - A farmer milks his cows
5.74% - they actually were on his land
-0.87% - When a phone falls down into a drain, something interesting happens


# Content vector embedding extractions

In [2]:
import dotenv
from qdrant import MyQdrant
from db_funcs import get_all_content, get_db_con
dotenv.load_dotenv()


model = SentenceTransformer("all-MiniLM-L6-v2")
size=len(model.encode("test"))
qdrant_client=MyQdrant("graphRAG",size)
conn=get_db_con()
content_ids, content=get_all_content(conn.cursor())
print("Creating embeddings from content")
content_embeddings=model.encode(content)
print("adding embeddings to db")
qdrant_client.add_points(content_embeddings, content_ids)
print("embedding db is ready")

Skipping creating collection; 'graphRAG' already exists.
Creating embeddings from content
adding embeddings to db
embedding db is ready
