---
title: "RAG"
format: ipynb
---

In [None]:
import pyarrow.parquet as pq

table2 = pq.read_table('data/query_result.parquet')

claims = [str(claim) for claim in table2['claim']]

In [None]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('sentence-transformers/all-MiniLM-L12-v2')

In [None]:
question = "What can you tell me about polar bears?"
embeddings = model.encode([question] + claims[0:20])

In [None]:
from sklearn.metrics.pairwise import cosine_similarity, manhattan_distances, euclidean_distances

cosine_similarity(embeddings)

cosine_similarity(embeddings[0], embeddings[1])

1/(1 + euclidean_distances(embeddings))

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, Batch
DIMENSIONS = 384
client = QdrantClient(location=":memory:")
client.create_collection(
    collection_name="claims",
    vectors_config=VectorParams(size= DIMENSIONS, distance=Distance.DOT),
)


embeddings = model.encode(claims)


client.upsert(
    collection_name="claims",
    points=Batch(
        ids=list(range(len(embeddings))),
        vectors=embeddings,
        payloads = [{'text': claim} for claim in claims]
    ),
)


In [None]:
search_result = client.query_points(
    collection_name="claims",
    query=model.encode("what is happening to the polar bears?"),
    with_payload=True,
    limit=3
).points

print(search_result)