In [None]:
from qdrant_client import QdrantClient, models
import json 


  from .autonotebook import tqdm as notebook_tqdm


In [12]:
qd_client = QdrantClient("http://localhost:6333")

In [3]:
from fastembed import TextEmbedding

In [None]:
tables=[]
texts= []

In [None]:
with open('data/summarized_tables.json', 'r' , encoding="utf-8") as f1:
    tables = json.load(f1)

with open('data/summarized_texts.json', 'r' , encoding="utf-8") as f2:
    texts = json.load(f2)


In [14]:
# configurations

model_handle = "jinaai/jina-embeddings-v2-small-en"
EMBEDDING_DIMENSIONALITY = 512

In [27]:
collection_name = "stardew-vector-search"

qd_client.create_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(
        size=EMBEDDING_DIMENSIONALITY,
        distance=models.Distance.COSINE
    )
)

True

In [28]:
table_points = []

for i , table in enumerate(tables):

    text_to_embedd = f"Page title: {table['page_title']}. Section title: {table['section_title']}. Table summary: {table['summary']}"
    
    point = models.PointStruct(
        vector = models.Document(text=text_to_embedd, model=model_handle), 
        id=i,
        payload=table
    )
    table_points.append(point)

In [57]:
text_points = []

for i , text in enumerate(texts):
    text_to_embedd = f"Page title: {text['page_title']}. Section title: {text['section_title']}. text: {text['text']}"
    text["type"] ="text"
    point = models.PointStruct(
        vector = models.Document(text=text_to_embedd, model=model_handle), 
        id=i,
        payload=text
    )
    text_points.append(point)

    

In [62]:
def batch_upsert(qd_client, collection_name, points, batch_size=500):
    total = len(points)
    for i in range(0, total, batch_size):
        batch = points[i : i + batch_size]
        qd_client.upsert(collection_name=collection_name, points=batch)
        print(f"✅ Upserted {min(i+batch_size, total)}/{total}")

In [63]:
batch_upsert(qd_client=qd_client,collection_name=collection_name,points=text_points,batch_size=500)

✅ Upserted 500/19039
✅ Upserted 1000/19039
✅ Upserted 1500/19039
✅ Upserted 2000/19039
✅ Upserted 2500/19039
✅ Upserted 3000/19039
✅ Upserted 3500/19039
✅ Upserted 4000/19039
✅ Upserted 4500/19039
✅ Upserted 5000/19039
✅ Upserted 5500/19039
✅ Upserted 6000/19039
✅ Upserted 6500/19039
✅ Upserted 7000/19039
✅ Upserted 7500/19039
✅ Upserted 8000/19039
✅ Upserted 8500/19039
✅ Upserted 9000/19039
✅ Upserted 9500/19039
✅ Upserted 10000/19039
✅ Upserted 10500/19039
✅ Upserted 11000/19039
✅ Upserted 11500/19039
✅ Upserted 12000/19039
✅ Upserted 12500/19039
✅ Upserted 13000/19039
✅ Upserted 13500/19039
✅ Upserted 14000/19039
✅ Upserted 14500/19039
✅ Upserted 15000/19039
✅ Upserted 15500/19039
✅ Upserted 16000/19039
✅ Upserted 16500/19039
✅ Upserted 17000/19039
✅ Upserted 17500/19039
✅ Upserted 18000/19039
✅ Upserted 18500/19039
✅ Upserted 19000/19039
✅ Upserted 19039/19039


In [29]:
for i in range(12):
    print("epoch ", i+1)
    qd_client.upsert(
        collection_name=collection_name,
        points=table_points[i*1000:(i+1)*1000]
    )



epoch  1
epoch  2
epoch  3
epoch  4
epoch  5
epoch  6
epoch  7
epoch  8
epoch  9
epoch  10
epoch  11
epoch  12


In [31]:
qd_client.upsert(
    collection_name=collection_name,
    points=table_points[12000:12010]
)

UpdateResult(operation_id=12, status=<UpdateStatus.COMPLETED: 'completed'>)

In [64]:
def vector_search(query, limit=5):
    results = qd_client.query_points(
        collection_name=collection_name,
        query=models.Document( #embed the query text locally
            text=query,
            model=model_handle 
        ),
        limit=limit,
        with_payload=True #to get metadata in the results
    )

    return results


In [65]:
question = "what crops to plant in fall?"

query_points = vector_search(query=question)

results = []
for qPoint in query_points.points:
    results.append(qPoint.payload)

results

[{'page_title': 'Fall_Crops',
  'section_title': 'Contents',
  'text': 'Crops are plants that are grown from seeds to be harvested for the purpose of profit, food, or gifting. Generally, each crop is seasonal. It can be planted only during its designated season, and when seasons change (after the 28th day), the crop will wither and die. Multi-season crops (i.e., Ancient Fruit, Coffee Bean, Corn, Sunflower, and Wheat), however, will continue to grow during all specified seasons.',
  'type': 'text'},
 {'page_title': 'Spring_Crops',
  'section_title': 'Contents',
  'text': 'Crops are plants that are grown from seeds to be harvested for the purpose of profit, food, or gifting. Generally, each crop is seasonal. It can be planted only during its designated season, and when seasons change (after the 28th day), the crop will wither and die. Multi-season crops (i.e., Ancient Fruit, Coffee Bean, Corn, Sunflower, and Wheat), however, will continue to grow during all specified seasons.',
  'type':