In [1]:
import sys
import os

# Ensure we can import from src
sys.path.append(os.path.abspath(".."))

from src.rag.embeddings import EmbeddingModel
from src.memory.vectordb import VectorDB

print("Initializing RAG Pipeline...")

# 1. Initialize Components
# This might take a moment to download the model (~80MB) the first time
embedder = EmbeddingModel()
vectordb = VectorDB()

# 2. Add Sample Data (Farm Records)
farm_records = [
    {"id": "F001", "text": "Farm A: Wheat crop affected by rust in field 3. Treated with fungicide X."},
    {"id": "F002", "text": "Farm B: Healthy corn crop. Expected yield 200 bushels/acre."},
    {"id": "F003", "text": "Farm A: Tomato blight detected. Humidity was high last week."},
    {"id": "F004", "text": "Farm C: Potato harvest successful. No signs of rot."},
]

print("Ingesting data...")
for record in farm_records:
    # Generate embedding
    # [0] is used because embed_text returns a batch of vectors
    vector = embedder.embed_text(record['text'])[0] 
    vectordb.add(vector, record)
    print(f"Added record: {record['id']}")

# 3. Query
query_text = "What disease affected the tomato crop?"
print(f"\nQuerying: '{query_text}'")

query_vector = embedder.embed_text(query_text)[0]
results = vectordb.search(query_vector, top_k=2)

print("\nResults:")
for res in results:
    print(f"Score: {res['score']:.4f} | Text: {res['metadata']['text']}")

  from .autonotebook import tqdm as notebook_tqdm


Initializing RAG Pipeline...
Loading embedding model: sentence-transformers/all-MiniLM-L6-v2 on cpu...
Ingesting data...
Added record: F001
Added record: F002
Added record: F003
Added record: F004

Querying: 'What disease affected the tomato crop?'

Results:
Score: 0.6043 | Text: Farm A: Tomato blight detected. Humidity was high last week.
Score: 0.5233 | Text: Farm A: Wheat crop affected by rust in field 3. Treated with fungicide X.
