In [1]:
# !pip install --upgrade pip setuptools wheel --quiet
# !pip uninstall -y numpy faiss faiss-cpu faiss-gpu
# !pip install --upgrade numpy
# !pip install faiss-cpu --quiet
# !pip install sentence-transformers --quiet

In [2]:
from pathlib import Path
import sys
import json
project_root = Path.cwd().parent   
sys.path.append(str(project_root / "src"))

In [3]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from simple_rag import create_embeddings, create_faiss_index, retrieve_chunks, save_artifacts, load_artifacts

In [5]:
DATA_DIR_RAW = project_root / "data" / "raw"
DATA_DIR_PROCESSED = project_root / "data" / "processed"

In [7]:
chunks = {}
with open(DATA_DIR_RAW / "a_study_in_scarlet_chunks.jsonl", "r", encoding="utf-8") as f:
    for line in f:
        record = json.loads(line)
        chunks[record["id"]] = record["text"]

In [12]:
# create embeddings
embeddings, model = create_embeddings(list(chunks.values()), model_name="all-MiniLM-L6-v2")
#create FAISS index
index = create_faiss_index(embeddings)

Batches: 100%|██████████| 3/3 [00:03<00:00,  1.30s/it]


In [13]:
save_artifacts(embeddings, index, chunks, path=DATA_DIR_PROCESSED, prefix="simple_rag_")

In [15]:
#retrieve relevant chunks
question = "How did Watson meet Holmes?"
relevant_chunks = retrieve_chunks(question, index, list(chunks.values()), model, top_k=2)

for i, chunk in enumerate(relevant_chunks, 1):
    print(f"Chunk {i}: {chunk}")

Chunk 1: Holmes’ smallest actions were all directed towards some definite and practical end. “What do you think of it, sir? ” they both asked. “It would be robbing you of the credit of the case if I was to presume to help you,” remarked my friend. “You are doing so well now that it would be a pity for anyone to interfere. ” There was a world of sarcasm in his voice as he spoke. “If you will let me know how your investigations go,” he continued, “I shall be happy to give you any help I can. In the meantime I should like to speak to the constable who found the body. Can you give me his name and address? ” Lestrade glanced at his note-book. “John Rance,” he said. “He is off duty now. You will find him at 46, Audley Court, Kennington Park Gate. ” Holmes took a note of the address. “Come along, Doctor,” he said; “we shall go and look him up. I’ll tell you one thing which may help you in the case,” he continued, turning to the two detectives. “There has been murder done, and the murderer was