# ---- Evaluation of RAG pipelines using RAGAS ----

1. Importing all environment variables

In [None]:
from dotenv import load_dotenv
load_dotenv()

True

2. Import all neccessary libraries

In [None]:
from langchain_community.document_loaders import DirectoryLoader, PyPDFium2Loader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
import time
from pathlib import Path

  from .autonotebook import tqdm as notebook_tqdm


##### Step 1a: Load documents

In [None]:
ROOT_DIR = Path().resolve().parent
PDF_DIR = ROOT_DIR / "data" / "pdfs"

loader = DirectoryLoader(
    path=PDF_DIR,
    glob="*.pdf",   
    loader_cls=PyPDFium2Loader,
    loader_kwargs={
        "mode": "page"
    },
    show_progress=True
)

docs = loader.load()

print(f"Number of documents loaded: {len(docs)}")


  0%|          | 0/3 [00:00<?, ?it/s]

100%|██████████| 3/3 [00:02<00:00,  1.04it/s]

Number of documents loaded: 224





In [None]:
# save the loaded documents to a file for inspection
with open(ROOT_DIR / "loaded_docs" / "loaded_docs.txt", "w", encoding="utf-8") as f:
    for doc in docs:
        f.write(doc.page_content)
        f.write("\n" + "="*80 + "\n")

##### Step 1b : Text Splitting (Chunking)

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([doc.page_content for doc in docs])

In [None]:
len(chunks)

549

In [None]:
chunks[5].page_content

'Amazon Alexa features ..................................35\nAmazon Alexa features (overview) ...................35\nLinking with your Amazon Echo Show ..............36\nUsing Amazon Alexa to control devices ...........37\nGoogle Home features ...................................38\nGoogle Home features (overview) ....................38\nLinking with your Google Home ........................38\nUsing Google Home to control devices ............39\nSystem settings and maintenance ................40\nCountry/region settings .....................................40\nEvent logs .........................................................40\nDeregistering devices .......................................41\nSupport information .......................................43\nAccessing the User’s Guide ..............................43\nAccessing customer support .............................43\nViewing the Camera Installation / Removal Guide 43\nViewing system information ..............................43'

##### Step 1c & 1d: Indexing (Embedding generation and Storing in vector store)

In [None]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

BATCH_SIZE = 20 
SLEEP_TIME = 20           # seconds between batches

vector_store = None

for i in range(0, len(chunks), BATCH_SIZE):
    batch = chunks[i:i + BATCH_SIZE]

    if vector_store is None:
        vector_store = FAISS.from_documents(batch, embeddings)
    else:
        vector_store.add_documents(batch)

    print(f"Embedded {i + len(batch)} / {len(chunks)}")
    time.sleep(SLEEP_TIME)


Embedded 20 / 549


In [None]:
vector_store.index_to_docstore_id

{0: '1c7aac2a-f6a4-4e16-b70f-7717b58ebc2b',
 1: 'a23334c2-7c89-464f-92b1-a1ef55ec2a3e',
 2: '937bc2c4-8349-4544-9685-9d55b258c83d',
 3: '29c417ee-a7e1-401d-9f8f-1162e3cd40a9',
 4: '0c59d5e1-a652-4fbb-b9c9-2fff343d79a5',
 5: '4630d69a-ded2-43c9-8b5e-cf406c9e7f2c',
 6: 'e2a65ce8-6387-4537-bf55-4d70ff1177e7',
 7: '3d16e0e3-98b3-4899-925d-fb15dde86398',
 8: 'adc515da-5dd4-4759-9ba5-c6c1437bfa3e',
 9: '4b605ce2-cc6f-4ac2-b7ab-b55cc2c7677c',
 10: 'cfc1b3c5-5c9a-4d9f-9fb3-aaf7f7d6f86d',
 11: 'e6ffcc95-7e6a-4fc4-89fd-947cd98b64af',
 12: '473b2350-ffdf-4541-8dac-54cec54442c4',
 13: '024c65b8-5d8d-403a-a538-1f1d8a056388',
 14: '77872d33-0002-41fc-8cad-8a66406eaa94',
 15: '8dc9a8bd-6d51-4139-8dd7-0db5766bdc87',
 16: '92bcf6b0-3519-4684-a5eb-6370307e2f9c',
 17: '88a5215c-0deb-4852-b6a3-85c3ed50228b',
 18: 'a2c8e1c7-abeb-467e-943d-d38addab4ff5',
 19: 'b7304806-6770-409d-b245-d9f75db8069f',
 20: '5b34c3cb-9a5f-4855-bf07-e48dcd490539',
 21: '9c4dbe1f-89f8-47bf-8d4e-058d346ebaf8',
 22: '09da5f1c-1ee6-

In [None]:
vector_store.get_by_ids(['9f89558e-0c4d-4d0e-8cb6-815d078b5e66'])

[]

##### Step 2: Retrieval

In [None]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [None]:
retriever

VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000024C3BCE25D0>, search_kwargs={'k': 4})

In [None]:
retriever.invoke("Tell me about the advantages of home hawk")

[Document(id='c89acb50-0728-4759-869d-544ece7a29de', metadata={}, page_content='What is the HomeHawk?\nOverview\nThe Panasonic Home Monitoring Camera is a wireless network of devices that help you monitor your\nhome. Its easy-to-use app allows you to stay in charge even when away from home.\n6\nGetting started with the HomeHawk!'),
 Document(id='a315669c-5a28-4ca3-8639-fe35e0230248', metadata={}, page_content='I want to know when a room gets too hot or cold\nYou can configure cameras to notify your mobile device when the temperature of a room goes above or\nbelow the specified temperatures.\n17\nGetting started with the HomeHawk!'),
 Document(id='a23334c2-7c89-464f-92b1-a1ef55ec2a3e', metadata={}, page_content='Getting started with the HomeHawk!\nWhat is the HomeHawk? ..................................6\nOverview .............................................................6\nWhat kinds of devices are available? .............7\nPanasonic Home Monitoring Camera device\nlineup ..........

##### 3. Augmentation

In [None]:
model = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash-lite"
)

In [None]:
prompt = PromptTemplate(
    template="""
    You are an helpful assistant.
    Answer ONLY from the provided context.
    If the context is not sufficient, say "I don't know".
    Context: {context}
    Question: {question}
    Answer:""",
    input_variables=["context", "question"]
)