In [1]:
from datasets import Dataset 
import os
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall
)


from IngestionAgent import IngestionAgent
from RetrivalAgent import RetrievalAgent
from LLMResponseAgent import LLMResponseAgent
import unicodedata
import os
from dotenv import load_dotenv

load_dotenv()

from ragas.llms import LangchainLLMWrapper
from langchain_google_genai import ChatGoogleGenerativeAI


Docs on RAGS using Langchain : 

https://blog.langchain.com/evaluating-rag-pipelines-with-ragas-langsmith/



In [2]:

test_file_path = "./Test/test.pdf"


ingestion_agent = IngestionAgent()
document_chunks = ingestion_agent.run(test_file_path)

retrieval_agent = RetrievalAgent()
retrieval_agent.add_documents(document_chunks) 
retriever = retrieval_agent.get_retriever(search_kwargs={"k": 3})


llm_agent = LLMResponseAgent(retriever=retriever)



[IngestionAgent] Starting ingestion for: ./Test/test.pdf
[Document Loader] Loading 'test.pdf' with LangChain...
[IngestionAgent] Successfully created 140 chunks from ./Test/test.pdf
[RetrievalAgent] Initializing...
[RetrievalAgent] Embeddings initialized successfully.
Creating new FAISS store.
[RetrievalAgent] Initialization complete.
[RetrievalAgent] Adding 140 chunks to FAISS.
[RetrievalAgent] Vector store updated.


In [3]:
data_samples = {
    'question': [
        'What is Cloud Computing?', 
        'List Cloud Deployment Models?'
    ],
    
    'ground_truth': [
        'Cloud computing is the delivery of computing services – like servers, storage, databases, networking, software, analytics, and intelligence  over the internet', 
        'List Cloud Deployment Models?'
    ]
}
data_samples["answer"] = []
data_samples["contexts"] = []


In [4]:
for question in data_samples['question']:
    response = llm_agent.run(question)
    result = unicodedata.normalize('NFKC', response['result'])
    data_samples["answer"].append(result)
    data_samples["contexts"].append([source.page_content for source in response["source_documents"]])

data_samples

[LLMResponseAgent] Answering question: 'What is Cloud Computing?'
[LLMResponseAgent] Answering question: 'List Cloud Deployment Models?'


{'question': ['What is Cloud Computing?', 'List Cloud Deployment Models?'],
 'ground_truth': ['Cloud computing is the delivery of computing services – like servers, storage, databases, networking, software, analytics, and intelligence  over the internet',
  'List Cloud Deployment Models?'],
 'answer': ['Based on the National Institute of Standards and Technology (NIST), cloud computing is defined as “a model for enabling ubiquitous, convenient, on-demand network access to a shared pool of configurable computing resources (e.g., networks, servers, storage, applications, and services) that can be rapidly provisioned and released with minimal management effort or service provider interaction”.',
  'The cloud deployment models are:\n*   Private cloud\n*   Hybrid cloud\n*   Community cloud\n*   Public cloud'],
 'contexts': [['automated and more efﬁcient; compared to manual and physical\nprovision, it may reach a near real-time provision.\nData centers that are used to host the three service

In [5]:
gemini_llm = llm_agent.llm # or your specific Gemini model

# Wrap Gemini LLM for Ragas
ragas_llm = LangchainLLMWrapper(gemini_llm)

# Set the LLM for each metric
faithfulness.llm = ragas_llm
answer_relevancy.llm = ragas_llm
context_recall.llm = ragas_llm

metrics = [faithfulness, answer_relevancy, context_recall]

In [6]:
dataset = Dataset.from_dict(data_samples)

score = evaluate(dataset, metrics=metrics, llm=ragas_llm,embeddings=retrieval_agent.embeddings)
df = score.to_pandas()
df.to_csv('score.csv', index=False)

Evaluating:   0%|          | 0/6 [00:00<?, ?it/s]

In [7]:
df.head()

Unnamed: 0,user_input,retrieved_contexts,response,reference,faithfulness,answer_relevancy,context_recall
0,What is Cloud Computing?,[automated and more efﬁcient; compared to manu...,Based on the National Institute of Standards a...,Cloud computing is the delivery of computing s...,1.0,0.847335,1.0
1,List Cloud Deployment Models?,[deploy their Internet applications and suppor...,The cloud deployment models are:\n* Private ...,List Cloud Deployment Models?,0.75,0.947626,0.0
