## RAG Evaluation

using Synthetic Test Data

In [1]:
import sys
from dotenv import load_dotenv
sys.path.append('..')
from utils.config import ENV_FILE_PATH
load_dotenv(ENV_FILE_PATH)

from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4.1-nano"))
embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

path = './data/'
loader = DirectoryLoader(path, glob="*.pdf", loader_cls=PyMuPDFLoader)
docs = loader.load()

In [2]:
from src.graph import build_agent
from uuid import uuid4

query = "Hi my name is Jerry"
config = {'configurable': {'thread_id': str(uuid4())}}

rag_agent = build_agent()
response = await rag_agent.ainvoke(
    {
        'messages': query
    },
    config=config
)

response['messages'][-1].content

'Hello Jerry! How can I assist you today?'

In [3]:
from ragas.testset.graph import KnowledgeGraph
from ragas.testset.graph import Node, NodeType

kg = KnowledgeGraph()

for doc in docs:
    kg.nodes.append(
        Node(
            type=NodeType.DOCUMENT,
            properties={
                'page_content': doc.page_content,
                'document_metadata': doc.metadata
            }
        )
    )
    
kg

KnowledgeGraph(nodes: 5, relationships: 0)

In [4]:
from ragas.testset.transforms import default_transforms, apply_transforms

default_transforms = default_transforms(documents=docs, llm=llm, embedding_model=embeddings)
apply_transforms(kg, default_transforms)
kg

Applying SummaryExtractor:   0%|          | 0/5 [00:00<?, ?it/s]

Applying CustomNodeFilter:   0%|          | 0/5 [00:00<?, ?it/s]

Applying [EmbeddingExtractor, ThemesExtractor, NERExtractor]:   0%|          | 0/15 [00:00<?, ?it/s]

Applying [CosineSimilarityBuilder, OverlapScoreBuilder]:   0%|          | 0/2 [00:00<?, ?it/s]

KnowledgeGraph(nodes: 5, relationships: 15)

In [5]:
# Save the knowledge graph to a json file

kg.save('apple_intelligence_bloomberg_report.json')
bloomberg_report = KnowledgeGraph.load('apple_intelligence_bloomberg_report.json')
bloomberg_report

KnowledgeGraph(nodes: 5, relationships: 15)

In [6]:
from ragas.testset import TestsetGenerator

generator = TestsetGenerator(
    llm=llm,
    embedding_model=embeddings,
    knowledge_graph=bloomberg_report
)

In [7]:
from ragas.testset.synthesizers import SingleHopSpecificQuerySynthesizer, MultiHopAbstractQuerySynthesizer, MultiHopSpecificQuerySynthesizer

query_distribution = [
        (SingleHopSpecificQuerySynthesizer(llm=llm), 0.5),
        (MultiHopAbstractQuerySynthesizer(llm=llm), 0.25),
        (MultiHopSpecificQuerySynthesizer(llm=llm), 0.25),
]

In [8]:
testset = generator.generate(
    testset_size=10,
    query_distribution=query_distribution
)

Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Scenarios:   0%|          | 0/3 [00:00<?, ?it/s]

Generating Samples:   0%|          | 0/11 [00:00<?, ?it/s]

In [9]:
testset_df = testset.to_pandas()
testset_df

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,"Whaat is the date of March 14, 2025 in the con...",[Apple's AI Stumble:\nIntelligence Delays and\...,"The date March 14, 2025, is mentioned as the p...",single_hop_specifc_query_synthesizer
1,Who is Tim Cook in the context of Apple's AI d...,[Memory constraints: Insufficient RAM on older...,Tim Cook is mentioned in the context of Apple'...,single_hop_specifc_query_synthesizer
2,"So, like, how does AI stuff, you know, impact ...",[Integration challenges: Difficulty incorporat...,Integration challenges include difficulty inco...,single_hop_specifc_query_synthesizer
3,How is Apple's enterprise AI deployment compar...,[Enterprise AI Adoption\nApple lags significan...,Apple lags significantly in enterprise AI depl...,single_hop_specifc_query_synthesizer
4,How Apple AI Strategy plans to improve Enterpr...,[Services revenue: App Store and Apple Service...,Services revenue from the App Store and Apple ...,single_hop_specifc_query_synthesizer
5,How do memory constraints on older devices imp...,[<1-hop>\n\nServices revenue: App Store and Ap...,Memory constraints on older devices limit the ...,multi_hop_abstract_query_synthesizer
6,How do the technical constraints of on-device ...,[<1-hop>\n\nIntegration challenges: Difficulty...,The technical constraints of on-device AI proc...,multi_hop_abstract_query_synthesizer
7,"So, with all the integration challenges and te...",[<1-hop>\n\nIntegration challenges: Difficulty...,Apple faces significant integration challenges...,multi_hop_abstract_query_synthesizer
8,How do the integration challenges and talent r...,[<1-hop>\n\nIntegration challenges: Difficulty...,Apple faces integration challenges such as inc...,multi_hop_specific_query_synthesizer
9,How does Google's involvement in AI acquisitio...,[<1-hop>\n\nApple's AI Stumble:\nIntelligence ...,The context indicates that Apple has faced sig...,multi_hop_specific_query_synthesizer


### LangSmith Dataset

In [10]:
from langsmith import Client

client = Client()

dataset_name = 'Apple Intelligence Bloomberg Report'

langsmith_dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="Bloomberg Report on the state of Apple's Intelligence"
)

In [11]:
for data_row in testset_df.itertuples():
    client.create_example(
        inputs={
            'messages': data_row.user_input
        },
        outputs={
            'answer': data_row.reference
        },
        metadata={
            'context': data_row.reference_contexts
        },
        dataset_id=langsmith_dataset.id
    )

In [12]:
from langsmith.evaluation import LangChainStringEvaluator, aevaluate

eval_llm = ChatOpenAI(model='gpt-4.1')

qa_evaluator = LangChainStringEvaluator("qa", config={"llm" : eval_llm})

In [13]:
async def eval_wrapper(inputs: dict):
    """
    Take dataset inputs (which have {"messages": ...})
    and feed them to your rag_agent properly.
    """
    response = await rag_agent.ainvoke({"messages": inputs["messages"]})
    
    output = response['messages'][-1].content
    
    return {
        'output': output
    }

In [14]:
eval_result = await aevaluate(
    eval_wrapper,
    data=dataset_name,
    evaluators=[
        qa_evaluator,
    ],
    metadata={"revision_id": "default_chain_init"},
)

View the evaluation results for experiment: 'stupendous-child-61' at:
https://smith.langchain.com/o/82f2f79c-f7d4-4fda-a89e-d181cfe5bf92/datasets/ecc8d5cf-2e7e-425b-a483-4a625658e7ce/compare?selectedSessions=e7ece4be-86c1-45c1-b235-3b1fe8758cd1




0it [00:00, ?it/s]

  vectorstore = Qdrant(
[32m2025-08-24 19:18:51.673[0m | [1mINFO    [0m | [36msrc.knowledge_graph[0m:[36msearch_knowledge_graph[0m:[36m67[0m - [1mSearching knowledge graph...[0m
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ec63adcb-fe29-4b49-9d9c-311e1e7c903b.us-east4-0.gcp.cloud.qdrant.io:6333/collections/self_corrective_agentic_rag/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
IN

ALL DOCUMENTS ARE NOT RELEVANT TO THE QUESTION, TRANSFORM QUERY...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[32m2025-08-24 19:19:07.220[0m | [1mINFO    [0m | [36msrc.knowledge_graph[0m:[36msearch_knowledge_graph[0m:[36m67[0m - [1mSearching knowledge graph...[0m
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ec63adcb-fe29-4b49-9d9c-311e1e7c903b.us-east4-0.gcp.cloud.qdrant.io:6333/collections/self_corrective_agentic_rag/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: P

ALL DOCUMENTS ARE NOT RELEVANT TO THE QUESTION, TRANSFORM QUERY...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[32m2025-08-24 19:19:22.544[0m | [1mINFO    [0m | [36msrc.knowledge_graph[0m:[36msearch_knowledge_graph[0m:[36m67[0m - [1mSearching knowledge graph...[0m
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ec63adcb-fe29-4b49-9d9c-311e1e7c903b.us-east4-0.gcp.cloud.qdrant.io:6333/collections/self_corrective_agentic_rag/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: P

ALL DOCUMENTS ARE NOT RELEVANT TO THE QUESTION, TRANSFORM QUERY...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[32m2025-08-24 19:19:36.943[0m | [1mINFO    [0m | [36msrc.knowledge_graph[0m:[36msearch_knowledge_graph[0m:[36m67[0m - [1mSearching knowledge graph...[0m
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ec63adcb-fe29-4b49-9d9c-311e1e7c903b.us-east4-0.gcp.cloud.qdrant.io:6333/collections/self_corrective_agentic_rag/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: P

ALL DOCUMENTS ARE NOT RELEVANT TO THE QUESTION, TRANSFORM QUERY...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[32m2025-08-24 19:19:50.692[0m | [1mINFO    [0m | [36msrc.knowledge_graph[0m:[36msearch_knowledge_graph[0m:[36m67[0m - [1mSearching knowledge graph...[0m
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ec63adcb-fe29-4b49-9d9c-311e1e7c903b.us-east4-0.gcp.cloud.qdrant.io:6333/collections/self_corrective_agentic_rag/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: P

ALL DOCUMENTS ARE NOT RELEVANT TO THE QUESTION, TRANSFORM QUERY...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[32m2025-08-24 19:20:04.986[0m | [1mINFO    [0m | [36msrc.knowledge_graph[0m:[36msearch_knowledge_graph[0m:[36m67[0m - [1mSearching knowledge graph...[0m
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ec63adcb-fe29-4b49-9d9c-311e1e7c903b.us-east4-0.gcp.cloud.qdrant.io:6333/collections/self_corrective_agentic_rag/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: P

ALL DOCUMENTS ARE NOT RELEVANT TO THE QUESTION, TRANSFORM QUERY...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[32m2025-08-24 19:20:18.375[0m | [1mINFO    [0m | [36msrc.knowledge_graph[0m:[36msearch_knowledge_graph[0m:[36m67[0m - [1mSearching knowledge graph...[0m
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ec63adcb-fe29-4b49-9d9c-311e1e7c903b.us-east4-0.gcp.cloud.qdrant.io:6333/collections/self_corrective_agentic_rag/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: P

ALL DOCUMENTS ARE NOT RELEVANT TO THE QUESTION, TRANSFORM QUERY...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[32m2025-08-24 19:20:34.147[0m | [1mINFO    [0m | [36msrc.knowledge_graph[0m:[36msearch_knowledge_graph[0m:[36m67[0m - [1mSearching knowledge graph...[0m
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ec63adcb-fe29-4b49-9d9c-311e1e7c903b.us-east4-0.gcp.cloud.qdrant.io:6333/collections/self_corrective_agentic_rag/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: P

ALL DOCUMENTS ARE NOT RELEVANT TO THE QUESTION, TRANSFORM QUERY...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[32m2025-08-24 19:20:49.674[0m | [1mINFO    [0m | [36msrc.knowledge_graph[0m:[36msearch_knowledge_graph[0m:[36m67[0m - [1mSearching knowledge graph...[0m
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ec63adcb-fe29-4b49-9d9c-311e1e7c903b.us-east4-0.gcp.cloud.qdrant.io:6333/collections/self_corrective_agentic_rag/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: P

GENERATE...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[32m2025-08-24 19:21:01.521[0m | [1mINFO    [0m | [36msrc.knowledge_graph[0m:[36msearch_knowledge_graph[0m:[36m67[0m - [1mSearching knowledge graph...[0m
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ec63adcb-fe29-4b49-9d9c-311e1e7c903b.us-east4-0.gcp.cloud.qdrant.io:6333/collections/self_corrective_agentic_rag/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: P

ALL DOCUMENTS ARE NOT RELEVANT TO THE QUESTION, TRANSFORM QUERY...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
[32m2025-08-24 19:21:15.989[0m | [1mINFO    [0m | [36msrc.knowledge_graph[0m:[36msearch_knowledge_graph[0m:[36m67[0m - [1mSearching knowledge graph...[0m
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://ec63adcb-fe29-4b49-9d9c-311e1e7c903b.us-east4-0.gcp.cloud.qdrant.io:6333/collections/self_corrective_agentic_rag/points/search "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: P

ALL DOCUMENTS ARE NOT RELEVANT TO THE QUESTION, TRANSFORM QUERY...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
