# TrueLens Evaluation for RAG Systems

This notebook demonstrates how to evaluate a Retrieval-Augmented Generation (RAG)
pipeline using the **TrueLens RAG Triad**:

1. Context Relevance
2. Groundedness
3. Answer Relevance

We will:
- Simulate a RAG pipeline
- Generate good and bad answers
- Evaluate them using LLM-as-a-Judge prompts
- Understand failure modes clearly


In [None]:
#!pip install trulens-eval
# !pip show faiss-cpu
#!pip install openai

In [None]:
#!pip install langchain langchain-openai faiss-cpu openai

In [None]:
!pip show langchain
#!pip install -U langchain
#!pip install langchain-text-splitters
#!pip install -U langchain langchain-core langchain-community langchain-openai langchain-text-splitters

In [None]:
import os
os.environ['OPENAI_API_KEY'] = ''

from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document


In [None]:
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

In [None]:
# 1️⃣ Documents
docs = [
    Document(page_content="LangGraph is used to build stateful multi-agent systems."),
    Document(page_content="LangChain provides composable building blocks for LLM apps.")
]

# 2️⃣ Chunking
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
splits = splitter.split_documents(docs)

# 3️⃣ Vector Store
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(splits, embeddings)
retriever = vectorstore.as_retriever()

# 4️⃣ LLM
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

# 5️⃣ Prompt
prompt = ChatPromptTemplate.from_template("""
Answer the question using only the provided context.

Context:
{context}

Question:
{question}
""")

# 6️⃣ LCEL RAG Chain
rag_chain = (
    {
        "context": retriever,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
)

# 7️⃣ Invoke
rag_chain.invoke("What is LangGraph used for?")


In [6]:
from trulens_eval import Tru
from trulens_eval.feedback import OpenAI as TruOpenAI
from trulens_eval.feedback import Feedback

tru = Tru()
tru_openai = TruOpenAI()

print("✅ TruLens (trulens_eval) is working")


✅ TruLens (trulens_eval) is working


In [19]:
from trulens.core.session import TruSession

session = TruSession()
tru_openai = TruOpenAI()


feedbacks = [
    Feedback(tru_openai.context_relevance, name="Context Relevance"),
    Feedback(tru_openai.groundedness_measure_with_cot_reasons, name="Groundedness"),
]


In [24]:
session.get_leaderboard()

Unnamed: 0_level_0,Unnamed: 1_level_0,Answer Relevance,Context Relevance,Groundedness,latency,total_cost
app_name,app_version,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RAG,base,1.0,0.5,0.583333,11.265052,0.000939
