# Lesson 2: RAG Triad of metrics

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import utils

import os
import openai
openai.api_key = utils.get_openai_api_key()

In [3]:
from trulens_eval import Tru

tru = Tru()
tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [4]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["./bioengineering-2247773.pdf"]
).load_data()

In [5]:
# Merge all of this content (= multiple document chunks) 
# into a single large document rather than having 
# one document per page which is the default set-up


from llama_index import Document

document = Document(text="\n\n".\
                    join([doc.text for doc in documents]))

In [6]:
# Set up Sentence Index leveraging llama-index utilities

from utils import build_sentence_window_index

from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

sentence_index = build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="sentence_index"
)

In [7]:
from utils import get_sentence_window_query_engine

sentence_window_engine = \
get_sentence_window_query_engine(sentence_index)

In [8]:
output = sentence_window_engine.query(
    "What are performance metrics applied in this paper?")
output.response

'The performance metrics applied in this paper include skill progression, complementing career goals, starting an AI job search, using informational interviews to find the right job, and finding the right AI job for the individual.'

## Feedback functions

In [9]:
import nest_asyncio

nest_asyncio.apply()

In [10]:
from trulens_eval import OpenAI as fOpenAI

provider = fOpenAI()

### 1. Answer Relevance

In [11]:
from trulens_eval import Feedback

f_qa_relevance = Feedback(
    provider.relevance_with_cot_reasons,
    name="Answer Relevance"
).on_input_output()

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .


### 2. Context Relevance

In [12]:
from trulens_eval import TruLlama

context_selection = TruLlama.select_source_nodes().node.text

In [13]:
import numpy as np

f_qs_relevance = (
    Feedback(provider.qs_relevance,
             name="Context Relevance")
    .on_input()
    .on(context_selection)
    .aggregate(np.mean)
)

✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input statement will be set to __record__.app.query.rets.source_nodes[:].node.text .


In [14]:
import numpy as np

f_qs_relevance = (
    Feedback(provider.qs_relevance_with_cot_reasons,
             name="Context Relevance")
    .on_input()
    .on(context_selection)
    .aggregate(np.mean)
)

✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input statement will be set to __record__.app.query.rets.source_nodes[:].node.text .


### 3. Groundedness

In [15]:
from trulens_eval.feedback import Groundedness

grounded = Groundedness(groundedness_provider=provider)

In [16]:
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons,
             name="Groundedness"
            )
    .on(context_selection)
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


## Evaluation of the RAG application

In [17]:
from trulens_eval import TruLlama
from trulens_eval import FeedbackMode

tru_recorder = TruLlama(
    sentence_window_engine,
    app_id="App_1",
    feedbacks=[
        f_qa_relevance,
        f_qs_relevance,
        f_groundedness
    ]
)

In [18]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

In [19]:
eval_questions

['What is the name of article?',
 'How the writer used the AI method?',
 'What is the contribution of this paper?',
 'Who are the authors?',
 'How many networks have been applied in this paper?',
 'What deep learning methods has been applied?',
 'What is the result of the paper?']

In [20]:
eval_questions.append("What are the contributions of this paper?")

In [21]:
eval_questions

['What is the name of article?',
 'How the writer used the AI method?',
 'What is the contribution of this paper?',
 'Who are the authors?',
 'How many networks have been applied in this paper?',
 'What deep learning methods has been applied?',
 'What is the result of the paper?',
 'What are the contributions of this paper?']

In [22]:
for question in eval_questions:
    with tru_recorder as recording:
        sentence_window_engine.query(question)

In [23]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Groundedness,Answer Relevance_calls,Groundedness_calls,latency,total_tokens,total_cost
0,App_1,"{""app_id"": ""App_1"", ""tags"": ""-"", ""metadata"": {...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_278bf8bc6cd493a334910bf3f7bd129d,"""What is the name of article?""","""Using Informational Interviews to Find the Ri...",-,"{""record_id"": ""record_hash_278bf8bc6cd493a3349...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-05-16T03:10:39.000634"", ""...",2025-05-16T03:10:40.258454,1.0,1.0,[{'args': {'prompt': 'What is the name of arti...,[{'args': {'source': 'No one is an expert at e...,1,412,0.000623
1,App_1,"{""app_id"": ""App_1"", ""tags"": ""-"", ""metadata"": {...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_ab1f28f7055d269bde7bf93110ca011a,"""How the writer used the AI method?""","""The writer used the AI method by identifying ...",-,"{""record_id"": ""record_hash_ab1f28f7055d269bde7...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-05-16T03:10:40.381736"", ""...",2025-05-16T03:10:42.282064,1.0,,[{'args': {'prompt': 'How the writer used the ...,,1,431,0.000677
2,App_1,"{""app_id"": ""App_1"", ""tags"": ""-"", ""metadata"": {...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_410555a1f93a52376d8437b64e3960cc,"""What is the contribution of this paper?""","""The contribution of this paper lies in provid...",-,"{""record_id"": ""record_hash_410555a1f93a52376d8...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-05-16T03:10:42.393181"", ""...",2025-05-16T03:10:44.287177,1.0,,[{'args': {'prompt': 'What is the contribution...,,1,450,0.000723
3,App_1,"{""app_id"": ""App_1"", ""tags"": ""-"", ""metadata"": {...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_c8abaccdc3ac06c7cc29ad1dc3030669,"""Who are the authors?""","""The authors are not explicitly mentioned in t...",-,"{""record_id"": ""record_hash_c8abaccdc3ac06c7cc2...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-05-16T03:10:44.400690"", ""...",2025-05-16T03:10:45.491387,,,,,1,355,0.000539
4,App_1,"{""app_id"": ""App_1"", ""tags"": ""-"", ""metadata"": {...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_2103cfae97ad58d877be75d158716718,"""How many networks have been applied in this p...","""In this paper, neural networks have been appl...",-,"{""record_id"": ""record_hash_2103cfae97ad58d877b...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2025-05-16T03:10:45.605907"", ""...",2025-05-16T03:10:46.846284,,,,,1,542,0.000819


In [24]:
import pandas as pd

pd.set_option("display.max_colwidth", None)
records[["input", "output"] + feedback]

Unnamed: 0,input,output,Groundedness,Answer Relevance
0,"""What is the name of article?""","""Using Informational Interviews to Find the Right Job""",1.0,1.0
1,"""How the writer used the AI method?""","""The writer used the AI method by identifying a business problem through conversations with domain experts, then brainstorming AI solutions to address the issues raised. This approach involved seeking input from experts on what aspects could be improved and why they were not functioning optimally, followed by generating AI-based solutions to tackle the identified problems.""",,1.0
2,"""What is the contribution of this paper?""","""The contribution of this paper lies in providing guidance on assessing the feasibility and value of potential solutions in AI projects. It emphasizes the importance of conducting quick proof of concept implementations to gain missing perspectives, determining technical feasibility by reviewing existing work or competitor approaches, and assessing value by consulting domain experts. Additionally, the paper highlights the significance of budgeting for necessary project resources such as data, personnel, time, and integrations, ensuring that all essential elements are considered for successful project completion.""",,1.0
3,"""Who are the authors?""","""The authors are not explicitly mentioned in the provided context information.""",,
4,"""How many networks have been applied in this paper?""","""In this paper, neural networks have been applied.""",,
5,"""What deep learning methods has been applied?""","""Understanding the math behind gradient descent, momentum, and the Adam optimization algorithm has been applied in deep learning methods.""",,
6,"""What is the result of the paper?""","""The result of the paper is a structured guide that outlines the process of learning foundational technical skills, working on projects, and continuously learning in the field of AI. It emphasizes the importance of gaining foundational technical skills, working on projects to apply those skills, and continuing to learn and grow in the field.""",,
7,"""What are the contributions of this paper?""","""The contributions of this paper include emphasizing the importance of personal projects and class projects in gaining technical growth, resources, and project opportunities. It also highlights the value of creating value through quick proof of concept implementations to assess feasibility and consulting with domain experts to determine the worth of potential solutions. Additionally, the paper stresses the significance of budgeting for necessary resources such as data, personnel, time, and integrations to ensure project success.""",,


In [25]:
tru.get_leaderboard(app_ids=[])

Unnamed: 0_level_0,Groundedness,Answer Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
App_1,1.0,1.0,1.0,0.000702


In [26]:
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at https://s172-29-98-243p38560.lab-aws-production.deeplearning.ai/ .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>