# 📓 _TruLens-Eval_ Internals Overview

## Setup

In [1]:
# pip uninstall -y trulens_eval
# pip install git+https://github.com/truera/trulens@piotrm/azure_bugfixes#subdirectory=trulens_eval

%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys

base = Path().cwd()
while not (base / "trulens_eval" / "trulens_eval").exists():
    base = base.parent

if (base / "trulens_eval" / "trulens_eval").exists():
    base = base / "trulens_eval"

print(base)

# If running from github repo, can use this:
sys.path.append(str(base))

from trulens_eval.keys import check_keys

check_keys(
    "OPENAI_API_KEY",
    "HUGGINGFACE_API_KEY"
)

from trulens_eval import Tru
tru = Tru()
tru.reset_database()
# tru.run_dashboard(_dev=base, force=True)

/Volumes/dev_new/trulens/trulens_eval
✅ Key OPENAI_API_KEY set from environment (same value found in .env file at /Volumes/dev_new/.env).
✅ Key HUGGINGFACE_API_KEY set from environment (same value found in .env file at /Volumes/dev_new/.env).
🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of Tru` to prevent this.


In [2]:
from examples.expositional.end2end_apps.custom_app.custom_app import CustomApp
from examples.expositional.end2end_apps.custom_app.custom_tool import CustomStackTool

from trulens_eval.tru_custom_app import TruCustomApp

In [3]:
custom_app = CustomApp()
tru_custom_app = TruCustomApp(custom_app)

In [10]:
#with tru_custom_app as recorder:
custom_app.respond_to_query("hello")

'The answer to hello is probably Generating from "RELEVANT CHUNK: HELLO,<frame at 0x2caa26340, file \'/Volumes/dev_new/trulens/trulens_eval/examples/expositional/end2end_apps/custom_app/custom_tool.py\', line 79, code invoke>\\n<frame at 0x2c7f42790, file \'/Volumes/dev_new/trulens/trulens_eval/trulens_eval/instruments.py\', line 456, code tru_wrapper>\\n<frame at 0x2c280c040, file \'/Volumes/dev_new/trulens/trulens_eval/examples/expositional/end2end_apps/custom_app/custom_app.py\', line 81, code process_chunk_by_random_tool>\\n<frame at 0x2c7f42ea0, file \'/Volumes/dev_new/trulens/trulens_eval/trulens_eval/instruments.py\', line 456, code tru_wrapper>\\n<frame at 0x2caa3a7a0, file \'/Volumes/dev_new/trulens/trulens_eval/trulens_eval/utils/python.py\', line 475, code _future_target_wrapper>\\n<frame at 0x2caa3a5c0, file \'/Volumes/dev_new/trulens/trulens_eval/.conda/py-opt-3.10/lib/python3.10/concurrent/futures/thread.py\', line 58, code run>\\n<frame at 0x2caa40be0, file \'/Volumes/de

In [11]:
import pandas as pd
from ipydatagrid import DataGrid
from pprint import pformat

def crop_end(s, n=32):
    s = str(s)
    if len(s) <= n:
        return s
    else:
        return "..." + str(s)[-n:]

rows = []
for f in CustomStackTool.last_stack:
    rows.append(map(crop_end, [f.f_code.co_filename, list(f.f_locals.keys()), f.f_code.co_name]))

In [12]:
df = pd.DataFrame(rows, columns=['f_code', 'f_locals', 'f_code.co_name'])

In [13]:
DataGrid(df)

DataGrid(auto_fit_params={'area': 'all', 'padding': 30, 'numCols': None}, corner_renderer=None, default_render…

## Initialize Feedback Function(s)

In [None]:
from trulens_eval.feedback.provider import OpenAI
from trulens_eval import Feedback
import numpy as np

# Initialize provider class
provider = OpenAI()

# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(rag_chain)

from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.relevance)
    .on_input_output()
)
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(provider.context_relevance_with_cot_reasons)
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

## Instrument chain for logging with TruLens

In [None]:
tru_recorder = TruChain(rag_chain,
    app_id='Chain1_ChatApplication',
    feedbacks=[f_answer_relevance, f_context_relevance, f_groundedness])

In [None]:
response, tru_record = tru_recorder.with_record(rag_chain.invoke, "What is Task Decomposition?")

In [None]:
json_like = tru_record.layout_calls_as_app()

In [None]:
json_like

In [None]:
from ipytree import Tree, Node

def display_call_stack(data):
    tree = Tree()
    tree.add_node(Node('Record ID: {}'.format(data['record_id'])))
    tree.add_node(Node('App ID: {}'.format(data['app_id'])))
    tree.add_node(Node('Cost: {}'.format(data['cost'])))
    tree.add_node(Node('Performance: {}'.format(data['perf'])))
    tree.add_node(Node('Timestamp: {}'.format(data['ts'])))
    tree.add_node(Node('Tags: {}'.format(data['tags'])))
    tree.add_node(Node('Main Input: {}'.format(data['main_input'])))
    tree.add_node(Node('Main Output: {}'.format(data['main_output'])))
    tree.add_node(Node('Main Error: {}'.format(data['main_error'])))
    
    calls_node = Node('Calls')
    tree.add_node(calls_node)
    
    for call in data['calls']:
        call_node = Node('Call')
        calls_node.add_node(call_node)
        
        for step in call['stack']:
            step_node = Node('Step: {}'.format(step['path']))
            call_node.add_node(step_node)
            if 'expanded' in step:
                expanded_node = Node('Expanded')
                step_node.add_node(expanded_node)
                for expanded_step in step['expanded']:
                    expanded_step_node = Node('Step: {}'.format(expanded_step['path']))
                    expanded_node.add_node(expanded_step_node)
    
    return tree

# Usage
tree = display_call_stack(json_like)
tree

In [None]:
tree

In [None]:
with tru_recorder as recording:
    llm_response = rag_chain.invoke("What is Task Decomposition?")

display(llm_response)

## Retrieve records and feedback

In [None]:
# The record of the app invocation can be retrieved from the `recording`:

rec = recording.get() # use .get if only one record
# recs = recording.records # use .records if multiple

display(rec)

In [None]:
# The results of the feedback functions can be rertireved from
# `Record.feedback_results` or using the `wait_for_feedback_result` method. The
# results if retrieved directly are `Future` instances (see
# `concurrent.futures`). You can use `as_completed` to wait until they have
# finished evaluating or use the utility method:

for feedback, feedback_result in rec.wait_for_feedback_results().items():
    print(feedback.name, feedback_result.result)

# See more about wait_for_feedback_results:
# help(rec.wait_for_feedback_results)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])

records.head()

In [None]:
tru.get_leaderboard(app_ids=["Chain1_ChatApplication"])

## Explore in a Dashboard

In [None]:
tru.run_dashboard() # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed

Alternatively, you can run `trulens-eval` from a command line in the same folder to start the dashboard.

Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.