# 在 LlamaIndex 示例基础上使用 LiteLLM

参考： https://github.com/truera/trulens/blob/main/trulens_eval/examples/quickstart/llama_index_quickstart.ipynb

因为直接使用会报错，改为使用 LiteLLM 替代之前的 LLM

In [1]:
%%time

import os

base_url = "http://ape:3000/v1"
api_key = "sk-bJP6QSnUfjAYeYeE505d3eBf63A643BeB0B8E350Df9b7750"

os.environ["OPENAI_API_KEY"] = api_key
os.environ["OPENAI_API_BASE"] = base_url

CPU times: user 9 µs, sys: 1 µs, total: 10 µs
Wall time: 11.7 µs


In [2]:
from trulens_eval import Tru

tru = Tru()
tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [6]:
%%time

from llama_index.core import Settings
from llama_index.core import SimpleDirectoryReader
from llama_index.core import VectorStoreIndex
# from llama_index.llms.openai import OpenAI
from llama_index.llms.openai_like import OpenAILike

Settings.chunk_size = 128
Settings.chunk_overlap = 16
Settings.llm = OpenAILike(
    model="qwen2", 
    api_base=base_url, 
    api_key=api_key,
    is_chat_model=True,
    temperature=0.1,
    request_timeout=60.0
)

documents = SimpleDirectoryReader("data2").load_data()
index = VectorStoreIndex.from_documents(documents)

query_engine = index.as_query_engine(similarity_top_k=3)

CPU times: user 2.05 s, sys: 11.2 ms, total: 2.06 s
Wall time: 11 s


In [7]:
%%time

response = query_engine.query("What did the author do growing up?")
print(response)

Growing up, the author focused on writing short stories and programming. They didn't engage in traditional essay writing but instead wrote stories that were characterized by strong emotions among characters with little plot development. The author also mentions experiencing moments of intellectual engagement, such as solving a problem involving continuations while watching children play at the coast in 2015.
CPU times: user 78 ms, sys: 130 µs, total: 78.1 ms
Wall time: 3.71 s


In [8]:
%%time

import numpy as np

from trulens_eval import Feedback
# from trulens_eval.feedback.provider import OpenAI
from trulens_eval import LiteLLM

# Initialize provider class
provider = LiteLLM(
    model_engine="gpt-3.5-turbo",
)

# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App

context = App.select_context(query_engine)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(
        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(context.collect())  # collect context chunks into a list
    .on_output()
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = Feedback(
    provider.relevance_with_cot_reasons, name="Answer Relevance"
).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(
        provider.context_relevance_with_cot_reasons, name="Context Relevance"
    )
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .
CPU times: user 74.7 ms, sys: 185 µs, total: 74.9 ms
Wall time: 74.3 ms


In [9]:
%%time

from trulens_eval import TruLlama

tru_query_engine_recorder = TruLlama(
    query_engine,
    app_id="LlamaIndex_App1",
    feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance],
)

CPU times: user 281 ms, sys: 74 µs, total: 281 ms
Wall time: 292 ms


In [11]:
import nltk
# [nltk_data] Error loading punkt: <urlopen error [Errno 111] Connection
nltk.set_proxy('http://myproxy:7890')

In [12]:
%%time

# or as context manager
with tru_query_engine_recorder as recording:
    query_engine.query("What did the author do growing up?")

CPU times: user 1.97 s, sys: 13.4 ms, total: 1.98 s
Wall time: 4.3 s


In [13]:
%%time

last_record = recording.records[-1]

from trulens_eval.utils.display import get_feedback_result

get_feedback_result(last_record, "Context Relevance")

CPU times: user 5.39 ms, sys: 106 µs, total: 5.5 ms
Wall time: 5.13 ms


Unnamed: 0,question,context,ret
0,What did the author do growing up?,What I Worked On\n\nFebruary 2021\n\nBefore co...,0.8
1,What did the author do growing up?,I remember that I answered the essay question ...,0.3
2,What did the author do growing up?,I remember taking the boys to the coast on a s...,0.2


In [14]:
%%time

from trulens_eval.guardrails.llama import WithFeedbackFilterNodes

# note: feedback function used for guardrail must only return a score, not also reasons
f_context_relevance_score = Feedback(provider.context_relevance)

filtered_query_engine = WithFeedbackFilterNodes(
    query_engine, feedback=f_context_relevance_score, threshold=0.5
)

CPU times: user 9.47 ms, sys: 118 µs, total: 9.59 ms
Wall time: 9.37 ms


In [15]:
%%time

tru_recorder = TruLlama(
    filtered_query_engine,
    app_id="LlamaIndex_App1_Filtered",
    feedbacks=[f_answer_relevance, f_context_relevance, f_groundedness],
)

with tru_recorder as recording:
    llm_response = filtered_query_engine.query(
        "What did the author do growing up?"
    )

display(llm_response)

Response(response='Growing up, the author focused on writing short stories and programming outside of school.', source_nodes=[NodeWithScore(node=TextNode(id_='84343470-d803-4c3b-96e6-d2f116021386', embedding=None, metadata={'file_path': '/root/notebook/my-jupyter-notebook/llm/trulens/data2/paul.txt', 'file_name': 'paul.txt', 'file_type': 'text/plain', 'file_size': 75042, 'creation_date': '2024-08-06', 'last_modified_date': '2024-08-06'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='f4f8e222-b752-4f63-b272-1372556d1f09', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/root/notebook/my-jupyter-notebook/llm/trulens/data2/paul.txt', 'file_name': 'paul.txt', 'file_type': 'text/plain', 'file_siz

CPU times: user 2.91 s, sys: 39 ms, total: 2.95 s
Wall time: 4.78 s


In [16]:
%%time

last_record = recording.records[-1]

from trulens_eval.utils.display import get_feedback_result

get_feedback_result(last_record, "Context Relevance")

CPU times: user 553 µs, sys: 38 µs, total: 591 µs
Wall time: 595 µs


Unnamed: 0,question,context,ret
0,What did the author do growing up?,What I Worked On\n\nFebruary 2021\n\nBefore co...,0.8


In [17]:
tru.get_leaderboard()

Unnamed: 0_level_0,Answer Relevance,Groundedness,Context Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LlamaIndex_App1_Filtered,0.9,1.0,0.8,6.0,0.004869
LlamaIndex_App1,0.9,1.0,0.4,4.5,0.0


In [18]:
# The record of the app invocation can be retrieved from the `recording`:

rec = recording.get()  # use .get if only one record
# recs = recording.records # use .records if multiple

display(rec)

Record(record_id='record_hash_1ef9735cf2ffb6066a30bf0abd8c8db5', app_id='LlamaIndex_App1_Filtered', cost=Cost(n_requests=5, n_successful_requests=15, n_classes=0, n_tokens=3526, n_stream_chunks=0, n_prompt_tokens=3500, n_completion_tokens=26, cost=0.004869), perf=Perf(start_time=datetime.datetime(2024, 8, 7, 15, 2, 50, 56257), end_time=datetime.datetime(2024, 8, 7, 15, 2, 54, 224286)), ts=datetime.datetime(2024, 8, 7, 15, 2, 54, 224937), tags='-', meta=None, main_input='What did the author do growing up?', main_output='Growing up, the author focused on writing short stories and programming outside of school.', main_error=None, calls=[RecordAppCall(call_id='e09967c9-8ae4-456c-88bf-ac27d018737e', stack=[RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=trulens_eval.guardrails.llama.WithFeedbackFilterNodes, id=140368941480640, init_bindings=None), name='query')), RecordAppCallMethod(path=Lens().app.query_engine, method=Method(obj=Obj(cls=llama_index.core.query_engine.retrieve

In [19]:
for feedback, feedback_result in rec.wait_for_feedback_results().items():
    print(feedback.name, feedback_result.result)

Answer Relevance 0.9
Context Relevance 0.8
Groundedness 1.0


In [20]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Context Relevance,Answer Relevance,Groundedness,Context Relevance_calls,Answer Relevance_calls,Groundedness_calls,latency,total_tokens,total_cost
0,LlamaIndex_App1,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_9988bb572119a5a3d59c306457772887,"""What did the author do growing up?""","""Growing up, the author focused on writing sho...",-,"{""record_id"": ""record_hash_9988bb572119a5a3d59...","{""n_requests"": 2, ""n_successful_requests"": 3, ...","{""start_time"": ""2024-08-07T15:00:37.025989"", ""...",2024-08-07T15:00:43.413037,0.366667,0.9,1.0,[{'args': {'question': 'What did the author do...,[{'args': {'prompt': 'What did the author do g...,"[{'args': {'source': [""What I Worked On\n\nFeb...",6,546,0.0
1,LlamaIndex_App1,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_b0ad28bf3b3327bd6cb17dd7b5260906,"""What did the author do growing up?""","""Growing up, the author focused on writing sho...",-,"{""record_id"": ""record_hash_b0ad28bf3b3327bd6cb...","{""n_requests"": 2, ""n_successful_requests"": 3, ...","{""start_time"": ""2024-08-07T15:01:01.665013"", ""...",2024-08-07T15:01:05.625065,0.433333,0.9,1.0,[{'args': {'question': 'What did the author do...,[{'args': {'prompt': 'What did the author do g...,"[{'args': {'source': [""What I Worked On\n\nFeb...",3,568,0.0
2,LlamaIndex_App1_Filtered,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",WithFeedbackFilterNodes(trulens_eval.guardrail...,record_hash_1ef9735cf2ffb6066a30bf0abd8c8db5,"""What did the author do growing up?""","""Growing up, the author focused on writing sho...",-,"{""record_id"": ""record_hash_1ef9735cf2ffb6066a3...","{""n_requests"": 5, ""n_successful_requests"": 15,...","{""start_time"": ""2024-08-07T15:02:50.056257"", ""...",2024-08-07T15:02:54.224937,0.8,0.9,1.0,[{'args': {'question': 'What did the author do...,[{'args': {'prompt': 'What did the author do g...,"[{'args': {'source': [""What I Worked On\n\nFeb...",6,3526,0.004869


In [21]:
tru.get_leaderboard(app_ids=[])

Unnamed: 0_level_0,Answer Relevance,Groundedness,Context Relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LlamaIndex_App1_Filtered,0.9,1.0,0.8,6.0,0.004869
LlamaIndex_App1,0.9,1.0,0.4,4.5,0.0
