# RAG

This notebook applies RAG to the Medicare Handbook demo.

* See https://github.com/nicknochnack/Llama2RAG/blob/main/app.py
* See https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/response_modes.html

In [1]:
# Import helper functions
import sys
sys.path.insert(1, '/mnt/efs/data/AIEresearch/')
import aie_helper_functions as aie_helper

In [2]:
# Imports
import os
import configparser
import torch
from llama_index import (SimpleDirectoryReader, 
                         Document, 
                         ServiceContext, 
                         VectorStoreIndex)
from llama_index.llms import HuggingFaceLLM 
from llama_index.prompts import PromptTemplate
#https://docs.llamaindex.ai/en/stable/api_reference/llms/huggingface.html
#https://docs.llamaindex.ai/en/stable/examples/customization/llms/SimpleIndexDemo-Huggingface_stablelm.html
from llama_index.embeddings import LangchainEmbedding
from llama_index.response.notebook_utils import display_response
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

In [None]:
# # Initialize config parser
# config = configparser.ConfigParser()
# config.read("/mnt/efs/data/AIEresearch/config.ini")
# # Set the OpenAI authorization token 
# openai_key = config['openai']['api_key']
# os.environ['OPENAI_API_KEY'] = openai_key

# Read in Data

In [3]:
# Set paths
path_handbook_2023 = '/mnt/efs/data/AIEresearch/demo_medicare_handbook/data/Medicare-and-You.2023 National Version.pdf'
path_handbook_2024 = '/mnt/efs/data/AIEresearch/demo_medicare_handbook/data/10050-Medicare-and-You.pdf'

In [4]:
# Read in PDF(s) into a llamaindex document object
# documents = SimpleDirectoryReader(input_files=[path_handbook_2024]).load_data()
documents = SimpleDirectoryReader(input_files=[path_handbook_2023, 
                                               path_handbook_2024]).load_data()

In [5]:
# Print stats on document
print(f"Documents type: {type(documents)}")
print(f"Number of pages: {len(documents)}")
print(f"Sub-document type: {type(documents[0])}")
print(f"{documents[0]}")

Documents type: <class 'list'>
Number of pages: 256
Sub-document type: <class 'llama_index.schema.Document'>
Doc ID: 60c290f1-642f-4134-a81e-d90e889bbc5c
Text: 2023Medicare & YouThe official U.S. government  Medicare
handbook


Merge documents into one to help with overall text accuracy in more advanced retrieval methods, such as advanced window retrieval as well as auto merging retrieval.

> CAN THIS IMPACT GETTING SOURCE DATA?

In [9]:
# # Merge documents into one
# document = Document(text="\n\n".join([doc.text for doc in documents]))

# Llama2

Set service context. Embedding models [Hugging Face Leaderboard](https://huggingface.co/spaces/mteb/leaderboard "Hugging Face").

https://huggingface.co/WhereIsAI/UAE-Large-V1?library=true

In [5]:
# Load llama2 model and tokenizer
model, tokenizer = aie_helper.load_llama2_model(7)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [9]:
# Set the system prompt
system_prompt = '''
<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as 
helpfully as possible, while being safe. Your answers should not include
any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain 
why instead of answering something not correct. If you don't know the answer 
to a question, please don't share false information.
[/INST]
'''
# <</SYS>>
# """

In [8]:
# Create a HF LLM using the llama index wrapper 
llm = HuggingFaceLLM(context_window=4096,
                     max_new_tokens=2048, #256,
                     system_prompt=system_prompt,
                    #  query_wrapper_prompt=query_wrapper_prompt,
                     query_wrapper_prompt=PromptTemplate("<s> [INST] {query_str} [/INST] "),
                     device_map="auto",
                     model_kwargs={"quantization_config": aie_helper.NF4_CONFIG},
                     model=model,
                     tokenizer=tokenizer)

In [12]:
# # Set the embedding model 
# embed_model = AutoModel.from_pretrained('/mnt/efs/data/saved_models/UAE-Large-V1/model')

In [None]:
# # Create and dl embeddings instance  
# embeddings = LangchainEmbedding(HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2"))

In [10]:
# Create new service context instance
# service_context = ServiceContext.from_defaults(chunk_size=1024,
#                                                llm=llm,
#                                                embed_model=embeddings)
service_context = ServiceContext.from_defaults(llm=llm, 
                                               embed_model="local:/mnt/efs/data/saved_models/BAAI/bge-small-en-v1.5/model/")

In [11]:
# Create the index
index = VectorStoreIndex.from_documents(documents,
                                        service_context=service_context)

In [12]:
# Create the query engine
# query_engine = index.as_query_engine()
query_engine = index.as_query_engine(streaming=True)

In [13]:
eval_questions = ['How many parts does medicare have?',
                  'How old do you have to be to get medicare coverage?',
                  'Does medicare cover all costs or should I expect to pay out-of-pocket?']

In [None]:
# Ask a test question
response = query_engine.query(eval_questions[0])
# display_response(response)
# response.get_response()

In [6]:
qe = aie_helper.load_llama2_rag(model, tokenizer, [path_handbook_2023, path_handbook_2024])


Documents type: <class 'list'>
Number of pages: 256
Sub-document type: <class 'llama_index.schema.Document'>
Doc ID: 698d3528-4246-44c3-b348-2b84625ea4f5
Text: 2023Medicare & YouThe official U.S. government  Medicare
handbook


service_context created.

index created.

query_engine loaded.



In [20]:
# Ask a test question
response = qe.query(eval_questions[0])
# display_response(response)
# response.get_response()

In [21]:
print(response)

Medicare has 3 main parts: Part A (Hospital Insurance), Part B (Medical Insurance), and Part D (Drug coverage).</s>


In [15]:
display_response(response.print_response_stream())

Based on the provided context information, there are three parts of Medicare:

1. Part A (Hospital Insurance): Helps cover inpatient care in hospitals, skilled nursing facility care, hospice care, and home health care.
2. Part B (Medical Insurance): Helps cover services from doctors and other health care providers, outpatient care, home health care, durable medical equipment, and many preventive services.
3. Part D (Drug coverage): Helps cover the cost of prescription drugs, including many recommended shots or vaccines.

Therefore, the answer to the query is three.</s>

AttributeError: 'NoneType' object has no attribute 'response'

In [20]:
# Get doc ID
doc_id = response.get_formatted_sources()
print(doc_id)

> Source (Doc id: 7ffbe867-4d70-4245-a62e-47530f77a2b4): If your birthday is on the first of the month, your 7-month period starts 4 months before the mon...

> Source (Doc id: fb992459-549c-4dbf-a40a-2576c012b247): Preventive service  
Hepatitis C screenings 
Medicare covers one Hepatitis C screening test if yo...


### ReAct Agent Mode

In [16]:
service_context = ServiceContext.from_defaults(llm=llm,
                                               embed_model='local')

In [None]:
chat_engine = index.as_chat_engine(service_context=service_context, 
                                   chat_mode="react", 
                                   verbose=True)

In [None]:
# Ask a test question
response = chat_engine.chat(eval_questions[0])
print(str(response))

TemplateError: Conversation roles must alternate user/assistant/user/assistant/...

In [None]:
custom_prompt = PromptTemplate("""\
Given a conversation (between Human and Assistant) and a follow up message from Human, \
rewrite the message to be a standalone question that captures all relevant context \
from the conversation.

<Chat History>
{chat_history}

<Follow Up Message>
{question}

<Standalone question>
""")

In [None]:
chat_engine = index.as_chat_engine(service_context=service_context, 
                                   chat_mode="react", 
                                   condense_question_prompt=custom_prompt,
                                   verbose=True)

# Mistral

* See https://colab.research.google.com/drive/1ZAdrabTJmZ_etDp10rjij_zME2Q3umAQ?usp=sharing#scrollTo=lMNaHDzPM68f
* See https://github.com/mickymultani/RAG-Mistral7b/blob/main/RAG_testing_mistral7b.ipynb

In [6]:
# Load mistral model and tokenizer
model, tokenizer = aie_helper.load_mistral7b_model()

Loading checkpoint shards:   0%|          | 0/6 [00:00<?, ?it/s]

In [None]:
# Set the system prompt
system_prompt = '''
You are a helpful, respectful and honest assistant. Always answer as 
helpfully as possible, while being safe. Your answers should not include
any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain 
why instead of answering something not correct. If you don't know the answer 
to a question, please don't share false information.
'''

In [None]:
query_wrapper_prompt = "<|USER|>{query_str}<|ASSISTANT|>"

In [None]:
# Create a HF LLM using the index wrapper 
llm = HuggingFaceLLM(context_window=4096,
                     max_new_tokens=2048, #256,
                     system_prompt=system_prompt,
                    #  query_wrapper_prompt=query_wrapper_prompt,
                     query_wrapper_prompt=PromptTemplate("<s>[INST] {query_str} [/INST] </s>\n"),
                    #  generate_kwargs={"temperature": 0.3, "do_sample": True},
                     generate_kwargs={"temperature": 0.2, "top_k": 5, "top_p": 0.95},
                     tokenizer_kwargs={"max_length": 4096},
                    #  model_kwargs={"torch_dtype": torch.float16}, #usually commented away
                     model_kwargs={"quantization_config": aie_helper.NF4_CONFIG},
                     device_map="auto",
                     model=model,
                     tokenizer=tokenizer)

In [10]:
# Create and dl embeddings instance  
embeddings = LangchainEmbedding(HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))

In [21]:
# Create new service context instance
# service_context = ServiceContext.from_defaults(chunk_size=1024,
#                                                llm=llm,
#                                                embed_model=embeddings)

service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:/mnt/efs/data/saved_models/BAAI/bge-small-en-v1.5/model/")

In [22]:
# Create the index
index = VectorStoreIndex.from_documents(documents,
                                        service_context=service_context)

In [13]:
eval_questions = ['How many parts does medicare have?',
                  'How old do you have to be to get medicare coverage?',
                  'Does medicare cover all costs or should I expect to pay out-of-pocket?']

## Query Engine

In [14]:
# Create the query engine
# query_engine = index.as_query_engine()
query_engine = index.as_query_engine(streaming=True)

In [15]:
# Ask a test question
response = query_engine.query(eval_questions[0])
print(str(response))

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Medicare is made up of four parts: Part A (Hospital Insurance), Part B (Medical Insurance), Part D (Drug coverage), and various supplemental insurance plans. Each part covers different aspects of healthcare costs. For more detailed information, you can refer to pages 9 to 75 in the provided document.</s>


In [None]:
response.response

In [16]:
response.get_response()

Response(response='Medicare is made up of four parts: Part A (Hospital Insurance), Part B (Medical Insurance), Part D (Drug coverage), and various supplemental insurance plans. Each part covers different aspects of healthcare costs. For more detailed information, you can refer to pages 9 to 75 in the provided document.</s>', source_nodes=[NodeWithScore(node=TextNode(id_='17e5f1b0-c3d7-42b8-a6f7-1ccb92270146', embedding=None, metadata={'page_label': '9', 'file_name': '10050-Medicare-and-You.pdf', 'file_path': '/mnt/efs/data/AIEresearch/demo_medicare_handbook/data/10050-Medicare-and-You.pdf', 'file_type': 'application/pdf', 'file_size': 3323573, 'creation_date': '2023-12-12', 'last_modified_date': '2023-12-12', 'last_accessed_date': '2023-12-12'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_a

In [21]:
response.print_response_stream()

Medicare is divided into four parts: Part A (Hospital Insurance), Part B (Medical Insurance), Part D (Drug coverage), and various supplementary insurance plans. Each part covers different aspects of healthcare expenses. For more details, you can refer to pages 25-29, 29-55, and 79-90 in the Medicare handbook.</s>


In [18]:
response.source_nodes

[NodeWithScore(node=TextNode(id_='17e5f1b0-c3d7-42b8-a6f7-1ccb92270146', embedding=None, metadata={'page_label': '9', 'file_name': '10050-Medicare-and-You.pdf', 'file_path': '/mnt/efs/data/AIEresearch/demo_medicare_handbook/data/10050-Medicare-and-You.pdf', 'file_type': 'application/pdf', 'file_size': 3323573, 'creation_date': '2023-12-12', 'last_modified_date': '2023-12-12', 'last_accessed_date': '2023-12-12'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='ea14dd48-caf1-40d7-8565-4696c959e2f6', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'page_label': '9', 'file_name': '10050-Medicare-and-You.pdf', 'file_path': '/mnt/efs/data/AIEresearch/demo_medicare_handbook/data/10050-Medicare-and-You.pdf', 'file_

## Get source data

In [25]:
node_w_score1 = response.source_nodes[0]

In [30]:
node_w_score1.metadata

{'page_label': '9',
 'file_name': '10050-Medicare-and-You.pdf',
 'file_path': '/mnt/efs/data/AIEresearch/demo_medicare_handbook/data/10050-Medicare-and-You.pdf',
 'file_type': 'application/pdf',
 'file_size': 3323573,
 'creation_date': '2023-12-12',
 'last_modified_date': '2023-12-12',
 'last_accessed_date': '2023-12-12'}

In [27]:
print(node_w_score1.get_content())

9
What are the parts of 
Medicare?
Part A (Hospital Insurance) 
Helps cover:
‚Ä¢ Inpatient care in hospitals
‚Ä¢ Skilled nursing facility care
‚Ä¢ Hospice care
‚Ä¢ Home health care
Go to pages 25‚Äì29. 
Part B (Medical Insurance)
Helps cover:
‚Ä¢ Services from doctors and other health care providers
‚Ä¢ Outpatient care
‚Ä¢ Home health care
‚Ä¢ Durable medical equipment (like wheelchairs, walkers, 
hospital beds, and other equipment)
‚Ä¢ Many preventive services  (like screenings, shots or vaccines, 
and yearly ‚Äú Wellness‚Äù visits) 
Go to pages 29‚Äì55 . 
Part D (Drug coverage) 
Helps cover the cost of prescription drugs (including many recommended shots or vaccines).
Plans that offer Medicare drug coverage (Part D) are run by 
private insurance companies that follow rules set by Medicare.
Go to pages 79‚Äì90.


In [29]:
print(node_w_score1.get_text())

9
What are the parts of 
Medicare?
Part A (Hospital Insurance) 
Helps cover:
‚Ä¢ Inpatient care in hospitals
‚Ä¢ Skilled nursing facility care
‚Ä¢ Hospice care
‚Ä¢ Home health care
Go to pages 25‚Äì29. 
Part B (Medical Insurance)
Helps cover:
‚Ä¢ Services from doctors and other health care providers
‚Ä¢ Outpatient care
‚Ä¢ Home health care
‚Ä¢ Durable medical equipment (like wheelchairs, walkers, 
hospital beds, and other equipment)
‚Ä¢ Many preventive services  (like screenings, shots or vaccines, 
and yearly ‚Äú Wellness‚Äù visits) 
Go to pages 29‚Äì55 . 
Part D (Drug coverage) 
Helps cover the cost of prescription drugs (including many recommended shots or vaccines).
Plans that offer Medicare drug coverage (Part D) are run by 
private insurance companies that follow rules set by Medicare.
Go to pages 79‚Äì90.


In [22]:
response = query_engine.query("How many parts does Medicare have? Please cite sources along with your answer.")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


In [24]:
print(str(response))

Medicare is a health insurance program for people aged 65 and above, as well as certain younger people with disabilities. It is divided into four parts, each covering different aspects of healthcare:

1. Part A (Hospital Insurance): Covers inpatient care in hospitals, skilled nursing facility care, hospice care, and home health care.
2. Part B (Medical Insurance): Covers services from doctors and other healthcare providers, outpatient care, home health care, durable medical equipment, and many preventive services.
3. Part D (Drug Coverage): Helps cover the cost of prescription drugs, including recommended shots or vaccines. Plans that offer Medicare drug coverage are run by private insurance companies that follow rules set by Medicare.
4. Part C (Medicare Advantage Plans): Offers an alternative to Original Medicare for health and drug coverage. These plans include Part A, Part B, and usually Part D, and may offer additional benefits.

Sources: Medicare.gov (2022). "Medicare Parts A, B,

## Chat Engine

### [ChatEngines](https://docs.llamaindex.ai/en/stable/api_reference/query/chat_engines.html)
* https://docs.llamaindex.ai/en/latest/module_guides/deploying/chat_engines/root.html#
* https://docs.llamaindex.ai/en/latest/module_guides/deploying/chat_engines/usage_pattern.html#configuring-a-chat-engine

Note: you can access different chat engines by specifying the chat_mode as a kwarg. condense_question corresponds to CondenseQuestionChatEngine, react corresponds to ReActChatEngine, context corresponds to a ContextChatEngine.

<br>

[SimpleChatEngine](https://docs.llamaindex.ai/en/stable/api_reference/query/chat_engines/simple_chat_engine.html). Have a conversation with the LLM. This does __not__ make use of a knowledge base.

<br>

[CondenseQuestionChatEngine](https://docs.llamaindex.ai/en/stable/api_reference/query/chat_engines/condense_question_chat_engine.html)
First generate a standalone question from conversation context and last message, then query the query engine for a response.

<br>

[ReActChatEngine](https://docs.llamaindex.ai/en/stable/examples/chat_engine/chat_engine_react.html)
ReAct is an agent based chat mode built on top of a query engine over your data.

For each chat interaction, the agent enter a ReAct loop:

* first decide whether to use the query engine tool and come up with appropriate input

* (optional) use the query engine tool and observe its output

* decide whether to repeat or give final response

This approach is flexible, since it can flexibility choose between querying the knowledge base or not. However, the performance is also more dependent on the quality of the LLM. You might need to do more coercing to make sure it chooses to query the knowledge base at right times, instead of hallucinating an answer.

<br>

[ContextChatEngine](https://docs.llamaindex.ai/en/stable/examples/chat_engine/chat_engine_context.html)
ContextChatEngine is a simple chat mode built on top of a retriever over your data.

For each chat interaction:

* first retrieve text from the index using the user message

* set the retrieved text as context in the system prompt

* return an answer to the user message

This approach is simple, and works for questions directly related to the knowledge base and general interactions.

<br>

[Condense Plus Context Chat Engine](https://docs.llamaindex.ai/en/stable/api_reference/query/chat_engines/condense_plus_context_chat_engine.html)
First condense a conversation and latest user message to a standalone question Then build a context for the standalone question from a retriever, Then pass the context along with prompt and user message to LLM to generate a response.

### ReAct Agent Mode

In [24]:
service_context = ServiceContext.from_defaults(llm=llm,
                                               embed_model='local')

In [25]:
chat_engine = index.as_chat_engine(service_context=service_context, 
                                   chat_mode="react", 
                                   verbose=True)

In [33]:
# Ask a test question
response = chat_engine.chat(eval_questions[0])
print(str(response))

TemplateError: Conversation roles must alternate user/assistant/user/assistant/...

In [22]:
custom_prompt = PromptTemplate("""\
Given a conversation (between Human and Assistant) and a follow up message from Human, \
rewrite the message to be a standalone question that captures all relevant context \
from the conversation.

<Chat History>
{chat_history}

<Follow Up Message>
{question}

<Standalone question>
""")

In [32]:
chat_engine = index.as_chat_engine(service_context=service_context, 
                                   chat_mode="react", 
                                   condense_question_prompt=custom_prompt,
                                   verbose=True)

### CondenseQuestionChatEngine
https://docs.llamaindex.ai/en/latest/module_guides/deploying/chat_engines/usage_pattern.html#

In [27]:
from llama_index.prompts import PromptTemplate
from llama_index.llms import ChatMessage, MessageRole
from llama_index.chat_engine.condense_question import CondenseQuestionChatEngine

In [28]:
custom_prompt = PromptTemplate("""\
Given a conversation (between Human and Assistant) and a follow up message from Human, \
rewrite the message to be a standalone question that captures all relevant context \
from the conversation.

<Chat History>
{chat_history}

<Follow Up Message>
{question}

<Standalone question>
""")

In [29]:
query_engine = index.as_query_engine()

In [30]:
chat_engine = CondenseQuestionChatEngine.from_defaults(
    llm=llm,
    query_engine=query_engine,
    condense_question_prompt=custom_prompt,
    # chat_history=custom_chat_history,
    verbose=False,
)

ValueError: 
******
Could not load OpenAI model. If you intended to use OpenAI, please check your OPENAI_API_KEY.
Original error:
No API key found for OpenAI.
Please set either the OPENAI_API_KEY environment variable or openai.api_key prior to initialization.
API keys can be found or created at https://platform.openai.com/account/api-keys

To disable the LLM entirely, set llm=None.
******

In [None]:
# Ask a test question
response = chat_engine.chat(eval_questions[0])
print(str(response))

In [None]:
# Test stream > TEST TOOK OVER TEN MINS!!!
streaming_response = chat_engine.stream_chat(eval_questions[0])
for token in streaming_response.response_gen:
    print(token, end="")

In [None]:
response = query_engine.query("How many parts does Medicare have? Please cite sources along with your answer.")

In [None]:
print(str(response))

### [ContextChatEngine](https://docs.llamaindex.ai/en/stable/examples/chat_engine/chat_engine_context.html)

In [34]:
chat_engine = index.as_chat_engine(
    chat_mode="context",
    # memory=memory,
    system_prompt=custom_prompt
)

In [35]:
# Ask a test question
response = chat_engine.chat(eval_questions[0])
print(str(response))

TemplateError: Conversation roles must alternate user/assistant/user/assistant/...

# Trulens
[Llama-Index Quickstart](https://www.trulens.org/trulens_eval/llama_index_quickstart)

In [1]:
from trulens_eval import Feedback, Tru, TruLlama
from trulens_eval.feedback import Groundedness
from trulens_eval.feedback.provider.openai import OpenAI
import numpy as np

In [19]:
tru = Tru()
tru.reset_database()

ü¶ë Tru initialized with db url sqlite:///default.sqlite .
üõë Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [20]:
# Initialize provider class
openai = OpenAI()

In [21]:
grounded = Groundedness(groundedness_provider=openai)

In [22]:
# Define a groundedness feedback function
f_groundedness = Feedback(grounded.groundedness_measure_with_cot_reasons) \
    .on(TruLlama.select_source_nodes().node.text.collect()) \
    .on_output() \
    .aggregate(grounded.grounded_statements_aggregator)

‚úÖ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
‚úÖ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [23]:
# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()

‚úÖ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
‚úÖ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .


In [24]:
# Question/statement relevance between question and each context chunk.
f_qs_relevance = Feedback(openai.qs_relevance).on_input().on(
                     TruLlama.select_source_nodes().node.text).aggregate(np.mean)

‚úÖ In qs_relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
‚úÖ In qs_relevance, input statement will be set to __record__.app.query.rets.source_nodes[:].node.text .


In [25]:
tru_query_engine_recorder = TruLlama(query_engine,
                                     app_id='LlamaIndex_App1',
                                     feedbacks=[f_groundedness, f_qa_relevance, f_qs_relevance])     

In [26]:
# or as context manager
with tru_query_engine_recorder as recording:
    query_engine.query(eval_questions[0])

In [27]:
# os.environ ['STREAMLIT_SERVER_PORT'] = '8080'

Forward and use this port for the app.
http://localhost:8501

In [28]:
tru.run_dashboard() # open a local streamlit app to explore

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu‚Ä¶

In [None]:
# tru.stop_dashboard() # stop if needed

In [None]:
# tru.get_records_and_feedback(app_ids=[])[0] # pass an empty list of app_ids to get all