In [1]:
import os
import openai
from datasets import load_dataset

In [2]:
openai.api_key = os.environ["OPENAI_API_KEY"]

In [7]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader(input_files=["./eBook-How-to-Build-a-Career-in-AI.pdf"]).load_data()

In [8]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [9]:
from llama_index import VectorStoreIndex
from llama_index import ServiceContext
from llama_index.retrievers import VectorIndexRetriever
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-4-1106-preview", temperature=0.1)
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_documents([document], service_context=service_context)
# retriever = VectorIndexRetriever(index=index, similarity_top_k=5)

In [10]:
query_engine = index.as_query_engine()

In [11]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [12]:
from utils import get_prebuilt_trulens_recorder

tru_recorder = get_prebuilt_trulens_recorder(query_engine, app_id="Direct Query Engine")

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [13]:
eval_questions = []
with open('eval_questions.txt', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        print(item)
        eval_questions.append(item)

What are the keys to building a career in AI?
How can teamwork contribute to success in AI?
What is the importance of networking in AI?
What are some good habits to develop for a successful career?
How can altruism be beneficial in building a career?
What is imposter syndrome and how does it relate to AI?
Who are some accomplished individuals who have experienced imposter syndrome?
What is the first step to becoming good at AI?
What are some common challenges in AI?
Is it normal to find parts of AI challenging?


In [14]:
with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [15]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [16]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Context Relevance,Groundedness,Answer Relevance_calls,Context Relevance_calls,Groundedness_calls,latency,total_tokens,total_cost
0,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_e3c654faf5bddc2f3ca1f17397119345,"""What are the keys to building a career in AI?""","""The keys to building a career in AI include l...",-,"{""record_id"": ""record_hash_e3c654faf5bddc2f3ca...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-06T19:32:17.442264"", ""...",2023-12-06T19:32:25.364025,1.0,0.95,1.0,[{'args': {'prompt': 'What are the keys to bui...,[{'args': {'prompt': 'What are the keys to bui...,"[{'args': {'source': 'PAGE 1Founder, DeepLearn...",7,2133,0.02365
1,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_87cfd2d3487d2bd303875811ccebdd33,"""How can teamwork contribute to success in AI?""","""Teamwork can contribute to success in AI by a...",-,"{""record_id"": ""record_hash_87cfd2d3487d2bd3038...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-06T19:32:25.484969"", ""...",2023-12-06T19:32:34.462566,1.0,0.0,0.8,[{'args': {'prompt': 'How can teamwork contrib...,[{'args': {'prompt': 'How can teamwork contrib...,[{'args': {'source': 'Hopefully the previous c...,8,1759,0.02027
2,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_f6a9f1e0e8158a741bf128bbec67e2d2,"""What is the importance of networking in AI?""","""Networking in the field of AI is important be...",-,"{""record_id"": ""record_hash_f6a9f1e0e8158a741bf...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-06T19:32:34.558983"", ""...",2023-12-06T19:32:42.766191,1.0,0.0,0.25,[{'args': {'prompt': 'What is the importance o...,[{'args': {'prompt': 'What is the importance o...,[{'args': {'source': 'Hopefully the previous c...,8,1729,0.01937
3,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_2e7fa9c3bb4c010a590dd621ec62e8ef,"""What are some good habits to develop for a su...","""Developing good habits is essential for a suc...",-,"{""record_id"": ""record_hash_2e7fa9c3bb4c010a590...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-06T19:32:42.858909"", ""...",2023-12-06T19:32:56.999943,1.0,0.5,0.828571,[{'args': {'prompt': 'What are some good habit...,[{'args': {'prompt': 'What are some good habit...,[{'args': {'source': 'Hopefully the previous c...,14,1772,0.02126
4,Direct Query Engine,"{""app_id"": ""Direct Query Engine"", ""tags"": ""-"",...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_074bf8cbf5d347cd30e353d9baa7fd98,"""How can altruism be beneficial in building a ...","""Altruism can be beneficial in building a care...",-,"{""record_id"": ""record_hash_074bf8cbf5d347cd30e...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-06T19:32:57.093920"", ""...",2023-12-06T19:33:04.885196,1.0,0.0,0.775,[{'args': {'prompt': 'How can altruism be bene...,[{'args': {'prompt': 'How can altruism be bene...,[{'args': {'source': 'Hopefully the previous c...,7,1694,0.01894


In [18]:
# Calculate and print the average for each specified column
average_answer_relevance = records['Answer Relevance'].mean()
average_context_relevance = records['Context Relevance'].mean()
average_groundedness = records['Groundedness'].mean()

# Print the averages
print("Average Answer Relevance:", average_answer_relevance)
print("Average Context Relevance:", average_context_relevance)
print("Average Groundedness:", average_groundedness)

Average Answer Relevance: 1.0
Average Context Relevance: 0.23333333333333334
Average Groundedness: 0.8316964285714286


## FIQA Dataset

In [3]:
corpus_dataset = load_dataset("explodinggradients/fiqa", "corpus")
corpus_dataset = corpus_dataset['corpus'].to_pandas()
query_dataset = load_dataset("explodinggradients/fiqa", "main")
query_dataset = query_dataset['train'].to_pandas()

Found cached dataset fiqa (/Users/deathscope/.cache/huggingface/datasets/explodinggradients___fiqa/corpus/1.0.0/3dc7b639f5b4b16509a3299a2ceb78bf5fe98ee6b5fee25e7d5e4d290c88efb8)


  0%|          | 0/1 [00:00<?, ?it/s]

Found cached dataset fiqa (/Users/deathscope/.cache/huggingface/datasets/explodinggradients___fiqa/main/1.0.0/3dc7b639f5b4b16509a3299a2ceb78bf5fe98ee6b5fee25e7d5e4d290c88efb8)


  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
questions_file = "fiqa_eval_questions.txt"

In [5]:
# Read questions from the file
with open(questions_file, 'r') as file:
    questions = file.read().splitlines()

# Filter query_dataset to include rows with questions from the file
new_query_dataset = query_dataset[query_dataset['question'].isin(questions)]

# Extract all ground truths from the new query dataset
ground_truths = new_query_dataset['ground_truths'].explode().dropna().unique()

# Filter corpus_dataset to include rows mentioned in ground_truths
new_corpus_dataset = corpus_dataset[corpus_dataset['doc'].isin(ground_truths)].reset_index(drop=True)

In [29]:
from llama_index import Document

document_texts_original = "\n\n".join(new_corpus_dataset['doc'].tolist())

In [None]:
document = Document(text=document_texts_original)

In [30]:
from llama_index import VectorStoreIndex
from llama_index import ServiceContext
from llama_index.retrievers import VectorIndexRetriever
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_documents([document], service_context=service_context)

In [31]:
query_engine = index.as_query_engine()

In [32]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

In [33]:
from utils import get_prebuilt_trulens_recorder

tru_recorder = get_prebuilt_trulens_recorder(query_engine, app_id="FIQA Unsanitized")

In [35]:
with tru_recorder as recording:
    for question in questions:
        response = query_engine.query(question)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [36]:
records, feedback = tru.get_records_and_feedback(app_ids=[])

In [37]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Context Relevance,Groundedness,Answer Relevance_calls,Context Relevance_calls,Groundedness_calls,latency,total_tokens,total_cost
0,FIQA Unsanitized,"{""app_id"": ""FIQA Unsanitized"", ""tags"": ""-"", ""m...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_77268e145237b1bc21f005ebc5585179,"""What is considered a business expense on a bu...","""Business expenses on a business trip that may...",-,"{""record_id"": ""record_hash_77268e145237b1bc21f...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T10:12:16.619030"", ""...",2023-12-07T10:12:28.284322,1.0,0.1,1.0,[{'args': {'prompt': 'What is considered a bus...,[{'args': {'prompt': 'What is considered a bus...,[{'args': {'source': 'The IRS Guidance pertain...,11,2141,0.02421
1,FIQA Unsanitized,"{""app_id"": ""FIQA Unsanitized"", ""tags"": ""-"", ""m...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_4258ab426f731b4c4bf12f03a6e948f7,"""What are 'business fundamentals'?""","""Business fundamentals refer to the basic prin...",-,"{""record_id"": ""record_hash_4258ab426f731b4c4bf...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T10:12:28.380672"", ""...",2023-12-07T10:12:37.184241,1.0,0.0,0.0,[{'args': {'prompt': 'What are 'business funda...,[{'args': {'prompt': 'What are 'business funda...,[{'args': {'source': 'Scanned or electronic co...,8,2115,0.02319
2,FIQA Unsanitized,"{""app_id"": ""FIQA Unsanitized"", ""tags"": ""-"", ""m...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_b5f524c99c4a7f1f767581f382d65c18,"""Can I pay off my credit card balance to free ...","""Yes, you can pay off your credit card balance...",-,"{""record_id"": ""record_hash_b5f524c99c4a7f1f767...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T10:12:37.282140"", ""...",2023-12-07T10:12:42.302777,1.0,0.0,1.0,[{'args': {'prompt': 'Can I pay off my credit ...,[{'args': {'prompt': 'Can I pay off my credit ...,[{'args': {'source': 'That said if your goal i...,5,2167,0.02279
3,FIQA Unsanitized,"{""app_id"": ""FIQA Unsanitized"", ""tags"": ""-"", ""m...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_3791ae2bdef9e476bc8dfb87cba9b666,"""Why is \u201ccheque cashing\u201d a legitimat...","""Cheque cashing is a legitimate business becau...",-,"{""record_id"": ""record_hash_3791ae2bdef9e476bc8...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T10:12:42.402589"", ""...",2023-12-07T10:13:00.225660,1.0,0.5,1.0,[{'args': {'prompt': 'Why is “cheque cashing” ...,[{'args': {'prompt': 'Why is “cheque cashing” ...,[{'args': {'source': 'So if you don't have the...,17,2322,0.02732
4,FIQA Unsanitized,"{""app_id"": ""FIQA Unsanitized"", ""tags"": ""-"", ""m...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_7365761b961808a0b4d8344990fae2fa,"""Is business the only way to become a milliona...","""No, starting a business is not the only way t...",-,"{""record_id"": ""record_hash_7365761b961808a0b4d...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T10:13:00.323995"", ""...",2023-12-07T10:13:04.728121,1.0,0.25,1.0,[{'args': {'prompt': 'Is business the only way...,[{'args': {'prompt': 'Is business the only way...,"[{'args': {'source': 'I know, again, that peop...",4,2153,0.02233


In [38]:
# Calculate and print the average for each specified column
average_answer_relevance = records['Answer Relevance'].mean()
average_context_relevance = records['Context Relevance'].mean()
average_groundedness = records['Groundedness'].mean()

# Print the averages
print("Average Answer Relevance:", average_answer_relevance)
print("Average Context Relevance:", average_context_relevance)
print("Average Groundedness:", average_groundedness)

Average Answer Relevance: 0.9888888888888889
Average Context Relevance: 0.15555555555555556
Average Groundedness: 0.8163265306122449


## Presidio

In [7]:
from mechanisms.detectors.presidio_detector import PresidioDetector
from mechanisms.santext import SanText
from collections import Counter
from spacy.lang.en import English

In [51]:
detector = PresidioDetector()

In [53]:
vocab = Counter()
tokenizer = English()
for text in new_corpus_dataset['doc']:
    tokenized_text = [token.text for token in tokenizer(text)]
    vocab.update(tokenized_text)

In [54]:
sensitive_words = detector.detect(vocab)

In [55]:
sensitive_words

['2010',
 'Sunday',
 'Biff',
 'U.S.D.',
 'http://financial-dictionary.thefreedictionary.com/Business+Fundamentals',
 'today',
 'P463',
 'payee',
 '2012',
 'Uncle',
 'http://www.myfico.com/crediteducation/whatsinyourscore.aspx',
 'US',
 '8%+',
 'hours',
 'Dilip',
 'Bush',
 'Kennedy',
 'quarterly',
 'yeild',
 'annual',
 'Spoiles',
 'http://rsafilms.com',
 'overnight']

## Constants

In [9]:
WORD_EMBEDDING = "glove"
WORD_EMBEDDING_PATH = "glove.840B.300d.txt"
TOP_K = 20
P = 0.3
BERT_MODEL = "bert-base-uncased"

## Preprocessing

In [6]:
from llama_index import Document
from llama_index import VectorStoreIndex
from llama_index import ServiceContext
from llama_index.retrievers import VectorIndexRetriever
from llama_index.llms import OpenAI
from trulens_eval import Tru
from utils import get_prebuilt_trulens_recorder

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [8]:
rag_dataset = new_corpus_dataset.copy()
rag_dataset.rename(columns={'doc': 'sentence'}, inplace=True)

## SanText

In [73]:
from mechanisms.detectors.santext_detector import SanTextDetector

In [75]:
santext_detector = SanTextDetector(0.9)
santext = SanText(WORD_EMBEDDING, WORD_EMBEDDING_PATH, 0.2, P, santext_detector)

In [87]:
sanitized_rag_dataset = santext.sanitize(rag_dataset)

2196017it [00:52, 41447.90it/s]                                                 


In [88]:
document_texts = "\n\n".join(sanitized_rag_dataset['sanitized sentence'].tolist())
document = Document(text=document_texts)
llm = OpenAI(model="gpt-4-1106-preview", temperature=0.1)
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_documents([document], service_context=service_context)
query_engine = index.as_query_engine()

tru = Tru()
tru.reset_database()
tru_recorder = get_prebuilt_trulens_recorder(query_engine, app_id="FIQA SanText")

INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:Deleted 44 rows.
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added app FIQA SanText
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added feedback definition feedback_definition_hash_85fde0195d1d39777b0f95d1996190a6
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added feedback definition feedback_definition_hash_076412253996f477717992c912e9a80f
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens

In [89]:
with tru_recorder as recording:
    for question in questions:
        response = query_engine.query(question)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added record record_hash_facff1ef22f4fc778cc7e3f5cec8b5b0
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ feedback result Answer Relevance DONE feedback_result_hash_162b6eb5fe4f6b0ad8eaae37bd39b5ff
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Con

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added record record_hash_4b6b920f06275182b0e75f189bceaca6
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ feedback result Answer Relevance DONE feedback_result_hash_3a64dcabbcef101eae99ca0ded9ac8bb
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Con

In [90]:
records, feedback = tru.get_records_and_feedback(app_ids=["FIQA SanText"])

INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.


In [91]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Context Relevance,Groundedness,Answer Relevance_calls,Context Relevance_calls,Groundedness_calls,latency,total_tokens,total_cost
0,FIQA SanText,"{""app_id"": ""FIQA SanText"", ""tags"": ""-"", ""metad...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_facff1ef22f4fc778cc7e3f5cec8b5b0,"""What is considered a business expense on a bu...","""Business expenses on a business trip can incl...",-,"{""record_id"": ""record_hash_facff1ef22f4fc778cc...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T10:54:11.074122"", ""...",2023-12-07T10:54:20.569886,1.0,0.0,0.0,[{'args': {'prompt': 'What is considered a bus...,[{'args': {'prompt': 'What is considered a bus...,[{'args': {'source': 'until withdrawn link Who...,9,2163,0.02395
1,FIQA SanText,"{""app_id"": ""FIQA SanText"", ""tags"": ""-"", ""metad...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_a068f7c5681c85c0b2cf080643f0d9d9,"""What are 'business fundamentals'?""","""Business fundamentals refer to the basic prin...",-,"{""record_id"": ""record_hash_a068f7c5681c85c0b2c...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T10:54:20.778033"", ""...",2023-12-07T10:54:27.265837,1.0,0.0,0.0,[{'args': {'prompt': 'What are 'business funda...,[{'args': {'prompt': 'What are 'business funda...,[{'args': {'source': 'I ATM security amounts t...,6,2117,0.02283
2,FIQA SanText,"{""app_id"": ""FIQA SanText"", ""tags"": ""-"", ""metad...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_1d3bb763ca1fca5509d149613dcbf4cb,"""Can I pay off my credit card balance to free ...","""Yes, you can pay off your credit card balance...",-,"{""record_id"": ""record_hash_1d3bb763ca1fca5509d...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T10:54:27.387292"", ""...",2023-12-07T10:54:29.643077,1.0,0.0,0.7,[{'args': {'prompt': 'Can I pay off my credit ...,[{'args': {'prompt': 'Can I pay off my credit ...,[{'args': {'source': 'huge banking accept free...,2,1594,0.01626
3,FIQA SanText,"{""app_id"": ""FIQA SanText"", ""tags"": ""-"", ""metad...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_0d25de5c1678f8b2cea674a081acfd8a,"""Why is \u201ccheque cashing\u201d a legitimat...","""Cheque cashing is a legitimate business becau...",-,"{""record_id"": ""record_hash_0d25de5c1678f8b2cea...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T10:54:29.751021"", ""...",2023-12-07T10:54:39.462106,1.0,0.0,0.5,[{'args': {'prompt': 'Why is “cheque cashing” ...,[{'args': {'prompt': 'Why is “cheque cashing” ...,[{'args': {'source': 'anyway everyone than sub...,9,2147,0.02387
4,FIQA SanText,"{""app_id"": ""FIQA SanText"", ""tags"": ""-"", ""metad...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_6e13826f2801ec14db8382c2343e6487,"""Is business the only way to become a milliona...","""No, business is not the only way to become a ...",-,"{""record_id"": ""record_hash_6e13826f2801ec14db8...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T10:54:39.569432"", ""...",2023-12-07T10:54:45.501665,1.0,0.0,0.0,[{'args': {'prompt': 'Is business the only way...,[{'args': {'prompt': 'Is business the only way...,[{'args': {'source': 'school The word that suc...,5,1614,0.01724


In [92]:
# Calculate and print the average for each specified column
average_answer_relevance = records['Answer Relevance'].mean()
average_context_relevance = records['Context Relevance'].mean()
average_groundedness = records['Groundedness'].mean()

# Print the averages
print("Average Answer Relevance:", average_answer_relevance)
print("Average Context Relevance:", average_context_relevance)
print("Average Groundedness:", average_groundedness)

Average Answer Relevance: 0.9888888888888889
Average Context Relevance: 0.0
Average Groundedness: 0.18035714285714285


In [93]:
# Loop through each item in the Series and print it
for item in records['Context Relevance_calls']:
    print(item)
    print("\n")  # Adding a new line for better readability

[{'args': {'prompt': 'What is considered a business expense on a business trip?', 'response': 'until withdrawn link Who using the probably spot suppose In young the day I opportunity say trying your business trying may be AGI . anything nor it sign on the considered and what it starts you giant staying call . produce pass lodging designer away from requires on business may number treated \'re included page regard the become of average their calculating , the once typical of pass and her and pays home and requirements expenses . contingent joint are investors future our editing from score poorly if their wage affect print wrong to be away investing clients include hobby than an become Little last gives work and they need to precisely or wisely to others the before investing Use their helping . The diem 2010 describe help and Or expenses substantially be easy U.S.D. library the security may use a 50 send excellent those gave 50 rush carefully odd . It of city wire associated , he whoever

### Epsilon 

## CusText

In [53]:
from mechanisms.detectors.custext_detector import CusTextDetector
from mechanisms.custext import CusText

custext_detector = CusTextDetector()
custext = CusText(WORD_EMBEDDING, WORD_EMBEDDING_PATH, 0.2, TOP_K, custext_detector)

In [54]:
sanitized_rag_dataset = custext.sanitize(rag_dataset.head(1))

INFO:root: Found word mappings in ./word_mappings/probability_mappings.txt, ./word_mappings/similar_word_mappings.txt. Making use of them.
INFO:root: Santizing dataset using CusText


In [55]:
sanitized_rag_dataset['sentence'][0]

'The IRS Guidance pertaining to the subject.  In general the best I can say is your business expense may be deductible.  But it depends on the circumstances and what it is you want to deduct. Travel Taxpayers who travel away from home on business may deduct related   expenses, including the cost of reaching their destination, the cost   of lodging and meals and other ordinary and necessary expenses.   Taxpayers are considered “traveling away from home” if their duties   require them to be away from home substantially longer than an   ordinary day’s work and they need to sleep or rest to meet the demands   of their work. The actual cost of meals and incidental expenses may be   deducted or the taxpayer may use a standard meal allowance and reduced   record keeping requirements. Regardless of the method used, meal   deductions are generally limited to 50 percent as stated earlier.    Only actual costs for lodging may be claimed as an expense and   receipts must be kept for documentation.

In [56]:
sanitized_rag_dataset['sanitized sentence'][0]

'that exempt byCategoryThemeAnnive relate put whole concerning the however the great wondering get say that my business profit or sort deductibles Actually with hence BMXZnot place whether that how up first put let take subtract Tourism Voters who Trips into off house TreatmentsHighlightersLips industry i.e. reimburse detailed Income other both expensive into reaches need visitor when savings when accommodations the meals both other run-of-the-mill well necessary deductions Consumers seem one getaway the into away and any duty need their bring kind off as go essentially no more sort hence past work while well need get insomnia may which could need whole concerns whole yourself much which same paid same delicious BMXZnot insofar profits as with subtract be as Paying either even is i.e. delicious expenses others pay put compensate ensure Ironically place both employing either eating cash not those limited go billion as explained past only hence reduced there resorts or something latter p

In [33]:
document_texts = "\n\n".join(sanitized_rag_dataset['sanitized sentence'].tolist())
document = Document(text=document_texts)
llm = OpenAI(model="gpt-4-1106-preview", temperature=0.1)
service_context = ServiceContext.from_defaults(llm=llm, embed_model="local:BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_documents([document], service_context=service_context)
query_engine = index.as_query_engine()

tru = Tru()
tru.reset_database()
tru_recorder = get_prebuilt_trulens_recorder(query_engine, app_id="FIQA CusText")

INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:Deleted 4 rows.
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added app FIQA CusText
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added feedback definition feedback_definition_hash_07b7a14ab33149016794026efe130a2b
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added feedback definition feedback_definition_hash_a38e6c5a0584630352c2a4baa82dfcc0
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_

In [35]:
sanitized_rag_dataset

Unnamed: 0,sentence,sanitized sentence,sensitive words,missing words
0,The IRS Guidance pertaining to the subject. I...,into filing Tweets@WorldTimeEngine regard woul...,[],[]
1,The card you have is one where you had to depo...,place checks way can it another that able appa...,[],[]
2,"""In my experience (in the US), the main draw o...",place own experienced the both DescriptionMfgr...,[],[]
3,"""How does this get any business? You'd be surp...",That think this way not clients Have up wonder...,[],[]
4,See what the contract says about transfers or ...,http://www.npr.org/.Missing than first lease s...,[],[]
5,"""There are three (or four) ways that a company...",receivedKUDOS These different even sixteen way...,[],[]
6,"""This answer is based on my understanding of t...",ShoppingCart.current.id answers up developed y...,[],[]
7,I'd approach the lender that you're getting th...,thought relates both loan when go getting into...,[],[]
8,Is it possible to pay off my balance more than...,At something necessary can money on yourself b...,[],[]
9,"Generally, unless you're doing a wire transfer...",PATCH/resize/273x203 unless give needed is wir...,[],[]


In [34]:
document_texts

"into filing Tweets@WorldTimeEngine regard would place matter In reason as way n't will you as sure opportunity profits and/or same premiums Even way affect MascaraFoundations entire matter BMXZnot out kind something give need can reimburse Travelling 1Bialosky wants Travel out BMXZnot up the profitable rather deduct website expenses included when purchasing both halfway your choice which purchase the rental which delicious well that everyday out compensate cash Tax should such traveling out the rest otherwise my assignments utilize they will a coming leaving well essentially not less up hence months there even not need will rest even one get we both concern the our doing when any purchasing when eaten but unintentional profits even is reimbursed or place Taxes instead able first basic lunch pay both savings BMXZnot regardless criteria Whatever the that using used supper revenue can ones limited let 200,000 that asks year yetResolved exactly purchase know!OverviewReviews accommodations

In [30]:
document_texts_original

'The IRS Guidance pertaining to the subject.  In general the best I can say is your business expense may be deductible.  But it depends on the circumstances and what it is you want to deduct. Travel Taxpayers who travel away from home on business may deduct related   expenses, including the cost of reaching their destination, the cost   of lodging and meals and other ordinary and necessary expenses.   Taxpayers are considered “traveling away from home” if their duties   require them to be away from home substantially longer than an   ordinary day’s work and they need to sleep or rest to meet the demands   of their work. The actual cost of meals and incidental expenses may be   deducted or the taxpayer may use a standard meal allowance and reduced   record keeping requirements. Regardless of the method used, meal   deductions are generally limited to 50 percent as stated earlier.    Only actual costs for lodging may be claimed as an expense and   receipts must be kept for documentation.

In [14]:
with tru_recorder as recording:
    for question in questions:
        response = query_engine.query(question)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added record record_hash_cfa54c437746f2754baf4ff5164bc753
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 400 Bad Request"
ERROR:trulens_eval.feedback.provider.endpoint.base:openai request failed <class 'openai.BadRequestError'>=Error code: 400 - {'error': {'message': 'Detected an error in the prompt. Please try again with a different prompt.', 'type': 'invalid_request_error', 'param': 'prompt', 'code': None}}. Retries remaining=3.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 400 Bad Request"
ERROR:trulens_eval.feedback.provider.endpoint.base:openai request failed <class 'openai.BadRequestError'>=Error code: 400 - {'error': {'message': 'Detected an error in the prompt. Please try again with a different prompt.', 'type': 'invalid_request_error', 'param': 'prompt', 'code': None}}. Retries remaining=3.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 400 Bad Request"
ERROR:trulens_eval.feedback.provider.endpoint.base:openai request failed <

INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:🛑 feedback result Context Relevance FAILED feedback_result_hash_160aaddc716ab94a2bee204b205ec992
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 400 Bad Request"
ERROR:trulens_eval.feedback.provider.endpoint.base:openai request failed <class 'openai.BadRequestError'>=Error code: 400 - {'error': {'message': 'Detected an error in the prompt. Please try again with a different prompt.', 'type': 'invalid_request_error', 'param': 'prompt', 'code': None}}. Retries remaining=0.
  File "/Users/deathscope/anaconda3/envs/research/lib/python3.11/site-packages/trulens_eval/feedback/feedback.py", line 481, in run
    result_and_meta, part_cost = Endpoint.track_all_costs_tally(
                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/deathscope/anaconda3/envs/research/lib/python3.11/

INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ added record record_hash_ba98427c442e16ce9ee7e7b067f6e284
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.
INFO:trulens_eval.database.sqlalchemy_db:✅ feedback result Answer Relevance DONE feedback_result_hash_83305ae1d22b5109a9b4df8cc01eda41
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Wil

In [18]:
records, feedback = tru.get_records_and_feedback(app_ids=["FIQA CusText"])

INFO:alembic.runtime.migration:Context impl SQLiteImpl.
INFO:alembic.runtime.migration:Will assume non-transactional DDL.


In [19]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Answer Relevance,Answer Relevance_calls,Context Relevance_calls,Groundedness_calls,Context Relevance,Groundedness,latency,total_tokens,total_cost
0,FIQA SanText,"{""app_id"": ""FIQA SanText"", ""tags"": ""-"", ""metad...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_cfa54c437746f2754baf4ff5164bc753,"""What is considered a business expense on a bu...","""Business expenses on a business trip can incl...",-,"{""record_id"": ""record_hash_cfa54c437746f2754ba...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T11:45:38.836051"", ""...",2023-12-07T11:45:51.689423,1.0,[{'args': {'prompt': 'What is considered a bus...,[],[],,,12,1987,0.02211
1,FIQA SanText,"{""app_id"": ""FIQA SanText"", ""tags"": ""-"", ""metad...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_8da7fe2af2ab4a6758e1be97f437a8fb,"""What are 'business fundamentals'?""","""Business fundamentals refer to the core attri...",-,"{""record_id"": ""record_hash_8da7fe2af2ab4a6758e...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T11:45:51.827435"", ""...",2023-12-07T11:46:00.993277,1.0,[{'args': {'prompt': 'What are 'business funda...,[{'args': {'prompt': 'What are 'business funda...,[{'args': {'source': 'Strategy which ensures T...,0.0,0.466667,9,1660,0.01842
2,FIQA SanText,"{""app_id"": ""FIQA SanText"", ""tags"": ""-"", ""metad...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_e576e33eea6e975b50acbffaa1f855f9,"""Can I pay off my credit card balance to free ...","""Yes, you can pay off your credit card balance...",-,"{""record_id"": ""record_hash_e576e33eea6e975b50a...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T11:46:01.101377"", ""...",2023-12-07T11:46:03.140814,1.0,[{'args': {'prompt': 'Can I pay off my credit ...,[{'args': {'prompt': 'Can I pay off my credit ...,[{'args': {'source': 'wanted well talk go that...,0.0,0.0,2,1695,0.01727
3,FIQA SanText,"{""app_id"": ""FIQA SanText"", ""tags"": ""-"", ""metad...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_7decb122e59a5395f222e387f7d1e205,"""Why is \u201ccheque cashing\u201d a legitimat...","""Cheque cashing is a legitimate business becau...",-,"{""record_id"": ""record_hash_7decb122e59a5395f22...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T11:46:03.250122"", ""...",2023-12-07T11:46:12.463696,1.0,[{'args': {'prompt': 'Why is “cheque cashing” ...,[{'args': {'prompt': 'Why is “cheque cashing” ...,[{'args': {'source': 'example afternoon BMXZno...,0.0,0.0,9,1624,0.018
4,FIQA SanText,"{""app_id"": ""FIQA SanText"", ""tags"": ""-"", ""metad...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_b022d3fdab9cfb7939ba7b3b2dd32e25,"""Is business the only way to become a milliona...","""No, business is not the only way to become a ...",-,"{""record_id"": ""record_hash_b022d3fdab9cfb7939b...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2023-12-07T11:46:12.567436"", ""...",2023-12-07T11:46:16.360343,1.0,[{'args': {'prompt': 'Is business the only way...,[{'args': {'prompt': 'Is business the only way...,[{'args': {'source': 'example afternoon BMXZno...,0.0,0.0,3,1159,0.01259


In [20]:
# Calculate and print the average for each specified column
average_answer_relevance = records['Answer Relevance'].mean()
average_context_relevance = records['Context Relevance'].mean()
average_groundedness = records['Groundedness'].mean()

# Print the averages
print("Average Answer Relevance:", average_answer_relevance)
print("Average Context Relevance:", average_context_relevance)
print("Average Groundedness:", average_groundedness)

Average Answer Relevance: 1.0
Average Context Relevance: 0.0
Average Groundedness: 0.058333333333333334


# TAB Dataset