In [2]:
from llama_index.readers.web import FireCrawlWebReader
from dotenv import load_dotenv
import os, pymongo, pprint
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext
from llama_index.core.settings import Settings
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
from llama_index.postprocessor.cohere_rerank import CohereRerank

In [3]:
load_dotenv()
FIRECRAWL_API = os.environ.get('FIRECRAWL_API')
ATLAS_CONNECTION_STRING = os.environ.get('ATLAS_URI')
COHERE_API_KEY = os.environ.get("COHERE_API_KEY")
os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_KEY")

In [None]:
filename_fn = lambda filename: {"file_name": filename}
# automatically sets the metadata of each document according to filename_fn
documents = SimpleDirectoryReader(
    input_dir="data/smart_solutions", file_metadata=filename_fn
).load_data()

## Create Vector Embeddings

In [None]:
"""
base = top_k 5
v2 = 512 chunk
v3 = 100 chunk *
v4 = 100 chunk, 50 overlap **
v5 = 100 chunk, 100 overlap, top_k=5 [higher results, lower groudedness, 0.39, 0.64, 0.8]
v6 = v5 + top_k 20
v7 = v5 + top_k 10
v7 = v5 + refine
v8 = 100 chunk, 100 overlap, cohere rerank, top_n=3, top_k = 20
v9 = top_n=3, cohere, top_k = 20
v10 = top_n=3, cohere, top_k = 5
"""

In [77]:

# Settings.llm = OpenAI(model="gpt-4-turbo")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
Settings.chunk_size = 100
Settings.chunk_overlap = 100
top_k = 5
top_n = 3

db_name = "smart_solutions_db"
collection_name = "policies[100_chunk, 100 overlap]" 
app_name = "SmartSolutions_App_v10"

In [None]:
# Connect to your Atlas cluster
mongodb_client = pymongo.MongoClient(ATLAS_CONNECTION_STRING)

# Instantiate the vector store
atlas_vector_search = MongoDBAtlasVectorSearch(
    mongodb_client,
    db_name = db_name,
    collection_name = collection_name,
    index_name = "vector_index"
)
 
vector_store_context = StorageContext.from_defaults(vector_store=atlas_vector_search)

In [None]:
vector_store_index = VectorStoreIndex.from_documents(
   documents, storage_context=vector_store_context, show_progress=True
)

In [None]:
# Instantiate Atlas Vector Search as a retriever
vector_store_retriever = VectorIndexRetriever(index=vector_store_index, similarity_top_k=top_k)
# Pass the retriever into the query engine
query_engine = RetrieverQueryEngine(retriever=vector_store_retriever)

## Load Embeddings

In [None]:
# # Settings.llm = OpenAI(model="gpt-4-turbo")

# # Connect to your Atlas cluster
# mongodb_client = pymongo.MongoClient(ATLAS_CONNECTION_STRING)

# # Instantiate the vector store
# atlas_vector_search = MongoDBAtlasVectorSearch(
#     mongodb_client,
#     db_name = db_name,
#     collection_name = collection_name,
#     index_name = "vector_index"
# )

# # Create VectorStoreIndex from the vector store
# vector_store_index = VectorStoreIndex.from_vector_store(atlas_vector_search)

# # Instantiate Atlas Vector Search as a retriever
# vector_store_retriever = VectorIndexRetriever(index=vector_store_index, similarity_top_k=top_k)
# # Pass the retriever into the query engine
# query_engine = RetrieverQueryEngine(retriever=vector_store_retriever)

In [78]:
# Settings.llm = OpenAI(model="gpt-4-turbo")

# Connect to your Atlas cluster
mongodb_client = pymongo.MongoClient(ATLAS_CONNECTION_STRING)

# Instantiate the vector store
atlas_vector_search = MongoDBAtlasVectorSearch(
    mongodb_client,
    db_name = db_name,
    collection_name = collection_name,
    index_name = "vector_index"
)

# Create VectorStoreIndex from the vector store
vector_store_index = VectorStoreIndex.from_vector_store(atlas_vector_search)


cohere_rerank = CohereRerank(api_key=COHERE_API_KEY, top_n=top_n)
# Create a query engine with a default retriever
query_engine = vector_store_index.as_query_engine(
        similarity_top_k=top_k, node_postprocessors=[cohere_rerank]
    )

In [79]:
# from IPython.display import Markdown, display
# # define prompt viewing function
# def display_prompt_dict(prompts_dict):
#     for k, p in prompts_dict.items():
#         text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
#         display(Markdown(text_md))
#         print(p.get_template())
#         display(Markdown("<br><br>"))

# prompts_dict = query_engine.get_prompts()
# display_prompt_dict(prompts_dict)

In [80]:
# from llama_index.core import PromptTemplate

# # Define your custom prompt
# custom_prompt = (
#     "Context information is below.\n"
#     "---------------------\n"
#     "{context_str}\n"
#     "---------------------\n"
#     "Given the context information and not prior knowledge, "
#     "answer the query.\n"
#     "Query: {query_str}\n"
#     "As an expert in customer service relations, it's important to recognize and address the tone of the received query. If the customer sounds angry or dissatisfied, start by acknowledging their feelings and apologize for any inconvenience they may have experienced. Then, proceed to curate the response in a way that soothes and resolves their concerns."
# )

# # Create a PromptTemplate from your custom prompt
# custom_prompt_template = PromptTemplate(custom_prompt)

# # Update the query engine with the new prompt
# query_engine.update_prompts({"text_qa_template": custom_prompt_template})

In [81]:
# query_engine.query("I hate the service! How do i get my money back?")

In [82]:
from trulens_eval import Tru
tru = Tru()

In [83]:
import numpy as np

# Initialize provider class
from trulens_eval.feedback.provider.openai import OpenAI
openai = OpenAI()

# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(query_engine)

# imports for feedback
from trulens_eval import Feedback

# Define a groundedness feedback function
from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()

# Question/statement relevance between question and each context chunk.
f_qs_relevance = (
    Feedback(openai.qs_relevance)
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.query.rets.source_nodes[:].node.text.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In qs_relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In qs_relevance, input context will be set to __record__.app.query.rets.source_nodes[:].node.text .


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\TurnerZ\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [84]:
from trulens_eval import TruLlama
tru_query_engine_recorder = TruLlama(query_engine,
    app_id=app_name,
    feedbacks=[f_groundedness, f_qa_relevance, f_qs_relevance])

In [85]:
queries = [
    "I'm interested in learning more about the services offered by SmartSolutions Virtual Assistant Services. Could you provide me with more information?",
    "I'm excited to explore the possibility of working with SmartSolutions Virtual Assistant Services! Can you please send me the pricing details and package options for social media management services?",
    "I need to modify my current service plan ASAP. Your lack of flexibility is causing major inconvenience. Can you please guide me through the process?",
    "I need assistance with setting up email marketing campaigns for my business. Can you help me get started?",
    "I'm glad to hear about your virtual assistant services! Are virtual assistants available to assist with data entry tasks? I have a project that requires immediate attention.",
    "I'm looking for a customized service package to meet my specific business needs. Can I request a custom quote?",
    "How can I trust you with my sensitive data? Your lack of transparency about security measures is concerning. How do you ensure the security of client data and information?",
    "I'm experiencing issues with my website and need assistance with maintenance tasks. Can you help troubleshoot?",
    "I appreciate your prompt assistance! What is the typical turnaround time for completing research tasks? I have an urgent project that needs to be completed.",
    "I'm excited to enhance my online presence! Can you recommend the best social media platforms for a small business in the fashion industry?",
    "What qualifications and experience do your virtual assistants have? I want to ensure I'm working with skilled professionals.",
    "I'm looking forward to streamlining my marketing efforts! I need help integrating my email marketing platform with my CRM system. Can you assist with this?",
    "I'm eager to get started with your services! What is the process for onboarding new clients and getting started?",
    "I need assistance with analyzing social media analytics and performance metrics. Can you provide support?",
    "Your lack of 24/7 support is unacceptable! Is customer support available during weekends or holidays? I may need assistance outside of regular business hours.",
    "I'm considering optimizing my service plan! What is the process for upgrading/downgrading my current service plan?",
    "I'm unsure which service package is best for my business. Can you provide guidance on selecting the right one?",
    "I'm eager to see examples of your work! Do you have any examples of previous work or case studies in the healthcare industry?",
    "I want to ensure a smooth payment process! What payment methods do you accept, and what are your billing procedures?",
    "I'm interested in exploring more options! Do you offer any additional services or add-ons beyond the standard packages?"
]

with tru_query_engine_recorder as recording:
    for test in queries:
        query_engine.query(test)

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/6 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/5 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/4 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/4 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/8 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/2 [00:00<?, ?it/s]

In [86]:
records, feedback = tru.get_records_and_feedback(app_ids=[app_name])
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,relevance,qs_relevance,groundedness_measure_with_cot_reasons,relevance_calls,qs_relevance_calls,groundedness_measure_with_cot_reasons_calls,latency,total_tokens,total_cost
0,SmartSolutions_App_v10,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_73af7d7d289a221e6a4323576ae8c58f,"""I'm interested in learning more about the ser...","""SmartSolutions Virtual Assistant Services off...",-,"{""record_id"": ""record_hash_73af7d7d289a221e6a4...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-05-01T18:16:53.689723"", ""...",2024-05-01T18:17:01.242861,0.8,0.8,1.0,[{'args': {'prompt': 'I'm interested in learni...,[{'args': {'question': 'I'm interested in lear...,[{'args': {'source': ['SmartSolutions Virtual ...,7,495,0.00573
1,SmartSolutions_App_v10,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_a10eb655b97e4917a503a60e88bb97d1,"""I'm excited to explore the possibility of wor...","""SmartSolutions Virtual Assistant Services off...",-,"{""record_id"": ""record_hash_a10eb655b97e4917a50...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-05-01T18:17:01.637511"", ""...",2024-05-01T18:17:14.354816,0.8,0.633333,0.366667,[{'args': {'prompt': 'I'm excited to explore t...,[{'args': {'question': 'I'm excited to explore...,[{'args': {'source': ['SmartSolutions Virtual ...,12,548,0.00822
2,SmartSolutions_App_v10,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_c1f8481e23ccbd7fe3322c04b59a1e2b,"""I need to modify my current service plan ASAP...","""To modify your current service plan, you will...",-,"{""record_id"": ""record_hash_c1f8481e23ccbd7fe33...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-05-01T18:17:14.743313"", ""...",2024-05-01T18:17:23.178083,0.8,0.6,1.0,[{'args': {'prompt': 'I need to modify my curr...,[{'args': {'question': 'I need to modify my cu...,[{'args': {'source': ['Any modifications or am...,8,519,0.00609
3,SmartSolutions_App_v10,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_c5653fb638cf9319f33824cab0125884,"""I need assistance with setting up email marke...","""To get started with setting up email marketin...",-,"{""record_id"": ""record_hash_c5653fb638cf9319f33...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-05-01T18:17:23.554893"", ""...",2024-05-01T18:17:31.736026,0.8,0.8,0.5,[{'args': {'prompt': 'I need assistance with s...,[{'args': {'question': 'I need assistance with...,[{'args': {'source': ['Email Marketing: \n• M...,8,561,0.00663
4,SmartSolutions_App_v10,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.core.query_en...,record_hash_06af476289ef52220c2bbee245c535a4,"""I'm glad to hear about your virtual assistant...","""Yes, our team of experienced virtual assistan...",-,"{""record_id"": ""record_hash_06af476289ef52220c2...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-05-01T18:17:32.122682"", ""...",2024-05-01T18:17:39.020051,0.8,0.8,1.0,[{'args': {'prompt': 'I'm glad to hear about y...,[{'args': {'question': 'I'm glad to hear about...,[{'args': {'source': ['Virtual Assistant Servi...,6,532,0.00581


In [87]:
tru.get_leaderboard(app_ids=[app_name])

Unnamed: 0_level_0,groundedness_measure_with_cot_reasons,relevance,qs_relevance,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
SmartSolutions_App_v10,0.52037,0.789474,0.645614,9.65,0.006893


In [88]:
# tru.run_dashboard()