In [109]:
# Imports main tools:
from trulens_eval import TruChain, Tru
tru = Tru()
tru.reset_database()

# Imports from LangChain to build app
import bs4
from langchain import hub
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.schema import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [110]:
import os
import boto3
from io import BytesIO
import PyPDF2
from pathlib import Path
from dotenv import load_dotenv

# Remove the PINECONE_API_KEY environment variable if it exists to ensure a clean state.
if 'PINECONE_API_KEY' in os.environ:
    del os.environ['PINECONE_API_KEY']

# Load environment variables from a .env file.
load_dotenv()

# Import necessary modules and classes for the chatbot functionality.
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.agents import AgentExecutor
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores.faiss import FAISS
from langchain_community.embeddings.huggingface import HuggingFaceInstructEmbeddings
from langchain.tools.retriever import create_retriever_tool
from langchain.memory import ConversationBufferMemory
from langchain.agents import create_tool_calling_agent
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_community.chat_message_histories.upstash_redis import UpstashRedisChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.runnables import RunnablePassthrough
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.tools import tool
from langchain_community.tools.yahoo_finance_news import YahooFinanceNewsTool

import chainlit as cl

# Setup for Langsmith tracing.
from langsmith import Client
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_TRACING_V2"] = "agent-chain"
client = Client()

# Retrieve Upstash Redis configuration from environment variables.
UPSTASH_REDIS_REST_URL = os.getenv("UPSTASH_REDIS_REST_URL")  # URL for the Upstash Redis REST API.
UPSTASH_REDIS_REST_TOKEN = os.getenv("UPSTASH_REDIS_REST_TOKEN")  # Authentication token for the Upstash Redis REST API.

# Create a chat message history instance using Upstash Redis.
session_id = "chat-1"  # Session ID for the chat session.
history = UpstashRedisChatMessageHistory(
    url=UPSTASH_REDIS_REST_URL,
    token=UPSTASH_REDIS_REST_TOKEN,
    session_id=session_id,
    ttl=0
)

model_name = "text-embedding-3-small"  # Model name for the OpenAI embeddings.
embeddings = OpenAIEmbeddings(model=model_name)  # Embedding model for vector representation.

# Initialize a Pinecone-based vector store retriever.
index_name = os.getenv('PINECONE_INDEX_NAME')  # Name of the Pinecone index.
pinecone_api_key = os.getenv("PINECONE_API_KEY")  # API key for Pinecone.
pc = Pinecone(api_key=pinecone_api_key)  # Initialize Pinecone with the API key.
index = pc.Index(index_name)  # Access or create the specified index in Pinecone.
vectorstore = PineconeVectorStore(index_name=index_name, embedding=embeddings)  # Pinecone vector store for storing and retrieving vectors.
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})  # Retriever tool configured to return top 3 results.

# Initialize the chatbot with specified retrievers and session ID.
model = "gpt-3.5-turbo-0125"  # Model identifier for the OpenAI chat model.
llm = ChatOpenAI(model=model)  # Initialize the chat model.
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a financial advisor called Investibot."),
    ("human", "{input}"),
])

rag_chain = (
    {
        "context": retriever,
        "input": RunnablePassthrough(),
     }
    | prompt
    | llm
    | StrOutputParser()
)

# print(rag_chain)

# Process a chat input and return the agent's response.
# input = "What was American Airlines' revenue for 2019?"  # Example user input.
# response = rag_chain.invoke(input)
# print(response)

In [111]:
input = "What was American Airlines' revenue for 2019?"  # Example user input.
response = retriever.invoke(input)
for doc in response:
    print(doc)

page_content='during 2020 compared to 2019  Customer revenues increased in 2020 for contracted airline services charter flights \naircraft leasing and aviation fuel sales compared to the previous year periods  Beginning in late February 2020 our \nrevenues were disrupted due to the COVID19 pandemic  The DoD and other customers began canceling scheduled \npassenger flights as a result of the pandemic  The decline in revenues from these cancellations was offset by an \nincrease in flying for our customers package delivery networks and charter flight operations during 2020  Revenues \nfor 2018 were 8923 million and included only a few weeks of revenue for OAI which was acquired on November \n9 2018\nThe consolidated net earnings from continuing operations were 251 million for 2020 compared to 600 \nmillion for 2019 and 679 million for 2018  The pretax earnings from continuing operations were 414 million' metadata={'company': 'NASDAQ_ATSG', 'index': 193.0, 'name': 'NASDAQ_ATSG_2020.txt', '

# Lesson 2: RAG Triad of metrics

In [112]:
# ANOTHER ALTERNATIVE

from trulens_eval.feedback.provider import OpenAI
from trulens_eval import Feedback
import numpy as np

# Initialize provider class
provider = OpenAI()

# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(retriever)

# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(provider.context_relevance_with_cot_reasons)
    .on_input()
    .on(context)
    .aggregate(np.mean)
)

# Define a groundedness feedback function
f_groundedness = (
    Feedback(provider.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
)

# Question/answer relevance between overall question and answer.
f_answer_relevance = (
    Feedback(provider.relevance)
    .on_input_output()
)

✅ In context_relevance_with_cot_reasons, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In context_relevance_with_cot_reasons, input context will be set to __record__.app.invoke.rets .
✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.invoke.rets.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .


In [113]:
from trulens_eval import TruChain

tru_recorder = TruChain(rag_chain,
    app_id='Chain1_ChatApplication',
    feedbacks=[f_answer_relevance, f_context_relevance, f_groundedness])

In [116]:
test_prompts = [
    "What was American Airlines' revenue for 2019?",
    # "What was Nvidia's revenue for 2019?",
    # "What was Apple's revenue for 2019?",
    # "What was Microsoft's revenue for 2019?",
    # "What was Applied Optoelectronics's revenue for 2019?",
    # "What was Arch Capital Group's revenue for 2019?",
    # "What was Abiomed's revenue for 2019?",
    # "What was ACI Worldwide's revenue for 2019?",
    # "What was Acadia Healthcare Company's revenue for 2019?",
    # "What was ACNB Corporation's revenue for 2019?",
]

with tru_recorder as recording:
    for prompt in test_prompts:
        llm_response = rag_chain.invoke(prompt)
        display(llm_response)

"American Airlines' revenue for 2019 was approximately $45.8 billion."

In [117]:
# The record of the app invocation can be retrieved from the `recording`:

rec = recording.get() # use .get if only one record
# recs = recording.records # use .records if multiple

display(rec)

[Record(record_id='record_hash_dce8b75862c3ff0c933f1872004fbcbc', app_id='Chain1_ChatApplication', cost=Cost(n_requests=2, n_successful_requests=2, n_classes=0, n_tokens=59, n_stream_chunks=0, n_prompt_tokens=43, n_completion_tokens=16, cost=3.9999999999999996e-05), perf=Perf(start_time=datetime.datetime(2024, 6, 11, 21, 51, 29, 37672), end_time=datetime.datetime(2024, 6, 11, 21, 51, 32, 915368)), ts=datetime.datetime(2024, 6, 11, 21, 51, 32, 915368), tags='-', meta=None, main_input="What was American Airlines' revenue for 2019?", main_output="American Airlines' revenue for 2019 was approximately $45.8 billion.", main_error=None, calls=[RecordAppCall(call_id='dc3201e3-b5e2-4e5b-8538-bf6391f8243c', stack=[RecordAppCallMethod(path=Lens().app, method=Method(obj=Obj(cls=langchain_core.runnables.base.RunnableSequence, id=1756216539984, init_bindings=None), name='invoke')), RecordAppCallMethod(path=Lens().app.first, method=Method(obj=Obj(cls=langchain_core.runnables.base.RunnableParallel, id

In [119]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])

records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,latency,total_tokens,total_cost
0,Chain1_ChatApplication,"{""tru_class_info"": {""name"": ""TruChain"", ""modul...",RunnableSequence(langchain_core.runnables.base),record_hash_dce8b75862c3ff0c933f1872004fbcbc,"""What was American Airlines' revenue for 2019?""","""American Airlines' revenue for 2019 was appro...",-,"{""record_id"": ""record_hash_dce8b75862c3ff0c933...","{""n_requests"": 2, ""n_successful_requests"": 2, ...","{""start_time"": ""2024-06-11T21:51:29.037672"", ""...",2024-06-11T21:51:32.915368,3,59,4e-05


In [120]:
tru.get_leaderboard(app_ids=["Chain1_ChatApplication"])

Unnamed: 0_level_0,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1
Chain1_ChatApplication,3.0,4e-05


In [121]:
tru.run_dashboard() # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path:   Network URL: http://192.168.0.95:8501



<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>