This is a simple LLM / RAG chat model with chat history saved

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

# Set environment
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [2]:
# Set up model and embedding
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
model = ChatOpenAI(model="gpt-3.5-turbo")
embedding = OpenAIEmbeddings()

In [3]:
# Set up basic chroma vectorstore
from langchain_community.vectorstores import Chroma
import document_handler

# https://python.langchain.com/docs/integrations/vectorstores/chroma

chroma_collection_name = "LangChainCollection"

import chromadb
new_client = chromadb.EphemeralClient()

vectorstore_initialize = Chroma.from_documents(
    document_handler.processed_texts,
    embedding=embedding,
    collection_name=chroma_collection_name,
    client=new_client,
)

vectorstore = Chroma(
    client=new_client,
    collection_name=chroma_collection_name,
    embedding_function=embedding,
)
retriever = vectorstore.as_retriever()

In [4]:
docs = vectorstore_initialize.similarity_search("What is Chocolate?")
print(docs)
docs = vectorstore.similarity_search("What is Chocolate?")
print(docs)

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


[Document(page_content='Chocolate is a sweet, usually brown, food product made from cocoa beans, which are the seeds of the cacao tree. The process of making chocolate involves harvesting and fermenting cacao beans, drying them, and then roasting and grinding them to produce cocoa mass. This cocoa mass is then further processed to extract cocoa solids and cocoa butter.', metadata={'source': 'test_data\\Chocolate.txt'}), Document(page_content='Chocolates are often used in confectionery and desserts, and they can be found in a wide range of products, including bars, truffles, candies, and baked goods. Chocolate is enjoyed worldwide and is often associated with indulgence and celebration. Additionally, cocoa has been linked to various potential health benefits, such as antioxidants and mood enhancement, when consumed in moderation.', metadata={'source': 'test_data\\Chocolate.txt'}), Document(page_content='Chocolate comes in various forms, such as dark chocolate, milk chocolate, and white 

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


[Document(page_content='Chocolate is a sweet, usually brown, food product made from cocoa beans, which are the seeds of the cacao tree. The process of making chocolate involves harvesting and fermenting cacao beans, drying them, and then roasting and grinding them to produce cocoa mass. This cocoa mass is then further processed to extract cocoa solids and cocoa butter.', metadata={'source': 'test_data\\Chocolate.txt'}), Document(page_content='Chocolates are often used in confectionery and desserts, and they can be found in a wide range of products, including bars, truffles, candies, and baked goods. Chocolate is enjoyed worldwide and is often associated with indulgence and celebration. Additionally, cocoa has been linked to various potential health benefits, such as antioxidants and mood enhancement, when consumed in moderation.', metadata={'source': 'test_data\\Chocolate.txt'}), Document(page_content='Chocolate comes in various forms, such as dark chocolate, milk chocolate, and white 

In [5]:
from langchain.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder
)
from langchain_core.output_parsers import StrOutputParser
from langchain.agents import tool
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

In [6]:
from operator import itemgetter

from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    return_messages=True, output_key="output", input_key="question"
)

In [7]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Chat History: {chat_history}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [8]:
# Prompt

system_message_template = (
    "You are a helpful assistant who helps answer questions. Answer only the facts based on the context. "
    "Your goal is to provide accurate and relevant answers based on the facts in the provided context. "
    "Make sure to reference the above source documents appropriately and avoid making assumptions or adding personal opinions. "
    "Emphasize the use of facts from the provided source documents. "
    "Instruct the model to use source name for each fact used in the response. "
    "Avoid generating speculative or generalized information. "
    "Use square brackets to reference the source, e.g. [info1.txt]. "
    "Do not combine sources, list each source separately, e.g. [info1.txt][info2.pdf].\n"
    "Here is how you should answer every question:\n"
        "-Look for relevant information in the above source documents to answer the question.\n"
        "-If the source document does not include the exact answer, please respond with relevant information from the data in the response along with citation. You must include a citation to each document referenced.\n"
        "-If you cannot find answer in below sources, respond with I am not sure. Do not provide personal opinions or assumptions and do not include citations.\n"
        "-If you use any information in the context, include the index(starts at 1) of the statement as citation in your answer\n"
    "At the end of your response:\n" 
    "1. Add key words from the paragraphs. \n"
    "2. Suggest a further question that can be answered by the paragraphs provided. \n"
    "3. Create a source list of source name, author name, and a link for each document you cited.\n"
    "{context}"

)

MEMORY_KEY = "chat_history"

final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_message_template),
        # MessagesPlaceholder(variable_name=MEMORY_KEY),
        ("human", "{question}"),
        # MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

from langchain.prompts.prompt import PromptTemplate

_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)


In [10]:
# Set llm chain
from langchain_core.messages import get_buffer_string

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)

standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | model
    | StrOutputParser()
}

chain = (
    loaded_memory
    | standalone_question
    | {"context": itemgetter("standalone_question") | retriever | format_docs, "question": lambda x: x["standalone_question"]}
    | final_prompt
    | model
    | StrOutputParser()
)

In [11]:
import pandas as pd

df = pd.read_csv('./test_data/Questions.csv', delimiter=',')
tuples = [tuple(x) for x in df.values]
dicts = df.to_dict('records')

print(dicts)

questions = list(map(lambda x : x['Question'], dicts))
print(questions)

[{'Question': 'What is Chocolate?', 'Follow up': 'No'}, {'Question': 'Is that a word?', 'Follow up': 'Yes'}, {'Question': 'Write the following words and then continue: g\\thumb|350', 'Follow up': 'No'}, {'Question': 'What is iPhone?', 'Follow up': 'No'}, {'Question': 'What is Task Decomposition?', 'Follow up': ' No'}]
['What is Chocolate?', 'Is that a word?', 'Write the following words and then continue: g\\thumb|350', 'What is iPhone?', 'What is Task Decomposition?']


In [12]:
# for dict in dicts:
#         question = dict['Question']
#         follow_up = dict['Follow up']
#         if follow_up == "No":
#                 memory.clear()
#         print(memory.load_memory_variables({}))
#         llm_response = chain.invoke({"question": question})
#         print(llm_response)
#         memory.save_context({"question":question}, {"output":llm_response})

# memory.clear()

In [13]:
from trulens_eval import TruChain, Feedback, Tru
from trulens_eval.schema import FeedbackResult
tru = Tru()
tru.reset_database()

🦑 Tru initialized with db url sqlite:///default.sqlite .
🛑 Secret keys may be written to the database. See the `database_redact_keys` option of `Tru` to prevent this.


In [14]:
from trulens_eval.feedback.provider import OpenAI
import numpy as np

# Initialize provider class
openai = OpenAI(model_engine="gpt-3.5-turbo")

# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(chain)

from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI(model_engine="gpt-3.5-turbo"))
# Define a groundedness feedback function
f_groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons)
    .on(context.collect()) # collect context chunks into a list
    .on_output()
    .aggregate(grounded.grounded_statements_aggregator)
)

# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(openai.relevance).on_input_output()
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
    Feedback(openai.qs_relevance)
    .on_input()
    .on(context)
    .aggregate(np.mean)
    )

✅ In groundedness_measure_with_cot_reasons, input source will be set to __record__.app.middle[1].steps.context.middle[0].get_relevant_documents.rets.collect() .
✅ In groundedness_measure_with_cot_reasons, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In qs_relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In qs_relevance, input statement will be set to __record__.app.middle[1].steps.context.middle[0].get_relevant_documents.rets .


In [15]:
tru_recorder = TruChain(chain,
    app_id='Chain1_ChatApplication',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

In [16]:
# This is simple one call
with tru_recorder as recording:
    llm_response = chain.invoke({"question":"What is Task Decomposition?"})

display(llm_response)

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Task Decomposition?'}].


'I am not sure.'

In [17]:
tru_recorder_stress_one_question = TruChain(chain,
    app_id='Chain1_One_Q_Multiple',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

# This is simple multiple call on same query
with tru_recorder_stress_one_question as recording:
    for i in range(10):
        llm_response = chain.invoke({"question": "What is Multiple Question?"})
        display(llm_response)

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Multiple Question?'}].


'The provided source documents do not contain any information about the term "Multiple Question." I am not sure what you are referring to.'

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Multiple Question?'}].


'I\'m sorry, but I couldn\'t find any information about the definition of "Multiple Question" in the provided paragraphs.'

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Multiple Question?'}].


'I am not sure about the term "Multiple Question" as it is not mentioned in the provided sources.'

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Multiple Question?'}].


'I am not sure.'

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Multiple Question?'}].


'I am not sure.'

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Multiple Question?'}].


'I am not sure.'

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Multiple Question?'}].


'I am not sure what you mean by "Multiple Question." Could you please provide more information or clarify your question?'

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Multiple Question?'}].


'I am not sure.'

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Multiple Question?'}].


'I am not sure.'

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Multiple Question?'}].


'I am sorry, but I could not find any information about the definition of "Multiple Question" in the provided source documents.'

In [18]:
tru_recorder_more_questions = TruChain(chain,
    app_id='Chain1_More_Qs',
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness])

# This is simple multiple call on same query
with tru_recorder_more_questions as recording:
    for dict in dicts:
        question = dict['Question']
        follow_up = dict['Follow up']
        if follow_up == "No":
                memory.clear()
        print(memory.load_memory_variables({}))
        llm_response = chain.invoke({"question": question})
        print(llm_response)
        memory.save_context({"question":question}, {"output":llm_response})

memory.clear()

{'history': []}


Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Chocolate?'}].


Chocolate is a sweet, usually brown, food product made from cocoa beans, which are the seeds of the cacao tree. The process of making chocolate involves harvesting and fermenting cacao beans, drying them, and then roasting and grinding them to produce cocoa mass. This cocoa mass is then further processed to extract cocoa solids and cocoa butter. [1.txt]
{'history': [HumanMessage(content='What is Chocolate?'), AIMessage(content='Chocolate is a sweet, usually brown, food product made from cocoa beans, which are the seeds of the cacao tree. The process of making chocolate involves harvesting and fermenting cacao beans, drying them, and then roasting and grinding them to produce cocoa mass. This cocoa mass is then further processed to extract cocoa solids and cocoa butter. [1.txt]')]}


Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'Is that a word?'}].


Yes, "Chocolate" is a word. [info1.txt]
{'history': []}


Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'Write the following words and then continue: g\\thumb|350'}].


Sure! Here are the words you requested: chocolate, cocoa beans, cacao tree, cocoa mass, cocoa solids, cocoa butter, dark chocolate, milk chocolate, white chocolate, confectionery, desserts, bars, truffles, candies, baked goods, indulgence, celebration, antioxidants, mood enhancement.

Chocolate is a sweet, usually brown, food product made from cocoa beans, which are the seeds of the cacao tree [info1.txt]. The process of making chocolate involves harvesting and fermenting cacao beans, drying them, and then roasting and grinding them to produce cocoa mass [info1.txt]. This cocoa mass is then further processed to extract cocoa solids and cocoa butter [info1.txt].

Chocolate comes in various forms, such as dark chocolate, milk chocolate, and white chocolate [info1.txt]. Dark chocolate contains a higher percentage of cocoa solids and less sugar, giving it a more intense and bitter flavor [info1.txt]. Milk chocolate includes milk solids in addition to cocoa, creating a sweeter and creamier 

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is iPhone?'}].


I am not sure.
{'history': [HumanMessage(content='What is iPhone?'), AIMessage(content='I am not sure.')]}


Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Unsure what the main input string is for the call to invoke with args [{'question': 'What is Task Decomposition?'}].


I am not sure.


In [19]:
# The record of the app invocation can be retrieved from the `recording`:

rec = recording.get() # use .get if only one record
# recs = recording.records # use .records if multiple

display(rec)

RuntimeError: Recording context recorded more than 1 record. You can get them with ctx.records, ctx[i], or `for r in ctx: ...`.

In [None]:
# The results of the feedback functions can be rertireved from the record. These
# are `Future` instances (see `concurrent.futures`). You can use `as_completed`
# to wait until they have finished evaluating.

from concurrent.futures import as_completed

for feedback_future in  as_completed(rec.feedback_results):
    feedback, feedback_result = feedback_future.result()
    
    feedback: Feedback
    feedbac_result: FeedbackResult

    display(feedback.name, feedback_result.result)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])

records.head()

In [None]:
tru.get_leaderboard(app_ids=["Chain1_ChatApplication"])

In [24]:
tru.run_dashboard() # open a local streamlit app to explore

# tru.stop_dashboard() # stop if needed