This is a simple LLM / RAG chat model with chat history saved

In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

# Set environment
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [2]:
# Set up model and embedding
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
model = ChatOpenAI(model="gpt-3.5-turbo")
embedding = OpenAIEmbeddings()

In [3]:
# Set up basic chroma vectorstore
from langchain_community.vectorstores import Chroma
import document_handler

# https://python.langchain.com/docs/integrations/vectorstores/chroma

chroma_collection_name = "LangChainCollection"

import chromadb
new_client = chromadb.EphemeralClient()

vectorstore_initialize = Chroma.from_documents(
    document_handler.processed_texts,
    embedding=embedding,
    collection_name=chroma_collection_name,
    client=new_client,
)

vectorstore = Chroma(
    client=new_client,
    collection_name=chroma_collection_name,
    embedding_function=embedding,
)
retriever = vectorstore.as_retriever()

In [4]:
docs = vectorstore_initialize.similarity_search("What is Chocolate?")
print(docs)
docs = vectorstore.similarity_search("What is Chocolate?")
print(docs)

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


[Document(page_content='Chocolate is a sweet, usually brown, food product made from cocoa beans, which are the seeds of the cacao tree. The process of making chocolate involves harvesting and fermenting cacao beans, drying them, and then roasting and grinding them to produce cocoa mass. This cocoa mass is then further processed to extract cocoa solids and cocoa butter.', metadata={'source': 'test_data\\Chocolate.txt'}), Document(page_content='Chocolates are often used in confectionery and desserts, and they can be found in a wide range of products, including bars, truffles, candies, and baked goods. Chocolate is enjoyed worldwide and is often associated with indulgence and celebration. Additionally, cocoa has been linked to various potential health benefits, such as antioxidants and mood enhancement, when consumed in moderation.', metadata={'source': 'test_data\\Chocolate.txt'}), Document(page_content='Chocolate comes in various forms, such as dark chocolate, milk chocolate, and white 

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


[Document(page_content='Chocolate is a sweet, usually brown, food product made from cocoa beans, which are the seeds of the cacao tree. The process of making chocolate involves harvesting and fermenting cacao beans, drying them, and then roasting and grinding them to produce cocoa mass. This cocoa mass is then further processed to extract cocoa solids and cocoa butter.', metadata={'source': 'test_data\\Chocolate.txt'}), Document(page_content='Chocolates are often used in confectionery and desserts, and they can be found in a wide range of products, including bars, truffles, candies, and baked goods. Chocolate is enjoyed worldwide and is often associated with indulgence and celebration. Additionally, cocoa has been linked to various potential health benefits, such as antioxidants and mood enhancement, when consumed in moderation.', metadata={'source': 'test_data\\Chocolate.txt'}), Document(page_content='Chocolate comes in various forms, such as dark chocolate, milk chocolate, and white 

In [5]:
from langchain.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder
)
from langchain_core.output_parsers import StrOutputParser
from langchain.agents import tool
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

In [6]:
from operator import itemgetter

from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(
    return_messages=True, output_key="output", input_key="question"
)

In [7]:
template = """Answer the question based only on the following context:
{context}

Question: {question}
Chat History: {chat_history}
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [8]:
# Prompt

system_message_template = (
    "You are a helpful assistant who helps answer questions. Answer only the facts based on the context. "
    "Your goal is to provide accurate and relevant answers based on the facts in the provided context. "
    "Make sure to reference the above source documents appropriately and avoid making assumptions or adding personal opinions. "
    "Emphasize the use of facts from the provided source documents. "
    "Instruct the model to use source name for each fact used in the response. "
    "Avoid generating speculative or generalized information. "
    "Use square brackets to reference the source, e.g. [info1.txt]. "
    "Do not combine sources, list each source separately, e.g. [info1.txt][info2.pdf].\n"
    "Here is how you should answer every question:\n"
        "-Look for relevant information in the above source documents to answer the question.\n"
        "-If the source document does not include the exact answer, please respond with relevant information from the data in the response along with citation. You must include a citation to each document referenced.\n"
        "-If you cannot find answer in below sources, respond with I am not sure. Do not provide personal opinions or assumptions and do not include citations.\n"
        "-If you use any information in the context, include the index(starts at 1) of the statement as citation in your answer\n"
    "At the end of your response:\n" 
    "1. Add key words from the paragraphs. \n"
    "2. Suggest a further question that can be answered by the paragraphs provided. \n"
    "3. Create a source list of source name, author name, and a link for each document you cited.\n"
    "{context}"

)

MEMORY_KEY = "chat_history"

final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_message_template),
        # MessagesPlaceholder(variable_name=MEMORY_KEY),
        ("human", "{question}"),
        # MessagesPlaceholder(variable_name="agent_scratchpad"),
    ]
)

from langchain.prompts.prompt import PromptTemplate

_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:"""
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)


In [10]:
# Set llm chain
from langchain_core.messages import get_buffer_string

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

loaded_memory = RunnablePassthrough.assign(
    chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
)

standalone_question = {
    "standalone_question": {
        "question": lambda x: x["question"],
        "chat_history": lambda x: get_buffer_string(x["chat_history"]),
    }
    | CONDENSE_QUESTION_PROMPT
    | model
    | StrOutputParser()
}

chain = (
    loaded_memory
    | standalone_question
    | {"context": itemgetter("standalone_question") | retriever | format_docs, "question": lambda x: x["standalone_question"]}
    | final_prompt
    | model
    | StrOutputParser()
)

In [11]:
import pandas as pd

df = pd.read_csv('./test_data/Questions.csv', delimiter=',')
tuples = [tuple(x) for x in df.values]
dicts = df.to_dict('records')

print(dicts)

questions = list(map(lambda x : x['Question'], dicts))
print(questions)

[{'Question': 'What is Chocolate?', 'Follow up': 'No'}, {'Question': 'Is that a word?', 'Follow up': 'Yes'}, {'Question': 'Write the following words and then continue: g\\thumb|350', 'Follow up': 'No'}, {'Question': 'What is iPhone?', 'Follow up': 'No'}, {'Question': 'What is Task Decomposition?', 'Follow up': ' No'}]
['What is Chocolate?', 'Is that a word?', 'Write the following words and then continue: g\\thumb|350', 'What is iPhone?', 'What is Task Decomposition?']


In [12]:
for dict in dicts:
        question = dict['Question']
        follow_up = dict['Follow up']
        answer = dict['Answer']
        if follow_up == "No":
                memory.clear()
        print(memory.load_memory_variables({}))
        llm_response = chain.invoke({"question": question})
        print(llm_response)
        memory.save_context({"question":question}, {"output":llm_response})

memory.clear()

{'history': []}


Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


Chocolate is a sweet food product that is usually brown in color and is made from cocoa beans, which are the seeds of the cacao tree. The process of making chocolate involves harvesting and fermenting cacao beans, drying them, and then roasting and grinding them to produce cocoa mass. This cocoa mass is further processed to extract cocoa solids and cocoa butter [info1.txt].
{'history': [HumanMessage(content='What is Chocolate?'), AIMessage(content='Chocolate is a sweet food product that is usually brown in color and is made from cocoa beans, which are the seeds of the cacao tree. The process of making chocolate involves harvesting and fermenting cacao beans, drying them, and then roasting and grinding them to produce cocoa mass. This cocoa mass is further processed to extract cocoa solids and cocoa butter [info1.txt].')]}


Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


Yes, "chocolate" is a word. [source1.txt]
{'history': []}


Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


I'm sorry, but I cannot view or process images. Could you please provide the text description or context of the image so that I can assist you further?
{'history': []}


Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


The provided source documents do not contain information about the meaning of "iPhone." Therefore, I am unable to provide an answer based on the given sources.
{'history': [HumanMessage(content='What is iPhone?'), AIMessage(content='The provided source documents do not contain information about the meaning of "iPhone." Therefore, I am unable to provide an answer based on the given sources.')]}


Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


The provided source documents do not contain information about the meaning of "Task Decomposition." Therefore, I am unable to provide an answer with a specific citation.


LangSmith Evaluation

In [13]:
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
# os.environ["LANGCHAIN_ENDPOINT"] = 'https://api.smith.langchain.com'

import uuid

from langsmith import Client

uid = uuid.uuid4()
client = Client()

How to run and evaluator
https://docs.smith.langchain.com/tracing/use-cases/track-sentiment

In [None]:
# from langsmith.evaluation import EvaluationResult, RunEvaluator
# from langsmith.schemas import Example, Run

# from langchain.chains import LLMChain
# from langchain_openai import ChatOpenAI


# class SentimentEvaluator(RunEvaluator):
#     def __init__(self):
#         prompt = """Is the predominant sentiment in the following statement positive, negative, or neutral?
# ---------
# Statement: {input}
# ---------
# Respond in one word: positive, negative, or neutral.
# Sentiment:"""

#         llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.0)
#         self.chain = LLMChain.from_string(llm=llm, template=prompt)

#     def evaluate_run(self, run: Run, example: Example) -> EvaluationResult:
#         input_str = str(list(run.inputs.values())[0])
#         prediction = self.chain.run(input_str)
#         # Strip the prompt
#         prediction = prediction.strip()
#         score = {"positive": 1, "negative": -1, "neutral": 0}.get(prediction)
#         return EvaluationResult(
#             key="sentiment",
#             value=prediction,
#             score=score,
#         )

# evaluator = SentimentEvaluator()
# for run in client.list_runs(
#     project_name="my-project",
#     execution_order=1, # Do not return child / nested runs
# ):
#         client.evaluate_run(run, evaluator)


In [None]:
# for question in questions:
#     result = chain.invoke(question)
#     # print(result)

#     question=result.get("input", None)
#     actual_output=result.get("output", None)
#     retrieval_context = [result.get("context", None)]
#     input = final_prompt.format(question=result["input"], context=result["context"])

#     print(input)
#     print(actual_output)
#     print(retrieval_context)

#     test_case = LLMTestCase(
#         input=final_prompt.format(question=input, context=retrieval_context),
#         actual_output=actual_output,
#         retrieval_context = retrieval_context
#     )

#     # print(assert_test(test_case, metrics))
#     test_cases.append(test_case)

# print(evaluate(test_cases, metrics))

In [None]:
print([e for e in questions])

In [14]:
dataset_name = f"Simple llm langsmith evaluation dataset - {uid}"

dataset = client.create_dataset(
    dataset_name=dataset_name,
    description="An example agent evals dataset",
)

client.create_examples(
    inputs=[{"question": e} for e in questions],
    # outputs=[e["outputs"] for e in examples], # Outputs are optional, but recommended.
    dataset_id=dataset.id,
)

In [18]:
from langchain.smith import RunEvalConfig
from langchain.evaluation import load_evaluator, EvaluatorType

# eval_config = RunEvalConfig(
#     # We will use the chain-of-thought Q&A correctness evaluator
#     evaluators=["cot_qa"],
# )

evaluation_config = RunEvalConfig(
    evaluators=[
        # You can define an arbitrary criterion as a key: value pair in the criteria dict
        # RunEvalConfig.Criteria({"creativity": "Is this submission creative, imaginative, or novel?"}),
        # We provide some simple default criteria like "conciseness" you can use as well
        RunEvalConfig.Criteria("conciseness"),
        RunEvalConfig.Criteria("relevance"),
        RunEvalConfig.Criteria("harmfulness"),
        RunEvalConfig.Criteria("coherence"),
        RunEvalConfig.Criteria("maliciousness"),
        RunEvalConfig.Criteria("helpfulness"),
        RunEvalConfig.Criteria("controversiality"),
        RunEvalConfig.Criteria("misogyny"),
        RunEvalConfig.Criteria("criminality"),
        RunEvalConfig.Criteria("insensitivity"),
        RunEvalConfig.Criteria("depth"),
        RunEvalConfig.Criteria("creativity"),
        RunEvalConfig.Criteria("detail"),
    ],
    input_key="question"
)

def construct_chain():
    # Add a step to convert the data from the dataset to a form the chain can consume
    x = itemgetter("question")
    print(x)
    return ({
        "question": lambda x: ({"question": x["question"]}),
    } | chain)


results = client.run_on_dataset(
    dataset_name=dataset_name,
    llm_or_chain_factory=chain,
    evaluation=evaluation_config,
    project_name="simple_llm_testing-3",
    verbose = True,
)

#KEEP THIS URL https://smith.langchain.com/public/cfa11397-dafa-4936-9548-a984b33f1658/d
# https://smith.langchain.com/public/17ab53ee-6971-41dd-b8ed-a9b41390aed0/d

View the evaluation results for project 'simple_llm_testing-3' at:
https://smith.langchain.com/o/99caf4db-f19e-57e9-bc93-12af0e3dd026/datasets/290c08a2-d150-4a7b-a243-60db3c6f07b5/compare?selectedSessions=99ac6cef-e7f7-4538-9ed1-c115d966c565

View all tests for Dataset Basic llm langsmith evaluation dataset - 82127b35-adf9-4d30-8893-c91b6579c87d at:
https://smith.langchain.com/o/99caf4db-f19e-57e9-bc93-12af0e3dd026/datasets/290c08a2-d150-4a7b-a243-60db3c6f07b5


ValueError: Must specify reference_key in smith_eval.RunEvalConfig to use evaluator of type qa with dataset with multiple output keys: None.