# Chapter 4. Memory: Enabling Your Chatbot to Learn from Interactions

## A simple version of this memory system using LangChain

In [30]:
# Import modules
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.memory import ChatMessageHistory
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.chat import MessagesPlaceholder
from langchain_core.runnables import chain
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.runnables import RunnablePassthrough
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage, trim_messages, filter_messages
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from operator import itemgetter

In [2]:
# Create a prompt template
prompt_temp = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer all questions to the best of your ability."),
    ("placeholder", "{messages}"),
])

# Obtain a chat model
llm = ChatOpenAI()


# Create a simple chain
chain = prompt_temp | llm

# Invoke the chain.
# Note how the incorporation of the previous conversation in the chain enabled the model to answer the follow-up question in a context-aware manner.
chain.invoke({
    "messages": [
        ("human","Wha is your name?"),
        ("ai", "My name is John."),
        ("human", "Sorry. What is your name again?"),
    ],
})

AIMessage(content='No problem! My name is John. How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 54, 'total_tokens': 69, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-06006072-8841-4bdf-8360-8f93653e3ff8-0', usage_metadata={'input_tokens': 54, 'output_tokens': 15, 'total_tokens': 69, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

Whilst this may work for demo purposes, it won’t scale in a production environment because the list of conversation messages can grow significantly. Fortunately, LangChain provides a core utility class called *ChatMessageHistory*, which makes it easier to implement this memory system.

In [3]:
# Create a chat history object that can store messages in memory
chat_history = InMemoryChatMessageHistory()

# Add a user message to the chat history
chat_history.add_user_message("What is your name?")

# Add an AI message to the chat history
chat_history.add_ai_message("My name is John.")

# Print the chat history
chat_history.messages

[HumanMessage(content='What is your name?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='My name is John.', additional_kwargs={}, response_metadata={})]

In [4]:
# We can then integrate the stored chat messages into our chain and send a final prompt to the model
response = chain.invoke({
    "messages": chat_history.messages,
})
input = "Sorry, what is your name again?"
chat_history.add_user_message(input)
chain.invoke({
    "messages": chat_history.messages,
})

AIMessage(content='My name is John.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 53, 'total_tokens': 58, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-f5f05bc3-f720-463c-b289-950e1a984496-0', usage_metadata={'input_tokens': 53, 'output_tokens': 5, 'total_tokens': 58, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In the previous example, we integrated the chat messages into the chain explicitly but this requires the tedious manual management of each new message. In a production setting, we need a way to persist chat history and automate the insertion and updating of it.

To solve this problem, we can utilize LangChain’s RunnableWithMessageHistory class to automatically insert and update chat messages.

In [5]:
# First, let’s modify our prompt template to incorporate a chat_history parameter which will later contain all prior chat messages
prompt_temp = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer all questions to the best of your ability."),
    ("placeholder", "{history}"),
    ("human", "{input}"),
])
chain = prompt_temp | llm

In [6]:
# Next, let’s use the RunnableWithMessageHistory class to wrap our chain and incorporate the latest user input and chat history.
chat_history_for_chain = InMemoryChatMessageHistory()
chain_with_message_history = RunnableWithMessageHistory(
    chain,
    # Session_id is an identifier for the session (conversation) thread that the input messages correspond to. 
    # This allows you to maintain several conversations or threads with the same chain at the same time.
    lambda session_id: chat_history_for_chain,
    # An input_messages_key that specifies which part of the input should be tracked and stored in the chat history.
    # In this example, we want to track the string passed in as input (match with the "input" key in the prompt).
    input_messages_key="input",
    # A history_messages_key that specifies what the previous messages should be injected into the prompt as. 
    # Our prompt has a placeholder named "history", so we specify this property to match.
    history_messages_key="history",
    )

Let’s look at an example where we return a chat history corresponding to each session.

In [7]:
# Create the chain we used before
prompt_temp = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer all questions to the best of your ability."),
    ("placeholder", "{history}"),
    ("human", "{input}"),
])
llm = ChatOpenAI()
chain = prompt_temp | llm

# Keep track of the history for each combination of user_id and conversation_id.
# Note: This line uses type hinting in Python 3.9+, which indicates that histories is a dictionary that will map session_id strings to chat history objects.
# The code implies that when a new session starts, it can be stored in this dictionary like so: histories[session_id] = InMemoryChatMessageHistory()
histories: dict[str, InMemoryChatMessageHistory] = {}

# Define a function that takes a session_id as an argument and returns a chat history object.
# Note: This line also uses type hinting in Python 3.9+. Denoting that session_id is a string, and the default value is an empty string.
def get_session_history(session_id: str = ''):
    if session_id not in histories:
        histories[session_id] = InMemoryChatMessageHistory()
    return histories[session_id]

# Chain with history
with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
)

In [8]:
# In action by providing the input and session id
with_message_history.invoke(
    {"input": "hi im bob!"},
    config={"configurable": {"session_id": "123"}},
)

AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 31, 'total_tokens': 41, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5e4e40e8-bc1a-45df-bb9b-314db6711696-0', usage_metadata={'input_tokens': 31, 'output_tokens': 10, 'total_tokens': 41, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [9]:
# Continue the conversation
with_message_history.invoke(
    {"input": "whats my name?"},
    config={"configurable": {"session_id": "123"}},
)

AIMessage(content='Your name is Bob.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 5, 'prompt_tokens': 54, 'total_tokens': 59, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-bcf33259-6689-4d5f-9cbf-5c83e9db6895-0', usage_metadata={'input_tokens': 54, 'output_tokens': 5, 'total_tokens': 59, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [10]:
# New session_id --> does not remember
with_message_history.invoke(
    {"input": "whats my name?"},
    config={"configurable": {"session_id": "456"}},
)

AIMessage(content="I'm sorry, but I don't have access to your personal information. However, if there's anything specific you'd like help with, feel free to ask!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 32, 'total_tokens': 65, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-e37b132b-139a-47d9-8bcc-cda5e837be1a-0', usage_metadata={'input_tokens': 32, 'output_tokens': 33, 'total_tokens': 65, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [11]:
# You can print the dictioinary
histories

{'123': InMemoryChatMessageHistory(messages=[HumanMessage(content='hi im bob!', additional_kwargs={}, response_metadata={}), AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 31, 'total_tokens': 41, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-5e4e40e8-bc1a-45df-bb9b-314db6711696-0', usage_metadata={'input_tokens': 31, 'output_tokens': 10, 'total_tokens': 41, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}), HumanMessage(content='whats my name?', additional_kwargs={}, response_metadata={}), AIMessage(content='Your name is Bob.', addi

# How to Modify Chat History
In many cases, the chat history messages aren’t in the best state or format to generate an accurate response from the model. To overcome this problem, we can modify the chat history in a variety of ways.

## Trimming messages​
LLMs have limited context windows, therefore, the final prompt sent to the model can’t exceed the model’s input token limits. In addition, excessive prompt information can distract the model and lead to hallucination.

An effective solution to this problem is to limit the number of messages retrieved from chat history and appended to the prompt. In practice, we need only to load and store the most recent chat n history messages. Let’s use an example chat history with some preloaded messages.

In [12]:
# Define the LangChain's trim_messages function
trimmer = trim_messages(
    max_tokens=65,
    # Maintain the last messages 
    strategy="last",
    token_counter=ChatOpenAI(model="gpt-4o"),
    # Include the system message
    include_system=True,
    # Do not allow partial messages
    allow_partial=False,
    # start_on=”human” ensures that we never remove an AIMessage (that is a response from the model) 
    # without also removing corresponding HumanMessage (ie the question for that response).
    start_on="human",
)

In [13]:
# Create a long message
messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]

# Trim the message
trimmer.invoke(messages)

[SystemMessage(content="you're a good assistant", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='whats 2 + 2', additional_kwargs={}, response_metadata={}),
 AIMessage(content='4', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='thanks', additional_kwargs={}, response_metadata={}),
 AIMessage(content='no problem!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='having fun?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='yes!', additional_kwargs={}, response_metadata={})]

Now, let’s incorporate the trimmer into a chain and RunnableWithMessageHistory. To use it in the chain, we need to ensure that the trimmer is run before the messages input to our prompt.

In [14]:
# Create a prompt template
prompt_temp = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Answer all questions to the best of your ability."),
    ("placeholder", "{messages}"),
])

# Obtain an LLM
llm = ChatOpenAI()

# This makes a "messages" key available to prompt template,
# after passing the input messages list through the trimmer 
chain = {"messages": trimmer} | prompt_temp | llm

# Tracking history
history = InMemoryChatMessageHistory()
with_message_history = RunnableWithMessageHistory(
    chain, 
    lambda: history
)

In [15]:
# Using it
with_message_history.invoke(
[HumanMessage(content="Today is a good day to learn about LangChain. Do you agree?")]
)
with_message_history.invoke(
[HumanMessage(content="Why is sky blue?")]
)
with_message_history.invoke(
[HumanMessage(content="What is the capital of France?")]
)
with_message_history.invoke(
[HumanMessage(content="Tell me a joke.")]
)
with_message_history.invoke(
[HumanMessage(content="What joke did you tell me? Could you repeat it?")]
)

AIMessage(content="Of course! Here's the joke again:\n\nWhy did the scarecrow win an award?\nBecause he was outstanding in his field!", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 78, 'total_tokens': 104, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-38153048-f1ab-41a9-8a4c-76faa27a02b4-0', usage_metadata={'input_tokens': 78, 'output_tokens': 26, 'total_tokens': 104, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [16]:
# Print the history of the conversation
history

InMemoryChatMessageHistory(messages=[HumanMessage(content='Today is a good day to learn about LangChain. Do you agree?', additional_kwargs={}, response_metadata={}), AIMessage(content="Absolutely! Learning about LangChain can be a great way to expand your knowledge and stay informed about new technologies and developments in the field. Let me know if you have any specific questions about LangChain, and I'll do my best to provide you with the information you need.", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 55, 'prompt_tokens': 42, 'total_tokens': 97, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-dc0b5e2c-4982-41e4-86e2-a5fabeb79f01-0', usage_metadata={'input_tokens

## Summary memory
Aside from trimming messages, we can utilize the LLM to generate a summary of the conversation and then incorporate this summary into the prompt sent to the model.

In [17]:
# Use ChatMessageHistory() to save the chat history
# Note: while ChatMessageHistory serves as a base class for managing chat histories with potential for various storage implementations, 
# InMemoryChatMessageHistory is a concrete subclass that handles storage in memory.
demo_ephemeral_chat_history = ChatMessageHistory()
demo_ephemeral_chat_history.add_user_message("Hey there! I'm Nemo.")
demo_ephemeral_chat_history.add_ai_message("Hello!")
demo_ephemeral_chat_history.add_user_message("How are you today?")
demo_ephemeral_chat_history.add_ai_message("Fine thanks!")
demo_ephemeral_chat_history.messages

[HumanMessage(content="Hey there! I'm Nemo.", additional_kwargs={}, response_metadata={}),
 AIMessage(content='Hello!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='How are you today?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Fine thanks!', additional_kwargs={}, response_metadata={})]

In [18]:
# Create a prompt template
prompt = ChatPromptTemplate.from_messages(
    [("system", "You are a helpful assistant. Answer all questions to the best of your ability. The provided chat history includes facts about the user you are speaking with.",),
    MessagesPlaceholder(variable_name="chat_history"),
        ("user", "{input}"),
    ])

# Create a chain that uses the prompt template
chain = prompt | llm

# Chain with history
chain_with_message_history = RunnableWithMessageHistory(
    chain,
    lambda session_id: demo_ephemeral_chat_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)

In [20]:
# Next, let’s create a function that will distill previous interactions into a summary. We can add this one to the front of the chain too.
def summarize_messages(chain_input):
    stored_messages = demo_ephemeral_chat_history.messages
    if len(stored_messages) == 0:
        return False
    summarization_prompt = ChatPromptTemplate.from_messages(
        [
            MessagesPlaceholder(variable_name="chat_history"),
            (
                "user",
                "Distill the above chat messages into a single summary message. Include as many specific details as you can.",
            ),
        ]
    )
    summarization_chain = summarization_prompt | llm
    summary_message = summarization_chain.invoke({"chat_history": stored_messages})
    demo_ephemeral_chat_history.clear()
    demo_ephemeral_chat_history.add_message(summary_message)
    return True

# Finally, we can add this function to the chain with the message history.
chain_with_summarization = (
    RunnablePassthrough.assign(messages_summarized=summarize_messages)
    | chain_with_message_history
)

In [21]:
# Now, let’s invoke the chain and see if it remembers the chat history.
chain_with_summarization.invoke(
    {"input": "What did I say my name was?"},
    {"configurable": {"session_id": "unused"}},
)

AIMessage(content='You introduced yourself as Nemo.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 7, 'prompt_tokens': 88, 'total_tokens': 95, 'completion_tokens_details': {'audio_tokens': 0, 'reasoning_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-3bd66d22-f57b-4fb5-a2a8-9232c19e2cc1-0', usage_metadata={'input_tokens': 88, 'output_tokens': 7, 'total_tokens': 95, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

## Filtering messages
As the list of chat history messages grows, a wider variety of types, sub-chains, and models may be utilized. LangChain provides a filter_messages helper that makes it easier to filter the chat history messages by type, id, or name.

In [22]:
# Filtering messages
messages = [
    SystemMessage("you are a good assistant", id="1"),
    HumanMessage("example input", id="2", name="example_user"),
    AIMessage("example output", id="3", name="example_assistant"),
    HumanMessage("real input", id="4", name="bob"),
    AIMessage("real output", id="5", name="alice"),
]
filter_messages(messages, include_types="human")

[HumanMessage(content='example input', additional_kwargs={}, response_metadata={}, name='example_user', id='2'),
 HumanMessage(content='real input', additional_kwargs={}, response_metadata={}, name='bob', id='4')]

In [23]:
# Another filtering example
filter_messages(messages, exclude_names=["example_user", "example_assistant"])


[SystemMessage(content='you are a good assistant', additional_kwargs={}, response_metadata={}, id='1'),
 HumanMessage(content='real input', additional_kwargs={}, response_metadata={}, name='bob', id='4'),
 AIMessage(content='real output', additional_kwargs={}, response_metadata={}, name='alice', id='5')]

In [24]:
# Another way to filter messages
filter_messages(messages, include_types=[HumanMessage, AIMessage], exclude_ids=["3"])

[HumanMessage(content='example input', additional_kwargs={}, response_metadata={}, name='example_user', id='2'),
 HumanMessage(content='real input', additional_kwargs={}, response_metadata={}, name='bob', id='4'),
 AIMessage(content='real output', additional_kwargs={}, response_metadata={}, name='alice', id='5')]

In [25]:
# The filter_messages helper can also be used imperatively (as above) or declaratively (as below), 
# making it easy to compose with other components in a chain
filter_ = filter_messages(exclude_names=["example_user", "example_assistant"])
chain = filter_ | llm

## Chat history with retrieval

In [26]:
## Load the document 
loader = TextLoader("TeachingwithGenerativeAI.txt")
doc = loader.load()

## Split the document into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=20,
)
chunks = text_splitter.split_documents(doc)

## Define the embedding model
embed_model = OpenAIEmbeddings()

# Create the vector store
vector_db = FAISS.from_documents(
    documents = chunks, 
    embedding = embed_model)

# Create the retriever
retriever = vector_db.as_retriever()

Next, let’s define a sub-chain that takes historical chat messages and the latest user question, and reformulates the question if it makes reference to any information in the historical information. We’ll then use this sub-chain inside the final RAG chain, which will, in order,

1. Rephrase the user’s question given the conversation history (if there is history)

2. Pass the rephrased question to the retriever (see above) to get the most relevant documents

3. Pass the original question, chat history and documents to the final prompt to generate an answer.

In [31]:
# Define a function to extract the content of a message
def get_msg_content(msg):
    return msg.content

# Define the SYSTEM prompt for contextualizing the chat history to come up with a standalone question
contextualize_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

# Define the prompt for contextualizing the chat history to come up with a standalone question
contextualize_prompt = ChatPromptTemplate.from_messages([
    ("system", contextualize_system_prompt),
    ("placeholder", "{chat_history}"),
    ("human", "{input}"),
])

# Define the chain for contextualizing the chat history to come up with a standalone question
contextualize_chain = (
    contextualize_prompt
    | ChatOpenAI()
    | get_msg_content
)

In [32]:
# Define the question-answering SYSTEM prompt to generate the final answer
qa_system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

# Define the question-answering prompt to generate the final answer
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        ("placeholder", "{chat_history}"),
        ("human", "{input}"),
    ]
)

# Define the chain to generate the final answer
qa_chain = (
    qa_prompt
    | ChatOpenAI()
    | get_msg_content
) 

In [33]:
# Define the overall chain the uses both the retrieved documents and the chat history to answer the question
@chain
def history_aware_qa(input):
     # Rephrase the question if needed
     if input.get('chat_history'):
         question = contextualize_chain.invoke(input)
     else:
         question = input['input']
    
     # Get context from the retriever
     context = retriever.invoke(question)

     # Get the final answer
     return qa_chain.invoke({
         **input,
         "context": context
     })

In [34]:
# Next, let’s incorporate stateful management of chat history and send the final prompt, 
# including chat history and retrieved context to the model for an output.
chat_history_for_chain = InMemoryChatMessageHistory()
qa_with_history = RunnableWithMessageHistory(
    history_aware_qa,
    lambda _: chat_history_for_chain,
    input_messages_key="input",
    history_messages_key="chat_history",
)

In [35]:
# Finally, let's invoke the chain
qa_with_history.invoke(
    {"input": "Should faculty declare their AI policy in their classes?"},
    config={"configurable": {"session_id": "123"}},
)

'Yes, faculty should declare their AI policy in their classes to help students understand what is and is not allowed regarding AI use. It is recommended that instructors explain their AI policy in the syllabus and discuss the reasons for adopting it in class. Providing clear guidelines about Dos and Don’ts, such as acknowledging AI use and specifying its limitations, can help prevent misuse or overuse of AI tools.'

In [36]:
# Try ask a related question that refers to the previous question
qa_with_history.invoke(
    {"input": "What question did I just ask?"},
    config={"configurable": {"session_id": "123"}},
)

'You asked whether faculty should declare their AI policy in their classes.'