In [1]:
from langchain_ollama import ChatOllama

model_name = "llama3.2"
llm = ChatOllama(model=model_name, temperature=0.0)

ConversationBufferMemory with RunnableWithMessageHistory

In [3]:
from langchain_core.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, MessagesPlaceholder

system_message = "You are a helpful assistant called AlphaBot."

prompt_template = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system_message),
    MessagesPlaceholder(variable_name="history"),
    HumanMessagePromptTemplate.from_template("{user_input}"),
])

In [4]:
pipeline = prompt_template | llm

In [5]:
from langchain_core.chat_history import InMemoryChatMessageHistory

chat_map = {}

def get_chat_history(user_id: str) -> InMemoryChatMessageHistory:
    if user_id not in chat_map:
        chat_map[user_id] = InMemoryChatMessageHistory()
    return chat_map[user_id]

In [8]:
from langchain_core.runnables.history import RunnableWithMessageHistory

pipeline_with_history = RunnableWithMessageHistory(
    runnable=pipeline,
    get_session_history=get_chat_history,
    input_messages_key="user_input",
    history_messages_key="history"
)

In [9]:
pipeline_with_history.invoke(
    {"user_input": "Hello, my name is Kateryna!"},
    config={"session_id": "user_1"}
    )

AIMessage(content="Hello Kateryna! It's lovely to meet you. I'm AlphaBot, your friendly AI assistant. How can I help you today? Do you have any questions or topics you'd like to discuss? I'm all ears (or rather, all text)!", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-04T15:21:37.756666Z', 'done': True, 'done_reason': 'stop', 'total_duration': 3710974875, 'load_duration': 2610546292, 'prompt_eval_count': 43, 'prompt_eval_duration': 246010208, 'eval_count': 54, 'eval_duration': 556659374, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--5e8861c1-3b6c-4cc6-94cf-81d04d40ea0f-0', usage_metadata={'input_tokens': 43, 'output_tokens': 54, 'total_tokens': 97})

In [10]:
pipeline_with_history.invoke(
    {"user_input": "Can you remind me what my name is?"},
    config={"session_id": "user_1"}
)

AIMessage(content="Your name is Kateryna! Don't worry if you forgot - I've got your back. What else can I help you with today?", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-04T15:23:01.911295Z', 'done': True, 'done_reason': 'stop', 'total_duration': 664331500, 'load_duration': 99163583, 'prompt_eval_count': 115, 'prompt_eval_duration': 110378375, 'eval_count': 30, 'eval_duration': 309687583, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--b0945706-2646-4eb5-9aee-0eea87866c27-0', usage_metadata={'input_tokens': 115, 'output_tokens': 30, 'total_tokens': 145})

ConversationBufferWindowMemory with RunnableWithMessageHistory

In [29]:
from pydantic import BaseModel, Field
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.messages import BaseMessage

class BufferWindowMessageHistory(BaseChatMessageHistory, BaseModel):
    """Chat message history that stores messages in a buffer with a fixed window size."""

    window_size: int = Field(default_factory=int, description="The maximum number of messages to store in the buffer.")
    messages: list[BaseMessage] = Field(default_factory=list, description="The buffer to store messages.")

    def __init__(self, window_size: int = 5) -> None:
        super().__init__(window_size=window_size)
        print(f"Initialized BufferWindowMessageHistory with window size: {window_size}" )

    def add_messages(self, messages: list[BaseMessage]) -> None:
        """Add a messages to the buffer, maintaining the window size."""
        self.messages.extend(messages)
        self.messages = self.messages[-self.window_size:]

    def clear(self) -> None:
        self.messages = []

In [32]:
chat_map = {}

def get_chat_history(session_id: str, k: int = 5) -> BufferWindowMessageHistory:
    if session_id not in chat_map:
        chat_map[session_id] = BufferWindowMessageHistory(window_size=k)
    return chat_map[session_id]

In [33]:
from langchain_core.runnables import ConfigurableFieldSpec

pipeline_with_history = RunnableWithMessageHistory(
    pipeline,
    get_session_history=get_chat_history,
    input_messages_key="user_input",
    history_messages_key="history",
    history_factory_config=[
        ConfigurableFieldSpec(
            id="session_id",
            annotation=str,
            name="Session ID",
            description="The session ID to use for the chat history.",
            default="id_default",
        ),
        ConfigurableFieldSpec(
            id="k",
            annotation=int,
            name="Window Size",
            description="The number of messages to retain in the chat history.",
            default=4,
        )
    ]
)

In [34]:
pipeline_with_history.invoke(
    {"user_input": "Hi, my name is Kate"},
    config={"configurable" : { "session_id": "id_k4", "k": 4 }}
)

Initialized BufferWindowMessageHistory with window size: 4


AIMessage(content="Hello Kate! It's nice to meet you. I'm AlphaBot, your friendly AI assistant. How can I help you today? Do you have any questions or topics you'd like to discuss? I'm all ears (or rather, all text).", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-04T16:48:53.612654Z', 'done': True, 'done_reason': 'stop', 'total_duration': 3640917208, 'load_duration': 2605008875, 'prompt_eval_count': 40, 'prompt_eval_duration': 228314375, 'eval_count': 52, 'eval_duration': 538555499, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--b248616d-d7d3-4bc9-89ff-5ff05a852559-0', usage_metadata={'input_tokens': 40, 'output_tokens': 52, 'total_tokens': 92})

In [39]:
chat_map["id_k4"].clear()  # clear the history

# manually insert history
chat_map["id_k4"].add_user_message("Hi, my name is Josh")
chat_map["id_k4"].add_ai_message("I'm an AI model called Zeta.")
chat_map["id_k4"].add_user_message("I'm researching the different types of conversational memory.")
chat_map["id_k4"].add_ai_message("That's interesting, what are some examples?")
chat_map["id_k4"].add_user_message("I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.")
chat_map["id_k4"].add_ai_message("That's interesting, what's the difference?")
chat_map["id_k4"].add_user_message("Buffer memory just stores the entire conversation, right?")
chat_map["id_k4"].add_ai_message("That makes sense, what about ConversationBufferWindowMemory?")
chat_map["id_k4"].add_user_message("Buffer window memory stores the last k messages, dropping the rest.")
chat_map["id_k4"].add_ai_message("Very cool!")

chat_map["id_k4"].messages

[HumanMessage(content='Buffer memory just stores the entire conversation, right?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='That makes sense, what about ConversationBufferWindowMemory?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Buffer window memory stores the last k messages, dropping the rest.', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Very cool!', additional_kwargs={}, response_metadata={})]

In [None]:
#when running with k=4 we should expect the LLM to forget name:
pipeline_with_history.invoke(
    {"user_input": "What is my name again?"},
    config={"configurable" : { "session_id": "id_k4", "k": 4 }}
)

AIMessage(content='You didn\'t tell me your name. You introduced yourself as "you" when you started our conversation. I\'m AlphaBot, your helpful assistant.', additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-04T16:52:12.090188Z', 'done': True, 'done_reason': 'stop', 'total_duration': 677279084, 'load_duration': 100080875, 'prompt_eval_count': 97, 'prompt_eval_duration': 93829709, 'eval_count': 31, 'eval_duration': 318803539, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--bbb51aed-eb4f-42f7-a3be-d9e32496af64-0', usage_metadata={'input_tokens': 97, 'output_tokens': 31, 'total_tokens': 128})

In [None]:
#increasing k should help the LLM remember the name:
pipeline_with_history.invoke(
    {"user_input": "Hi, my name is Kate"},
    config={"configurable" : { "session_id": "id_k14", "k": 14 }}
)

Initialized BufferWindowMessageHistory with window size: 14


AIMessage(content="Hello Kate! It's nice to meet you. I'm AlphaBot, your friendly AI assistant. How can I help you today? Do you have any questions or topics you'd like to discuss? I'm all ears (or rather, all text).", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-04T16:54:43.155109Z', 'done': True, 'done_reason': 'stop', 'total_duration': 988566292, 'load_duration': 98924750, 'prompt_eval_count': 40, 'prompt_eval_duration': 95991125, 'eval_count': 52, 'eval_duration': 536806164, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--5553ad97-e8e6-4664-b336-dd7941ca2201-0', usage_metadata={'input_tokens': 40, 'output_tokens': 52, 'total_tokens': 92})

In [44]:
chat_map["id_k14"].add_user_message("I'm researching the different types of conversational memory.")
chat_map["id_k14"].add_ai_message("That's interesting, what are some examples?")
chat_map["id_k14"].add_user_message("I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.")
chat_map["id_k14"].add_ai_message("That's interesting, what's the difference?")
chat_map["id_k14"].add_user_message("Buffer memory just stores the entire conversation, right?")
chat_map["id_k14"].add_ai_message("That makes sense, what about ConversationBufferWindowMemory?")
chat_map["id_k14"].add_user_message("Buffer window memory stores the last k messages, dropping the rest.")
chat_map["id_k14"].add_ai_message("Very cool!")

chat_map["id_k14"].messages

[HumanMessage(content='Hi, my name is Kate', additional_kwargs={}, response_metadata={}),
 AIMessage(content="Hello Kate! It's nice to meet you. I'm AlphaBot, your friendly AI assistant. How can I help you today? Do you have any questions or topics you'd like to discuss? I'm all ears (or rather, all text).", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-04T16:54:43.155109Z', 'done': True, 'done_reason': 'stop', 'total_duration': 988566292, 'load_duration': 98924750, 'prompt_eval_count': 40, 'prompt_eval_duration': 95991125, 'eval_count': 52, 'eval_duration': 536806164, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--5553ad97-e8e6-4664-b336-dd7941ca2201-0', usage_metadata={'input_tokens': 40, 'output_tokens': 52, 'total_tokens': 92}),
 HumanMessage(content="I'm researching the different types of conversational memory.", additional_kwargs={}, response_metadata={}),
 AIMessage(content="That's interesting, what are

In [45]:
pipeline_with_history.invoke(
    {"user_input": "What is my name again?"},
    config={"configurable" : { "session_id": "id_k14", "k": 14 }}
)

AIMessage(content='Your name is Kate.', additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-04T16:55:30.581605Z', 'done': True, 'done_reason': 'stop', 'total_duration': 402577000, 'load_duration': 100957666, 'prompt_eval_count': 227, 'prompt_eval_duration': 201076250, 'eval_count': 6, 'eval_duration': 57043875, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--3435e1fc-93f3-4260-88e4-498b3df7807b-0', usage_metadata={'input_tokens': 227, 'output_tokens': 6, 'total_tokens': 233})

ConversationSummaryMemory

In [78]:
from langchain_core.messages import SystemMessage

class ConversationSummaryMessageHistory(BaseChatMessageHistory, BaseModel):
    """Chat message history that stores a summary of the conversation."""

    messages: list[BaseMessage] = Field(default_factory=list, description="The buffer to store messages.")
    llm: ChatOllama
    summary: str=""

    def __init__(self, llm: ChatOllama):
        super().__init__(llm=llm)
        
    
    def add_messages(self, messages: list[BaseMessage]) -> None:
        """Add messages to the history and update the summary."""
        new_messages_text = "\n".join([f"{msg.type}: {msg.content}" for msg in messages])   
        
        # Update summary logic
        summary_prompt = ChatPromptTemplate.from_messages([
            SystemMessagePromptTemplate.from_template(
                "Given the existing conversation summary and the new messages, "
                "generate a new summary of the conversation. Ensuring to maintain "
                "as much relevant information as possible BUT keep the summary "
                "concise and no more than a short paragraph in length."
            ),
            HumanMessagePromptTemplate.from_template(
                "Existing conversation summary:\n{existing_summary}\n\n"
                "New messages:\n{messages}"
            )
        ])
        #format messages and invoke llm
        new_summary = self.llm.invoke(
            summary_prompt.format_messages(
                existing_summary=self.summary,
                messages=new_messages_text
            )
        ).content

        self.summary = new_summary
        self.messages = [SystemMessage(content=self.summary)]

    def clear(self) -> None:
        """Clear chat history."""
        self.messages = []
        self.summary = ""

In [79]:
chat_map = {}
def get_chat_history(session_id: str, llm: ChatOllama) -> ConversationSummaryMessageHistory:
    if session_id not in chat_map:
        chat_map[session_id] = ConversationSummaryMessageHistory(llm=llm)
    return chat_map[session_id]

In [80]:
pipeline_with_history = RunnableWithMessageHistory(
    pipeline,
    get_session_history=get_chat_history,
    input_messages_key="user_input",
    history_messages_key="history",
    history_factory_config=[
        ConfigurableFieldSpec(
            id="session_id",
            annotation=str,
            name="Session ID",
            description="The session ID to use for the chat history.",
            default="id_default",
        ),
        ConfigurableFieldSpec(
            id="llm",
            annotation=ChatOllama,
            name="LLM",
            description="The LLM to use for summarization.",
            default=llm,
        )
    ]
)

In [81]:
#chat_map["id_123"].clear()
pipeline_with_history.invoke(
    {"user_input": "Hi, my name is Kate"},
    config={"session_id": "id_123", "llm": llm}
)

AIMessage(content="Hello Kate! It's nice to meet you. I'm AlphaBot, your friendly AI assistant. How can I help you today? Do you have any questions or topics you'd like to discuss? I'm all ears (or rather, all text).", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-08T15:21:52.327127Z', 'done': True, 'done_reason': 'stop', 'total_duration': 1000595625, 'load_duration': 98964666, 'prompt_eval_count': 40, 'prompt_eval_duration': 98963375, 'eval_count': 52, 'eval_duration': 534632088, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--6768b20c-7cb6-4e98-b3c7-5297fdda4407-0', usage_metadata={'input_tokens': 40, 'output_tokens': 52, 'total_tokens': 92})

In [82]:
chat_map["id_123"].messages

[SystemMessage(content="Here is a new summary of the conversation:\n\nKate has initiated a conversation with AlphaBot, her friendly AI assistant. Kate introduced herself and expressed interest in discussing something with AlphaBot, but hasn't specified a particular topic or question yet.", additional_kwargs={}, response_metadata={})]

In [83]:
pipeline_with_history.invoke(
    {"user_input": "I'm researching the different types of conversational memory."},
    config={"session_id": "id_123", "llm": llm}
)

chat_map["id_123"].messages

[SystemMessage(content='Here is a new summary of the conversation:\n\nKate initiated a conversation with AlphaBot, expressing interest in discussing conversational memory. The human responded by asking about different types of conversational memory, and AlphaBot explained that there are two primary types: short-term and long-term conversational memory. Additionally, researchers have identified subtypes such as working memory, episodic memory, and semantic memory, which often overlap or interact with each other. Kate is now interested in exploring a specific aspect of conversational memory further.', additional_kwargs={}, response_metadata={})]

In [84]:
for msg in [
    "I have been looking at ConversationBufferMemory and ConversationBufferWindowMemory.",
    "Buffer memory just stores the entire conversation",
    "Buffer window memory stores the last k messages, dropping the rest."
]:
    pipeline_with_history.invoke(
        {"user_input": msg},
        config={"session_id": "id_123", "llm": llm}
    )

In [85]:
chat_map["id_123"].messages

[SystemMessage(content='Here is a new summary of the conversation:\n\nKate discussed conversational memory with AlphaBot, exploring concepts such as ConversationBufferMemory and ConversationBufferWindowMemory. The human clarified that Buffer Window Memory stores only the last k messages, dropping older ones. This approach has benefits like reduced storage requirements, improved performance, and simplified management, but also limitations such as lost context and inconsistent user experience. Kate expressed interest in learning more about implementing Buffer Window Memory in chatbots and human-computer interaction systems.', additional_kwargs={}, response_metadata={})]

In [86]:
pipeline_with_history.invoke(
    {"user_input": "What is my name again?"},
    config={"session_id": "id_123", "llm": llm} 
)

AIMessage(content="I don't have any information about your name from our previous conversation. This is the start of our conversation, and I'm happy to chat with you! What would you like to talk about?", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-08T15:26:37.713465Z', 'done': True, 'done_reason': 'stop', 'total_duration': 868941250, 'load_duration': 99781375, 'prompt_eval_count': 138, 'prompt_eval_duration': 152945667, 'eval_count': 40, 'eval_duration': 419427083, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--07e7cfd4-f5d2-4f76-90ce-19eaee663c2b-0', usage_metadata={'input_tokens': 138, 'output_tokens': 40, 'total_tokens': 178})

ConversationSummaryBufferMemory

In [89]:
class ConversationSummaryBufferMessageHistory(BaseChatMessageHistory, BaseModel):
    messages: list[BaseMessage] = Field(default_factory=list)
    llm: ChatOllama = Field(default_factory=ChatOllama)
    window_size: int = Field(default_factory=int)

    def __init__(self, window_size: int, llm: ChatOllama) -> None:
        super().__init__(window_size=window_size, llm=llm)

    def add_messages(self, messages: list[BaseMessage]) -> None:
        """Add messages to the history, removing any messages beyond
        the last `k` messages and summarizing the messages that we
        drop.
        """
        existing_summary = ""
        old_messages = []
        #check if we already have a summary message
        if len(self.messages) > 0 and isinstance(self.messages[0], SystemMessage):
            print(">> Found existing summary message.")
            existing_summary = self.messages.pop(0)
        self.messages.extend(messages)

        #check if we have too many messages
        if len(self.messages) > self.window_size:
            print(
                f">> Found {len(self.messages)} messages, dropping "
                f"latest {len(self.messages) - self.window_size} messages.")
            # pull out the oldest messages...
            old_messages = self.messages[:self.window_size]
            # ...and keep only the most recent messages
            self.messages = self.messages[-self.window_size:]
        if not old_messages:
            print(">> No old messages to update summary with")
            # if we have no old_messages, we have nothing to update in summary
            return
        # Update summary logic
        summary_prompt = ChatPromptTemplate.from_messages([
            SystemMessagePromptTemplate.from_template(
                "Given the existing conversation summary and the new messages, "
                "generate a new summary of the conversation. Ensuring to maintain "
                "as much relevant information as possible BUT keep the summary "
                "concise and no more than a short paragraph in length."
            ),
            HumanMessagePromptTemplate.from_template(
                "Existing conversation summary:\n{existing_summary}\n\n"
                "New messages:\n{old_messages}"
            )
        ])
        #format messages and invoke llm
        new_summary = self.llm.invoke(
            summary_prompt.format_messages(
                existing_summary=existing_summary,
                old_messages=old_messages
            )
        )

        print(f">> New summary: {new_summary.content}")
        # prepend the new summary to the history
        self.messages = [SystemMessage(content=new_summary.content)] + self.messages

    def clear(self) -> None:
        """Clear the history."""
        self.messages = []

In [96]:
chat_map = {}
def get_chat_history(session_id: str, window_size: int, llm: ChatOllama) -> ConversationSummaryBufferMessageHistory:
    if session_id not in chat_map:
        chat_map[session_id] = ConversationSummaryBufferMessageHistory(window_size=window_size, llm=llm)
    return chat_map[session_id]

In [97]:
pipeline_with_history = RunnableWithMessageHistory(
    pipeline,
    get_session_history=get_chat_history,
    input_messages_key="user_input",
    history_messages_key="history",
    history_factory_config=[
        ConfigurableFieldSpec(
            id="session_id",
            annotation=str,
            name="Session ID",
            description="The session ID to use for the chat history.",
            default="id_default",
        ),
        ConfigurableFieldSpec(
            id="window_size",
            annotation=int,
            name="Window Size",
            description="The number of messages to retain in the chat history.",
            default=4,
        ),
        ConfigurableFieldSpec(
            id="llm",
            annotation=ChatOllama,
            name="LLM",
            description="The LLM to use for summarization.",
            default=llm,
        )
    ]
)

In [98]:
pipeline_with_history.invoke(
    {"user_input": "Hi, my name is Kate"},
    config={"session_id": "id_123", "window_size": 4, "llm": llm} 
)   
chat_map["id_123"].messages    

>> No old messages to update summary with


[HumanMessage(content='Hi, my name is Kate', additional_kwargs={}, response_metadata={}),
 AIMessage(content="Hello Kate! It's nice to meet you. I'm AlphaBot, your friendly AI assistant. How can I help you today? Do you have any questions or topics you'd like to discuss? I'm all ears (or rather, all text).", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-08T16:23:00.163282Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2903003792, 'load_duration': 1843678917, 'prompt_eval_count': 40, 'prompt_eval_duration': 260694542, 'eval_count': 52, 'eval_duration': 537588580, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--b9550d46-629f-419c-8cee-36db1df3c7cb-0', usage_metadata={'input_tokens': 40, 'output_tokens': 52, 'total_tokens': 92})]

In [99]:
for i, msg in enumerate([
    "I'm researching the different types of conversational memory.",
    "I have been looking at ConversationBufferMemory and ConversationBufferWindowMemory.",
    "Buffer memory just stores the entire conversation",
    "Buffer window memory stores the last k messages, dropping the rest."
]):
    print(f"---\nMessage {i+1}\n---\n")
    pipeline_with_history.invoke(
        {"user_input": msg},
        config={"session_id": "id_123", "llm": llm, "window_size": 4}
    )

---
Message 1
---

>> No old messages to update summary with
---
Message 2
---

>> Found 6 messages, dropping latest 2 messages.
>> New summary: Here is a concise summary of the conversation:

Kate initiated a conversation with AlphaBot, her AI assistant. She expressed interest in researching conversational memory and received an introduction to various types of conversational memory, including short-term, working, long-term, and contextual memory. The conversation also touched on cognitive load-based memory types, such as low-cognitive-load and high-cognitive-load memory. AlphaBot offered to help Kate explore her interests further, but the conversation was stopped due to a timeout.
---
Message 3
---

>> Found existing summary message.
>> Found 6 messages, dropping latest 2 messages.
>> New summary: Here is a concise summary of the conversation:

Kate initiated a conversation with AlphaBot about conversational memory. AlphaBot introduced various types of conversational memory, includin