In [1]:
from langchain_ollama import ChatOllama

model_name = "llama3.2"
llm = ChatOllama(model=model_name, temperature=0.0)

ConversationBufferMemory with RunnableWithMessageHistory

In [3]:
from langchain_core.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, MessagesPlaceholder

system_message = "You are a helpful assistant called AlphaBot."

prompt_template = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system_message),
    MessagesPlaceholder(variable_name="history"),
    HumanMessagePromptTemplate.from_template("{user_input}"),
])

In [4]:
pipeline = prompt_template | llm

In [5]:
from langchain_core.chat_history import InMemoryChatMessageHistory

chat_map = {}

def get_chat_history(user_id: str) -> InMemoryChatMessageHistory:
    if user_id not in chat_map:
        chat_map[user_id] = InMemoryChatMessageHistory()
    return chat_map[user_id]

In [8]:
from langchain_core.runnables.history import RunnableWithMessageHistory

pipeline_with_history = RunnableWithMessageHistory(
    runnable=pipeline,
    get_session_history=get_chat_history,
    input_messages_key="user_input",
    history_messages_key="history"
)

In [9]:
pipeline_with_history.invoke(
    {"user_input": "Hello, my name is Kateryna!"},
    config={"session_id": "user_1"}
    )

AIMessage(content="Hello Kateryna! It's lovely to meet you. I'm AlphaBot, your friendly AI assistant. How can I help you today? Do you have any questions or topics you'd like to discuss? I'm all ears (or rather, all text)!", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-04T15:21:37.756666Z', 'done': True, 'done_reason': 'stop', 'total_duration': 3710974875, 'load_duration': 2610546292, 'prompt_eval_count': 43, 'prompt_eval_duration': 246010208, 'eval_count': 54, 'eval_duration': 556659374, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--5e8861c1-3b6c-4cc6-94cf-81d04d40ea0f-0', usage_metadata={'input_tokens': 43, 'output_tokens': 54, 'total_tokens': 97})

In [10]:
pipeline_with_history.invoke(
    {"user_input": "Can you remind me what my name is?"},
    config={"session_id": "user_1"}
)

AIMessage(content="Your name is Kateryna! Don't worry if you forgot - I've got your back. What else can I help you with today?", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-04T15:23:01.911295Z', 'done': True, 'done_reason': 'stop', 'total_duration': 664331500, 'load_duration': 99163583, 'prompt_eval_count': 115, 'prompt_eval_duration': 110378375, 'eval_count': 30, 'eval_duration': 309687583, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--b0945706-2646-4eb5-9aee-0eea87866c27-0', usage_metadata={'input_tokens': 115, 'output_tokens': 30, 'total_tokens': 145})

ConversationBufferWindowMemory with RunnableWithMessageHistory

In [29]:
from pydantic import BaseModel, Field
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.messages import BaseMessage

class BufferWindowMessageHistory(BaseChatMessageHistory, BaseModel):
    """Chat message history that stores messages in a buffer with a fixed window size."""

    window_size: int = Field(default_factory=int, description="The maximum number of messages to store in the buffer.")
    messages: list[BaseMessage] = Field(default_factory=list, description="The buffer to store messages.")

    def __init__(self, window_size: int = 5) -> None:
        super().__init__(window_size=window_size)
        print(f"Initialized BufferWindowMessageHistory with window size: {window_size}" )

    def add_messages(self, messages: list[BaseMessage]) -> None:
        """Add a messages to the buffer, maintaining the window size."""
        self.messages.extend(messages)
        self.messages = self.messages[-self.window_size:]

    def clear(self) -> None:
        self.messages = []

In [32]:
chat_map = {}

def get_chat_history(session_id: str, k: int = 5) -> BufferWindowMessageHistory:
    if session_id not in chat_map:
        chat_map[session_id] = BufferWindowMessageHistory(window_size=k)
    return chat_map[session_id]

In [33]:
from langchain_core.runnables import ConfigurableFieldSpec

pipeline_with_history = RunnableWithMessageHistory(
    pipeline,
    get_session_history=get_chat_history,
    input_messages_key="user_input",
    history_messages_key="history",
    history_factory_config=[
        ConfigurableFieldSpec(
            id="session_id",
            annotation=str,
            name="Session ID",
            description="The session ID to use for the chat history.",
            default="id_default",
        ),
        ConfigurableFieldSpec(
            id="k",
            annotation=int,
            name="Window Size",
            description="The number of messages to retain in the chat history.",
            default=4,
        )
    ]
)

In [34]:
pipeline_with_history.invoke(
    {"user_input": "Hi, my name is Kate"},
    config={"configurable" : { "session_id": "id_k4", "k": 4 }}
)

Initialized BufferWindowMessageHistory with window size: 4


AIMessage(content="Hello Kate! It's nice to meet you. I'm AlphaBot, your friendly AI assistant. How can I help you today? Do you have any questions or topics you'd like to discuss? I'm all ears (or rather, all text).", additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-04T16:48:53.612654Z', 'done': True, 'done_reason': 'stop', 'total_duration': 3640917208, 'load_duration': 2605008875, 'prompt_eval_count': 40, 'prompt_eval_duration': 228314375, 'eval_count': 52, 'eval_duration': 538555499, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--b248616d-d7d3-4bc9-89ff-5ff05a852559-0', usage_metadata={'input_tokens': 40, 'output_tokens': 52, 'total_tokens': 92})

In [39]:
chat_map["id_k4"].clear()  # clear the history

# manually insert history
chat_map["id_k4"].add_user_message("Hi, my name is Josh")
chat_map["id_k4"].add_ai_message("I'm an AI model called Zeta.")
chat_map["id_k4"].add_user_message("I'm researching the different types of conversational memory.")
chat_map["id_k4"].add_ai_message("That's interesting, what are some examples?")
chat_map["id_k4"].add_user_message("I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.")
chat_map["id_k4"].add_ai_message("That's interesting, what's the difference?")
chat_map["id_k4"].add_user_message("Buffer memory just stores the entire conversation, right?")
chat_map["id_k4"].add_ai_message("That makes sense, what about ConversationBufferWindowMemory?")
chat_map["id_k4"].add_user_message("Buffer window memory stores the last k messages, dropping the rest.")
chat_map["id_k4"].add_ai_message("Very cool!")

chat_map["id_k4"].messages

[HumanMessage(content='Buffer memory just stores the entire conversation, right?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='That makes sense, what about ConversationBufferWindowMemory?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Buffer window memory stores the last k messages, dropping the rest.', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Very cool!', additional_kwargs={}, response_metadata={})]

In [None]:
#when running with k=4 we should expect the LLM to forget name:
pipeline_with_history.invoke(
    {"user_input": "What is my name again?"},
    config={"configurable" : { "session_id": "id_k4", "k": 4 }}
)

AIMessage(content='You didn\'t tell me your name. You introduced yourself as "you" when you started our conversation. I\'m AlphaBot, your helpful assistant.', additional_kwargs={}, response_metadata={'model': 'llama3.2', 'created_at': '2025-12-04T16:52:12.090188Z', 'done': True, 'done_reason': 'stop', 'total_duration': 677279084, 'load_duration': 100080875, 'prompt_eval_count': 97, 'prompt_eval_duration': 93829709, 'eval_count': 31, 'eval_duration': 318803539, 'logprobs': None, 'model_name': 'llama3.2', 'model_provider': 'ollama'}, id='lc_run--bbb51aed-eb4f-42f7-a3be-d9e32496af64-0', usage_metadata={'input_tokens': 97, 'output_tokens': 31, 'total_tokens': 128})