In [1]:
try:
    import os
    from typing import Dict
    from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
    from langchain_core.runnables import RunnableWithMessageHistory
    from langchain_core.chat_history import BaseChatMessageHistory
    from langchain_core.messages import HumanMessage, AIMessage, get_buffer_string
    from langchain_groq import ChatGroq
except:
    !pip install langchain langchain-groq
    import os
    from typing import Dict
    from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
    from langchain_core.runnables import RunnableWithMessageHistory
    from langchain_core.chat_history import BaseChatMessageHistory
    from langchain_core.messages import HumanMessage, AIMessage, get_buffer_string
    from langchain_groq import ChatGroq

## LLMs

In [3]:
summary_llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0
)

chat_llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0.3
)

### Prompts

In [5]:
chat_prompt = ChatPromptTemplate.from_messages([
    ("system", """You are a precise assistant. 
    IMPORTANT: You will be provided with a 'Conversation summary' in the chat history. 
    This summary contains the DEFINITIVE facts about the user (Name, Preferences, etc.).
    Prioritize the information in the summary over previous individual messages.
    If the summary says the user's name is x, then their name is x, even if the recent messages are brief.
    In this case, rely on the summary information and avoid referring the user to the summary or recent conversations."""),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{question}")
])

summary_prompt = ChatPromptTemplate.from_messages([
    ("system", """Analyze the following conversation history and extract critical user information to create a technical memory log.
    RULE 1: Persistent data like Name, Profession, and Preferences must NEVER be deleted or marked as 'unknown'.
    RULE 2: Keep the information in a list format (Entity: Detail).
    RULE 3: If a previous summary exists, merge old information with new data; do not lose previous context."""),
    ("human", "Current Memory and New Messages:\n\n{history_text}\n\nPlease generate an updated and complete summary.")
])

summary_chain = summary_prompt | summary_llm

### Evaluative chat history

In [None]:
class EvaluativeSummarizingHistory(BaseChatMessageHistory):
    def __init__(self, max_messages: int = 5, keep_last: int = 2):
        self.messages = []
        self.max_messages = max_messages
        self.keep_last = keep_last

    def add_message(self, message):
        self.messages.append(message)
        interaction_count = len([
            m for m in self.messages 
            if not (isinstance(m, AIMessage) and "Conversation summary:" in m.content)
        ])
        
        if interaction_count > self.max_messages:
            self._summarize()

    def _summarize(self):
        print(f"\n" + "="*60)
        print("--- [CRITICAL MEMORY UPDATE TRIGGERED] ---")
        
        history_text = get_buffer_string(self.messages)
        summary_output = summary_chain.invoke({"history_text": history_text})
        new_summary = summary_output.content.strip()
        
        print(f"UPDATED MEMORY LOG:\n{new_summary}")
        
        self.messages = [
            AIMessage(content=f"IMPORTANT CONTEXT - Conversation summary: {new_summary}")
        ] + self.messages[-self.keep_last:]
        
        print("="*60 + "\n")

    def clear(self):
        self.messages = []

### Setup

In [None]:
store: Dict[str, BaseChatMessageHistory] = {}

def get_chat_history(session_id: str):
    if session_id not in store:
        store[session_id] = EvaluativeSummarizingHistory(max_messages=4, keep_last=2)
    return store[session_id]

full_chain = RunnableWithMessageHistory(
    chat_prompt | chat_llm,
    get_chat_history,
    input_messages_key="question",
    history_messages_key="history"
)

config = {"configurable": {"session_id": "github_demo_session"}}

test_questions = [
    "My name is Serhan, I am a Python developer.",
    "I am currently working on memory management in LangChain using Llama and Gemma.",
    "Which language am I an expert in?",
    "My favorite food is Iskender, please don't forget that.",
    "I have a bachelor's degree in mathematics and am pursuing a master's degree in computer science.",
    "What do you know about me so far? What is my name, what do I do, and what do I like?"
]

print("=== STARTING MEMORY QUALITY EVALUATION ===\n")

for q in test_questions:
    print(f"User: {q}")
    res = full_chain.invoke({"question": q}, config=config)
    print(f"AI: {res.content}\n")

=== STARTING MEMORY QUALITY EVALUATION ===

User: My name is Serhan, I am a Python developer.
AI: Hello Serhan, it's nice to meet you. As a Python developer, you must be familiar with a wide range of libraries and frameworks. What specific areas of Python development are you interested in or currently working on?

User: I am currently working on memory management in LangChain using Llama and Gemma.
AI: That sounds like a fascinating project, Serhan. LangChain is an exciting framework for building AI applications, and Llama and Gemma are powerful tools for large language models. Memory management can be a challenging aspect of working with these models, given their complexity and computational requirements.

What specific challenges are you facing with memory management in your project, and how are you approaching optimization and efficiency? Are you exploring techniques like caching, pruning, or distributed computing to mitigate memory constraints?

User: Which language am I an expert 