In [None]:
from pydantic import BaseModel, Field
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from dotenv import load_dotenv

load_dotenv()

### Identify if question is relevant to be stored in Long-Term-Memory

In [2]:
class GradeQuestion(BaseModel):
    """Boolean value to check whether a question is related to the specified topics."""

    score: str = Field(
        description="Is the question relevant? Respond with 'Yes' or 'No'."
    )

In [10]:
system = """
You are a classifier that examines the given question or statement for any personal information or preferences.
Your job is to determine whether the input contains:
1. Personal information about the user (e.g., name, occupation, location, contact details, or other identifiable information).
2. Preferences, habits, or any explicitly mentioned likes/dislikes.

If you find any such information, respond with 'Yes'. If the input does not contain any personal information or preferences, respond with 'No'.
Your response must be ONLY 'Yes' or 'No'.
"""

grade_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", "Here is the input: {question}"),
    ]
)

llm = ChatOpenAI(model="gpt-4o-mini")
structured_llm = llm.with_structured_output(GradeQuestion)
grader_llm = grade_prompt | structured_llm


In [None]:
grader_llm.invoke({"question": "Where is Thomas Müller from?"})

In [None]:
grader_llm.invoke({"question": "Where is Thomas Müller from? I love playing football myself"})

### Summarise question/information

In [13]:
message = "Where is Thomas Müller from? I love playing football myself."

system = """
You are an extractor focusing on personal information **about the user**.
Ignore references to third parties unless they somehow reveal personal details **about the user**.

Personal information (for the user) includes:
1. The user’s own name or identifying details.
2. The user’s locations (e.g., "I live in Seattle").
3. The user’s hobbies, preferences, or habits ("I love playing soccer every day").
4. Any unique personal details shared by the user.

If the user mentions others’ personal info (like celebrity data), do not include it.

### Few-Shot Examples:

User: "I am John, I live in Berlin."
- Extract only user info: "User name: John, location: Berlin."

User: "Do you know where Beyoncé is from? I love dancing to her songs every night!"
- Do NOT extract Beyoncé’s info. Instead, focus on the user’s statement:
  "User enjoys dancing to Beyoncé's songs every night."

User: "Hello, how are you?"
- No personal info to extract. Output: "No personal info found."

Now read the message carefully and respond with the user’s personal info (if any). Otherwise, respond "No personal info found."
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system),
        ("human", f"Extract and summarize personal information: {message}"),
    ]
)

llm = ChatOpenAI(model="gpt-4o-mini")
summarizer = prompt | llm

In [None]:
result = summarizer.invoke({})
print(f"Extracted Personal Information: {result}")

In [15]:
from typing import Annotated, Literal, TypedDict, Sequence
from pydantic import BaseModel, Field
from dotenv import load_dotenv

load_dotenv()

from langgraph.graph import StateGraph, END
from langgraph.graph.message import add_messages
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langgraph.store.memory import InMemoryStore
import uuid
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langgraph.checkpoint.memory import MemorySaver

store = InMemoryStore()
USER_ID = "user-123"

class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    personal_info_detected: str
    personal_info_extracted: str
    is_duplicate: str
    collected_memories: str

class GradeQuestion(BaseModel):
    score: str = Field()

def personal_info_classifier(state: AgentState) -> AgentState:
    """
    Classifies if the last user message contains personal info.
    Uses few-shot examples to clarify what "personal info" is.
    """
    message = state["messages"][-1].content

    system_prompt = """You are a classifier that determines if a message contains personal info.
Personal info may include:
- Names (e.g., "John Smith"),
- Locations (e.g., "Berlin", "123 Main St"),
- Preferences / hobbies ("I love playing soccer", "I prefer short responses"),
- Occupation details, phone number, or any unique ID.

Examples:
User: "My name is Thomas, I live in Vancouver."
Classifier: "Yes"

User: "I love pizza with extra cheese."
Classifier: "Yes"  (because it expresses a personal preference)

User: "Hello, how are you?"
Classifier: "No"  (no personal info)

User: "This is great weather."
Classifier: "No"  (no personal info)

Now analyze the new user message. Respond ONLY 'Yes' or 'No'."""

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{message}"),
        ]
    )
    llm = ChatOpenAI(model="gpt-4o-mini")
    structured_llm = llm.with_structured_output(GradeQuestion)
    chain = prompt | structured_llm

    result = chain.invoke({"message": message})
    state["personal_info_detected"] = result.score.strip()
    return state

def personal_info_router(state: AgentState) -> Literal["extract_personal_info", "retrieve_memories"]:
    """
    If classified 'Yes', go to extraction. Otherwise skip directly to retrieving memories.
    """
    if state["personal_info_detected"].lower() == "yes":
        return "extract_personal_info"
    return "retrieve_memories"

def personal_info_extractor(state: AgentState) -> AgentState:
    """
    Extracts personal info from the user's message using few-shot examples.
    """
    message = state["messages"][-1].content

    # A few-shot style system prompt for extraction:
    extractor_system = """You are an extractor focusing on personal info.

Examples:
User: "I am John and I live in Seattle."
You output: "User name: John, Location: Seattle."

User: "Hey, I'm Lucy. I love playing guitar!"
You output: "User name: Lucy, Hobby: playing guitar."

User: "Just a random statement about the weather."
You output: "No personal info found."

Now read the input and extract personal info as a single-sentence summary:
- If there's name, location, preferences, or any unique details, mention them briefly.
- If no personal info is found, write: 'No personal info found.'"""

    extractor_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", extractor_system),
            ("human", "Input: {message}"),
        ]
    )
    llm = ChatOpenAI(model="gpt-4o-mini")
    chain = extractor_prompt | llm
    extracted_info = chain.invoke({"message": message})

    state["personal_info_extracted"] = extracted_info.content.strip()
    return state

class InfoNoveltyGrade(BaseModel):
    score: str = Field()

def personal_info_duplicate_classifier(state: AgentState) -> AgentState:
    """
    Checks if the newly extracted info is already in the store or not.
    If 'Yes', it's new info. If 'No', it's a duplicate.
    """
    new_info = state.get("personal_info_extracted", "")
    namespace = ("memories", USER_ID)
    results = store.search(namespace)
    old_info_list = [doc.value["data"] for doc in results]

    system_msg = """You are a classifier that checks if the new personal info is already stored.
If the new info adds anything new, respond 'Yes'. Otherwise 'No'."""

    old_info_str = "\n".join(old_info_list) if old_info_list else "No stored info so far."
    human_template = """New info:\n{new_info}\n
Existing memory:\n{old_info}\n
Answer ONLY 'Yes' if the new info is unique. Otherwise 'No'."""
    human_msg = human_template.format(new_info=new_info, old_info=old_info_str)

    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_msg),
            ("human", "{human_msg}"),
        ]
    )
    llm = ChatOpenAI(model="gpt-4o-mini").with_structured_output(InfoNoveltyGrade)
    chain = prompt | llm
    result = chain.invoke({"human_msg": human_msg})
    state["is_duplicate"] = result.score.strip()
    return state

def personal_info_deduper_router(state: AgentState) -> Literal["personal_info_storer", "retrieve_memories"]:
    """
    If 'Yes', store the new info. Otherwise skip storing.
    """
    if state["is_duplicate"].lower() == "yes":
        return "personal_info_storer"
    return "retrieve_memories"

def personal_info_storer(state: AgentState) -> AgentState:
    """
    Stores the new personal info in memory if it exists.
    """
    extracted = state.get("personal_info_extracted")
    if extracted:
        namespace = ("memories", USER_ID)
        store.put(namespace, str(uuid.uuid4()), {"data": extracted})
    return state

def retrieve_memories(state: AgentState) -> AgentState:
    """
    Retrieves all personal info from the store and aggregates into 'collected_memories'.
    """
    namespace = ("memories", USER_ID)
    results = store.search(namespace)
    memory_strs = [doc.value["data"] for doc in results]
    state["collected_memories"] = "\n".join(memory_strs)
    return state

def log_personal_memory(state: AgentState) -> AgentState:
    """
    Logs the memory to stdout for debugging (optional).
    """
    print("----- Logging Personal Memory -----")
    if state["collected_memories"]:
        for i, line in enumerate(state["collected_memories"].split("\n"), start=1):
            print(f"[Memory {i}] {line}")
    else:
        print("[Memory] No personal info stored yet.")
    return state

def call_model(state: AgentState) -> AgentState:
    """
    Final LLM call that uses the collected memories in a SystemMessage.
    """
    personal_info = state.get("collected_memories", "")
    system_msg = SystemMessage(
        content=f"You are a helpful assistant. The user has shared these personal details:\n{personal_info}"
    )
    all_messages = [system_msg] + list(state["messages"])
    llm = ChatOpenAI(model="gpt-4o-mini")
    response = llm.invoke(all_messages)
    state["messages"] = state["messages"] + [response]
    return state

workflow = StateGraph(AgentState)
workflow.add_node("personal_info_classifier", personal_info_classifier)
workflow.add_node("personal_info_extractor", personal_info_extractor)
workflow.add_node("personal_info_duplicate_classifier", personal_info_duplicate_classifier)
workflow.add_node("personal_info_storer", personal_info_storer)
workflow.add_node("retrieve_memories", retrieve_memories)
workflow.add_node("log_personal_memory", log_personal_memory)
workflow.add_node("call_model", call_model)

workflow.add_conditional_edges(
    "personal_info_classifier",
    personal_info_router,
    {
        "extract_personal_info": "personal_info_extractor",
        "retrieve_memories": "retrieve_memories",
    },
)

workflow.add_edge("personal_info_extractor", "personal_info_duplicate_classifier")

workflow.add_conditional_edges(
    "personal_info_duplicate_classifier",
    personal_info_deduper_router,
    {
        "personal_info_storer": "personal_info_storer",
        "retrieve_memories": "retrieve_memories",
    },
)

workflow.add_edge("personal_info_storer", "retrieve_memories")
workflow.add_edge("retrieve_memories", "log_personal_memory")
workflow.add_edge("log_personal_memory", "call_model")
workflow.add_edge("call_model", END)

workflow.set_entry_point("personal_info_classifier")

checkpointer = MemorySaver()
graph = workflow.compile(checkpointer=checkpointer, store=store)


In [None]:
from IPython.display import Image, display
from langchain_core.runnables.graph import MermaidDrawMethod

display(
    Image(
        graph.get_graph().draw_mermaid_png(
            draw_method=MermaidDrawMethod.API,
        )
    )
)

In [None]:
input_data_1 = {"messages": [HumanMessage(content="Hi, I'm Thomas Müller and I love playing football.")]}
graph.invoke(input=input_data_1, config={"configurable": {"thread_id": 99}})


In [None]:
input_data_2 = {"messages": [HumanMessage(content="Great!")]}
graph.invoke(input=input_data_2, config={"configurable": {"thread_id": 99}})

In [None]:
input_data_3 = {"messages": [HumanMessage(content="What do you know about me?")]}
graph.invoke(input=input_data_3, config={"configurable": {"thread_id": 2}})