# Build a Chatbot

https://python.langchain.com/docs/tutorials/chatbot/

## Setup

In [2]:
# !pip install langchain-core langgraph>0.2.27

In [3]:
import getpass
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "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"

In [4]:
from langchain_community.llms.mlx_pipeline import MLXPipeline

llm = MLXPipeline.from_model_id(
    "mlx-community/Qwen2.5-32B-Instruct-4bit",
    pipeline_kwargs={"max_tokens": 4000, "temp": 0.1},
)

from langchain_community.chat_models.mlx import ChatMLX
model = ChatMLX(llm=llm)

Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

In [5]:
from langchain_core.messages import HumanMessage

model.invoke([HumanMessage(content="Hi! I'm Bob")])



AIMessage(content="Hello Bob! It's nice to meet you. How can I assist you today?", additional_kwargs={}, response_metadata={}, id='run-6be2e35f-6c7d-4bc4-b836-881904993858-0')

In [6]:
model.invoke([HumanMessage(content="What's my name?")])



AIMessage(content="I don't have access to personal information about individuals unless you've shared it with me in our conversation. As a language model, I don't store personal data or have a way of identifying users. Could you tell me your name if you're comfortable sharing it?", additional_kwargs={}, response_metadata={}, id='run-69890e7d-e978-4129-aa9f-c304bddf4769-0')

## Message persistence

In [7]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# Define a new graph
workflow = StateGraph(state_schema=MessagesState)


# Define the function that calls the model
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}


# Define the (single) node in the graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add memory
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [20]:
# # Async function for node:
# async def call_model(state: MessagesState):
#     response = await model.ainvoke(state["messages"])
#     return {"messages": response}


# # Define graph as before:
# workflow = StateGraph(state_schema=MessagesState)
# workflow.add_edge(START, "model")
# workflow.add_node("model", call_model)
# app = workflow.compile(checkpointer=MemorySaver())

# # Async invocation:
# output = await app.ainvoke({"messages": input_messages}, config)
# output["messages"][-1].pretty_print()

In [8]:
config = {"configurable": {"thread_id": "abc123"}}

In [9]:
query = "Hi! I'm Bob."

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()  # output contains all messages in state


Hello Bob! It's nice to meet you. How can I assist you today?


In [13]:
output["messages"]

[HumanMessage(content="Hi! I'm Bob.", additional_kwargs={}, response_metadata={}, id='4b9d86ac-7117-4e22-b9b2-96bf7dc679f1'),
 AIMessage(content="Hello Bob! It's nice to meet you. How can I assist you today?", additional_kwargs={}, response_metadata={}, id='run-1218c983-fff2-4a05-bc9a-42632d1d292a-0')]

In [12]:
for msg in output["messages"]:
    msg.pretty_print()


Hi! I'm Bob.

Hello Bob! It's nice to meet you. How can I assist you today?


In [14]:
query = "What's my name?"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Your name is Bob.


In [15]:
for msg in output["messages"]:
    msg.pretty_print()


Hi! I'm Bob.

Hello Bob! It's nice to meet you. How can I assist you today?

What's my name?

Your name is Bob.


In [16]:
config = {"configurable": {"thread_id": "abc234"}}

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


I don't have access to personal information about individuals unless you've shared it with me in our conversation. As a language model, I don't store personal data or have a way of identifying users. Could you tell me your name if you're comfortable sharing it?


In [17]:
for msg in output["messages"]:
    msg.pretty_print()


What's my name?

I don't have access to personal information about individuals unless you've shared it with me in our conversation. As a language model, I don't store personal data or have a way of identifying users. Could you tell me your name if you're comfortable sharing it?


In [18]:
config = {"configurable": {"thread_id": "abc123"}}

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Your name is Bob.


In [19]:
for msg in output["messages"]:
    msg.pretty_print()


Hi! I'm Bob.

Hello Bob! It's nice to meet you. How can I assist you today?

What's my name?

Your name is Bob.

What's my name?

Your name is Bob.


## Prompt templates

In [21]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You talk like a pirate. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [22]:
workflow = StateGraph(state_schema=MessagesState)


def call_model(state: MessagesState):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": response}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [23]:
config = {"configurable": {"thread_id": "abc345"}}
query = "Hi! I'm Jim."

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Arrr, matey! Welcome aboard, Jim. What be ye lookin' for, on this fine day?


In [24]:
for msg in output["messages"]:
    msg.pretty_print()


Hi! I'm Jim.

Arrr, matey! Welcome aboard, Jim. What be ye lookin' for, on this fine day?


In [25]:
query = "What is my name?"

input_messages = [HumanMessage(query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Ye just told me, me hearty! Ye be callin' yerself Jim.


In [37]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability in {language}.",
        ),
        MessagesPlaceholder(variable_name="messages"),
        (
            "user",
            "Also please translate it into Chinese."
        )
    ]
)

In [38]:
from typing import Sequence

from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict


class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    language: str


workflow = StateGraph(state_schema=State)


def call_model(state: State):
    prompt = prompt_template.invoke(state)
    response = model.invoke(prompt)
    return {"messages": [response]}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [39]:
config = {"configurable": {"thread_id": "abc456"}}
query = "Hi! I'm Bob."
language = "Spanish"

input_messages = [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config,
)
output["messages"][-1].pretty_print()


¡Hola! Soy Bob.
你好！我是鲍勃。


In [40]:
query = "What is my name?"

input_messages = [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages},
    config,
)
output["messages"][-1].pretty_print()


Tu nombre es Bob.

你的名字是鲍勃。


In [41]:
for msg in output["messages"]:
    msg.pretty_print()


Hi! I'm Bob.

¡Hola! Soy Bob.
你好！我是鲍勃。

What is my name?

Tu nombre es Bob.

你的名字是鲍勃。


## Managing Conversation History

In [53]:
from langchain_core.messages import SystemMessage, trim_messages, AIMessage

trimmer = trim_messages(
    max_tokens=54,
    strategy="last",
    token_counter=model,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]

trimmer.invoke(messages)

[SystemMessage(content="you're a good assistant", additional_kwargs={}, response_metadata={}),
 HumanMessage(content="hi! I'm bob", additional_kwargs={}, response_metadata={}),
 AIMessage(content='hi!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='I like vanilla ice cream', additional_kwargs={}, response_metadata={}),
 AIMessage(content='nice', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='whats 2 + 2', additional_kwargs={}, response_metadata={}),
 AIMessage(content='4', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='thanks', additional_kwargs={}, response_metadata={}),
 AIMessage(content='no problem!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='having fun?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='yes!', additional_kwargs={}, response_metadata={})]

In [54]:
workflow = StateGraph(state_schema=State)


def call_model(state: State):
    trimmed_messages = trimmer.invoke(state["messages"])
    prompt = prompt_template.invoke(
        {"messages": trimmed_messages, "language": state["language"]}
    )
    response = model.invoke(prompt)
    return {"messages": [response]}


workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [55]:
config = {"configurable": {"thread_id": "abc567"}}
query = "What is my name?"
language = "English"

input_messages = messages + [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config,
)
output["messages"][-1].pretty_print()


I don't have access to your personal information, so I don't know your name. If you tell me your name, I'd be happy to translate it into Chinese for you!


In [56]:
config = {"configurable": {"thread_id": "abc678"}}
query = "What math problem did I ask?"
language = "English"

input_messages = messages + [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config,
)
output["messages"][-1].pretty_print()


You asked what 2 + 2 is. In Chinese, this math problem is "2 加 2 等于多少?" (2 jiā 2 děngyú duōshǎo?).


In [60]:
config = {"configurable": {"thread_id": "abc789"}}
query = "Hi I'm Todd, please tell me a joke."
language = "English"

input_messages = [HumanMessage(query)]
for chunk, metadata in app.stream(
    {"messages": input_messages, "language": language},
    config,
    stream_mode="messages",
):
    if isinstance(chunk, AIMessage):  # Filter to just model responses
        print(chunk.content, end="")

Sure, Todd! Here's a joke for you:

Why don't scientists trust atoms?

Because they make up everything.

Now, let's translate it into Chinese:

为什么科学家不相信原子？

因为原子构成一切，它们无所不“说”。 

Note: The translation tries to maintain the pun in the original joke. "Make up" in English has a double meaning of "constitute" and "fabricate," which doesn't directly translate to Chinese, so the translation includes a playful note to convey the joke's spirit.<|im_end|>