In [11]:
import getpass  # Secure way to prompt the user for sensitive input (e.g. API keys)
import os       # Allows interaction with the operating system, including setting environment variables

# Enables LangChain's advanced tracing and debugging features (v2) via LangSmith
os.environ["LANGCHAIN_TRACING_V2"] = "true"

# Prompts the user to enter their LangChain (LangSmith) API key securely (input is hidden)
# This key is required to enable tracing in LangSmith for monitoring and debugging your LangChain runs
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass("Enter your LangChain API Key: ")

# ✅ Set your OpenAI API Key
os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API Key: ")

# | output: false
# | echo: false
# (These two lines are Jupyter cell metadata used to suppress output/echo in some notebook renderers like Quarto)

from langchain_openai import ChatOpenAI
# Imports the ChatOpenAI class from LangChain's OpenAI integration package.
# This allows me to easily call OpenAI's chat models (e.g., GPT-4o, GPT-3.5) using a consistent interface.

model = ChatOpenAI(model="gpt-4o-mini")
# Initializes a chat model object using OpenAI's "gpt-4o-mini" (a lighter, faster GPT-4 model).
# I can now use `model.invoke("your message")` to send messages and receive responses.

Enter your LangChain API Key:  ········
Enter your OpenAI API Key:  ········


In [None]:
from langchain_core.messages import HumanMessage
# Imports the HumanMessage class from LangChain.
# This class is used to define messages coming from the user (you) in a structured way.

model.invoke([HumanMessage(content="Hi! I'm Dan")])
# Sends a list containing one HumanMessage to the model using `.invoke()`.
# This simulates a user saying "Hi! I'm Dan" to the chatbot.
# The model will process this input and return an AI-generated response.

In [13]:
# NOTE: LLMs like GPT have no built-in memory between calls.
# This is a key concept in LangChain:
# If you want memory, you have to implement it yourself — either by:
# 1. Passing the full message history manually every time (like we're doing here), or
# 2. Using LangChain tools like `RunnableWithMessageHistory` or `LangGraph` to handle memory automatically.

model.invoke([HumanMessage(content="What's my name?")])

NameError: name 'HumanMessage' is not defined

In [17]:
from langchain_core.messages import HumanMessage, AIMessage
# Import both HumanMessage and AIMessage.
# This allows you to simulate a full back-and-forth conversation between a user and the AI.

# We're manually passing the full conversation history to simulate memory.
# This is necessary because the model does not remember past interactions unless we give it the full context.
# In LangChain, memory can be managed manually like this or automatically using `RunnableWithMessageHistory` or `LangGraph`.

model.invoke(
    [
        HumanMessage(content="Hi! I'm Dan"),
        AIMessage(content="Hello Dan! How can I assist you today?"),
        HumanMessage(content="What's my name?"),
    ]
)

AIMessage(content='Your name is Dan. How can I help you further?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 13, 'prompt_tokens': 33, 'total_tokens': 46, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_80cf447eee', 'id': 'chatcmpl-BN0O2f9uqDvqODeaFxq0M7qJgJCqW', 'finish_reason': 'stop', 'logprobs': None}, id='run-652e2858-3758-45dd-a5d1-b6b27977deea-0', usage_metadata={'input_tokens': 33, 'output_tokens': 13, 'total_tokens': 46, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

In [19]:
from langgraph.checkpoint.memory import MemorySaver
# Imports an in-memory checkpointer. This stores message history so it persists between interactions.
# It's perfect for testing or quick prototyping. For production, you could use SQLite, Postgres, etc.

from langgraph.graph import START, MessagesState, StateGraph
# Imports core building blocks of a LangGraph:
# - `START` is the entry point of the workflow.
# - `MessagesState` defines the structure of our state (chat messages).
# - `StateGraph` is the object used to define the flow of the chatbot.

# Step 1: Create a new LangGraph with the message state schema
workflow = StateGraph(state_schema=MessagesState)

# Step 2: Define the function that calls the model
# This function takes the current conversation state (all messages),
# sends them to the model, and returns a new message as a response.
def call_model(state: MessagesState):
    response = model.invoke(state["messages"])  # Send conversation history to the LLM
    return {"messages": response}               # Return the updated message list to the state

# Step 3: Define the nodes and edges in the graph
# Add a node named "model" that runs our `call_model` function
workflow.add_node("model", call_model)

# Connect the START of the conversation to the "model" node
workflow.add_edge(START, "model")

# Step 4: Add memory to the graph
memory = MemorySaver()  # This stores the full chat history in memory between turns

# Step 5: Compile the graph into a runnable app, passing in the memory
app = workflow.compile(checkpointer=memory)

In [23]:
# Create a config dictionary to store metadata for the session
# The 'thread_id' tells LangGraph which memory stream to use
# This enables multi-turn chat history to be stored and retrieved
config = {
    "configurable": {
        "thread_id": "abc123"  # You can use any unique string here; this will create a unique memory slot or chat bank (it's kind of like a folder - to store stuff)
    }
}

In [25]:
query = "Hi! I'm Dan."
# This is your user input — what you're saying to the chatbot

input_messages = [HumanMessage(query)]
# Wrap the query in a HumanMessage object and put it in a list.
# LangGraph expects messages in list format for multi-turn history.

output = app.invoke({"messages": input_messages}, config)
# This sends the message to the LangGraph app.
# - `{"messages": ...}` is your input state.
# - `config` includes your thread_id ("abc123"), so LangGraph knows where to store this memory.

output["messages"][-1].pretty_print()
# Print the last message in the list — this will be the AI's response.
# The full message history is stored in `output["messages"]`.
# `pretty_print()` gives you a nicely formatted version.


Hi Dan! How can I assist you today?


In [27]:
query = "What's my name?"
# This is your follow-up question — you're asking the AI to remember what you said earlier.

input_messages = [HumanMessage(query)]
# Wrap the follow-up question as a HumanMessage (LangChain expects structured messages in a list).

output = app.invoke({"messages": input_messages}, config)
# This calls the LangGraph app with:
# - The new user message
# - The SAME `config` (thread_id = "abc123"), so it can retrieve the existing memory from the earlier interaction

output["messages"][-1].pretty_print()
# Print the AI’s most recent response (the last message in the list).
# If memory is working, it should respond with "Your name is Dan."


Your name is Dan!


In [33]:
config = {"configurable": {"thread_id": "abc234"}}
# This is a NEW thread ID, different from "abc123"
# This effectively resets the memory — like opening a blank chat window for a new user or session

input_messages = [HumanMessage(query)]
# Wrap the query (e.g., "What's my name?") in a HumanMessage and put it in a list

output = app.invoke({"messages": input_messages}, config)
# Invoke the LangGraph app using:
# - the new user message
# - the NEW thread_id in the config — LangGraph will treat this as a brand-new chat with no memory

output["messages"][-1].pretty_print()
# Print the AI's most recent reply — you should get a generic response like:
# "I'm sorry, I don't know your name." because there's no context in this new thread


I still don't know your name since you haven't mentioned it yet. If you'd like to share it, feel free!


In [35]:
config = {"configurable": {"thread_id": "abc123"}}
# This is the original thread ID you used earlier — the one where you said "Hi! I'm Dan."
# By using this again, you're "reloading" that conversation and all the memory tied to it.

input_messages = [HumanMessage(query)]
# Wrap your latest input (e.g., "What's my name?") into a HumanMessage object

output = app.invoke({"messages": input_messages}, config)
# Invoke the chatbot using:
# - your new input
# - the original memory context (`thread_id = abc123`)

output["messages"][-1].pretty_print()
# Print the model's response.
# Because it's back on the original memory thread, the model should recall your name is Dan.


Your name is Dan.


In [37]:
config = {"configurable": {"thread_id": "abc123"}}
# Reconnects to the original chat thread — memory of "I'm Dan" is still stored here.

input_messages = [HumanMessage(query)]
# Wraps the user’s current input into a structured format for LangChain.

output = app.invoke({"messages": input_messages}, config)
# Sends the message into LangGraph with memory tracking.
# Because this thread has past messages, LangGraph retrieves and includes them automatically.

output["messages"][-1].pretty_print()
# Displays the model’s latest reply.
# You should see a memory-aware answer, like "Your name is Dan."


Your name is Dan.


In [39]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
# Imports:
# - ChatPromptTemplate: used to structure and format prompts in a modular way.
# - MessagesPlaceholder: a placeholder that gets replaced with the full conversation history at runtime.

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are James Bond. Respond with wit, charm, and confidence. Keep your answers cool, concise, and in-character.",
        ),
        # This is a system message — it sets the tone and behavior of the AI.
        # In this case, the AI should roleplay and respond like James Bond.

        MessagesPlaceholder(variable_name="messages"),
        # This placeholder will automatically pull in the chat history
        # (everything the human and AI have said so far) from the `state["messages"]` when invoked.
    ]
)

In [41]:
workflow = StateGraph(state_schema=MessagesState)
# Re-initialize the LangGraph workflow.
# We’re still using MessagesState to represent the conversation history (as before).

def call_model(state: MessagesState):
    # This function runs every time a new message comes in.

    # highlight-start
    prompt = prompt_template.invoke(state)
    # This uses the custom ChatPromptTemplate (James Bond version).
    # It injects both the system message and the current messages into a structured prompt.

    response = model.invoke(prompt)
    # Now the model sees the full prompt — including personality + history — and generates a response.
    # highlight-end

    return {"messages": response}
    # Return the new state with the model’s response added.

In [43]:
config = {"configurable": {"thread_id": "abc345"}}
# New memory thread — this is a clean slate conversation.
# The chatbot will NOT remember anything from previous threads ("abc123", etc.).

query = "Hi! I'm Dan."
# The user introduces themselves — this is the first message in this conversation.

input_messages = [HumanMessage(query)]
# Wrap the query in a HumanMessage so LangChain understands it as a user's message.

output = app.invoke({"messages": input_messages}, config)
# Call the LangGraph app:
# - It pulls in your James Bond system prompt from the ChatPromptTemplate
# - Injects this first user message into that structured prompt
# - Runs it through the model
# - Saves it to memory under "abc345"

output["messages"][-1].pretty_print()
# Print the latest AI response — this should sound like it came straight out of a Bond movie.


Hi Dan! How can I assist you today?


In [45]:
query = "What is my name?"
# You're now testing whether the chatbot remembers what you said earlier in this specific thread.

input_messages = [HumanMessage(query)]
# Wrap the follow-up question in a HumanMessage.

output = app.invoke({"messages": input_messages}, config)
# This will:
# - Retrieve the full conversation history stored under the current thread_id
# - Inject that into the James Bond prompt template
# - Send it to the model and get a contextual response

output["messages"][-1].pretty_print()
# Prints the model’s response.
# If memory is working, it should say “Your name is Jim.” (or Dan, if that's what you said earlier)
# And it should *still* sound like James Bond.


Your name is Dan. How can I help you, Dan?


In [62]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability in {language}.",
        ),
        # `{language}` is a placeholder that gets filled in when you invoke the prompt

        MessagesPlaceholder(variable_name="messages"),
        # This still inserts the chat history into the prompt as before
    ]
)

In [94]:
from typing import Sequence  # Used to define list-like types (e.g., a sequence of messages)
from langchain_core.messages import BaseMessage  # Base class for all message types (e.g., HumanMessage, AIMessage)
from langgraph.graph.message import add_messages  # Tells LangGraph to track the `messages` field for memory
from typing_extensions import Annotated, TypedDict  # Used to define a typed state schema with metadata

#Define the state that your LangGraph app will use and persist
class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    #A list of all messages exchanged — LangGraph uses this to maintain memory

    language: str
    #Custom input for the prompt template — lets the user define the language dynamically

#Create the LangGraph workflow using the custom state
workflow = StateGraph(state_schema=State)

#Define what the model does each time it's invoked in the graph
def call_model(state: State):
    #Create a full prompt by injecting the state (messages + language) into the template
    prompt = prompt_template.invoke(state)

    #DEBUG: Print each item in the generated prompt for inspection
    print("\n----- Prompt Preview -----")
    for msg in prompt:
        print(msg)  # 👀 This avoids calling .type or .content in case msg is a tuple
    print("--------------------------\n")

    #Send the prompt to the model and get a response
    response = model.invoke(prompt)

    #Return the response wrapped in a list so it can be added to memory
    return {"messages": [response]}

#Set up the graph structure: START → model
workflow.add_edge(START, "model")  # Defines the order of operations in the graph
workflow.add_node("model", call_model)  # Adds the actual logic for what happens at "model" step

# Set up in-memory persistence to keep track of conversation history per thread_id
memory = MemorySaver()

#Compile the full app with the current workflow and memory setup
app = workflow.compile(checkpointer=memory)

In [96]:
config = {"configurable": {"thread_id": "abc456"}}
# This thread ID ensures that all messages and memory stay scoped to this conversation session.

query = "Hi! I'm Dan."
language = "Spanish"
# You're now specifying that responses should be in Spanish — this gets injected into the prompt via {language}
input_messages = [HumanMessage(query)]
# Wrap the user's message in a HumanMessage — required by LangChain

output = app.invoke(
    {"messages": input_messages, "language": language},  #Your full custom state
    config,  #Includes thread_id for memory persistence
)
#This calls your LangGraph app with:
# - Full message history (starts with "Hi! I'm Bob.")
# - The language instruction ("Spanish")
# - The memory thread ID ("abc456")

output["messages"][-1].pretty_print()
#Prints the model's latest response — should be in Spanish, and should remember that your name is Dan


----- Prompt Preview -----
('messages', [SystemMessage(content='You are a helpful assistant. Answer all questions to the best of your ability in Spanish.', additional_kwargs={}, response_metadata={}), HumanMessage(content="Hi! I'm Dan.", additional_kwargs={}, response_metadata={}, id='47341a4b-968c-44b1-b69d-8d6fbcc8626e')])
--------------------------


¡Hola, Dan! ¿Cómo puedo ayudarte hoy?


In [98]:
query = "What is my name?"
#You’re asking the bot to recall your name — based on prior memory

input_messages = [HumanMessage(query)]
# Wrap your follow-up input in a HumanMessage

output = app.invoke(
    {"messages": input_messages},  #Notice: no "language" key this time!
    config,  #Same thread ID, so memory and config are reused
)

output["messages"][-1].pretty_print()
# Prints the AI's reply — it should remember your name (e.g., Dan)
# It should still respond in Spanish unless changed in a new input


----- Prompt Preview -----
('messages', [SystemMessage(content='You are a helpful assistant. Answer all questions to the best of your ability in Spanish.', additional_kwargs={}, response_metadata={}), HumanMessage(content="Hi! I'm Dan.", additional_kwargs={}, response_metadata={}, id='47341a4b-968c-44b1-b69d-8d6fbcc8626e'), AIMessage(content='¡Hola, Dan! ¿Cómo puedo ayudarte hoy?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 34, 'total_tokens': 46, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_80cf447eee', 'id': 'chatcmpl-BN17mK1MKyZ0UPxI5chQxDxow9IVm', 'finish_reason': 'stop', 'logprobs': None}, id='run-c546ffb8-4f79-4a24-ae67-9091bb3e7af4-0', usage_metadata={'input_tokens': 34, 'output_tokens

In [125]:
from langchain_core.messages import SystemMessage, trim_messages
#SystemMessage defines a system-level instruction (like setting personality or language)
#trim_messages is a helper to manage and truncate long message histories

#Create a trimmer instance with specific rules
trimmer = trim_messages(
    max_tokens=150,          #Only keep messages that fit within 65 tokens total
    strategy="last",        #Keep the most recent messages ("last N" style)
    token_counter=model,    #Use the model itself to estimate how many tokens each message uses
    include_system=True,    #Always include the system message, if present
    allow_partial=False,    #Don’t include partial (cut-off) messages
    start_on="human",       #Start trimming from the most recent human message backward
)
#Simulated message history
messages = [
    SystemMessage(content="you're a good assistant"),  # System instruction
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="no problem!"),
    HumanMessage(content="having fun?"),
    AIMessage(content="yes!"),
]
#Apply the trimming logic to this full list of messages
trimmer.invoke(messages)

[SystemMessage(content="you're a good assistant", additional_kwargs={}, response_metadata={}),
 HumanMessage(content="hi! I'm bob", additional_kwargs={}, response_metadata={}),
 AIMessage(content='hi!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='I like vanilla ice cream', additional_kwargs={}, response_metadata={}),
 AIMessage(content='nice', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='whats 2 + 2', additional_kwargs={}, response_metadata={}),
 AIMessage(content='4', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='thanks', additional_kwargs={}, response_metadata={}),
 AIMessage(content='no problem!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='having fun?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='yes!', additional_kwargs={}, response_metadata={})]

In [127]:
workflow = StateGraph(state_schema=State)
#Re-initialize the LangGraph workflow using your custom state (with messages + language)

def call_model(state: State):
    #Trim the message history before sending it to the model
    trimmed_messages = trimmer.invoke(state["messages"])
    #This prevents the message list from growing too large for the model's context window

    #Use the trimmed messages + current language setting to generate the prompt
    prompt = prompt_template.invoke(
        {
            "messages": trimmed_messages,      #Safe, shortened chat history
            "language": state["language"],     #Keep the preferred response language
        }
    )

    #Send the prompt to the model and get a response
    response = model.invoke(prompt)

    #Return the new AI message so it can be appended to memory
    return {"messages": [response]}

#Re-define the workflow structure
workflow.add_edge(START, "model")           # Define flow: start → model node
workflow.add_node("model", call_model)      # Attach the logic defined above

#Set up in-memory conversation tracking
memory = MemorySaver()

#Compile the app with the updated call_model logic and memory system
app = workflow.compile(checkpointer=memory)

In [129]:
config = {"configurable": {"thread_id": "abc567"}}
#Start a brand new memory thread — no prior messages saved under this ID

query = "What is my name?"
language = "English"
#Set the assistant's language to English (won't default to Spanish anymore)

#Use the full list of sample messages from before (from trimmer test)
#Add a new message at the end where you ask "What is my name?"
input_messages = messages + [HumanMessage(query)]

#Send trimmed chat history + language instruction into the chatbot
output = app.invoke(
    {"messages": input_messages, "language": language},  # Full input state
    config,                                              # Scoped to a new thread
)
output["messages"][-1].pretty_print()
#Show the assistant's response — should say it doesn't know your name


Your name is Bob.


In [131]:
#Set up a new memory thread — this keeps it separate from your earlier examples
config = {"configurable": {"thread_id": "abc678"}}

#User wants to know what math problem they asked earlier in the conversation
query = "What math problem did I ask?"

#Specify that the assistant should reply in English
language = "English"

#Combine the pre-defined list of messages (from earlier test) with the new query
#The message history includes: 
# - "what's 2 + 2" → "4"
# - Followed by: "thanks" → "no problem!"
# - Then: "having fun?" → "yes!"
# - Finally: "What math problem did I ask?"
input_messages = messages + [HumanMessage(query)]

#Invoke the LangGraph app with your conversation and language instruction
output = app.invoke(
    {"messages": input_messages, "language": language},  # Pass updated state (trimmed before prompt)
    config,  # Thread ID to track memory and maintain consistency
)

#Print the AI's latest response — should reference the math question you asked
output["messages"][-1].pretty_print()


You asked, "What's 2 + 2?"


In [135]:
#Start a new thread for streaming — completely separate from earlier sessions
config = {"configurable": {"thread_id": "abc789"}}

#Initial user message
query = "Hi I'm Dan, please tell me a joke."
language = "English"

#Wrap the message in a HumanMessage object
input_messages = [HumanMessage(query)]

#Use .stream() to start the LangGraph app, but stream tokens as they’re generated
#Instead of waiting for the full model response, we get it **token by token**
for chunk, metadata in app.stream(
    {"messages": input_messages, "language": language},  #Full input state
    config,                                               #Threaded memory
    stream_mode="messages",                               #Stream raw message tokens
):
    #Filter: Only show chunks that are actual model messages (ignore intermediate states)
    if isinstance(chunk, AIMessage):
        #Print each token chunk as it arrives, followed by a "|"
        print(chunk.content, end="|")

|Hi| Dan|!| Here|’s| a| joke| for| you|:

|Why| don't| scientists| trust| atoms|?

|Because| they| make| up| everything|!||