In [1]:
import getpass, os

if not os.environ.get("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Please enter your Google API key: ")

In [2]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("gemini-2.0-flash-lite", model_provider="google_genai")

In [3]:
from langchain_core.messages import HumanMessage

In [5]:
llm.invoke([HumanMessage(content="Hi, I am bob")])

AIMessage(content="Hi Bob, it's nice to meet you! How can I help you today?", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash-lite', 'safety_ratings': []}, id='run--73428790-7726-404f-bfb8-c4f2bb87f690-0', usage_metadata={'input_tokens': 5, 'output_tokens': 19, 'total_tokens': 24, 'input_token_details': {'cache_read': 0}})

In [6]:
llm.invoke([HumanMessage(content="Hi, what is my name?")])

AIMessage(content='I am a large language model, I do not have access to your personal information, including your name. Therefore, I cannot tell you your name.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash-lite', 'safety_ratings': []}, id='run--0e5a7a13-9dae-470a-9e96-7a4192dcd433-0', usage_metadata={'input_tokens': 7, 'output_tokens': 31, 'total_tokens': 38, 'input_token_details': {'cache_read': 0}})

In [7]:
from langchain_core.messages import AIMessage

llm.invoke(
    [
        HumanMessage(content="Hi, I am bob"),
        AIMessage(content="Hi Bob, it's nice to meet you! How can I help you today?"),
        HumanMessage(content="Can you tell me my name?"),
    ]
)

AIMessage(content='Your name is Bob! You just told me. 😊', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash-lite', 'safety_ratings': []}, id='run--2bb321e6-eda5-415d-bb83-c619ece2df5b-0', usage_metadata={'input_tokens': 30, 'output_tokens': 12, 'total_tokens': 42, 'input_token_details': {'cache_read': 0}})

Use langgraph to persist message history

In [4]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# define a new graph
workflow = StateGraph(state_schema=MessagesState)

def call_model(state: MessagesState):
    response = llm.invoke(state['messages'])
    return {"messages": response}


# add nodes and edges in the graph
workflow.add_edge(START, "llm")
workflow.add_node("llm", call_model)

# add memory
memory = MemorySaver()

app = workflow.compile(checkpointer=memory)

In [6]:
config = {"configurable": {"thread_id": "abc123"}}

In [7]:
query = "Hi, I am Jayden"

input_messages = [HumanMessage(content=query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Hi Jayden, it's nice to meet you! How can I help you today?


In [8]:
query = "What is my name?"

input_messages = [HumanMessage(content=query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Your name is Jayden! You told me that earlier. 😊


Prompt templates

In [8]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You talk like a pirate. Answer all questions to the best of your ability."
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)

In [6]:
workflow = StateGraph(state_schema=MessagesState)

def call_model(state: MessagesState):
    prompt = prompt_template.invoke(state)
    response = llm.invoke(prompt)
    return {"messages": response}

workflow.add_edge(START, "llm")
workflow.add_node("llm", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)


In [9]:
config = {"configurable": {"thread_id": "abc123"}}
query = "Hi, I am Jayden"

input_messages = [HumanMessage(content=query)]
output = app.invoke({"messages": input_messages}, config)
output["messages"][-1].pretty_print()


Ahoy there, Jayden! Shiver me timbers, it be a pleasure to meet ye! What be yer heart's desire, matey? What treasures do ye seek in these here waters?


More complicated prompt

In [10]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "Answer all questions to the best of your ability in {language}."
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)

In [11]:
from typing import Sequence
from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict

class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    language: str 

workflow = StateGraph(state_schema=State)

def call_model(state: State):
    prompt = prompt_template.invoke(state)
    response = llm.invoke(prompt)
    return {"messages": [response]}

workflow.add_edge(START, "llm")
workflow.add_node("llm", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [12]:
config = {"configurable": {"thread_id": "abc123"}}
query = "Hi, I am Jayden"
language = "Chinese"

input_messages = [HumanMessage(content=query)]
output = app.invoke({"messages": input_messages, "language": language}, config)
output["messages"][-1].pretty_print()


你好，Jayden！很高兴认识你。有什么我可以帮你的吗？


Conversation History management

In [20]:
from langchain_core.messages import SystemMessage, trim_messages, AIMessage

trimmer = trim_messages(
    max_tokens = 30,
    strategy = "last",
    token_counter = llm,
    include_system = True,
    allow_partial = False,
    start_on = "human"
)

messages = [
    SystemMessage(content="Your are a good chatbot"),
    HumanMessage(content="Hi, I am Jayden"),
    AIMessage(content="Hi Jayden, it's nice to meet you! How can I help you today?"),
    HumanMessage(content="I like to play video games"),
    AIMessage(content="I like to play video games too! What about you?"),
    HumanMessage(content="What is 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks"),
    AIMessage(content="you're welcome"),
    HumanMessage(content="Having fun?"),
    AIMessage(content="yes"),
]

trimmer.invoke(messages)

[SystemMessage(content='Your are a good chatbot', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='thanks', additional_kwargs={}, response_metadata={}),
 AIMessage(content="you're welcome", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Having fun?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='yes', additional_kwargs={}, response_metadata={})]

In [21]:
workflow = StateGraph(state_schema=State)

def call_model(state: State):
    trimmed_messages = trimmer.invoke(state['messages'])
    prompt = prompt_template.invoke(
        {"messages": trimmed_messages, "language": state['language']}
    )
    response = llm.invoke(prompt)
    return {"messages": [response]}

workflow.add_edge(START, "llm")
workflow.add_node("llm", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)


In [23]:
config = {"configurable": {"thread_id": "wabc123"}}
query = "Hi, I am Jayden"
language = "Chinese"

input_messages = messages + [HumanMessage(content=query)]
output = app.invoke({"messages": input_messages, "language": language}, config)
output["messages"][-1].pretty_print()


你好，Jayden！很高兴认识你！ (Nǐ hǎo, Jayden! Hěn gāoxìng rènshí nǐ!) 

Hi Jayden! Nice to meet you!


Streaming

In [24]:
config = {"configurable": {"thread_id": "wabc123"}}
query = "Hi, I am Jayden, tell me a joke"
language = "English"

input_messages = [HumanMessage(query)]
for chunk, metadata in app.stream(
    {"messages": input_messages, "language": language},
    config,
    stream_mode="messages"
):
    if isinstance(chunk, AIMessage):
        print(chunk.content, end="|")

Hi| Jayden! Here's a joke for you:

Why don't| scientists trust atoms?

Because they make up everything!
|