In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

# Langchain configuration
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

# OpenAI API Key
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [2]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage


# Create a OpenAI model
model = ChatOpenAI()
# Invoke the model
model.invoke([HumanMessage(content="Hi! I'm Bob")])

AIMessage(content='Hello Bob! How can I assist you today?', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 12, 'total_tokens': 22}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-6a430ecb-7192-47c9-84a8-0f12c98f7d47-0', usage_metadata={'input_tokens': 12, 'output_tokens': 10, 'total_tokens': 22})

The model on its own does not have any concept of state. For example, if you ask a followup question:


In [3]:
model.invoke([HumanMessage(content="What is my name?")])

AIMessage(content="I'm sorry, I don't have access to personal information such as your name.", response_metadata={'token_usage': {'completion_tokens': 17, 'prompt_tokens': 12, 'total_tokens': 29}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-8ebb0444-a33c-4a60-a6c4-b0b2a15195dc-0', usage_metadata={'input_tokens': 12, 'output_tokens': 17, 'total_tokens': 29})

In [4]:
from langchain_core.messages import AIMessage

model.invoke(
    [
        HumanMessage(content="Hi! I'm Bob"),
        AIMessage(content="Hello Bob! How can I assist you today?"),
        HumanMessage(content="What's my name?"),
    ]
)

AIMessage(content='Your name is Bob. How can I help you today, Bob?', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 35, 'total_tokens': 49}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-add76a05-948c-45ce-b5fd-128135d5bc59-0', usage_metadata={'input_tokens': 35, 'output_tokens': 14, 'total_tokens': 49})

# Message History

The history is a list of messages that the model has seen so far. We can use this history to provide context to the model.


In [5]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

store = {}


def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = ChatMessageHistory()
    return store[session_id]


with_message_history = RunnableWithMessageHistory(model, get_session_history)

Create a config and pass it to the `Runnable`.


In [6]:
config = {"configurable": {"session_id": "abc2"}}

response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Bob")],
    config=config,
)

response.content

'Hello Bob! How can I assist you today?'

In [7]:
response = with_message_history.invoke(
    [HumanMessage(content="What's my name?")],
    config={"configurable": {"session_id": "abc3"}},
)

response.content

"I'm sorry, I do not have access to that information."

# Prompt templates

Prompt templates are a way to provide the model with a structured input.


`MessagesPlaceholder` is used to create a variable that represents a list of messages.


In [8]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt = MessagesPlaceholder("history")
prompt.format_messages(
    history=[
        ("system", "You are an AI assistant."),
        ("human", "Hello!"),
    ]
)
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful assistant."),
        MessagesPlaceholder("history"),
        ("human", "{question}"),
    ]
)

In [9]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | model

We named a list of messages `messages` and passed the prompt to the model by chaining.


In [10]:
response = chain.invoke({"messages": [HumanMessage(content="hi! I'm bob")]})

response.content

'Hello, Bob! How can I assist you today?'

Now, we pass `chain` to the History Runnable to provide the model with the context of the conversation.


In [11]:
with_message_history = RunnableWithMessageHistory(chain, get_session_history)

In [12]:
config = {"configurable": {"session_id": "abc5"}}

In [13]:
response = with_message_history.invoke(
    [HumanMessage(content="Hi! I'm Jim")],
    config=config,
)

response.content

'Hello, Jim! How can I assist you today?'

In [14]:
response = with_message_history.invoke(
    [HumanMessage(content="What's my name?")],
    config=config,
)

response.content

'Your name is Jim.'

Let's create a new Template. A new input `language` is required to be passed to the model.


In [15]:
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability in {language}.",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

chain = prompt | model

In [16]:
response = chain.invoke(
    {
        "messages": [HumanMessage(content="hi! I'm bob")],
        "language": "traditional chinese",
    }
)

response.content

'你好，Bob！有什麼我可以幫忙的嗎？'

Now we wrap the `chain` with the new template. Since there are two inputs (`messages` & `language`), we need to specify the input messages key.


In [17]:
with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages",
)

In [18]:
config = {"configurable": {"session_id": "abc11"}}

response = with_message_history.invoke(
    {
        "messages": [HumanMessage(content="hi! I'm Clement")],
        "language": "chinese",
    },
    config=config,
)

response.content

'你好！很高兴认识你，我是你的助手。有什么可以帮到你的吗？'

In [19]:
response = with_message_history.invoke(
    {
        "messages": [HumanMessage(content="What's my name?")],
        "language": "chinese",
    },
    config=config,
)

response.content

'你的名字是 Clement。'

# Trim the messages

The history can be extremely long. We need to trim the messages from the history before passing them to the llm to avoid overflows.


In [20]:
from langchain_core.messages import SystemMessage, trim_messages

trimmer = trim_messages(
    max_tokens=73,
    strategy="last",
    token_counter=model,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

messages = [
    SystemMessage(content="you're a good assistant"),
    HumanMessage(content="hi! I'm bob"),
    AIMessage(content="hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="nice"),
    HumanMessage(content="whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="thanks!"),
    AIMessage(content="no problem!"),
    HumanMessage(content="Hi! having fun?"),
    AIMessage(content="Yes! I'm having a great time!"),
]

trimmer.invoke(messages + [HumanMessage(content="What's my name?")])

[SystemMessage(content="you're a good assistant"),
 HumanMessage(content='whats 2 + 2'),
 AIMessage(content='4'),
 HumanMessage(content='thanks!'),
 AIMessage(content='no problem!'),
 HumanMessage(content='Hi! having fun?'),
 AIMessage(content="Yes! I'm having a great time!"),
 HumanMessage(content="What's my name?")]

In [21]:
from operator import itemgetter

from langchain_core.runnables import RunnablePassthrough

chain = (
    RunnablePassthrough.assign(messages=itemgetter("messages") | trimmer)
    | prompt
    | model
)

response = chain.invoke(
    {
        "messages": messages + [HumanMessage(content="what's my name?")],
        "language": "English",
    }
)
response.content

"I'm sorry, but I don't have access to your personal information."

In [22]:
response = chain.invoke(
    {
        "messages": messages + [HumanMessage(content="what math problem did i ask")],
        "language": "English",
    }
)
response.content

'You asked "what\'s 2 + 2?"'

Create a new session


In [23]:
with_message_history = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="messages",
)

config = {"configurable": {"session_id": "abc20"}}

In [24]:
response = with_message_history.invoke(
    {
        "messages": messages + [HumanMessage(content="whats my name?")],
        "language": "English",
    },
    config=config,
)

response.content

"I'm sorry, I don't have access to your name. How can I assist you today?"

In [25]:
response = with_message_history.invoke(
    {
        "messages": [HumanMessage(content="what math problem did i ask?")],
        "language": "English",
    },
    config=config,
)

response.content

"You haven't asked a math problem yet. Feel free to ask any math-related questions you may have!"

# Streaming

An important UX consideration for chatbot application is streaming. All chains expose a `.stream` method, and ones that use message history are no different.


In [41]:
config = {"configurable": {"session_id": "abc15"}}
for r in with_message_history.stream(
    {
        "messages": [HumanMessage(content="hi! I'm todd. tell me a joke")],
        "language": "English",
    },
    config=config,
):
    print(r.content, end="")

Sure, here's a joke for you:

Why don't scientists trust atoms?

Because they make up everything!

In [42]:
for r in with_message_history.stream(
    {
        "messages": [HumanMessage(content="What's my name?")],
        "language": "English",
    },
    config=config,
):
    print(r.content, end="")

I apologize for the mistake earlier. I do not have access to personal information about you, so I do not know your name. How can I assist you today?