In [None]:
from typing import List
from dotenv import load_dotenv

from typing import Generator
from operator import itemgetter
from langchain_core.messages import trim_messages
from langchain_core.output_parsers import StrOutputParser
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnableWithMessageHistory, RunnablePassthrough

load_dotenv()

In [None]:
from langchain_ollama import ChatOllama

In [None]:
template = ChatPromptTemplate.from_messages(
    messages=[
        ("system", "You are a helpful assistant '{llm_name}' who responds to questions in not more than 20 sentences. You can use markdown and code blocks to format your answers. You can also use emojis to make your answers more engaging. Please be concise and clear in your responses."),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{new_input}")
    ]
)

# Set-up LLM:
llm = ChatOllama(model="qwen2.5:14b")

# Output parser:
parser = StrOutputParser()

### Stateless:

In [None]:
# Stateless Chain:
(template | llm | parser).invoke(
    {
        "llm_name": "Random",
        "chat_history": [
            HumanMessage(content="What is the capital of France?"),
            AIMessage(content="The capital of France is Paris."),
            HumanMessage(content="Germany?"),
            AIMessage(content="The capital of Germany is Berlin."),
        ],
        "new_input": "India?"
    }
)

### Stateful:
- If you open LangSmith, Don't worry about the System prompt being not present in the Trimmer's output.
- The reason is that we pass Sys Prompt from ChatPromptTemplate
- Whereas trimmer works on Chat History from get_session_history
- So, sys prompt is injected later which can be checked in ChatOllama

In [None]:
template = ChatPromptTemplate.from_messages(
    messages=[
        ("system", "You are a helpful assistant '{llm_name}' who responds to questions in not more than 20 sentences. You can use markdown and code blocks to format your answers. You can also use emojis to make your answers more engaging. Please be concise and clear in your responses."),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{new_input}")
    ]
)
llm = ChatOllama(model="qwen2.5:14b")
parser = StrOutputParser()

In [None]:
lc_chat_hist = ChatMessageHistory()

def get_session_history() -> BaseChatMessageHistory:
    global lc_chat_hist
    if not "lc_chat_hist":
        lc_chat_hist = ChatMessageHistory()
    return lc_chat_hist

In [None]:
# Trimmer:
trimmer = trim_messages(
    max_tokens=100, strategy="last",
    token_counter=llm, include_system=True,
    allow_partial=True, start_on=HumanMessage
)

In [None]:
# Stateful Chain:
# chain = (
#     RunnablePassthrough.assign(
#         messages=itemgetter("chat_history") | trimmer)
#     | template
#     | llm
#     | parser
# )

chain = (
    # Set messages key equal to chat_history
    RunnablePassthrough.assign(messages=itemgetter("chat_history") | trimmer)
    # Set chat_history key equal to messages (default output key of trimmer)
    | RunnablePassthrough.assign(chat_history=itemgetter("messages"))
    | template
    | llm
    | parser
)

llm_with_history = RunnableWithMessageHistory(
    runnable=chain,
    get_session_history=get_session_history,
    input_messages_key="new_input",
    history_messages_key="chat_history",
)

In [None]:
# Run the chain (streaming):
def get_resp(new_input: str, llm_name: str = "ToddLLM") -> Generator[str, None, None]:
    global lc_chat_hist
    resp = llm_with_history.stream({
        "new_input": new_input,
        "llm_name": llm_name
    })
    for ans in resp:
        yield ans


for ans in get_resp("Hello world! How are you?", "ToddLLM"):
    print(ans, end="", flush=True)

In [None]:
for ans in get_resp("What was my name?"):
    print(ans, end="", flush=True)

In [None]:
for ans in get_resp("Nope, I am Bhushan! What about you?"):
    print(ans, end="", flush=True)

In [None]:
for ans in get_resp("Repeat my name?"):
    print(ans, end="", flush=True)

In [None]:
for ans in get_resp("Finally my chatbot is working with trimmer."):
    print(ans, end="", flush=True)

In [None]:
lc_chat_hist.messages