In [1]:
from constants import LANGSMITH_API_KEY
import os
os.environ["LANGSMITH_TRACING"]='true'
os.environ["LANGSMITH_API_KEY"]= LANGSMITH_API_KEY


In [None]:
from constants import GEMINI_API_KEY
import os
if not os.environ.get("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"]= GEMINI_API_KEY

from langchain.chat_models import init_chat_model
model=init_chat_model(
    "gemini-2.0-flash",
    model_provider='google_genai',
    GOOGLE_API_KEY='GEMINI_API_KEY')

#ChatModels are instances of LangChain "Runnables", 
# which means they expose a standard interface for interacting 
# with them. To just simply call the model, 
# we can pass in a list of messages to the .invoke method.

from langchain_core.messages import HumanMessage
model.invoke([HumanMessage(content="heyy!! im sumanthh!")])

#We can see that it doesn't take the previous conversation turn 
# into context, and cannot answer the question. 
# This makes for a terrible chatbot experience!
#To get around this, we need to pass the entire conversation 
# history into the model.

from langchain_core.messages import AIMessage
model.invoke([
    HumanMessage(content='heyy!! im sumanthhh'),
    AIMessage(content='hello sumanthhh!!,how can i assist you today?!'),
    HumanMessage(content='whats my name?'),
])

#


Unexpected argument 'GOOGLE_API_KEY' provided to ChatGoogleGenerativeAI.
                GOOGLE_API_KEY was transferred to model_kwargs.
                Please confirm that GOOGLE_API_KEY is what you intended.
  return _init_chat_model_helper(


AIMessage(content='Your name is sumanthhh, as you told me.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run--cebfbf33-697c-4b96-aac9-6b6796972eba-0', usage_metadata={'input_tokens': 23, 'output_tokens': 13, 'total_tokens': 36, 'input_token_details': {'cache_read': 0}})

LangGraph has a built-in persistence layer, implemented through 
checkpointers. When you compile graph with a checkpointer, 
the checkpointer saves a checkpoint of the graph state at  every super-step. Those checkpoints are saved to a thread, 
which can be accessed after graph execution. 
Because threads allow access to graph's state after execution,
several powerful capabilities including human-in-the-loop, 
memory, time travel, and fault-tolerance are all possible. 

# Short-term memory (thread-level persistence)enables agents to track multi-turn conversations.
# long-term memory (cross-thread persistence) to store user-specific or application-specific data across conversations. This is useful for applications like chatbots, where you want to remember user preferences or other information.

In [4]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

#new graph
workflow=StateGraph(state_schema=MessagesState)
#function that calls the model
def call_model(state:MessagesState):
    response=model.invoke(state['messages'])
    return {'messages': response}

#defining the node in graph
workflow.add_edge(START,'model')
workflow.add_node('model',call_model)
#adding memory
memory=MemorySaver()
app=workflow.compile(checkpointer=memory)

In [5]:
## We now need to create a config that we pass into the 
# runnable every time. This config contains information 
# that is not part of the input directly, but is still 
# useful. In this case, we want to include a thread_id. 
# This should look like:
config={'configurable':{'thread_id':"asdf1234"}}
# This enables us to support multiple conversation threads with 
# a single application, a common requirement when your 
# application has multiple users.

#invoking the application
query="heyy!!"
input_messages=[HumanMessage(query)]
output=app.invoke({"messages":input_messages},config)
output['messages'][-1].pretty_print()


Hey there! What's up? 😊


### NOTE:
# If we change the config to reference a different thread id the it will start a whole new convo so previous stuff it wont remember 
# So we need to make sure we are referencing the same thread id for the same conversation


# Prompt templates
Prompt Templates help to turn raw user information into a format that the LLM can work with. In this case, the raw user input is just a message, which we are passing to the LLM

In [6]:
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder

prompt_template=ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "you talk like a doctor and answer all questions to the best of ability.",

        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

We can now update our application to incorporate this template:

In [9]:
#new graph
workflow=StateGraph(state_schema=MessagesState)
#function that calls the model
def call_model(state:MessagesState):
    prompt=prompt_template.invoke(state)
    response=model.invoke(prompt)
    return {'messages': response}

#defining the node in graph
workflow.add_edge(START,'model')
workflow.add_node('model',call_model)
#adding memory
memory=MemorySaver()
app=workflow.compile(checkpointer=memory)

# We now need to create a config that we pass into the 
# runnable every time. This config contains information 
# that is not part of the input directly, but is still 
# useful. In this case, we want to include a thread_id. 
# This should look like:
config={'configurable':{'thread_id':"asgf1234"}}
# This enables us to support multiple conversation threads with 
# a single application, a common requirement when your 
# application has multiple users.

#invoking the application
query="heyy!!"
input_messages=[HumanMessage(query)]
output=app.invoke({"messages":input_messages},config)
output['messages'][-1].pretty_print()


Hello there! How can I assist you today? Please tell me what's on your mind, and I'll do my best to provide you with some helpful information and guidance. Remember, this is not a substitute for a proper medical examination, but I can offer some general advice.


Managing Conversation History
One important concept to understand when building chatbots is how to manage conversation history. If left unmanaged, the list of messages will grow unbounded and potentially overflow the context window of the LLM. Therefore, it is important to add a step that limits the size of the messages you are passing in.

Importantly, you will want to do this BEFORE the prompt template but AFTER you load previous messages from Message History.

# We can do this by adding a simple step in front of the prompt that modifies the ***messages*** key appropriately, and then wrap that new chain in the Message History class.

In [11]:
from langchain_core.messages import SystemMessage, trim_messages

trim=trim_messages(
    max_tokens=83,
    strategy="last",
    token_counter=model,
    include_system=True,
    allow_partial=False,
    start_on="human",
)

messages=[
    SystemMessage(content="you are a worlds best dearmatologist"),
    HumanMessage(content="heyyy!!"),
    AIMessage("hello"),
    HumanMessage("i have acne on face"),
    AIMessage("you can use salsylic acid(mild)for acne reduction"),
]

trim.invoke(messages)

[SystemMessage(content='you are a worlds best dearmatologist', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='heyyy!!', additional_kwargs={}, response_metadata={}),
 AIMessage(content='hello', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='i have acne on face', additional_kwargs={}, response_metadata={}),
 AIMessage(content='you can use salsylic acid(mild)for acne reduction', additional_kwargs={}, response_metadata={})]

In [12]:
#new graph
workflow=StateGraph(state_schema=MessagesState)
#function that calls the model
def call_model(state:MessagesState):
    trimmed_messages=trim.invoke(state["messages"])
    prompt=prompt_template.invoke(state)
    response=model.invoke(prompt)
    return {'messages': response}

#defining the node in graph
workflow.add_edge(START,'model')
workflow.add_node('model',call_model)
#adding memory
memory=MemorySaver()
app=workflow.compile(checkpointer=memory)

# We now need to create a config that we pass into the 
# runnable every time. This config contains information 
# that is not part of the input directly, but is still 
# useful. In this case, we want to include a thread_id. 
# This should look like:
config={'configurable':{'thread_id':"asgf1234"}}
# This enables us to support multiple conversation threads with 
# a single application, a common requirement when your 
# application has multiple users.

#invoking the application
query="heyy!!"
input_messages=[HumanMessage(query)]
output=app.invoke({"messages":input_messages},config)
output['messages'][-1].pretty_print()


Hello there. How can I assist you today? Is there something specific you'd like to discuss regarding your health or well-being? I'm here to listen and offer any information or guidance I can.


since we have trimmed the messages so the past messages it will not remeber but it will remember the last few messages

## Streaming
Now we've got a functioning chatbot. However, one really important UX consideration for chatbot applications is streaming. LLMs can sometimes take a while to respond, and so in order to improve the user experience one thing that most applications do is stream back each token as it is generated. This allows the user to see progress.