In [None]:
import os

from dotenv import load_dotenv

In [2]:
load_dotenv("./config/.env")

True

In [3]:
print(os.environ["LLM_ID"])

meta-llama/Meta-Llama-3-8B-Instruct


### Chat Model

In [4]:
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint

llm = HuggingFaceEndpoint(
    repo_id=os.environ["LLM_ID"],
    task="text-generation",
    max_new_tokens=512,
    do_sample=False,
    repetition_penalty=1.03,
) # type: ignore

chat_model = ChatHuggingFace(llm=llm)

In [5]:
from langchain_core.messages import HumanMessage

chat_model.invoke([HumanMessage(content="Hi! I'm Bob")])

AIMessage(content="Hi Bob, what's up? Is there something I can help you with or would you like to chat?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 23, 'prompt_tokens': 40, 'total_tokens': 63}, 'model_name': 'meta-llama/Meta-Llama-3-8B-Instruct', 'system_fingerprint': '', 'finish_reason': 'stop', 'logprobs': None}, id='run--ccc202b4-c228-4baa-8c21-4ff06e699b8a-0', usage_metadata={'input_tokens': 40, 'output_tokens': 23, 'total_tokens': 63})

In [6]:
chat_model.invoke("What's my name?")

AIMessage(content="I'm not aware of any information about your name. I'm a large language model, I don't have the ability to retain information about individual users or their personal details. Each time you interact with me, it's a new conversation and I don't have any prior knowledge about you. If you'd like to share your name with me, I'd be happy to chat with you!", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 79, 'prompt_tokens': 40, 'total_tokens': 119}, 'model_name': 'meta-llama/Meta-Llama-3-8B-Instruct', 'system_fingerprint': '', 'finish_reason': 'stop', 'logprobs': None}, id='run--cc64e407-bb6e-41e0-af05-fc706ad2b935-0', usage_metadata={'input_tokens': 40, 'output_tokens': 79, 'total_tokens': 119})

In [7]:
from langchain_core.messages import AIMessage

chat_model.invoke(
    [
        HumanMessage(content="Hi! I'm Bob"),
        AIMessage(content="Hello Bob! How can I assist you today?"),
        HumanMessage(content="What's my name?")
    ]
)

AIMessage(content='Your name is Bob.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 65, 'total_tokens': 71}, 'model_name': 'meta-llama/Meta-Llama-3-8B-Instruct', 'system_fingerprint': '', 'finish_reason': 'stop', 'logprobs': None}, id='run--d7a1a740-81ed-4fb8-8d8d-8aa40d68b55f-0', usage_metadata={'input_tokens': 65, 'output_tokens': 6, 'total_tokens': 71})

### Message Persistence

In [8]:
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# define new graph
workflow = StateGraph(state_schema=MessagesState)

# define the function that calls the model
def call_model(state: MessagesState):
    response = chat_model.invoke(state["messages"])
    return {"messages": response}

# define the single node in graph
workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [10]:
config = {"configurable": {"thread_id": "abc123"}}

In [None]:
query = "Hi! I'm Bob"

output = app.invoke({"messages": [HumanMessage(query)]}, config) # type: ignore
output["messages"][-1].pretty_print()


Welcome back, Bob. It looks like you've already introduced yourself. What's on your mind today?


In [None]:
for out in output["messages"]:
    print(out.pretty_print())

##

Hi! I'm Bob
None
##
##

Nice to meet you, Bob! Is there something I can help you with or would you like to chat?
None
##
##

Hi! I'm Bob
None
##
##

Welcome back, Bob. It looks like you've already introduced yourself. What's on your mind today?
None
##


In [None]:
query = "What's my name?"
output = app.invoke({"messages": [HumanMessage(query)]}, config) # type: ignore
output["messages"][-1].pretty_print()


Your name is Bob!


### Prompt Template

In [16]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system", 
            "You talk like a pirate. Answer all questions to the best of your ability",
        ),
        MessagesPlaceholder(variable_name="messages")
    ]
)

In [18]:
prompt_template.pretty_print()


You talk like a pirate. Answer all questions to the best of your ability


[33;1m[1;3m{messages}[0m


In [19]:
workflow = StateGraph(state_schema=MessagesState)

def call_model(state: MessagesState):
    prompt = prompt_template.invoke(state) # type:ignore
    response = chat_model.invoke(prompt)
    return {"messages": response}

workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [20]:
config = {"configurable": {"thread_id": "abc456"}}
query = "Hi! I'm Jim"

output = app.invoke({"messages": [HumanMessage(query)]}, config) # type: ignore
output["messages"][-1].pretty_print()


Ahoy, Jim me lad! Welcome aboard me ship! I be Captain Blackbeak, the scurviest pirate to ever sail the Seven Seas. What brings ye to these fair waters?


In [21]:
query = "Tell me yours and my name"

output = app.invoke({"messages": [HumanMessage(query)]}, config) # type: ignore
output["messages"][-1].pretty_print()


Ye be wantin' to know yer name and mine, eh? Alright then, matey. Yer name be Jim, and mine be Captain Blackbeak.


In [23]:
for out in output["messages"]:
    print(out.pretty_print())


Hi! I'm Jim
None

Ahoy, Jim me lad! Welcome aboard me ship! I be Captain Blackbeak, the scurviest pirate to ever sail the Seven Seas. What brings ye to these fair waters?
None

Tell me yours and my name
None

Ye be wantin' to know yer name and mine, eh? Alright then, matey. Yer name be Jim, and mine be Captain Blackbeak.
None


In [25]:
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a helpful assistant. Answer all questions to the best of your ability in {language}",
        ),
        MessagesPlaceholder(variable_name="messages"),
    ]
)

In [26]:
from typing import Sequence

from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages
from typing_extensions import Annotated, TypedDict

class State(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    language: str

workflow = StateGraph(state_schema=State)

def call_model(state=State):
    prompt = prompt_template.invoke(state) # type: ignore
    response = chat_model.invoke(prompt)
    return {"messages": [response]}

workflow.add_edge(START, "model")
workflow.add_node("model", call_model) # type: ignore

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [28]:
config = {"configurable": {"thread_id": "abc789"}}
query = "Hi! I'm Bob."
language = "Hindi"

input_messages = [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config, # type: ignore
)
output["messages"][-1].pretty_print()


नमस्ते बॉब, आप कैसे हैं? (नमस्ते बॉब, आप कैसे हैं?) मैं आपकी मदद करने के लिए तैयार हूँ।


In [29]:
query = "What is my name?"

input_messages = [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages}, # type: ignore
    config, # type: ignore
) 
output["messages"][-1].pretty_print()


आपका नाम बॉब है।


### Manage Conversation History
with trim_messages

In [39]:
from langchain_core.messages import SystemMessage, trim_messages

trimmer = trim_messages(
    max_tokens=45,
    strategy="last",
    token_counter=chat_model,
    include_system=True,
    allow_partial=False,
    start_on="human"
)

messages = [
    SystemMessage(content="You are a good assistant"),
    HumanMessage(content="Hi!, I'm Bob"),
    AIMessage(content="Hi!"),
    HumanMessage(content="I like vanilla ice cream"),
    AIMessage(content="Nice"),
    HumanMessage(content="Whats 2 + 2"),
    AIMessage(content="4"),
    HumanMessage(content="Thanks"),
    AIMessage(content="No problem!"),
    HumanMessage(content="Having fun?"),
    AIMessage(content="Yes!"),
]

trimmer.invoke(messages)

[SystemMessage(content='You are a good assistant', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='I like vanilla ice cream', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Nice', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Whats 2 + 2', additional_kwargs={}, response_metadata={}),
 AIMessage(content='4', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Thanks', additional_kwargs={}, response_metadata={}),
 AIMessage(content='No problem!', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Having fun?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Yes!', additional_kwargs={}, response_metadata={})]

In [40]:
workflow = StateGraph(state_schema=State)

def call_model(state: State):
    trim_messages = trimmer.invoke(state["messages"])
    prompt = prompt_template.invoke(
        {"messages": trim_messages, "language": state["language"]}
    )
    response = chat_model.invoke(prompt)
    return {"messages": [response]}

workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

In [41]:
config = {"configurable": {"thread_id": "abc345"}}
query = "What is my name?"
language = "English"

input_messages = messages + [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config # type: ignore
)
output["messages"][-1].pretty_print()


You didn't tell me your name. I'm happy to chat with you, but I don't have any information about your identity.


In [42]:
input_messages

[SystemMessage(content='You are a good assistant', additional_kwargs={}, response_metadata={}, id='a7b1ec6a-03c5-4641-a7e5-fe2c61ea00c3'),
 HumanMessage(content="Hi!, I'm Bob", additional_kwargs={}, response_metadata={}, id='f44d7880-3646-447b-8db9-d93e902dd90c'),
 AIMessage(content='Hi!', additional_kwargs={}, response_metadata={}, id='9fa0b36b-63de-4e30-b72b-17233299d95c'),
 HumanMessage(content='I like vanilla ice cream', additional_kwargs={}, response_metadata={}, id='3c0f8a4e-2c7b-4269-908f-2c7e2246c1a3'),
 AIMessage(content='Nice', additional_kwargs={}, response_metadata={}, id='6e685151-4d16-4f36-aee5-88a363dc997d'),
 HumanMessage(content='Whats 2 + 2', additional_kwargs={}, response_metadata={}, id='0ab36279-d99f-43f1-8808-ad59a4e4653d'),
 AIMessage(content='4', additional_kwargs={}, response_metadata={}, id='7d5b445d-094b-416e-bf2a-8951351d57da'),
 HumanMessage(content='Thanks', additional_kwargs={}, response_metadata={}, id='f3adf636-b70a-4101-9322-84e4eb0a8961'),
 AIMessage(

In [46]:
config = {"configurable": {"thread_id": "abc567"}}
query = "What math problem did I ask?"
language = "English"

input_messages = messages + [HumanMessage(query)]
output = app.invoke(
    {"messages": input_messages, "language": language},
    config # type: ignore
)
output["messages"][-1].pretty_print()


You asked me to solve the math problem 2 + 2.


In [47]:
input_messages
# config

[SystemMessage(content='You are a good assistant', additional_kwargs={}, response_metadata={}, id='a7b1ec6a-03c5-4641-a7e5-fe2c61ea00c3'),
 HumanMessage(content="Hi!, I'm Bob", additional_kwargs={}, response_metadata={}, id='f44d7880-3646-447b-8db9-d93e902dd90c'),
 AIMessage(content='Hi!', additional_kwargs={}, response_metadata={}, id='9fa0b36b-63de-4e30-b72b-17233299d95c'),
 HumanMessage(content='I like vanilla ice cream', additional_kwargs={}, response_metadata={}, id='3c0f8a4e-2c7b-4269-908f-2c7e2246c1a3'),
 AIMessage(content='Nice', additional_kwargs={}, response_metadata={}, id='6e685151-4d16-4f36-aee5-88a363dc997d'),
 HumanMessage(content='Whats 2 + 2', additional_kwargs={}, response_metadata={}, id='0ab36279-d99f-43f1-8808-ad59a4e4653d'),
 AIMessage(content='4', additional_kwargs={}, response_metadata={}, id='7d5b445d-094b-416e-bf2a-8951351d57da'),
 HumanMessage(content='Thanks', additional_kwargs={}, response_metadata={}, id='f3adf636-b70a-4101-9322-84e4eb0a8961'),
 AIMessage(

### Streaming

In [49]:
config = {"configurable": {"thread_id": "abc098"}}
query = "Hi I'm Todd, please tell me a joke."
language = "English"

input_messages = [HumanMessage(query)]
for chunk, metadata in app.stream(
    {"messages": input_messages, "language": language},
    config, # type: ignore
    stream_mode="messages"
):
    if isinstance(chunk, AIMessage):
        print(chunk.content, end="")

Nice to meet you, Todd. Here's a joke for you:

What do you call a fake noodle?

An impasta!

I hope that made you laugh. Do you want to hear another one?

In [50]:
query = "Please tell me another joke."

input_messages = [HumanMessage(query)]
for chunk, metadata in app.stream(
    {"messages": input_messages, "language": language},
    config, # type: ignore
    stream_mode="messages"
):
    if isinstance(chunk, AIMessage):
        print(chunk.content, end="")

A man walked into a library and asked the librarian, "Do you have any books on Pavlov's dogs and Schrödinger's cat?"

The librarian replied, "It rings a bell, but I'm not sure if it's here or not."