In [1]:
import getpass
import os
from langchain_groq import ChatGroq



In [2]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [4]:
os.environ["GROQ_API_KEY"] = os.environ['groqe_api_key']

In [5]:
model = ChatGroq(model="llama3-8b-8192")

LangSmith

Many of the applications you build with LangChain will contain multiple steps with multiple invocations of LLM calls. As these applications get more and more complex, it becomes crucial to be able to inspect what exactly is going on inside your chain or agent. The best way to do this is with LangSmith.

In [6]:
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [7]:
os.environ["LANGCHAIN_API_KEY"] = os.environ["langchain_api_key"]

In [8]:
from langchain_core.messages import HumanMessage, SystemMessage

In [9]:
messages = [
    SystemMessage("Translate the following from English into Italian"),
    HumanMessage("hi!"),
]

In [12]:
model.invoke(messages)

AIMessage(content='Ciao!', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 4, 'prompt_tokens': 24, 'total_tokens': 28, 'completion_time': 0.003333333, 'prompt_time': 0.000968974, 'queue_time': 0.014314095999999998, 'total_time': 0.004302307}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_179b0f92c9', 'finish_reason': 'stop', 'logprobs': None}, id='run-627b06be-fbbf-4673-a4f1-15ba4264a7c0-0', usage_metadata={'input_tokens': 24, 'output_tokens': 4, 'total_tokens': 28})

In [13]:
model.invoke("Hello")

AIMessage(content="Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 11, 'total_tokens': 37, 'completion_time': 0.021666667, 'prompt_time': 0.000104749, 'queue_time': 0.013489569, 'total_time': 0.021771416}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_179b0f92c9', 'finish_reason': 'stop', 'logprobs': None}, id='run-ed23e0a0-ddfb-465f-9ab6-2d15afda2608-0', usage_metadata={'input_tokens': 11, 'output_tokens': 26, 'total_tokens': 37})

In [14]:
model.invoke([{"role": "user", "content": "Hello"}])

AIMessage(content="Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 11, 'total_tokens': 37, 'completion_time': 0.021666667, 'prompt_time': 0.001978539, 'queue_time': 0.012743271, 'total_time': 0.023645206}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_6a6771ae9c', 'finish_reason': 'stop', 'logprobs': None}, id='run-29f74671-d44b-4c8d-8195-eafe1998ad6f-0', usage_metadata={'input_tokens': 11, 'output_tokens': 26, 'total_tokens': 37})

In [16]:
model.invoke([HumanMessage("Hello")])

AIMessage(content="Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 11, 'total_tokens': 37, 'completion_time': 0.021666667, 'prompt_time': 0.00010711, 'queue_time': 0.01365166, 'total_time': 0.021773777}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_179b0f92c9', 'finish_reason': 'stop', 'logprobs': None}, id='run-9c2b9148-ecd3-4ecf-9ffc-f1152b2936dc-0', usage_metadata={'input_tokens': 11, 'output_tokens': 26, 'total_tokens': 37})

In [17]:
for token in model.stream(messages):
    print(token.content, end="|")

|C|iao|!||

Prompt Templates

Right now we are passing a list of messages directly into the language model. Where does this list of messages come from? Usually, it is constructed from a combination of user input and application logic. This application logic usually takes the raw user input and transforms it into a list of messages ready to pass to the language model. Common transformations include adding a system message or formatting a template with the user input.

Prompt templates are a concept in LangChain designed to assist with this transformation. They take in raw user input and return data (a prompt) that is ready to pass into a language model.

In [19]:
from langchain_core.prompts import ChatPromptTemplate

In [20]:
system_template = "Translate the following from English into {language}"

In [22]:
messages = [
    SystemMessage("Translate the following from English into Italian"),
    HumanMessage("hi!"),
]

In [29]:
inv = model.invoke(messages)

In [30]:
inv.content

'Ciao!'

In [21]:

prompt_template = ChatPromptTemplate.from_messages(
    [("system", system_template), ("user", "{text}")]
)

In [24]:
prompt = prompt_template.invoke({"language": "Italian", "text": "hi!"})

prompt

ChatPromptValue(messages=[SystemMessage(content='Translate the following from English into Italian', additional_kwargs={}, response_metadata={}), HumanMessage(content='hi!', additional_kwargs={}, response_metadata={})])

In [25]:
prompt.to_messages()

[SystemMessage(content='Translate the following from English into Italian', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='hi!', additional_kwargs={}, response_metadata={})]

In [26]:
response = model.invoke(prompt)

In [27]:
response

AIMessage(content='Ciao!', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 4, 'prompt_tokens': 24, 'total_tokens': 28, 'completion_time': 0.003333333, 'prompt_time': 0.004597852, 'queue_time': 0.246443386, 'total_time': 0.007931185}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_6a6771ae9c', 'finish_reason': 'stop', 'logprobs': None}, id='run-ad0fd162-1240-4358-8f5d-10b402be5955-0', usage_metadata={'input_tokens': 24, 'output_tokens': 4, 'total_tokens': 28})

In [28]:
print(response.content)

Ciao!


Prompt Templates

Prompt templates help to translate user input and parameters into instructions for a language model. This can be used to guide a model's response, helping it understand the context and generate relevant and coherent language-based output.

String PromptTemplates

These prompt templates are used to format a single string, and generally are used for simpler inputs. For example, a common way to construct and use a PromptTemplate is as follows:

In [32]:
from langchain_core.prompts import PromptTemplate

prompt_template = PromptTemplate.from_template("Tell me a joke about {topic}")

prompt = prompt_template.invoke({"topic": "cats"})

In [34]:
inv = model.invoke(prompt)

In [35]:
inv.content

'Why did the cat join a band?\n\nBecause it wanted to be the purr-cussionist!'

ChatPromptTemplates

These prompt templates are used to format a list of messages. These "templates" consist of a list of templates themselves. For example, a common way to construct and use a ChatPromptTemplate is as follows:

In [36]:
from langchain_core.prompts import ChatPromptTemplate

prompt_template = ChatPromptTemplate([
    ("system", "You are a helpful assistant"),
    ("user", "Tell me a joke about {topic}")
])

prompt=prompt_template.invoke({"topic": "cats"})

In [37]:
model.invoke(prompt)


AIMessage(content='Why did the cat join a band?\n\nBecause it wanted to be the purr-cussionist!', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 21, 'prompt_tokens': 26, 'total_tokens': 47, 'completion_time': 0.0175, 'prompt_time': 0.000981874, 'queue_time': 0.013428216, 'total_time': 0.018481874}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_179b0f92c9', 'finish_reason': 'stop', 'logprobs': None}, id='run-0a6acc1e-865f-4b30-ac95-06fc25258e9b-0', usage_metadata={'input_tokens': 26, 'output_tokens': 21, 'total_tokens': 47})

MessagesPlaceholder

This prompt template is responsible for adding a list of messages in a particular place. In the above ChatPromptTemplate, we saw how we could format two messages, each one a string. But what if we wanted the user to pass in a list of messages that we would slot into a particular spot? This is how you use MessagesPlaceholder.

In [38]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage

In [39]:
prompt_template = ChatPromptTemplate([
    ("system", "You are a helpful assistant"),
    MessagesPlaceholder("msgs")
])


In [41]:
inv = prompt_template.invoke({"msgs": [HumanMessage(content="hi!")]})

In [42]:
model.invoke(inv)

AIMessage(content="Hi there! It's great to meet you! Is there something I can help you with or would you like to chat?", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 22, 'total_tokens': 48, 'completion_time': 0.021666667, 'prompt_time': 0.000812355, 'queue_time': 0.013517813, 'total_time': 0.022479022}, 'model_name': 'llama3-8b-8192', 'system_fingerprint': 'fp_179b0f92c9', 'finish_reason': 'stop', 'logprobs': None}, id='run-2e73d724-c460-4d35-a3ce-0be361d258a9-0', usage_metadata={'input_tokens': 22, 'output_tokens': 26, 'total_tokens': 48})

In [43]:
prompt_template = ChatPromptTemplate([
    ("system", "You are a helpful assistant"),
    ("placeholder", "{msgs}") # <-- This is the changed part
])

In [45]:
prompt_template.invoke({"msgs": ["hi!"]})

ChatPromptValue(messages=[SystemMessage(content='You are a helpful assistant', additional_kwargs={}, response_metadata={}), HumanMessage(content='hi!', additional_kwargs={}, response_metadata={})])

The key methods of a chat model are:

invoke: The primary method for interacting with a chat model. It takes a list of messages as input and returns a list of messages as output.
stream: A method that allows you to stream the output of a chat model as it is generated.
batch: A method that allows you to batch multiple requests to a chat model together for more efficient processing.
bind_tools: A method that allows you to bind a tool to a chat model for use in the model's execution context.
with_structured_output: A wrapper around the invoke method for models that natively support structured output.

Inputs and outputs

Modern LLMs are typically accessed through a chat model interface that takes messages as input and returns messages as output. Messages are typically associated with a role (e.g., "system", "human", "assistant") and one or more content blocks that contain text or potentially multimodal data (e.g., images, audio, video).

LangChain supports two message formats to interact with chat models:

LangChain Message Format: LangChain's own message format, which is used by default and is used internally by LangChain.
OpenAI's Message Format: OpenAI's message format.
Standard parameters
Many chat models have standardized parameters that can be used to configure the model:

Parameter	Description

model	The name or identifier of the specific AI model you want to use (e.g., "gpt-3.5-turbo" or "gpt-4").
temperature	Controls the randomness of the model's output. A higher value (e.g., 1.0) makes responses more creative, while a lower value (e.g., 0.0) makes them more deterministic and focused.
timeout	The maximum time (in seconds) to wait for a response from the model before canceling the request. Ensures the request doesn’t hang indefinitely.
max_tokens	Limits the total number of tokens (words and punctuation) in the response. This controls how long the output can be.
stop	Specifies stop sequences that indicate when the model should stop generating tokens. For example, you might use specific strings to signal the end of a response.
max_retries	The maximum number of attempts the system will make to resend a request if it fails due to issues like network timeouts or rate limits.
api_key	The API key required for authenticating with the model provider. This is usually issued when you sign up for access to the model.
base_url	The URL of the API endpoint where requests are sent. This is typically provided by the model's provider and is necessary for directing your requests.
rate_limiter	An optional BaseRateLimiter to space out requests to avoid exceeding rate limits. See rate-limiting below for more details.

Context window

A chat model's context window refers to the maximum size of the input sequence the model can process at one time. While the context windows of modern LLMs are quite large, they still present a limitation that developers must keep in mind when working with chat models.