In [6]:
#Initializing an LLM and Setting up Langsmith

import os
from dotenv import load_dotenv

load_dotenv()
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "Langchain Tutorial"

from langchain_groq import ChatGroq

llm = ChatGroq(model="openai/gpt-oss-20b")

# Conversational Memory :
> *Conversational Memoery* allows our chatbots and agents to remember precious interactions within a conversation. <br>

Type of Conversational Memory
1. Conversation Buffer Memory
2. Conversation Buffer Window Memory
3. Conversation Summary Memory
4. Conversation Summary Buffer Memory

## 1. Conversation Buffer Memory

*ConversationBufferMemory* is the simplest form of conversational memory, it is literally just a place that we store messages, and then use to feed messages into our LLM. <br> <br> 
Let's start with LangChain's original ConversationBufferMemory object, we are setting return_messages=True to return the messages as a list of ChatMessage objects — unless using a non-chat model we would always set this to True as without it the messages are passed as a direct string which can lead to unexpected behavior from chat LLMs.

In [10]:
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(return_messages= True)

There are several ways that we can add messages to our memory, using the save_context method we can add a user query (via the input key) and the AI's response (via the output key). So, to create the following conversation: <br>

User: Hi, my name is James <br>
AI: Hey James, what's up? I'm an AI model called Zeta. <br>
User: I'm researching the different types of conversational memory. <br>
AI: That's interesting, what are some examples? <br>
User: I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory. <br>
AI: That's interesting, what's the difference? <br>
User: Buffer memory just stores the entire conversation, right? <br>
AI: That makes sense, what about ConversationBufferWindowMemory? <br>
User: Buffer window memory stores the last k messages, dropping the rest. <br>
AI: Very cool! <br>
We do:

In [11]:
memory.save_context(
    {"input" : "Hi, my name is James"}, #user message
    {"output" : "Hey James, what's up? I'm an AI model called Zeta."} # AI response
)
memory.save_context(
    {"input" : "I'm researching the different types of conversational memory."}, #user message
    {"output" : "That's interesting, what are some examples?"} # AI response
)
memory.save_context(
    {"input" : "I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory."}, #user message
    {"output" : "That's interesting, what's the difference?"} # AI response
)
memory.save_context(
    {"input" : "Buffer memory just stores the entire conversation, right?"}, #user message
    {"output" : "That makes sense, what about ConversationBufferWindowMemory?"} # AI response
)
memory.save_context(
    {"input" : "Buffer window memory stores the last k messages, dropping the rest."}, #user message
    {"output" : "Very cool!"} # AI response
)

Before using the memory, we need to load in any variables for that memory type — in this case, there are none, so we just pass an empty dictionary:

In [12]:
memory.load_memory_variables({})

{'history': [HumanMessage(content='Hi, my name is James', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Hey James, what's up? I'm an AI model called Zeta.", additional_kwargs={}, response_metadata={}),
  HumanMessage(content="I'm researching the different types of conversational memory.", additional_kwargs={}, response_metadata={}),
  AIMessage(content="That's interesting, what are some examples?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content="I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.", additional_kwargs={}, response_metadata={}),
  AIMessage(content="That's interesting, what's the difference?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Buffer memory just stores the entire conversation, right?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='That makes sense, what about ConversationBufferWindowMemory?', additional_kwargs={}, response_metadata={}),
  HumanMess

In [13]:
from pprint import pprint
pprint(memory.load_memory_variables({})["history"])

[HumanMessage(content='Hi, my name is James', additional_kwargs={}, response_metadata={}),
 AIMessage(content="Hey James, what's up? I'm an AI model called Zeta.", additional_kwargs={}, response_metadata={}),
 HumanMessage(content="I'm researching the different types of conversational memory.", additional_kwargs={}, response_metadata={}),
 AIMessage(content="That's interesting, what are some examples?", additional_kwargs={}, response_metadata={}),
 HumanMessage(content="I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.", additional_kwargs={}, response_metadata={}),
 AIMessage(content="That's interesting, what's the difference?", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Buffer memory just stores the entire conversation, right?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='That makes sense, what about ConversationBufferWindowMemory?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Buffer 

With that, we've created our buffer memory. Before feeding it into our LLM let's quickly view the alternative method for adding messages to our memory. With this other method, we pass individual user and AI messages via the add_user_message and add_ai_message methods. To reproduce what we did above, we do:

In [14]:
memory = ConversationBufferMemory(return_messages=True)

memory.chat_memory.add_user_message("Hi, my name is James")
memory.chat_memory.add_ai_message("Hey James, what's up? I'm an AI model called Zeta.")
memory.chat_memory.add_user_message("I'm researching the different types of conversational memory.")
memory.chat_memory.add_ai_message("That's interesting, what are some examples?")
memory.chat_memory.add_user_message("I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.")
memory.chat_memory.add_ai_message("That's interesting, what's the difference?")
memory.chat_memory.add_user_message("Buffer memory just stores the entire conversation, right?")
memory.chat_memory.add_ai_message("That makes sense, what about ConversationBufferWindowMemory?")
memory.chat_memory.add_user_message("Buffer window memory stores the last k messages, dropping the rest.")
memory.chat_memory.add_ai_message("Very cool!")

memory.load_memory_variables({})

{'history': [HumanMessage(content='Hi, my name is James', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Hey James, what's up? I'm an AI model called Zeta.", additional_kwargs={}, response_metadata={}),
  HumanMessage(content="I'm researching the different types of conversational memory.", additional_kwargs={}, response_metadata={}),
  AIMessage(content="That's interesting, what are some examples?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content="I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.", additional_kwargs={}, response_metadata={}),
  AIMessage(content="That's interesting, what's the difference?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Buffer memory just stores the entire conversation, right?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='That makes sense, what about ConversationBufferWindowMemory?', additional_kwargs={}, response_metadata={}),
  HumanMess

The outcome is exactly the same in either case. To pass this onto our LLM, we need to create a ConversationChain object — which is already deprecated in favor of the RunnableWithMessageHistory class, which we will cover in a moment.

In [16]:
from langchain.chains import ConversationChain

chain = ConversationChain(
    llm = llm,
    memory = memory,
    verbose = True
)

  chain = ConversationChain(


In [17]:
chain.invoke({"input": "What is My Name Again?"})



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
[HumanMessage(content='Hi, my name is James', additional_kwargs={}, response_metadata={}), AIMessage(content="Hey James, what's up? I'm an AI model called Zeta.", additional_kwargs={}, response_metadata={}), HumanMessage(content="I'm researching the different types of conversational memory.", additional_kwargs={}, response_metadata={}), AIMessage(content="That's interesting, what are some examples?", additional_kwargs={}, response_metadata={}), HumanMessage(content="I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.", additional_kwargs={}, response_metadata={}), AIMessage(content="That's interesting, what's the differ

{'input': 'What is My Name Again?',
 'history': [HumanMessage(content='Hi, my name is James', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Hey James, what's up? I'm an AI model called Zeta.", additional_kwargs={}, response_metadata={}),
  HumanMessage(content="I'm researching the different types of conversational memory.", additional_kwargs={}, response_metadata={}),
  AIMessage(content="That's interesting, what are some examples?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content="I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.", additional_kwargs={}, response_metadata={}),
  AIMessage(content="That's interesting, what's the difference?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Buffer memory just stores the entire conversation, right?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='That makes sense, what about ConversationBufferWindowMemory?', additional_kwargs={}

### ConversationBufferMemory with RunnableWithMessageHistory

As mentioned, the ConversationBufferMemory type is due for deprecation. Instead, we can use the RunnableWithMessageHistory class to implement the same functionality.

When implementing RunnableWithMessageHistory we will use LangChain Expression Language (LCEL) and for this we need to define our prompt template and LLM components. Our llm has already been defined, so now we just define a ChatPromptTemplate object.

In [19]:
from langchain.prompts import (
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    ChatPromptTemplate
)

system_prompt = "You are a helpful assistant called Zeta."

prompt_template = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system_prompt),
    MessagesPlaceholder(variable_name="history"),
    HumanMessagePromptTemplate.from_template("{query}"),
])

We can link our prompt_template and our llm together to create a pipeline via LCEL.

In [20]:
pipeline = prompt_template | llm

Our RunnableWithMessageHistory requires our pipeline to be wrapped in a RunnableWithMessageHistory object. This object requires a few input parameters. One of those is get_session_history, which requires a function that returns a ChatMessageHistory object based on a session ID. We define this function ourselves:

In [21]:
from langchain_core.chat_history import InMemoryChatMessageHistory

chat_map = {}

def get_chat_history(session_id : str) -> InMemoryChatMessageHistory:
    if session_id not in chat_map:
        # If session_id doesn't exists, create a new chat history
        chat_map[session_id] = InMemoryChatMessageHistory()
    return chat_map[session_id]

We also need to tell our runnable which variable name to use for the chat history (ie history) and which to use for the user's query (ie query).

In [22]:
from langchain_core.runnables.history import RunnableWithMessageHistory

pipeline_with_history = RunnableWithMessageHistory(
    pipeline,
    get_session_history= get_chat_history,
    input_messages_key= "query",
    history_messages_key= "history"
)   

Now we invoke our Runnable 

In [24]:
response = pipeline_with_history.invoke(
    {
        "query" : "Hi, My name is Ankur and I am GenAI Developer."
    },
    config = {"session_id" : "1"}
)

In [25]:
pprint(response.content)

('Nice to meet you again, Ankur! 😊  \n'
 'What’s on your mind today? Are you working on a particular project, need '
 'help with a code snippet, or curious about the latest trends in generative '
 'AI? Let me know how I can assist!')


In [26]:
ans = pipeline_with_history.invoke(
    {"query": "What is My name and what do I do?"},
    config = {"session_id" : "1"}
)

pprint(ans.content)

('You’re **Ankur**, and you’re a **GenAI Developer**—someone who builds, '
 'experiments with, and deploys generative AI models and applications.')


## 2. Conversation Buffer Window Memory

The ConversationBufferWindowMemory type is similar to ConversationBufferMemory, but only keeps track of the last k messages. There are a few reasons why we would want to keep only the last k messages:

* More messages mean more tokens are sent with each request, more tokens increases latency and cost.

* LLMs tend to perform worse when given more tokens, making them more likely to deviate from instructions, hallucinate, or "forget" information provided to them. Conciseness is key to high performing LLMs.

* If we keep all messages we will eventually hit the LLM's context window limit, by adding a window size k we can ensure we never hit this limit.

The buffer window solves many problems that we encounter with the standard buffer memory, while still being a very simple and intuitive form of conversational memory.

In [2]:
from langchain.memory import ConversationBufferWindowMemory

memory = ConversationBufferWindowMemory(k=4, return_messages=True)

  memory = ConversationBufferWindowMemory(k=4, return_messages=True)


We populate this memory using the same methods as before:

In [3]:
memory.chat_memory.add_user_message("Hi, my name is James")
memory.chat_memory.add_ai_message("Hey James, what's up? I'm an AI model called Zeta.")
memory.chat_memory.add_user_message("I'm researching the different types of conversational memory.")
memory.chat_memory.add_ai_message("That's interesting, what are some examples?")
memory.chat_memory.add_user_message("I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.")
memory.chat_memory.add_ai_message("That's interesting, what's the difference?")
memory.chat_memory.add_user_message("Buffer memory just stores the entire conversation, right?")
memory.chat_memory.add_ai_message("That makes sense, what about ConversationBufferWindowMemory?")
memory.chat_memory.add_user_message("Buffer window memory stores the last k messages, dropping the rest.")
memory.chat_memory.add_ai_message("Very cool!")

memory.load_memory_variables({})["history"]

[HumanMessage(content="I'm researching the different types of conversational memory.", additional_kwargs={}, response_metadata={}),
 AIMessage(content="That's interesting, what are some examples?", additional_kwargs={}, response_metadata={}),
 HumanMessage(content="I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.", additional_kwargs={}, response_metadata={}),
 AIMessage(content="That's interesting, what's the difference?", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Buffer memory just stores the entire conversation, right?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='That makes sense, what about ConversationBufferWindowMemory?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Buffer window memory stores the last k messages, dropping the rest.', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Very cool!', additional_kwargs={}, response_metadata={})]

In [4]:
from langchain.chains import ConversationChain

chain = ConversationChain(
    llm = llm,
    memory = memory,
    verbose = True
)

  chain = ConversationChain(


In [5]:
chain.invoke({"input":"what is my age?"})



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3mThe following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
[HumanMessage(content="I'm researching the different types of conversational memory.", additional_kwargs={}, response_metadata={}), AIMessage(content="That's interesting, what are some examples?", additional_kwargs={}, response_metadata={}), HumanMessage(content="I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.", additional_kwargs={}, response_metadata={}), AIMessage(content="That's interesting, what's the difference?", additional_kwargs={}, response_metadata={}), HumanMessage(content='Buffer memory just stores the entire conversation, right?', additional_kwargs={}, response_metadata={}), AIMessage(content='That mak

{'input': 'what is my age?',
 'history': [HumanMessage(content="I'm researching the different types of conversational memory.", additional_kwargs={}, response_metadata={}),
  AIMessage(content="That's interesting, what are some examples?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content="I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.", additional_kwargs={}, response_metadata={}),
  AIMessage(content="That's interesting, what's the difference?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Buffer memory just stores the entire conversation, right?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='That makes sense, what about ConversationBufferWindowMemory?', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Buffer window memory stores the last k messages, dropping the rest.', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Very cool!', additional_kwargs={}

The reason our LLM can no longer remember our name is because we have set the k parameter to 4, meaning that only the last messages are stored in memory, as we can see above this does not include the first message where we introduced ourselves.

Based on the agent forgetting our name, we might wonder why we would ever use this memory type compared to the standard buffer memory. Well, as with most things in AI, it is always a trade-off. Here we are able to support much longer conversations, use less tokens, and improve latency — but these come at the cost of forgetting non-recent messages.

### Conversation Buffer Window Memory with Runnable With Message History
To implement this memory type using the RunnableWithMessageHistory class, we can use the same approach as before. We define our prompt_template and llm as before, and then wrap our pipeline in a RunnableWithMessageHistory object.

For the window feature, we need to define a custom version of the InMemoryChatMessageHistory class that removes any messages beyond the last k messages.

In [8]:
from pydantic import BaseModel, Field
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.messages import BaseMessage

class BufferWindowMessageHistory(BaseChatMessageHistory, BaseModel):
    messages: list[BaseMessage] = Field(default_factory=list)
    k: int = Field(default_factory=int)

    def __init__(self, k: int):
        super().__init__(k=k)
        print(f"Initializing BufferWindowMessageHistory with k={k}")

    def add_messages(self, messages: list[BaseMessage]) -> None:
        """Add messages to the history, removing any messages beyond
        the last `k` messages.
        """
        self.messages.extend(messages)
        self.messages = self.messages[-self.k:]

    def clear(self) -> None:
        """Clear the history."""
        self.messages = []

In [11]:
chat_map = {}
def get_chat_history(session_id: str, k: int = 4) -> BufferWindowMessageHistory:
    print(f"get_chat_history called with session_id={session_id} and k={k}")
    if session_id not in chat_map:
        # if session ID doesn't exist, create a new chat history
        chat_map[session_id] = BufferWindowMessageHistory(k=k)
    return chat_map[session_id]

In [12]:
from langchain.prompts import (
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
    MessagesPlaceholder,
    ChatPromptTemplate
)

system_prompt = "You are a helpful assistant called Zeta."

prompt_template = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system_prompt),
    MessagesPlaceholder(variable_name="history"),
    HumanMessagePromptTemplate.from_template("{query}"),
])

pipeline = prompt_template | llm

In [16]:
from langchain_core.runnables import ConfigurableFieldSpec
from langchain_core.runnables.history import RunnableWithMessageHistory

pipeline_with_history = RunnableWithMessageHistory(
    pipeline,
    get_session_history= get_chat_history,
    input_messages_key= "query",
    history_messages_key= "history",
    history_factory_config = [
        ConfigurableFieldSpec(
            id= "session_id",
            annotation= str,
            name = "Session ID",
            description = "The Session ID to use for the chat history",
            default= "id_default",
        ),
        ConfigurableFieldSpec(
            id = "k",
            annotation= int,
            name = "k",
            description = "The number of messages to keep in the history",
            default = 4,
        )
    ]
)

Now we invoke our runnable, this time passing a k parameter via the config parameter.

In [21]:
response = pipeline_with_history.invoke(
    {"query": "Hi, my name is Ankur and I am a GenAI Developer."},
    config={"configurable": {"session_id": "1", "k": 4}}
)

get_chat_history called with session_id=1 and k=4
Initializing BufferWindowMessageHistory with k=4


In [22]:
print(response.content)

Hello Ankur! 👋 Nice to meet a fellow GenAI developer. How can I assist you today? Whether you’re working on a new model, fine‑tuning, deployment, or just looking for some best‑practice tips, I’m here to help.


In [23]:
response = pipeline_with_history.invoke(
    {"query": "Write an Introduction for me."},
    config={"configurable": {"session_id": "1", "k": 4}}
)
print(response.content)

get_chat_history called with session_id=1 and k=4
**Introduction**

Hi, I’m Ankur — a GenAI Developer passionate about building intelligent, data‑driven solutions that bridge the gap between cutting‑edge research and real‑world impact. With a solid foundation in machine learning, natural language processing, and cloud‑native deployment, I specialize in designing, fine‑tuning, and scaling generative AI models for a variety of industries—from customer‑service chatbots to creative content generators and beyond.

I thrive on tackling complex problems, optimizing model performance, and ensuring that AI systems are both ethical and highly performant. When I’m not writing code or experimenting with the latest transformer architectures, you’ll find me mentoring junior engineers, contributing to open‑source projects, or exploring the newest research papers in generative AI.

Let’s connect and explore how we can turn ideas into intelligent, scalable solutions!


We can also modify the messages that are stored in memory by modifying the records inside the chat_map dictionary directly.

In [None]:
chat_map["id_k4"].clear()  # clear the history

# manually insert history
chat_map["id_k4"].add_user_message("Hi, my name is James")
chat_map["id_k4"].add_ai_message("I'm an AI model called Zeta.")
chat_map["id_k4"].add_user_message("I'm researching the different types of conversational memory.")
chat_map["id_k4"].add_ai_message("That's interesting, what are some examples?")
chat_map["id_k4"].add_user_message("I've been looking at ConversationBufferMemory and ConversationBufferWindowMemory.")
chat_map["id_k4"].add_ai_message("That's interesting, what's the difference?")
chat_map["id_k4"].add_user_message("Buffer memory just stores the entire conversation, right?")
chat_map["id_k4"].add_ai_message("That makes sense, what about ConversationBufferWindowMemory?")
chat_map["id_k4"].add_user_message("Buffer window memory stores the last k messages, dropping the rest.")
chat_map["id_k4"].add_ai_message("Very cool!")

from pprint import pprint   
pprint(chat_map["id_k4"].messages)

[HumanMessage(content='Buffer memory just stores the entire conversation, right?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='That makes sense, what about ConversationBufferWindowMemory?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='Buffer window memory stores the last k messages, dropping the rest.', additional_kwargs={}, response_metadata={}),
 AIMessage(content='Very cool!', additional_kwargs={}, response_metadata={})]


In [31]:
pipeline_with_history.invoke(
    {"query": "what is my name again?"},
    config={"configurable": {"session_id": "id_k4", "k": 4}}
)

get_chat_history called with session_id=id_k4 and k=4


AIMessage(content='I don’t have that information stored. Could you tell me your name again?', additional_kwargs={'reasoning_content': 'We have a conversation: user says "Buffer window memory stores the last k messages, dropping the rest." Then assistant says "Very cool!" Then user asks "what is my name again?" Assistant says "I’m not sure what you’re called—could you let me know your name?" Then user repeats: "what is my name again?" The assistant hasn\'t been given any name. The user might be testing memory. According to the system instruction: The assistant should not claim to have memory. We can politely say we don\'t have that info. We can ask them again. We should not claim to remember. We can say we don\'t know.'}, response_metadata={'token_usage': {'completion_tokens': 155, 'prompt_tokens': 148, 'total_tokens': 303, 'completion_time': 0.138507529, 'prompt_time': 0.009551516, 'queue_time': 0.052551584, 'total_time': 0.148059045}, 'model_name': 'openai/gpt-oss-20b', 'system_finger

In [38]:
pprint(chat_map["id_k4"].messages)

[HumanMessage(content='what is my name again?', additional_kwargs={}, response_metadata={}),
 AIMessage(content='I’m not sure what you’re called—could you let me know your name?', additional_kwargs={'reasoning_content': 'The user asks: "what is my name again?" They likely want their name. We need to check if we have the user\'s name. In prior conversation, the user hasn\'t given their name. There\'s no mention. So we should respond that we don\'t know. According to policy: If we don\'t know the user\'s name, we should say we don\'t know. We should not guess. So we should respond that we don\'t know.'}, response_metadata={'token_usage': {'completion_tokens': 112, 'prompt_tokens': 146, 'total_tokens': 258, 'completion_time': 0.103248967, 'prompt_time': 0.013081489, 'queue_time': 3.796570634, 'total_time': 0.116330456}, 'model_name': 'openai/gpt-oss-20b', 'system_fingerprint': 'fp_ebaf47239f', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--a0176968-3023-