In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Get started with Vertex AI Memory Bank - LangGraph

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/tree/main/gemini/agent-engine/memory/get_started_with_memory_bank_langgraph.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Ftree%2Fmain%2Fgemini%2Fagent-engine%2%2Fmemory%2Fget_started_with_memory_bank_langgraph.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/tree/main/gemini/agent-engine/memory/get_started_with_memory_bank_langgraph.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/tree/main/gemini/agent-engine//memoryget_started_with_memory_bank_langgraph.ipynb">
      <img width="32px" src="https://www.svgrepo.com/download/217753/github.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/tree/main/gemini/agent-engine/memory/get_started_with_memory_bank_langgraph.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/tree/main/gemini/agent-engine/memory/get_started_with_memory_bank_langgraph.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/tree/main/gemini/agent-engine/memory/get_started_with_memory_bank_langgraph.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/tree/main/gemini/agent-engine/memory/get_started_with_memory_bank_langgraph.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/tree/main/gemini/agent-engine/memory/get_started_with_memory_bank_langgraph.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>

| Author(s) |
| --- |
| [Ivan Nardini](https://github.com/inardini) |

## Overview

This tutorial demonstrates how to build a conversational agent using **LangGraph** with the **Vertex AI Memory Bank API** to provide the agent with long-term memory. This enables the agent to personalize responses based on information learned from prior conversations.

### What you'll learn

By the end of this tutorial, you will be able to:

  * Provision a Vertex AI Agent Engine to use Memory Bank.
  * Define a LangGraph agent state and graph structure.
  * Create a graph node that retrieves memories, generates personalized responses, and stores new information.
  * Build and run a conversational agent that maintains context across sessions.

### Why this is important

While standard chatbot frameworks can manage short-term conversation history, they often fail to retain information across different sessions. LangGraph allows you to build robust, stateful applications by defining agent workflows as graphs. Integrating it with a dedicated long-term memory service like Vertex AI Memory Bank unlocks the ability to create truly personalized and context-aware agents that build a continuous understanding of their users over time.


## Get started

### Install required packages


In [None]:
%pip install --upgrade --quiet google-cloud-aiplatform>=1.100.0 langgraph langchain-core langchain-google-vertexai

### Authenticate your notebook environment (Colab only)

If you're running this notebook on Google Colab, run the cell below to authenticate your environment.

In [None]:
# import sys

# if "google.colab" in sys.modules:
#     from google.colab import auth

#     auth.authenticate_user()

### Set Google Cloud project information and enviroment

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [None]:
# Use the environment variable if the user doesn't provide Project ID.
import os
import uuid
import vertexai

# Project configuration
PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))
    if not PROJECT_ID:
        raise ValueError("Project ID not found. Please set it in the form above or as the GOOGLE_CLOUD_PROJECT environment variable.")

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

# Agent configuration
MODEL_NAME = "gemini-2.5-flash"
USER_ID = f"user_{uuid.uuid4()}"

print(f"Project: {PROJECT_ID}")
print(f"Location: {LOCATION}")
print(f"Session User ID: {USER_ID}")

# Initialize Vertex AI client
client = vertexai.Client(
    project=PROJECT_ID,
    location=LOCATION,
)

### Import libraries

Import the necessary libraries for building our conversational agent with LangGraph and Vertex AI.

In [None]:
from typing import TypedDict, Annotated
from functools import partial
from langchain_core.messages import SystemMessage, HumanMessage
from langchain_google_vertexai import ChatVertexAI
from langgraph.graph import StateGraph, START, END, add_messages

### Helper functions

These helper functions will manage the user-facing chat loop and process a single turn of the conversation with our LangGraph agent.


In [None]:
def run_single_turn(user_input: str, user_id: str) -> str:
    """Run a single conversation turn."""
    config = {"configurable": {"thread_id": user_id}}
    state = {
        "messages": [HumanMessage(content=user_input)],
        "user_id": user_id
    }

    response_content = None
    for event in compiled_graph.stream(state, config):
        for value in event.values():
            if value.get("messages"):
                response_content = value["messages"][-1].content

    return response_content


def chat_loop(user_id: str) -> None:
    """Main chat interface loop."""
    print("\nStarting chat. Type 'exit' or 'quit' to end.")
    print("Every message will be automatically stored in memory.\n")

    while True:
        user_input = input("\nYou: ")
        if user_input.lower() in ['quit', 'exit', 'bye']:
            print("\nAssistant: Thank you for chatting. Have a great day!")
            break

        response = run_single_turn(user_input, user_id)
        if response:
            print(f"\nAssistant: {response}")

## Creating the Agent Engine with Memory Bank

Vertex AI Memory Bank is part of the Vertex AI Agent Engine, a managed service that enables developers to deploy, manage, and scale AI agents.

To use Memory Bank, you first create a new Agent Engine instance, which provides the APIs to store and retrieve user-specific memories.


In [None]:
agent_engine = client.agent_engines.create()
print(f"Created Agent Engine: {agent_engine.api_resource.name}")

## (Optional) Initialize user memory

You can optionally pre-seed the Memory Bank with initial facts about a user. This demonstrates how to store information that the agent can reference from the very first interaction.


In [None]:
import_initial_memory_op = client.agent_engines.create_memory(
    name=agent_engine.api_resource.name,
    fact=f"The user's name is {USER_ID} and they have a passion for learning new things.",
    scope={"user_id": USER_ID},
)
memory = import_initial_memory_op.response
print("Created initial memory:")
print(f"  Fact: {memory.fact}")
print(f"  User ID: {memory.scope.get('user_id')}")
print(f"  Created at: {memory.create_time}")

In [None]:
memories = client.agent_engines.retrieve_memories(
    name=agent_engine.api_resource.name,
    scope={"user_id": USER_ID},
)

for i, memory in enumerate(memories, 1):
    if hasattr(memory, 'memory') and hasattr(memory.memory, 'fact'):
        print(f"{i}. {memory.memory.fact}")

## Building the LangGraph Agent

Now, we will construct our conversational agent. The agent will be designed to:

1.  Retrieve relevant memories before generating a response.
2.  Use these memories to personalize its conversation.
3.  Automatically store each conversation turn to build its long-term memory.


### Define the Agent State

The `State` object is a central concept in LangGraph. It holds the data that persists and is passed between the nodes in our graph. For this agent, the state will track the conversation `messages` and the `user_id`.

In [None]:
class State(TypedDict):
    """Agent state definition."""
    messages: Annotated[list, add_messages]
    user_id: str

### Create the Core Logic: The Chatbot Node

This node contains the main logic of our agent. For each turn, it performs a sequence of actions:

1.  **Retrieve Memories**: It queries the Memory Bank API to find memories relevant to the user's latest message. It can use either semantic search for relevance or chronological retrieval for a complete history.
2.  **Construct Prompt**: It injects the retrieved memories into the system prompt, providing the LLM with personalized context.
3.  **Generate Response**: It calls the Gemini model with the context-enhanced prompt to generate a response.
4.  **Store New Memories**: It sends the latest user message and agent response back to Memory Bank to be processed and stored for future conversations.

This "always store" approach ensures the agent continuously learns from every interaction.

In [None]:
def chatbot_node(state: State, agent_engine_name: str, use_semantic_search: bool = True) -> dict:
    """Main agent logic node - stores every interaction."""

    user_id = state["user_id"]
    user_message = state["messages"][-1].content

    # Check user_message is string
    if not isinstance(user_message, str):
        raise ValueError("User message must be a string")

    # Retrieve memories based on the selected method
    if use_semantic_search:
        # Use semantic search to find relevant memories based on the user's current message
        memories = client.agent_engines.retrieve_memories(
            name=agent_engine_name,
            scope={"user_id": user_id},
            similarity_search_params={
                "search_query": user_message,
                "top_k": 10  # Retrieve top 10 most relevant memories
            }
        )
    else:
        # Retrieve all memories
        memories = client.agent_engines.retrieve_memories(
            name=agent_engine_name,
            scope={"user_id": user_id},
            simple_retrieval_params={}
        )

    # Extract facts from memory objects
    memory_facts = [
        memory.memory.fact
        for memory in memories
        if hasattr(memory, 'memory') and hasattr(memory.memory, 'fact')
    ]

    # Format memory context based on retrieval method
    if memory_facts:
        if use_semantic_search:
            memory_context = "Relevant memories from previous conversations:\n" + "\n".join(
                f"- {fact}" for fact in memory_facts
            )
        else:
            memory_context = "Previous conversation history:\n" + "\n".join(
                f"- {fact}" for fact in memory_facts
            )
    else:
        memory_context = "No previous conversations found."

    # Create system prompt with memory context
    system_message = SystemMessage(
        content=f"""You are a helpful assistant with perfect memory.

        {memory_context}

        Instructions:
        - Use the context to personalize responses
        - Naturally reference past conversations when relevant
        - Build upon previous knowledge about the user
        - If using semantic search, the memories shown are the most relevant to the current query"""
    )

    # Generate response using LLM
    messages = [system_message] + state["messages"]
    llm = ChatVertexAI(model=MODEL_NAME, project=PROJECT_ID, location=LOCATION)
    response = llm.invoke(messages)

    # Store the conversation turn in memory
    operation = client.agent_engines.generate_memories(
        name=agent_engine_name,
        direct_contents_source={
            "events": [{
                "content": {
                    "role": "user",
                    "parts": [{"text": user_message}],
                }
            }, {
                "content": {
                    "role": "model",
                    "parts": [{"text": response.content}],
                }
            }]
        },
        scope={"user_id": user_id},
        config={"wait_for_completion": True},
    )
    return {"messages": [response]}


### Build and Compile the Conversation Graph

LangGraph uses a state graph to define the conversational flow. Our graph is simple: it has a single `chatbot` node that processes each turn. We connect the `START` of the graph to our node and the node's output to the `END`.

We use `functools.partial` to bind the `agent_engine_name` and `use_semantic_search` arguments to our node function ahead of time.

In [None]:
graph_builder = StateGraph(State)


# Create chatbot node with agent engine name and search preference bound
chatbot_with_memory = partial(
    chatbot_node,
    agent_engine_name=agent_engine.api_resource.name,
    use_semantic_search=True
)

# Assemble and compile the graph
graph_builder.add_node("chatbot", chatbot_with_memory)
graph_builder.add_edge(START, "chatbot")
graph_builder.add_edge("chatbot", END)

compiled_graph = graph_builder.compile()

## Interacting with the Agent

Now, let's interact with our memory-enabled agent. Try the following phrases to see how the agent builds and uses its memory.

```
You: Hi, I work as an agent engineer
You: I love hiking and have a dog named Max
You: I'm working on a recommendation system project
You: What do you remember about me?
You: Bye
```


In [None]:
chat_loop(USER_ID)

## Check for Stored Memories

After the conversation, we can retrieve all stored memories to see what the agent has learned about the user. This demonstrates that the information has been successfully processed and persisted by Vertex AI Memory Bank.


In [None]:
print("Stored Memories")
memories = client.agent_engines.retrieve_memories(
    name=agent_engine.api_resource.name,
    scope={"user_id": USER_ID},
)

for memory in memories:
    if hasattr(memory, 'memory') and hasattr(memory.memory, 'fact'):
        print(f"{memory.memory.fact}")

# Show total memory count
print(f"\nTotal memories stored: {len(list(memories))}")

## Cleaning up

To avoid charges, delete the agent engine when you're done experimenting.


In [None]:
delete_engine = True

if delete_engine:
    client.agent_engines.delete(name=agent_engine.api_resource.name, force=True)