In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Get started with Sessions and Memory Bank for ADK agents in Google Kubernetes Engine

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/agents/gke/agents_with_memory/get_started_with_memory_for_adk_in_gke.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fagents%2Fgke%2Fagents_with_memory%2Fget_started_with_memory_for_adk_in_gke.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/agents/gke/agents_with_memory/get_started_with_memory_for_adk_in_gke.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/agents/gke/agents_with_memory/get_started_with_memory_for_adk_in_gke.ipynb">
      <img width="32px" src="https://www.svgrepo.com/show/512317/github-142.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>Share to:</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/agents/gke/agents_with_memory/get_started_with_memory_for_adk_in_gke.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/agents/gke/agents_with_memory/get_started_with_memory_for_adk_in_gke.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/agents/gke/agents_with_memory/get_started_with_memory_for_adk_in_gke.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/agents/gke/agents_with_memory/get_started_with_memory_for_adk_in_gke.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/agents/gke/agents_with_memory/get_started_with_memory_for_adk_in_gke.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>

| Author(s) |
| --- |
| [Vlad Kolesnikov](https://github.com/vladkol) |

## Overview

This tutorial demonstrates how to build agents with short-term and long-term memory using
ADK with [Vertex AI Agent Engine Sessions](https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/sessions/overview?utm_campaign=CDR_0xc245fc42_default_b452364298&utm_medium=external&utm_source=blog) service and
[Vertex AI Agent Engine Memory Bank](https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/memory-bank/overview?utm_campaign=CDR_0xc245fc42_default_b452364298&utm_medium=external&utm_source=blog),
and deploy them to [Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/docs/concepts/kubernetes-engine-overview?utm_campaign=CDR_0xc245fc42_default_b452364298&utm_medium=external&utm_source=blog) (GKE).

This tutorial will cover:
* Registering agents with Vertex AI Agent Engine.
* Storing ADK session data with Vertex AI Sessions.
* Generating memories with ADK and Agent Engine Memory Bank.
* Retrieving memories with ADK and Agent Engine Memory Bank.
* Deploying agents to GKE.

## Get started

### Install Google Gen AI SDK and other required packages


In [None]:
%pip install google-adk google-cloud-aiplatform --upgrade --quiet

### Set Google Cloud project information

To get started using Vertex AI, you must have an existing Google Cloud project.

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment?utm_campaign=CDR_0xc245fc42_default_b452364298&utm_medium=external&utm_source=blog).

Makes sure you installed [`kubectl` and necessary plugins](https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-access-for-kubectl?utm_campaign=CDR_0xc245fc42_default_b452364298&utm_medium=external&utm_source=blog).

In [None]:
import os

# fmt: off
PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
LOCATION = "us-central1" # @param {type: "string", placeholder: "us-central1", isTemplate: true}
# fmt: on
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    raise ValueError("Please specify your Project Id.")

if not LOCATION:
    LOCATION = "us-central1"

# Set environment variables for ADK.
os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "TRUE"
os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID
os.environ["GOOGLE_CLOUD_LOCATION"] = LOCATION

### Authenticate with your Google Cloud account

Depending on your Jupyter environment, you may have to manually authenticate. Run the cell below.

In [None]:
!gcloud auth print-identity-token -q &> /dev/null || gcloud auth login --project="{PROJECT_ID}" --update-adc --quiet

### Enable APIs

Enable GKE and Vertex AI APIs

In [None]:
!gcloud services enable container.googleapis.com cloudbuild.googleapis.com artifactregistry.googleapis.com aiplatform.googleapis.com --project="{PROJECT_ID}"

### Initialize Vertex AI Client

In [None]:
import vertexai

client = vertexai.Client(project=PROJECT_ID, location=LOCATION)  # type: ignore

## ADK Sessions as short-term memory

Every user interaction with an ADK agent gets a session, and that session is managed by the ADK BaseSessionService.
Each session acts as a history of a multi-turn conversation, just like a chat,
and contains important fields, like the session ID, user ID,
event history (the conversation thread), and the [state](https://google.github.io/adk-docs/sessions/state/).

By default, the ADK uses `InMemorySessionService`, and writes session data in memory. This means that if ADK's runner is shut down, all session data is lost.
And if you're running a scaled, production-grade agent with ADK, with multiple instances of your ADK agent,
you can't guarantee that user requests will always hit the same instance.
This means that if request 1 goes to instance A, and request 2 goes to instance B, instance B won't have the in-memory session state stored inside instance A.

So for production-grade agents, you should store session data outside of the agent's runtime.
ADK provides two ways of doing this:

* `DatabaseSessionService`: stores session data in a SQL database, like SQLLite, MySQL, or PostgreSQL.
* `VertexAISessionService`: a scalable fully-managed [Agent Engine Sessions service](https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/sessions/overview?utm_campaign=CDR_0xc245fc42_default_b452364298&utm_medium=external&utm_source=blog).

This notebook shows how to get started with `VertexAISessionService`.

## Long-term Memory for ADK Agents

ADK offers foundation for long-term memory services - an interface for creating and searching for memories. The [`BaseMemoryService`](https://github.com/google/adk-python/blob/main/src/google/adk/memory/base_memory_service.py) defines the interface for managing this searchable, long-term knowledge store. Its primary responsibilities are:

* Ingesting Information (`add_session_to_memory`): Taking the contents of a (usually completed) Session and adding relevant information to the long-term knowledge store.
* Searching Information (`search_memory`): Allowing an agent (typically via a Tool) to query the knowledge store and retrieve relevant snippets or context based on a search query.

ADK also provides 2 built-in memory tools:

* PreloadMemory: Always retrieve memory at the beginning of each turn (similar to a callback).
* LoadMemory: Retrieve memory when your agent decides it would be helpful.

By default, ADK uses [`InMemoryMemoryService`](https://github.com/google/adk-python/blob/main/src/google/adk/memory/in_memory_memory_service.py).
It persists **all** information in-memory, and just like `InMemorySessionService`, if ADK's runner is shut down, all session data is lost.
With in-memory sessions, the events are not condensed, and information is not extracted from them.

Another Memory Service that ADK supports is [`VertexAiMemoryBankService`](https://github.com/google/adk-python/blob/main/src/google/adk/memory/vertex_ai_memory_bank_service.py).
Vertex AI Memory Bank stores extracted memories, and only persists *meaningful* information. Not all conversations will result in generated memories. Additionally, the information will be condensed to individual, self-contained memories.

### Getting started with `VertexAISessionService` and `VertexAiMemoryBankService`

To get started with Agent Engine Sessions and Memory Bank, you need to first create an Agent Engine. This only takes a few seconds.

After that, you can start using sessions service and memory bank provided by Vertex AI through respective ADK components.

In [None]:
from google.adk.memory import VertexAiMemoryBankService
from google.adk.sessions import VertexAiSessionService

AGENT_NAME = "weather_agent"

agent_engine = client.agent_engines.create(
    config={
        "display_name": AGENT_NAME,
        "context_spec": {
            "memory_bank_config": {
                "generation_config": {
                    "model": f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/gemini-2.5-flash"
                }
            }
        },
    }
)

agent_engine_id = agent_engine.api_resource.name.split("/")[-1]

session_service = VertexAiSessionService(
    project=PROJECT_ID, location=LOCATION, agent_engine_id=agent_engine_id
)
memory_service = VertexAiMemoryBankService(
    project=PROJECT_ID, location=LOCATION, agent_engine_id=agent_engine_id
)

print(f"Agent Engine Id: {agent_engine_id}")

Notice that we don't deploy the agent to Agent Engine service.
Instead, we only register an Agent Engine resource so that Vertex AI Session Service and Memory Bank can use it to store respective sessions and memories
in association to the agent.

## Using Sessions and Memory in ADK agents

Now, let's link our agent to Sessions and Memory, so that memories can be fetched by your agent and used for inference. This allows your agent to remember information from prior sessions in a new, empty session.

You can use ADK's built-in tools to fetch memories and incorporate them in the prompt.
When using the built-in memory tools, it's important to provide both a memory tool when creating your Agent and a memory service when defining your Runner.
Similarly, the Runner also needs a session service.

The code below demonstrates a simple weather agent with a special callback (`after_agent_callback` argument of Agent) that ensures generating memories after each agent turn.
`add_session_to_memory` function takes care of that.

We're also using `PreloadMemoryTool`, so the retrieved memories will be appended to the System Instructions.

In [None]:
%%writefile agent.py

from typing import Optional

from google.adk.agents import Agent
from google.adk.agents.callback_context import CallbackContext
from google.adk.tools.preload_memory_tool import PreloadMemoryTool
from google.genai import types


def get_weather(city: str) -> dict:
    """Retrieves the current weather report for a specified city.

    Args:
        city (str): The name of the city for which to retrieve the weather report.

    Returns:
        dict: status and result or error msg.
    """
    if city.lower() == "new york":
        return {
            "status": "success",
            "report": (
                "The weather in New York is sunny with a temperature of 25 degrees"
                " Celsius (77 degrees Fahrenheit)."
            ),
        }
    else:
        return {
            "status": "error",
            "error_message": f"Weather information for '{city}' is not available.",
        }



async def add_session_to_memory(
        callback_context: CallbackContext
) -> Optional[types.Content]:
    """Automatically save completed sessions to memory bank """
    if hasattr(callback_context, "_invocation_context"):
        invocation_context = callback_context._invocation_context
        if invocation_context.memory_service:
            await invocation_context.memory_service.add_session_to_memory(
                invocation_context.session
            )


root_agent = Agent(
    name="weather_agent",
    model="gemini-2.5-flash",
    description=(
        "Agent to answer questions about weather in a city."
    ),
    instruction=(
        "You are a helpful agent who can answer user questions about weather in a city."
    ),
    tools=[
        get_weather,
        PreloadMemoryTool() # This tool will be automatically executed by ADK
    ],
    after_agent_callback=add_session_to_memory
)


## Running the Agent

To run the agent, we need an instance of ADK Runner.

In [None]:
from agent import root_agent  # type: ignore
from google.adk.runners import Runner
from google.genai import types

USER_ID = "user"

runner = Runner(
    app_name=root_agent.name,  # type: ignore
    agent=root_agent,
    session_service=session_service,
    memory_service=memory_service,
)


async def call_agent(query, runner):
    session = await session_service.create_session(
        app_name=root_agent.name,  # type: ignore
        user_id=USER_ID,
    )
    content = types.Content(role="user", parts=[types.Part(text=query)])
    events = runner.run(
        user_id=session.user_id, session_id=session.id, new_message=content
    )

    for event in events:
        if event.is_final_response():
            final_response = event.content.parts[0].text
            print("\nAgent Response: ", final_response)

`call_agent` is a function that helps us quickly test the agent.

Notice that we create a new session every time we make a request.
Therefore, the underlying LLM will only have access to the user's query itself **and long-term memories loaded by `PreloadMemoryTool`**.

First, let's ask the agent a question it knows answer to.

In [None]:
await call_agent("What's the weather in New York?", runner)

Now, ask a question it cannot answer.

In [None]:
await call_agent("What's the weather in Seattle?", runner)

Since the agent only "knows" about New York weather, it cannot answer the question.

But we can indicate our preference for this situation (Seattle weather).

In [None]:
await call_agent(
    "Whenever asked about weather in Seattle, answer that it's raining as usual.",
    runner,
)

Now, try again with Seattle.

In [None]:
await call_agent("What's the weather in Seattle?", runner)

print(
    f"\nAgent Memories are here: https://console.cloud.google.com/vertex-ai/agents/locations/{LOCATION}/agent-engines/{agent_engine_id}/memories?project={PROJECT_ID}"
)

And you got the answer!

Memory Bank generated a memory that carries across sessions.

## Deploying the Agent to GKE

### Create a GKE cluster

First, create a GKE cluster
and configure gcloud CLI credentials to use `kubectl` with it.

> If creating a GKE Standard cluster, make sure [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) is enabled. Workload Identity is enabled by default in an AutoPilot cluster.

In [None]:
CLUSTER_NAME = "weather-agent-cluster"

!gcloud container clusters create-auto {CLUSTER_NAME} --project {PROJECT_ID} --region {LOCATION}
!gcloud container clusters get-credentials {CLUSTER_NAME} --project {PROJECT_ID} --region {LOCATION}

### Assign necessary permissions to the cluster's Service Account

In [None]:
AGENT_SERVICE_ACCOUNT="default"

PROJECT_NUMBER = !gcloud projects describe {PROJECT_ID} --format="value(projectNumber)"
if isinstance(PROJECT_NUMBER, list):
    PROJECT_NUMBER = PROJECT_NUMBER[0]

!gcloud projects add-iam-policy-binding projects/{PROJECT_ID} \
    --role=roles/aiplatform.user \
    --member=principal://iam.googleapis.com/projects/{PROJECT_NUMBER}/locations/global/workloadIdentityPools/{PROJECT_ID}.svc.id.goog/subject/ns/default/sa/{AGENT_SERVICE_ACCOUNT}

Once the cluster is ready, deploying your agent to GKE is a matter of running a single `adk deploy gke`
command.

For using Sessions and Memory Bank, the important parameters are:

```bash
--session_service_uri="agentengine://AGENT_ENGINE_ID"
--memory_service_uri="agentengine://AGENT_ENGINE_ID"
```

Run the command below to deploy the agent.

> Parameter `--with_ui` is added to make testing
the agent easier with ADK Web UI.
>
> Normally, instead of ADK Web UI, you should only expose [ADK REST API endpoint](https://google.github.io/adk-docs/get-started/testing/#api-endpoints)
or [A2A](https://google.github.io/adk-docs/a2a/quickstart-exposing/) endpoint (`--a2a` parameter).

In [None]:
SERVICE_NAME="weather-agent"

!adk deploy gke --project {PROJECT_ID} --region {LOCATION} \
    --cluster_name {CLUSTER_NAME} \
    --service_name {SERVICE_NAME} \
    --session_service_uri=agentengine://{agent_engine_id} \
    --memory_service_uri=agentengine://{agent_engine_id} \
    --app_name {AGENT_NAME} \
    --with_ui \
    .

Once the deployment is complete, **before continuing**, ensure your agent's pods are in the Running state. 

> It may take a few minutes for the pod to get running.

In [None]:
!kubectl get pods -l=app={SERVICE_NAME}

Find the External IP: Get the public IP address for your agent's service.

> It may take a few minutes before an external IP address is assigned.

In [None]:
!kubectl get svc {SERVICE_NAME} -o=jsonpath={{.status.loadBalancer.ingress[0].ip}}

You can now open the Agent's Web UI using `http://IP_ADDRESS` URL,
where **IP_ADDRESS** is the service's IP address you retrieved by the previous command.

## Cleaning up

It's always a best practice in cloud development to clean up resources you no longer need to avoid incurring unexpected costs.

In [None]:
delete_resources = True

if delete_resources:
    # Delete Agent Engine resource
    client.agent_engines.delete(
        name=f"projects/{PROJECT_ID}/locations/{LOCATION}/reasoningEngines/{agent_engine_id}",
        force=True,
    )
    # Delete Cloud Run service
    !gcloud container clusters delete {CLUSTER_NAME} --project $PROJECT_ID --region $LOCATION

## References

* [Agent Development Kit documentation](https://google.github.io/adk-docs/).
* [Vertex AI Agent Engine Memory Bank documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/memory-bank/overview?utm_campaign=CDR_0xc245fc42_default_b452364298&utm_medium=external&utm_source=blog).
* [Vertex AI Agent Engine Sessions documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/sessions/overview?utm_campaign=CDR_0xc245fc42_default_b452364298&utm_medium=external&utm_source=blog).
* [Google Kubernetes Engine documentation](https://cloud.google.com/kubernetes-engine/docs/concepts/kubernetes-engine-overview?utm_campaign=CDR_0xc245fc42_default_b452364298&utm_medium=external&utm_source=blog).
* [Deploying ADK agents to GKE](https://google.github.io/adk-docs/deploy/gke/).
