In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Get started with Vertex AI Memory Bank

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/agents/agent_engine/memory_bank/get_started_with_memory_bank.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> Open in Colab
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fgenerative-ai%2Fmain%2Fagents%2Fagent_engine%2Fmemory_bank%2Fget_started_with_memory_bank.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> Open in Colab Enterprise
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/generative-ai/main/agents/agent_engine/memory_bank/get_started_with_memory_bank.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> Open in Vertex AI Workbench
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/generative-ai/blob/main/agents/agent_engine/memory_bank/get_started_with_memory_bank.ipynb">
      <img width="32px" src="https://raw.githubusercontent.com/primer/octicons/refs/heads/main/icons/mark-github-24.svg" alt="GitHub logo"><br> View on GitHub
    </a>
  </td>
</table>

<div style="clear: both;"></div>

| Authors |
| --- |
| [Kimberly Milam](https://github.com/klmilam) |
| [Ivan Nardini](https://github.com/inardini) |

## Overview

This notebook is a hands-on guide to mastering **Vertex AI Memory Bank**, a service for building stateful, context-aware conversational AI agents. You will learn how to give your agent a persistent, long-term memory, allowing it to recall guest preferences and past interactions across multiple sessions to provide truly personalized hospitality experiences. We will apply these concepts to a practical, real-world hotel scenario: building a sophisticated hotel concierge assistant.

By the end of this tutorial, you will not only understand the core concepts of Memory Bank but also know how to apply them to build an assistant that remembers guest preferences, dietary restrictions, room preferences, and maintains context across conversations to deliver exceptional, personalized service.

Here's a high-level overview of the steps we'll take:

* **Initial Setup**: We will begin with the fundamentals, configuring a new Memory Bank instance and learning how to create guest sessions to store and retrieve conversation history.
* **Basic Memory Operations**: We will explore how to generate memories from conversations and retrieve them to understand what the system remembers about each guest.
* **Real-World Application**: We will see how to use these memories to personalize guest interactions when they return to the hotel.
* **Resource Management**: Finally, we will address essential operational aspects by properly cleaning up resources.

## Get started


### Install Vertex AI SDK and required packages

First, let's install the Vertex AI SDK.

**Note**: This will install the SDK. Colab may prompt you to restart the runtime after installation. This is expected behavior.

In [None]:
%pip install --upgrade --quiet google-cloud-aiplatform

### Authenticate your notebook environment

If you are running this notebook in **Google Colab**, run the cell below to authenticate your account.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### Set Google Cloud project information

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).


In [None]:
import os

import vertexai

# fmt: off
PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
# fmt: on
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

# Initialize the Vertex AI client
client = vertexai.Client(project=PROJECT_ID, location=LOCATION)

print("‚úÖ Vertex AI client initialized!")
print(f"   Project: {PROJECT_ID}")
print(f"   Location: {LOCATION}")

### Import libraries

We're importing standard Python libraries and, importantly, several class-based types from the Vertex AI SDK.

To make the code more readable, we're creating shorter aliases for these long class names. This is a common Python practice that helps keep our code clean and concise without sacrificing the benefits of using the typed classes.

In [None]:
import datetime
import os
import uuid
import warnings

warnings.filterwarnings("ignore")

# Import class-based types for Memory Bank
from vertexai import types

# Basic configuration types
MemoryBankConfig = types.ReasoningEngineContextSpecMemoryBankConfig
SimilaritySearchConfig = (
    types.ReasoningEngineContextSpecMemoryBankConfigSimilaritySearchConfig
)
GenerationConfig = types.ReasoningEngineContextSpecMemoryBankConfigGenerationConfig

# Advanced configuration types
TtlConfig = types.ReasoningEngineContextSpecMemoryBankConfigTtlConfig
GranularTtlConfig = (
    types.ReasoningEngineContextSpecMemoryBankConfigTtlConfigGranularTtlConfig
)
CustomizationConfig = types.MemoryBankCustomizationConfig
MemoryTopic = types.MemoryBankCustomizationConfigMemoryTopic
ManagedMemoryTopic = types.MemoryBankCustomizationConfigMemoryTopicManagedMemoryTopic
CustomMemoryTopic = types.MemoryBankCustomizationConfigMemoryTopicCustomMemoryTopic
GenerateMemoriesExample = types.MemoryBankCustomizationConfigGenerateMemoriesExample
ConversationSource = (
    types.MemoryBankCustomizationConfigGenerateMemoriesExampleConversationSource
)
ConversationSourceEvent = (
    types.MemoryBankCustomizationConfigGenerateMemoriesExampleConversationSourceEvent
)
ExampleGeneratedMemory = (
    types.MemoryBankCustomizationConfigGenerateMemoriesExampleGeneratedMemory
)
ManagedTopicEnum = types.ManagedTopicEnum

print("‚úÖ Libraries imported successfully!")

### Define helper function for displaying memories

This helper function provides consistent formatting when displaying generated memories throughout the tutorial.

In [None]:
def display_generated_memories(operation, client, title="Generated Memories"):
    """Display memories from a generation operation with consistent formatting.

    Args:
        operation: The result from client.agent_engines.memories.generate()
        client: The Vertex AI client instance
        title: Title to display above the memories
    """
    if operation.response and operation.response.generated_memories:
        print(f"\n‚úÖ {title}: {len(operation.response.generated_memories)}\n")

        for i, gen_memory in enumerate(operation.response.generated_memories, 1):
            if gen_memory.action != "DELETED" and gen_memory.memory:
                try:
                    full_memory = client.agent_engines.memories.get(
                        name=gen_memory.memory.name
                    )
                    action_icon = "üÜï" if gen_memory.action == "CREATED" else "üîÑ"
                    print(f"   {action_icon} {i}. {full_memory.fact}")
                except Exception as e:
                    print(f"   ‚ö†Ô∏è Could not retrieve memory: {e}")
    else:
        print(f"\nüì≠ No {title.lower()} found")


print("‚úÖ Helper function defined!")

## Create your Hotel Concierge Memory Bank

Now let's set up the foundation for our hotel concierge assistant by creating an Agent Engine with Memory Bank capabilities.

### Create Agent Engine with Memory Bank configuration

The AgentEngine resource acts as the top-level container for your Memory Bank instance. To create one, we need to provide a configuration.

Here, MemoryBankConfig has two key parts:

1. **`similarity_search_config`**: This specifies the **embedding model** used for similarity searches. When a guest asks "What did I request last time?", this model helps find the most relevant memories. We're using `text-embedding-005`, which is excellent for English conversations. If you expect multilingual guests, consider `text-multilingual-embedding-002`.

2. **`generation_config`**: This defines the **LLM** that will extract and consolidate memories from conversations. The default, `gemini-2.5-flash`, is a fast and capable model perfect for this task. It reads conversations and intelligently extracts key facts like dietary preferences or room temperature preferences.


In [None]:
print("üß† Creating Memory Bank configuration for hotel concierge...\n")

basic_memory_config = MemoryBankConfig(
    # Which embedding model to use for similarity search
    similarity_search_config=SimilaritySearchConfig(
        embedding_model=f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/text-embedding-005"
    ),
    # Which LLM to use for extracting memories from conversations
    generation_config=GenerationConfig(
        model=f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/gemini-2.5-flash"
    ),
)

print("‚úÖ Memory Bank configuration created!")
print("   Embedding model: text-embedding-005")
print("   Generation model: gemini-2.5-flash")

Now, we create the AgentEngine resource. By default, Memory Bank is enabled when you create an Agent Engine. This call provisions the necessary backend infrastructure to store and retrieve guest memories.


In [None]:
print("\nüõ†Ô∏è Creating Agent Engine with Memory Bank...\n")
print("‚è≥ This provisions the backend infrastructure for guest memory storage...")

agent_engine = client.agent_engines.create(
    config={"context_spec": {"memory_bank_config": basic_memory_config}}
)

agent_engine_name = agent_engine.api_resource.name

print("\n‚úÖ Agent Engine created successfully!")
print(f"   Resource Name: {agent_engine_name}")

## Store a guest conversation

Now let's simulate a real hotel check-in conversation and see how Memory Bank extracts and stores important guest information.


### Create a Session for a guest

A **Session** is a chronological log of a single interaction between a guest and your concierge agent. It's the raw material from which memories are made.

Each session is linked to a `user_id` (in this case, the guest's identifier). This allows the agent to recall information for a specific guest across different stays and interactions. Think of it as the guest's profile ID in your hotel system.

**Important**: Using Vertex AI Agent Engine Session is not the only option supported. You can also [provide the source conversation directly in JSON format](https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/memory-bank/generate-memories#json-format) if you're using a different session storage system.

In [None]:
print("üí¨ Creating a session for guest check-in...\n")

# Generate a unique guest identifier
guest_id = "guest_emma_" + str(uuid.uuid4())[:4]

# Create a session for this guest
session = client.agent_engines.sessions.create(
    name=agent_engine_name,
    user_id=guest_id,
    config={"display_name": f"Check-in conversation for {guest_id}"},
)

session_name = session.response.name

print("‚úÖ Session created successfully!")
print(f"   Session: {session_name}")
print(f"   Guest ID: {guest_id}")

### Define the guest conversation

This is the raw conversational data we'll use. It's a simple list of dictionaries, each representing a turn in the dialogue between the guest and the hotel concierge.

This conversation contains valuable information about the guest's preferences that we want to remember for future stays. Notice how the guest mentions dietary restrictions, room preferences, and special requests.

As you read this conversation, think about what a good concierge would remember:
- The guest's name (Emma)
- Dietary restrictions (vegetarian, lactose intolerant)
- Room preferences (high floor, away from elevator, quiet)
- Special requests (extra pillows, white noise machine)
- Temperature preferences (cooler room, 68¬∞F)

In [None]:
# A realistic hotel check-in conversation
guest_conversation = [
    {
        "role": "user",
        "message": "Hi! I'm Emma Chen. I have a reservation for a deluxe room for three nights.",
    },
    {
        "role": "model",
        "message": "Welcome to our hotel, Ms. Chen! I have your reservation right here. Let me get you checked in.",
    },
    {
        "role": "user",
        "message": "Great! Just so you know, I'm vegetarian and lactose intolerant, so please note that for room service and the breakfast buffet.",
    },
    {
        "role": "model",
        "message": "Absolutely, I've noted your dietary preferences. Our kitchen will be informed, and we have excellent vegetarian options.",
    },
    {
        "role": "user",
        "message": "Thank you! Also, I prefer a high floor room, away from the elevator. I'm a light sleeper, so somewhere quiet would be ideal.",
    },
    {
        "role": "model",
        "message": "Of course! I'll assign you a corner room on the 12th floor. It's one of our quietest locations.",
    },
    {
        "role": "user",
        "message": "Perfect! One more thing - can I get extra pillows and a white noise machine? And I prefer the room temperature around 68¬∞F.",
    },
    {
        "role": "model",
        "message": "Certainly! I'll have housekeeping bring extra pillows and a white noise machine to your room, and I'll set the thermostat to 68¬∞F before your arrival.",
    },
]

print("‚úÖ Guest conversation defined!")
print(f"   {len(guest_conversation)} conversation turns")

### Add conversation to the session

Here, we loop through our conversation and append each turn as an event to the session we created earlier. This persists the conversation history, making it available for memory generation.

Memory Bank needs the complete conversation stored in a session before it can analyze and extract memories. Each turn is timestamped and attributed to either the guest (user) or the concierge (model).

In [None]:
print("‚¨ÜÔ∏è Adding conversation to session...\n")

invocation_id = 0

for turn in guest_conversation:
    client.agent_engines.sessions.events.append(
        name=session_name,
        author=guest_id,  # Required by Sessions
        invocation_id=str(invocation_id),  # Required by Sessions
        timestamp=datetime.datetime.now(
            tz=datetime.timezone.utc
        ),  # Required by Sessions
        config={
            "content": {"role": turn["role"], "parts": [{"text": turn["message"]}]}
        },
    )

    invocation_id += 1
    icon = "üë§" if turn["role"] == "user" else "ü§ñ"
    print(f"{icon} {turn['message']}")

print("\n‚úÖ Conversation successfully added to session!")

## Generate memories from the conversation

Now let's see what the AI automatically remembers from this conversation.


### Extract and consolidate guest memories

This is the core of memory generation. The generate method kicks off an operation that performs two main steps.

This is how Memory Bank works:

1. **Extraction**: The generation model (gemini-2.5-flash) reads the conversation and extracts key facts. With the default configuration, it looks for information that matches pre-defined **Managed Topics** like:
   - `USER_PERSONAL_INFO`: Personal details like the guest's name
   - `USER_PREFERENCES`: Stated preferences about rooms, amenities, temperature, etc.
   - `EXPLICIT_INSTRUCTIONS`: Specific requests the guest made

2. **Consolidation**: Memory Bank intelligently merges new facts with existing memories (if the guest has stayed before), avoiding duplicates and resolving contradictions. For example, if a guest previously preferred 70¬∞F but now says 68¬∞F, the system updates the memory.


**Note**: The `wait_for_completion=True` flag makes this a blocking call, which is useful for this tutorial. In production, you would typically set it to `False` to run in the background and poll for results later.

In [None]:
print("üß† Analyzing conversation and extracting memories...\n")
print(
    "‚è≥ The AI is reading the conversation and identifying important facts to remember..."
)

# Generate memories from the session
operation = client.agent_engines.memories.generate(
    name=agent_engine_name,
    vertex_session_source={"session": session_name},
    config={"wait_for_completion": True},
)

print("\n‚úÖ Memory extraction complete!")

### Display extracted memories

Let's see what the AI remembered from Emma's check-in conversation.

Memory Bank will have extracted structured facts about:
- Guest identity and personal information
- Dietary preferences and restrictions
- Room location preferences
- Temperature preferences
- Special amenity requests

Each memory is a concise, factual statement that can be used to personalize future interactions.

In [None]:
# Display the generated memories using our helper function
display_generated_memories(operation, client, "Guest Preferences Extracted")

## Retrieve and use guest memories

Now let's see how to retrieve these memories and use them to personalize the guest experience.

### Retrieve all memories for a guest

Let's retrieve all the memories we just created for our guest. The simplest method is scope-based retrieval.

A "scope" is a set of key-value pairs that defines a collection of memories. By providing `{"user_id": guest_id}`, we're asking for all memories that belong to this specific guest. This is like querying a database: "Give me all records where user_id = 'guest_emma_1234'"

When a guest returns or calls the front desk, you want to quickly access everything you know about their preferences.

In [None]:
print(f"üìö Retrieving all memories for guest: {guest_id}...\n")

# Simple retrieval - get all memories for this guest
results = client.agent_engines.memories.retrieve(
    name=agent_engine_name, scope={"user_id": guest_id}
)

all_memories = list(results)

print(f"‚úÖ Found {len(all_memories)} memories for this guest:\n")

for i, retrieved_memory in enumerate(all_memories, 1):
    print(f"   {i}. {retrieved_memory.memory.fact}")

## Putting everything together

Using Vertex AI Memory Bank, when Emma returns for another stay, we can instantly recall her preferences and provide exceptional, personalized service So, instead of asking Emma to repeat all her preferences, we proactively set up her room exactly how she likes it.


In [None]:
print("=" * 80)
print("SCENARIO: Guest Returns for Second Stay")
print("=" * 80)
print("\nüìÖ Two weeks later, Emma returns to the hotel...\n")

# Simulate a return visit conversation
print("üë§ Hi! I'm Emma Chen, checking in again.")
print("ü§ñ Welcome back, Ms. Chen! Let me pull up your preferences...\n")

# Retrieve Emma's memories
print("üîç Retrieving guest preferences from previous stay...")
results = client.agent_engines.memories.retrieve(
    name=agent_engine_name, scope={"user_id": guest_id}
)

memories = list(results)

print(f"‚úÖ Found {len(memories)} preferences on file\n")

# Display how we'll use these memories
print("ü§ñ Perfect! Based on your previous stay, I have:")
print()

for memory in memories:
    fact = memory.memory.fact

    # Provide contextual responses based on memory content
    if "vegetarian" in fact.lower() or "lactose" in fact.lower():
        print(f"   ü•ó Dietary: {fact}")
        print("      ‚Üí I've already notified the kitchen and breakfast staff")
    elif (
        "floor" in fact.lower() or "quiet" in fact.lower() or "elevator" in fact.lower()
    ):
        print(f"   üè® Room: {fact}")
        print("      ‚Üí I've assigned you the same corner room on floor 12")
    elif "temperature" in fact.lower() or "68" in fact:
        print(f"   üå°Ô∏è  Climate: {fact}")
        print("      ‚Üí Thermostat pre-set before your arrival")
    elif "pillow" in fact.lower() or "white noise" in fact.lower():
        print(f"   üõèÔ∏è  Amenities: {fact}")
        print("      ‚Üí Already placed in your room")
    else:
        print(f"   ‚ÑπÔ∏è  {fact}")

    print()

## Advanced retrieval: Similarity Search

In the previous examples, we retrieved **all** memories for a guest using scope-based retrieval. But what if you only want the most **relevant** memories for a specific question? This is where **similarity search** shines.


### Understanding the difference: Scope vs. Similarity

Imagine Emma has stayed at your hotel 10 times and has 50+ memories stored. When the front desk needs to answer "What are Emma's dietary restrictions?", they don't need all 50 memories‚Äîjust the 2-3 most relevant ones about food preferences.

This is the idea that justifies similarity search in memory. To recap:

- **Scope-based retrieval**: Returns ALL memories matching a scope (e.g., all memories for user_id = "guest_emma_1234")
- **Similarity search**: Returns only the TOP K most relevant memories for a specific query using semantic similarity

Let's see this in action by adding more conversation history for Emma.

#### Add more guest interactions

Emma calls the front desk with questions about spa services and breakfast options. Let's record this conversation.

In [None]:
print("üìû Emma calls the front desk with some questions...\n")

# Additional conversation turns
additional_conversation = [
    {
        "role": "user",
        "message": "Hi, I'd like to book a spa treatment tomorrow. Do you have deep tissue massage? I prefer morning appointments, around 9 AM.",
    },
    {
        "role": "model",
        "message": "Absolutely! I can book you a deep tissue massage at 9 AM tomorrow. I've noted your preference for morning spa appointments.",
    },
    {
        "role": "user",
        "message": "Great! Also, what vegetarian options do you have for breakfast? I need dairy-free choices because of my lactose intolerance.",
    },
    {
        "role": "model",
        "message": "We have several vegetarian and dairy-free options: oat milk smoothie bowls, avocado toast, fresh fruit platters, and coconut yogurt parfaits.",
    },
]

# Add these turns to the same session
invocation_id_counter = len(guest_conversation)

for turn in additional_conversation:
    client.agent_engines.sessions.events.append(
        name=session_name,
        author=guest_id,
        invocation_id=str(invocation_id_counter),
        timestamp=datetime.datetime.now(tz=datetime.timezone.utc),
        config={
            "content": {"role": turn["role"], "parts": [{"text": turn["message"]}]}
        },
    )

    invocation_id_counter += 1
    icon = "üë§" if turn["role"] == "user" else "ü§ñ"
    print(f"{icon} {turn['message']}")

print("\n‚úÖ Additional conversation added to session!")

#### Generate new memories from extended conversation

Now let's generate memories from this extended conversation. Memory Bank will analyze the entire session and extract new facts while consolidating them with existing memories.

This demonstrates Memory Bank's consolidation capability‚Äîit won't create duplicate memories. Instead, it merges new information with what it already knows about Emma.


In [None]:
print("\nüß† Generating memories from extended conversation...\n")
print("‚è≥ Analyzing the full conversation history and consolidating memories...")

# Generate additional memories
operation = client.agent_engines.memories.generate(
    name=agent_engine_name,
    vertex_session_source={"session": session_name},
    config={"wait_for_completion": True},
)

print("\n‚úÖ Memory generation complete!")

In [None]:
# Display newly generated memories
display_generated_memories(operation, client, "New Memories Generated")

#### Compare scope-based vs. similarity-based retrieval

Now let's compare both retrieval methods side-by-side to see the difference.

In [None]:
print("COMPARISON: Scope-Based vs. Similarity-Based Retrieval")

# Method 1: Scope-based retrieval (returns ALL memories)
print("\nüìä Method 1: Scope-Based Retrieval")
print("Query: Get all memories for Emma")
print("-" * 80)

all_results = client.agent_engines.memories.retrieve(
    name=agent_engine_name, scope={"user_id": guest_id}
)

all_memories = list(all_results)
print(f"\n‚úÖ Found {len(all_memories)} total memories:\n")

for i, mem in enumerate(all_memories, 1):
    print(f"   {i}. {mem.memory.fact}")

# Method 2: Similarity search (returns only RELEVANT memories)
print("\nüîç Method 2: Similarity Search")
print("Query: What are Emma's dietary restrictions?")
print("-" * 80)

search_results = client.agent_engines.memories.retrieve(
    name=agent_engine_name,
    scope={"user_id": guest_id},
    similarity_search_params={
        "search_query": "What are Emma's dietary restrictions?",
        "top_k": 3,  # Get top 3 most relevant memories
    },
)

relevant_memories = list(search_results)
print(f"\n‚úÖ Found {len(relevant_memories)} relevant memories:\n")

for i, mem in enumerate(relevant_memories, 1):
    distance = mem.distance if hasattr(mem, "distance") else "N/A"
    print(f"   {i}. {mem.memory.fact}")
    print(f"      ‚Üí Relevance distance: {distance} (lower = more relevant)")

### Use similarity search for specific questions

Let's test similarity search with several specific front desk questions.

In real-world scenarios, hotel staff need quick, targeted answers. Similarity search makes this efficient by retrieving only what matters for each specific question.

**Understanding Similarity Search Parameters**

| Parameter | Description | Example Value |
|-----------|-------------|---------------|
| `search_query` | The natural language question or topic | "What are the guest's room preferences?" |
| `top_k` | Maximum number of results to return | 3 (returns top 3 most relevant) |
| `distance` | Distance between query and memory embeddings | Automatically calculated (lower = more similar) |

In [None]:
print("\nüè® Testing Similarity Search with Common Front Desk Questions\n")

# Test with various specific queries
search_queries = [
    "What are Emma's room location preferences?",
    "What are Emma's dietary restrictions?",
    "What spa services does Emma prefer?",
    "What temperature does Emma like in her room?",
]

for query in search_queries:
    print(f'\n‚ùì Query: "{query}"')
    print("-" * 80)

    # Perform similarity search
    results = client.agent_engines.memories.retrieve(
        name=agent_engine_name,
        scope={"user_id": guest_id},
        similarity_search_params={
            "search_query": query,
            "top_k": 2,  # Get top 2 most relevant memories
        },
    )

    memories = list(results)

    if memories:
        print("\nüéØ Most Relevant Memories:")
        for i, mem in enumerate(memories, 1):
            distance = mem.distance if hasattr(mem, "distance") else "N/A"
            print(f"   {i}. {mem.memory.fact}")
            print(
                f"      (Distance: {distance:.4f})"
                if isinstance(distance, (int, float))
                else f"      (Distance: {distance})"
            )
    else:
        print("   No relevant memories found")

    print()

print("‚úÖ Similarity search enables fast, targeted information retrieval!")

### Best practices: When to use each retrieval method

**Use Scope-Based Retrieval When**:
- You need a complete profile of the guest
- You're displaying all preferences in a dashboard
- The number of memories is small
- You want to ensure nothing is missed

**Use Similarity Search When**:
- You're answering a specific question
- The guest has many memories
- You need fast, targeted responses
- You're building a conversational agent that needs context-aware responses

## Cleaning up

To avoid incurring charges to your Google Cloud account for the resources used in this tutorial, delete the Agent Engine we created.


In [None]:
print("üßπ Cleaning up resources...\n")

delete_agent_engine = True

if delete_agent_engine:
    # Delete the agent engine and all its memories
    client.agent_engines.delete(name=agent_engine_name, force=True)

    print("‚úÖ Agent Engine deleted successfully!")
    print("   All guest memories have been removed")
else:
    print("‚è≠Ô∏è  Skipping cleanup - Agent Engine will remain active")
    print(f"   Resource: {agent_engine_name}")
    print("\n‚ö†Ô∏è  Remember to delete this resource later to avoid charges")

## Congratulations!

You've completed the "Get Started with Vertex AI Memory Bank" tutorial!

You now have the foundational knowledge to build context-aware, personalized AI agents that remember user preferences across sessions. The hotel concierge scenario you built here can be adapted to countless use cases where personalization and memory matter.

**What's Next?**
- Explore advanced Memory Bank features in our intermediate tutorials
- Check out the [Memory Bank documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/memory-bank)
- Join the [Google Cloud AI community](https://discuss.google.dev/c/google-cloud/14) to share your projects
