In [None]:
# @title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Memory Management - Part 2 - Memory

Memory is a service that provides long-term knowledge storage for your agents. The key distinction:

Session = Short-term memory (single conversation)

Memory = Long-term knowledge (across multiple conversations)

Configure your Gemini API Key 


In [None]:
#pip install google-adk
import os
from kaggle_secrets import UserSecretsClient

try:
    GOOGLE_API_KEY = UserSecretsClient().get_secret("GOOGLE_API_KEY")
    os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY
    print("‚úÖ Gemini API key setup complete.")
except Exception as e:
    print(
        f"üîë Authentication Error: Please make sure you have added 'GOOGLE_API_KEY' to your Kaggle secrets. Details: {e}"
    )

In [None]:
from google.adk.agents import LlmAgent
from google.adk.models.google_llm import Gemini
from google.adk.runners import Runner
from google.adk.sessions import InMemorySessionService
from google.adk.memory import InMemoryMemoryService
from google.adk.tools import load_memory, preload_memory
from google.genai import types

print("‚úÖ ADK components imported successfully.")

Helper Function

In [None]:
async def run_session(
    runner_instance: Runner, user_queries: list[str] | str, session_id: str = "default"
):
    """Helper function to run queries in a session and display responses."""
    print(f"\n### Session: {session_id}")

    # Create or retrieve session
    try:
        session = await session_service.create_session(
            app_name=APP_NAME, user_id=USER_ID, session_id=session_id
        )
    except:
        session = await session_service.get_session(
            app_name=APP_NAME, user_id=USER_ID, session_id=session_id
        )

    # Convert single query to list
    if isinstance(user_queries, str):
        user_queries = [user_queries]

    # Process each query
    for query in user_queries:
        print(f"\nUser > {query}")
        query_content = types.Content(role="user", parts=[types.Part(text=query)])

        # Stream agent response
        async for event in runner_instance.run_async(
            user_id=USER_ID, session_id=session.id, new_message=query_content
        ):
            if event.is_final_response() and event.content and event.content.parts:
                text = event.content.parts[0].text
                if text and text != "None":
                    print(f"Model: > {text}")


print("‚úÖ Helper functions defined.")

Configure Retry Options

In [None]:
retry_config = types.HttpRetryOptions(
    attempts=5,  # Maximum retry attempts
    exp_base=7,  # Delay multiplier
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504],  # Retry on these HTTP errors
)

Memory Workflow

To integrate Memory into your Agents, there are three high-level steps.

Three-step integration process:

Initialize ‚Üí Create a MemoryService and provide it to your agent via the Runner
Ingest ‚Üí Transfer session data to memory using add_session_to_memory()
Retrieve ‚Üí Search stored memories using search_memory()

Initialize MemoryService¬∂

Initialize Memory

ADK provides multiple MemoryService implementations through the BaseMemoryService interface:

InMemoryMemoryService - Built-in service for prototyping and testing (keyword matching, no persistence)
VertexAiMemoryBankService - Managed cloud service with LLM-powered consolidation and semantic search
Custom implementations - You can build your own using databases, though managed services are recommended

In [None]:
memory_service = (
    InMemoryMemoryService()
)  # ADK's built-in Memory Service for development and testing

Add Memory to Agent

In [None]:
# Define constants used throughout the notebook
APP_NAME = "MemoryDemoApp"
USER_ID = "demo_user"

# Create agent
user_agent = LlmAgent(
    model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
    name="MemoryDemoAgent",
    instruction="Answer user questions in simple words.",
)

print("‚úÖ Agent created")

Create Runner
Now provide both Session and Memory services to the Runner.

Key configuration:

The Runner requires both services to enable memory functionality:

session_service ‚Üí Manages conversation threads and events
memory_service ‚Üí Provides long-term knowledge storage
Both services work together: Sessions capture conversations, Memory stores knowledge for retrieval across sessions.

In [None]:
#Create Session Service 
session_service = InMemorySessionService() #Handles conversations

#Create runner with BOTH services
runner = Runner(
    agent=user_agent,
    app_name="MemoryDemoApp",
    session_service=session_service,
    memory_service=memory_service, # Memory services is now available!
)

print("‚úÖ Agent and Runner created with memory support!")

üí° Configuration vs. Usage: Adding memory_service to the Runner makes memory available to your agent, but doesn't automatically use it. You must explicitly:

Ingest data using add_session_to_memory()
Enable retrieval by giving your agent memory tools (load_memory or preload_memory)

Memory starts empty, so you must manually transfer important Session data using add_session_to_memory(). Sessions store full raw history, but none becomes long-term memory automatically. Managed services extract key facts, while InMemoryMemoryService stores everything. First, create a session by talking to the agent.

In [None]:
# User tells agent about their favorite color
await run_session(
    runner,
    "My favorite color is blue-green. Can you write a Haiku about it?",
    "conversation-01",  # Session ID
)

verify the convertation was captured in the session.

In [None]:
session = await session_service.get_session(
    app_name=APP_NAME,
    user_id=USER_ID,
    session_id="convertation-01"
)

#Let's see what's in the session
print("üìù Session contains:")
for event in session.events:
    text = (
        event.content.parts[0].text[:60]
        if event.content and event.content.parts
        else "(empty)"
    )
    print(f"  {event.content.role}: {text}...")

Now we transfer it to memory

In [None]:
# This is the key method!
await memory_service.add_session_to_memory(session)

print("‚úÖ Session added to memory!")

5.1 Memory Retrieval in ADK
ADK has two memory retrieval tools: load_memory (reactive) and preload_memory (proactive).
load_memory searches only when the agent decides‚Äîefficient but the agent might forget.
preload_memory searches every turn‚Äîguaranteed context but less efficient.
It‚Äôs like looking up notes only when needed vs. rereading all notes before every question.

5.2 Add Load Memory Tool
To use the reactive pattern, you add the built-in load_memory tool to the agent‚Äôs tool list. No custom code is needed‚Äîjust include it in the tools array.

In [None]:
# Create agent
user_agent = LlmAgent(
    model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
    name="MemoryDemoAgent",
    instruction="Answer user questions in simple words. Use load_memory tool if you need to recall past conversations.",
    tools=[
        load_memory
    ],  # Agent now has access to Memory and can search it whenever it decides to!
)

print("‚úÖ Agent with load_memory tool created.")

Update the runner and the Test

In [None]:
# Create a new runner with the updated agent
runner = Runner(
    agent=user_agent,
    app_name=APP_NAME,
    session_service=session_service,
    memory_service=memory_service,
)

await run_session(runner, "What is my favorite color?", "color-test")

Complete Manual Workflow Test

In [None]:
await run_session(runner, "My birthday is on March 15th.", "birthday-session-01")

Manually saving the session to memory 

In [None]:
# Manually save the session to memory
birthday_session = await session_service.get_session(
    app_name=APP_NAME, user_id=USER_ID, session_id="birthday-session-01"
)

await memory_service.add_session_to_memory(birthday_session)

print("‚úÖ Birthday session saved to memory!")

Here's the crucial test: we'll start a completely new session with a different session ID and ask the agent to recall the birthday.

In [None]:
# Test retrieval in a NEW session
await run_session(
    runner, "When is my birthday?", "birthday-session-02"  # Different session ID
)

Agent receives: "When is my birthday?"
Agent recognizes: This requires past conversation context
Agent calls: load_memory("birthday")
Memory returns: Previous conversation containing "March 15th"
Agent responds: "Your birthday is on March 15th"

Manual Memory Search

In [None]:
# Search for color preferences
search_response = await memory_service.search_memory(
    app_name=APP_NAME,
    user_id=USER_ID,
    query="What is the user's favorite color?"
)

print("üîç Search Results:")
print(f"  Found {len(search_response.memories)} relevant memories")
print()

for memory in search_response.memories:
    if memory.content and memory.content.parts:
        text = memory.content.parts[0].text[:80]
        print(f"  [{memory.author}]: {text}...")

How Search Works
InMemoryMemoryService (this notebook):

Method: Keyword matching
Example: "favorite color" matches because those exact words exist
Limitation: "preferred hue" won't match
VertexAiMemoryBankService (Day 5):

Method: Semantic search via embeddings
Example: "preferred hue" WILL match "favorite color"
Advantage: Understands meaning, not just keywords
You'll explore semantic search in Day 5!

Automating Memory Storage


In [None]:
async def auto_save_to_memory(callback_context):
    """Automatically save session to memory after each agent turn."""
    await callback_context._invocation_context.memory_service.add_session_to_memory(
        callback_context._invocation_context.session
    )


print("‚úÖ Callback created.")

Create an Agent: Callback and PreLoad Memory Tool
Now create an agent that combines:

Automatic storage: after_agent_callback saves conversations
Automatic retrieval: preload_memory loads memories
This creates a fully automated memory system with zero manual intervention.

In [None]:
# Agent with automatic memory saving
auto_memory_agent = LlmAgent(
    model=Gemini(model="gemini-2.5-flash-lite", retry_options=retry_config),
    name="AutoMemoryAgent",
    instruction="Answer user questions.",
    tools=[preload_memory],
    after_agent_callback=auto_save_to_memory,  # Saves after each turn!
)

print("‚úÖ Agent created with automatic memory saving!")

After every agent response ‚Üí callback triggers
Session data ‚Üí transferred to memory
No manual add_session_to_memory() calls needed

Create a Runner and Test The Agent
Time to test! Create a Runner with the auto-memory agent, connecting the session and memory services.

In [None]:
# Create a runner for the auto-save agent
# This connects our automated agent to the session and memory services
auto_runner = Runner(
    agent=auto_memory_agent,  # Use the agent with callback + preload_memory
    app_name=APP_NAME,
    session_service=session_service,  # Same services from Section 3
    memory_service=memory_service,
)

print("‚úÖ Runner created.")

In [None]:
# Test 1: Tell the agent about a gift (first conversation)
# The callback will automatically save this to memory when the turn completes
await run_session(
    auto_runner,
    "I gifted a new toy to my nephew on his 1st birthday!",
    "auto-save-test",
)

# Test 2: Ask about the gift in a NEW session (second conversation)
# The agent should retrieve the memory using preload_memory and answer correctly
await run_session(
    auto_runner,
    "What did I gift my nephew?",
    "auto-save-test-2",  # Different session ID - proves memory works across sessions!
)

Memory Consolidation = Extracting only important facts while discarding conversational noise.

Before (Raw Storage):

User: "My favorite color is BlueGreen. I also like purple. 
       Actually, I prefer BlueGreen most of the time."
Agent: "Great! I'll remember that."
User: "Thanks!"
Agent: "You're welcome!"

‚Üí Stores ALL 4 messages (redundant, verbose)
After (Consolidation):

Extracted Memory: "User's favorite color: BlueGreen"

‚Üí Stores 1 concise fact


7.3 How Consolidation Works (Conceptual)¬∂
The pipeline:

1. Raw Session Events
   ‚Üì
2. LLM analyzes conversation
   ‚Üì
3. Extracts key facts
   ‚Üì
4. Stores concise memories
   ‚Üì
5. Merges with existing memories (deduplication)
Example transformation:

Input:  "I'm allergic to peanuts. I can't eat anything with nuts."

Output: Memory {
  allergy: "peanuts, tree nuts"
  severity: "avoid completely"
}
Natural language ‚Üí Structured, actionable data.