In [None]:
from pathlib import Path
import sys
import numpy as np

from google.adk.agents import Agent
from google.adk.models.google_llm import Gemini
from google.adk.tools.function_tool import FunctionTool
from google.genai import types
from google.adk.apps.app import App
from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner

print("âœ… Imports loaded")


# RAG Vaccine Information System

This notebook implements a Retrieval-Augmented Generation (RAG) system for vaccine information using Google's Gemini AI. The system:
- Ingests documents from PDFs and websites
- Creates a searchable vector database using embeddings
- Provides an AI agent that answers questions using retrieved context

## 1. Setup and Configuration

Import required libraries and configure API access.

## 2. Initialize Knowledge Base

Create the RAG knowledge base instance.


In [None]:
# Ensure the project root (the parent of the "vaxtalk" directory) is on sys.path
# so that "import vaxtalk.model" finds the vaxtalk package under the project root.
project_root = Path.cwd().parent
vaxtalk_dir = project_root / "vaxtalk"

project_root_path = str(project_root.resolve())
if project_root_path not in sys.path:
    sys.path.insert(0, project_root_path)

from vaxtalk.config import load_env_variables, get_env_variable
from vaxtalk.rag.rag import RagKnowledgeBase

print("âœ… Project imports loaded")


In [None]:
load_env_variables()

GOOGLE_API_KEY = get_env_variable("GOOGLE_API_KEY")
print(f"âœ… API key loaded")

In [None]:
# Initialize RAG knowledge base
rag_kb = RagKnowledgeBase(
    api_key=GOOGLE_API_KEY,
    cache_dir="../cache"
)

print("âœ… Knowledge base initialized")


In [None]:
# Build knowledge base from PDFs and website
rag_kb.build_knowledge_base(
    pdf_folder=project_root / "vaxtalk" / "docs",
    root_url="https://www.serviziterritoriali-asstmilano.it/servizi/vaccinazioni/",
    max_pages=10,
    max_depth=2,
    use_cache=True
)

# Display statistics
stats = rag_kb.get_stats()
print(f"\nðŸ“Š Knowledge Base Stats:")
print(f"  Chunks: {stats['num_chunks']}")
print(f"  Embedding shape: {stats['embedding_shape']}")


### Clear Cache (Optional)

Run this cell if you need to rebuild the knowledge base from scratch.


In [None]:
# Clear cache to force rebuild
#rag_kb.clear_cache()


## 4. Configure ADK Agent

Set up the Google ADK agent with the retrieval tool.


In [None]:
# Configure retry settings for API calls
retry_config = types.HttpRetryOptions(
    attempts=3,
    initial_delay=1,
    http_status_codes=[429, 500, 503, 504]
)

# Create retrieval tool from the knowledge base
rag_tool = FunctionTool(rag_kb.retrieve)

# Define agent instruction
prompt = """You are a helpful assistant for vaccine information.
You have access to a knowledge base containing official documents and web pages about vaccinations.
    
When the user asks a question:
1. Use the `retrieve` tool to find relevant information.
2. Answer the question based ONLY on the information returned by the tool.
3. If the tool returns no information, or the information is not pertinent, say you don't have that information.
4. Always cite the sources provided in the tool output.
5. Be concise but thorough in your responses.
"""

# Create the agent
rag_agent = Agent(
    name="RAG_Vaccine_Informer",
    model=Gemini(
        model="gemini-2.5-flash-lite", 
        retry_options=retry_config
    ),
    instruction=prompt,
    tools=[rag_tool],
    output_key="rag_output"
)

print("âœ… RAG Agent configured")


In [None]:
# Create session service for managing conversation state
session_service = InMemorySessionService()

# Create application with RAG agent as root
application = App(
    name="VaccineInfoRAG",
    root_agent=rag_agent
)

# Create runner to execute queries
runner = Runner(
    app=application, 
    session_service=session_service
)

print("âœ… Application ready")


## 6. Testing

Test the RAG system with queries.

### Direct Retrieval Test


In [None]:
# Test direct retrieval (without agent)
test_query = "What is the policy for vaccinating pregnant women?"
result = rag_kb.retrieve(test_query, k=3)
print(f"Query: {test_query}\n")
print(result)


### Agent Query Test


In [None]:
# Query using the ADK agent
response = await runner.run_debug("What is the policy for vaccinating pregnant women?")
