# Simple Vertex AI RAG Engine

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ThamuMnyulwa/mkdocs_rag/blob/main/notebooks/02_simple_vertex_ai_rag.ipynb)

## Managed RAG in Minutes

**Time**: 10-15 minutes | **Cost**: ¬±$0.50 | **Prerequisites**: GCP account

Learn the "golden path" - Vertex AI RAG Engine with managed infrastructure.

## Step 1: Install & Authenticate

In [None]:
!pip install -q google-cloud-aiplatform google-cloud-storage

In [None]:
from google.colab import auth
auth.authenticate_user()
print("‚úÖ Authenticated!")

## Step 2: Configure Project

You may be asked to `Allow this notebook to Access your Google credentials` here of which you should click on `Allow`.

In [None]:
import os

PROJECT_ID = "your-project-id"  # @param {type:"string"}
LOCATION = "europe-west1"  # @param {type:"string"}

os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID
!gcloud config set project {PROJECT_ID}

print(f"‚úÖ Project: {PROJECT_ID}")
print(f"‚úÖ Location: {LOCATION}")

## Step 3: Enable APIs

In [None]:
!gcloud services enable aiplatform.googleapis.com
print("‚úÖ Vertex AI API enabled!")

## Pre-Flight Checks

Let's verify your environment is ready.


In [None]:
# Check if APIs are enabled
import subprocess

def check_api_enabled(service):
    try:
        result = subprocess.run(
            f"gcloud services list --enabled --filter='{service}' --format='value(name)'",
            shell=True, capture_output=True, text=True, timeout=10
        )
        return service in result.stdout
    except:
        return False

print("üîç Checking required APIs...\n")

required_apis = {
    "aiplatform.googleapis.com": "Vertex AI API",
}

all_enabled = True
for api, name in required_apis.items():
    enabled = check_api_enabled(api)
    status = "‚úÖ" if enabled else "‚ùå"
    print(f"{status} {name} ({api})")
    if not enabled:
        all_enabled = False

if not all_enabled:
    print("\n‚ö†Ô∏è  Some APIs are not enabled. Run the API enablement step.")
else:
    print("\n‚úÖ All required APIs are enabled!")


## Step 4: Initialize Vertex AI

In [None]:
import vertexai
from vertexai import rag
from vertexai.generative_models import GenerativeModel, Tool

vertexai.init(project=PROJECT_ID, location=LOCATION)
print("‚úÖ Vertex AI initialized!")

## Step 5: Create RAG Corpus

In [None]:
# Create corpus with text-embedding-005 (official recommended model)
# Configure embedding model per official Vertex AI documentation
embedding_model_config = rag.RagEmbeddingModelConfig(
    vertex_prediction_endpoint=rag.VertexPredictionEndpoint(
        publisher_model="publishers/google/models/text-embedding-005"
    )
)

corpus = rag.create_corpus(
    display_name="demo-corpus",
    backend_config=rag.RagVectorDbConfig(
        rag_embedding_model_config=embedding_model_config
    ),
)

print(f"‚úÖ Corpus created: {corpus.name}")
print(f"üìä Using embedding model: text-embedding-005")

## Step 6: Prepare Documents

Choose one of the following options to get your documents ready:
1. **Upload from notebook** - Upload files directly from Colab/local
2. **Google Drive** - Import from Google Drive
3. **Direct GCS reference** - Use files already in GCS bucket


In [None]:
# Option 1: Upload files from notebook to GCS
# This will upload files from Colab/local to your GCS bucket

from google.colab import files
from google.cloud import storage
import io

BUCKET_NAME = "your-bucket-name"  # @param {type:"string"}
GCS_FOLDER = "rag-documents"  # @param {type:"string"}

upload_method = "notebook"  # @param ["notebook", "gdrive", "direct"]

if upload_method == "notebook":
    print("üì§ Uploading files from notebook...")
    
    # Create GCS client
    storage_client = storage.Client(project=PROJECT_ID)
    bucket = storage_client.bucket(BUCKET_NAME)
    
    # Upload files
    uploaded = files.upload()
    gcs_paths = []
    
    for filename, content in uploaded.items():
        blob_path = f"{GCS_FOLDER}/{filename}"
        blob = bucket.blob(blob_path)
        blob.upload_from_string(content, content_type='application/octet-stream')
        gcs_path = f"gs://{BUCKET_NAME}/{blob_path}"
        gcs_paths.append(gcs_path)
        print(f"‚úÖ Uploaded: {filename} ‚Üí {gcs_path}")
    
    paths = gcs_paths
    print(f"\n‚úÖ {len(paths)} file(s) uploaded to GCS!")
    
elif upload_method == "gdrive":
    print("üìÅ Importing from Google Drive...")
    
    from google.colab import drive
    drive.mount('/content/drive')
    
    drive_folder = "/content/drive/MyDrive/your-folder"  # @param {type:"string"}
    file_extensions = [".pdf", ".txt", ".docx", ".md"]  # @param {type:"string"}
    
    import os
    from pathlib import Path
    
    storage_client = storage.Client(project=PROJECT_ID)
    bucket = storage_client.bucket(BUCKET_NAME)
    
    gcs_paths = []
    drive_path = Path(drive_folder)
    
    for ext in file_extensions:
        for file_path in drive_path.rglob(f"*{ext}"):
            if file_path.is_file():
                blob_path = f"{GCS_FOLDER}/{file_path.name}"
                blob = bucket.blob(blob_path)
                blob.upload_from_filename(str(file_path))
                gcs_path = f"gs://{BUCKET_NAME}/{blob_path}"
                gcs_paths.append(gcs_path)
                print(f"‚úÖ Uploaded: {file_path.name} ‚Üí {gcs_path}")
    
    paths = gcs_paths
    print(f"\n‚úÖ {len(paths)} file(s) imported from Google Drive!")
    
else:  # direct
    print("üîó Using direct GCS references...")
    # For multiple files, use comma-separated paths or a list
    # Example: "gs://bucket/file1.pdf,gs://bucket/file2.pdf"
    paths_input = "gs://your-bucket/sample.pdf"  # @param {type:"string"}
    
    # Convert comma-separated string to list, or use as-is if already a list
    if isinstance(paths_input, str):
        paths = [p.strip() for p in paths_input.split(",") if p.strip()]
    else:
        paths = paths_input if isinstance(paths_input, list) else [paths_input]
    
    print(f"‚úÖ Using {len(paths)} GCS path(s):")
    for p in paths:
        print(f"   - {p}")


## Step 7: Import Documents to RAG Corpus


In [None]:
# Import documents into the RAG corpus
# Supports multiple files - paths can be a list of GCS URIs

print(f"üìö Importing {len(paths)} file(s) into corpus...")

rag.import_files(
    corpus.name,
    paths=paths,
    chunk_size=512,
    chunk_overlap=100
)

print("‚úÖ Documents imported!")


## Step 8: Create RAG Tool


In [None]:
rag_tool = Tool.from_retrieval(
    retrieval=rag.Retrieval(
        source=rag.VertexRagStore(
            rag_resources=[rag.RagResource(rag_corpus=corpus.name)],
            rag_retrieval_config=rag.RagRetrievalConfig(top_k=5)
        )
    )
)

print("‚úÖ RAG tool created!")


## Step 9: Initialize Gemini with RAG


In [None]:
# Note: Using gemini-2.0-flash-001
# If you encounter issues, try gemini-1.5-flash-001
# Note: New projects after April 29, 2025 may need prior Gemini usage

model = GenerativeModel(
    model_name="gemini-2.0-flash-001",
    tools=[rag_tool],
    system_instruction="Answer using ONLY the provided context."
)

print("‚úÖ Model ready with RAG!")


## Step 10: Ask Questions!


In [None]:
question = "What are the key features?"  # @param {type:"string"}

response = model.generate_content(question)
print(f"‚ùì {question}\n")
print(f"üí° {response.text}")


## Step 11: Interactive Q&A


In [None]:
def ask(q):
    return model.generate_content(q).text

# Try multiple questions
questions = [
    "What is this document about?",
    "What are the main points?",
    "Are there any specific recommendations?"
]

for q in questions:
    print(f"\n‚ùì {q}")
    print(f"üí° {ask(q)}\n" + "="*60)


## Cleanup (Optional)

In [None]:
# # Delete corpus to avoid charges (Commented out)
# rag.delete_corpus(corpus.name)

In [None]:
# rag_tool = Tool.from_retrieval(
#     retrieval=rag.Retrieval(
#         source=rag.VertexRagStore(
#             rag_resources=[rag.RagResource(rag_corpus=corpus.name)],
#             rag_retrieval_config=rag.RagRetrievalConfig(top_k=5)
#         )
#     )
# )

# print("‚úÖ RAG tool created!")



In [None]:
# # Note: Using gemini-2.0-flash-001
# # If you encounter issues, try gemini-1.5-flash-001
# # Note: New projects after April 29, 2025 may need prior Gemini usage

# model = GenerativeModel(
#     model_name="gemini-2.0-flash-001",
#     tools=[rag_tool],
#     system_instruction="Answer using ONLY the provided context."
# )

# print("‚úÖ Model ready with RAG!")

In [None]:
# question = "What are the key features?"  # @param {type:"string"}

# response = model.generate_content(question)
# print(f"‚ùì {question}\n")
# print(f"üí° {response.text}")

In [None]:
# def ask(q):
#     return model.generate_content(q).text

# # Try multiple questions
# questions = [
#     "What is this document about?",
#     "What are the main points?",
#     "Are there any specific recommendations?"
# ]

# for q in questions:
#     print(f"\n‚ùì {q}")
#     print(f"üí° {ask(q)}\n" + "="*60)

## Next Steps
- Notebook 3: Document AI for complex PDFs
- Notebook 4: Production scaling patterns

In [None]:
# # Delete corpus to avoid charges (Commented out)
# rag.delete_corpus(corpus.name)