In [9]:
# Install required libraries (run once per Colab/Jupyter session)
!pip install --upgrade google-cloud-bigquery --quiet
!pip install --upgrade google-generativeai --quiet

In [10]:
# Import necessary modules
import os
from google.cloud import bigquery
import google.generativeai as genai
from IPython.display import display, Markdown


In [11]:
# --------------------------- #
#     MODEL CONFIGURATION     #
# --------------------------- #

# Define the Gemini model version to use
MODEL_NAME = "gemini-2.5-pro-preview-06-05"

# Optional: Add safety settings for content moderation (can enhance later)
SAFETY_SETTINGS = {
    'HARM_CATEGORY_HARASSMENT': 'BLOCK_MEDIUM_AND_ABOVE',
    'HARM_CATEGORY_HATE_SPEECH': 'BLOCK_MEDIUM_AND_ABOVE',
    'HARM_CATEGORY_SEXUALLY_EXPLICIT': 'BLOCK_MEDIUM_AND_ABOVE',
    'HARM_CATEGORY_DANGEROUS_CONTENT': 'BLOCK_MEDIUM_AND_ABOVE',
}

# System instructions (can be injected into chat history if chat-based)
SYSTEM_PROMPT = """
You are a helpful assistant. Answer the user's question using only the content provided.
If the answer is not present in the content, say "Sorry, I don't have that information."
"""


In [12]:
def configure_gemini():
    """
    Configures the Gemini API using environment variable or manual input.
    """
    try:
        api_key = os.environ.get("GEMINI_API_KEY")
        if not api_key:
            api_key = input("🔐 Enter your Gemini API key: ").strip()

        if not api_key:
            raise ValueError("Gemini API key is required.")

        genai.configure(api_key=api_key)
        print("✅ Gemini API configured successfully.")
        return True
    except Exception as e:
        print(f"❌ Gemini configuration failed: {e}")
        return False


def initialize_model():
    """
    Initializes Gemini GenerativeModel with optional safety settings.
    """
    try:
        model = genai.GenerativeModel(
            model_name=MODEL_NAME,
            safety_settings=SAFETY_SETTINGS
        )
        print(f"✅ Gemini model '{MODEL_NAME}' initialized.")
        return model
    except Exception as e:
        print(f"❌ Failed to initialize model: {e}")
        return None


In [13]:
def initialize_bigquery_client(project_id: str):
    """
    Initializes the BigQuery client for the given GCP project.
    """
    try:
        client = bigquery.Client(project=project_id)
        print(f"✅ BigQuery client initialized for project: {project_id}")
        return client
    except Exception as e:
        print(f"❌ BigQuery initialization failed: {e}")
        return None


In [14]:
def retrieve_context_from_bigquery(user_input, bq_client):
    """
    Executes a BigQuery vector search to find the most relevant content.

    Returns:
        str: Top-matching document content or empty string.
    """
    try:
        query = f"""
        SELECT
            query.query,
            base.content
        FROM
            VECTOR_SEARCH(
                TABLE `CustomerReview.customer_reviews_embedded`,
                'ml_generate_embedding_result',
                (
                    SELECT
                        ml_generate_embedding_result,
                        content AS query
                    FROM
                        ML.GENERATE_EMBEDDING(
                            MODEL `CustomerReview.Embeddings`,
                            (SELECT '{user_input}' AS content)
                        )
                ),
                top_k => 1,
                options => '{{"fraction_lists_to_search": 0.01}}'
            );
        """
        query_job = bq_client.query(query)
        results = query_job.result()

        for row in results:
            return row.content  # Only top 1 needed

        return ""

    except Exception as e:
        print(f"\n⚠️ BigQuery Error: {e}\n")
        return ""


In [15]:
def get_rag_response(user_input, context, model):
    """
    Sends the user question along with BigQuery context to Gemini.

    Returns:
        str: Generated response.
    """
    try:
        # Construct the prompt using system instructions + retrieved context
        prompt = f"""
{SYSTEM_PROMPT}

Content:
{context}

Question:
{user_input}

Answer:"""

        response = model.generate_content(prompt)
        return response.text.strip()

    except Exception as e:
        print(f"❌ Gemini error: {e}")
        return "Sorry, I couldn't process your request. Please try again."


In [16]:
# --------------------------- #
#        MAIN EXECUTION       #
# --------------------------- #

# Replace with your own GCP project ID
PROJECT_ID = "qwiklabs-gcp-02-79b6e8a77529"

# Step-by-step startup (Jupyter/Colab friendly)
bq_client = initialize_bigquery_client(PROJECT_ID)
if configure_gemini():
    model = initialize_model()

    if model and bq_client:
        print("\n--- 🤖 BigQuery-RAG Chatbot is ready! ---")
        print("💬 Ask any question based on indexed content.")
        print("🛑 Type 'quit' or 'exit' to end the session.\n")

        while True:
            try:
                user_input = input("👤 You: ").strip()

                if user_input.lower() in {"quit", "exit"}:
                    print("\n👋 Chatbot: Goodbye! Stay curious. 🚀")
                    break

                if not user_input:
                    print("⚠️ Please type a valid question.")
                    continue

                # Step 1: Retrieve top matching content
                context = retrieve_context_from_bigquery(user_input, bq_client)

                if not context:
                    print("🤖 Chatbot: Sorry, I couldn't find relevant content.\n")
                    continue

                display(Markdown(f"📚 **Context Retrieved:**\n```\n{context}\n```"))

                # Step 2: Generate answer from Gemini using the context
                response = get_rag_response(user_input, context, model)
                display(Markdown(f"**🤖 Gemini Answer:** {response}"))

            except KeyboardInterrupt:
                print("\n🛑 Session interrupted.")
                break
    else:
        print("❌ Setup incomplete. Check model or BigQuery client.")
else:
    print("❌ Gemini API setup failed.")


✅ BigQuery client initialized for project: qwiklabs-gcp-02-79b6e8a77529
🔐 Enter your Gemini API key: AIzaSyCDAGZ67YzBIrfJVcks_4xWZKYoQhCwq_c  
✅ Gemini API configured successfully.
✅ Gemini model 'gemini-2.5-pro-preview-06-05' initialized.

--- 🤖 BigQuery-RAG Chatbot is ready! ---
💬 Ask any question based on indexed content.
🛑 Type 'quit' or 'exit' to end the session.

👤 You: When was Aurora Bay founded?


📚 **Context Retrieved:**
```
Question: When was Aurora Bay founded? Answer: Aurora Bay was founded in 1901 by a group of fur traders who recognized the region’s strategic coastal location.
```

**🤖 Gemini Answer:** 1901

👤 You: When are the town council meetings held?


📚 **Context Retrieved:**
```
Question: When are the town council meetings held? Answer: Town council meetings are held every second Tuesday of the month at 6 PM in the Town Hall conference room. Meetings are open to the public.
```

**🤖 Gemini Answer:** Town council meetings are held every second Tuesday of the month at 6 PM in the Town Hall conference room.

👤 You: When is the annual Salmon Derby?


📚 **Context Retrieved:**
```
Question: When is the annual Salmon Derby? Answer: The annual Salmon Derby takes place in early July, usually spanning three days. It attracts anglers from across the region.
```

**🤖 Gemini Answer:** The annual Salmon Derby takes place in early July, usually spanning three days.

👤 You: exit

👋 Chatbot: Goodbye! Stay curious. 🚀
