<a href="https://colab.research.google.com/github/Asaad972/CollabFirstNoteBook/blob/main/HW03_GIRAFFE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# CELL 1: Package Installation & Setup
import importlib.util, sys, subprocess

def ensure(pkg, import_name=None):
    """Checks if a package is installed; if not, installs it."""
    name = import_name or pkg
    if importlib.util.find_spec(name) is None:
        print(f"Installing {pkg}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", pkg])

# 1. Core Data & ML
ensure("pandas", "pandas")
ensure("numpy", "numpy")
ensure("sentence-transformers", "sentence_transformers")
ensure("faiss-cpu", "faiss")
ensure("pymupdf", "fitz")       # For PDF reading (if used later)
ensure("transformers", "transformers")

# 2. NLP
ensure("nltk", "nltk")

# 3. UI & Visualization
ensure("ipywidgets", "ipywidgets")
ensure("matplotlib", "matplotlib")

# 4. Cloud & Database
ensure("firebase-admin", "firebase_admin")

print("‚úÖ Dependencies ready")

In [None]:
# CELL 2: Imports & Resources (Run once)

# --- Standard Library ---
import io
import re
import sys
import json
import base64
import textwrap
import requests
from datetime import datetime, timezone
from collections import defaultdict

# --- Data Science & Math ---
import numpy as np
import pandas as pd

# --- Visualization & UI ---
import ipywidgets as widgets
from IPython.display import display, clear_output
import matplotlib.pyplot as plt
from PIL import Image

# --- Machine Learning & AI ---
import faiss
import google.generativeai as genai
from transformers import pipeline
from sentence_transformers import SentenceTransformer

# --- Firebase & Cloud ---
import firebase_admin
from firebase_admin import credentials, firestore
from google.colab import userdata


# --- Time ---
from datetime import datetime, timezone
import time

# --- NLP (NLTK) ---
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from nltk.tokenize import word_tokenize

# --- Download NLTK Resources ---
# Using quiet=True to keep output clean
nltk.download("stopwords", quiet=True)
nltk.download("punkt", quiet=True)
nltk.download("wordnet", quiet=True)
nltk.download("omw-1.4", quiet=True)
nltk.download("punkt_tab", quiet=True)

print("‚úÖ Imports ready + NLTK resources downloaded")

In [None]:
# CELL 3: Store Classes (Vector Store + Inverted Index)
# =====================================================

# Vector Store (Your original code, unchanged)
class SimpleVectorStore:
    """Simple in-memory vector store (fallback)"""

    def __init__(self):
        self.documents = []
        self.embeddings = []   # list of numpy arrays
        self.metadatas = []
        self.ids = []
        print(" SimpleVectorStore initialized")

    def add(self, embeddings, documents, metadatas, ids):
        # Ensure numpy arrays
        embeddings = [np.asarray(e, dtype=np.float32) for e in embeddings]
        self.embeddings.extend(embeddings)
        self.documents.extend(documents)
        self.metadatas.extend(metadatas)
        self.ids.extend(ids)
        print(f" Added {len(documents)} documents to simple vector store")

    def query(self, query_embeddings, n_results=5):
        if not self.embeddings:
            return {'ids': [[]], 'documents': [[]], 'metadatas': [[]], 'distances': [[]]}

        q = np.asarray(query_embeddings[0], dtype=np.float32)
        E = np.vstack(self.embeddings)  # shape: (N, d)

        # cosine similarity without sklearn
        q_norm = np.linalg.norm(q) + 1e-12
        E_norm = np.linalg.norm(E, axis=1) + 1e-12
        sims = (E @ q) / (E_norm * q_norm)

        top_idx = np.argsort(sims)[::-1][:n_results]

        return {
            'ids': [[self.ids[i] for i in top_idx]],
            'documents': [[self.documents[i] for i in top_idx]],
            'metadatas': [[self.metadatas[i] for i in top_idx]],
            'distances': [[float(1 - sims[i]) for i in top_idx]]  # distance-like
        }

    def count(self):
        return len(self.documents)


# Inverted Index (UPDATED with Tutorial Logic)
class InvertedIndexStore:
    """
    Updated Structure: term -> {doc_id: frequency}
    Includes 'search' method for ranking by Matches + Frequency
    """

    def __init__(self):
        # Maps term -> dictionary of {doc_id: count}
        self.term_to_doc_freqs = defaultdict(lambda: defaultdict(int))
        print(" InvertedIndexStore initialized (Frequency Aware)")

    def add_occurrence(self, term: str, doc_id: str, count: int = 1):
        # Store the frequency
        self.term_to_doc_freqs[term][doc_id] += count

    def get_docids(self, term: str):
        return sorted(list(self.term_to_doc_freqs.get(term, {}).keys()))

    def count_terms(self) -> int:
        return len(self.term_to_doc_freqs)

    def to_required_format(self):
        # [{"term": ..., "DocIDs": [...]}, ...]
        return [{"term": t, "DocIDs": sorted(list(doc_freqs.keys()))}
                for t, doc_freqs in sorted(self.term_to_doc_freqs.items())]

    def search(self, query_words, num_results=5):
        """
        Rank docs by:
        1. 'matches': Number of unique query terms found
        2. 'total_freq': Total count of those terms
        """
        if not query_words:
            return []

        # doc_id -> {'matches': 0, 'total_freq': 0}
        doc_scores = defaultdict(lambda: {'matches': 0, 'total_freq': 0})

        for word in query_words:
            if word in self.term_to_doc_freqs:
                for doc_id, freq in self.term_to_doc_freqs[word].items():
                    doc_scores[doc_id]['matches'] += 1
                    doc_scores[doc_id]['total_freq'] += freq

        # Convert to list
        ranked_results = [
            (doc_id, scores['matches'], scores['total_freq'])
            for doc_id, scores in doc_scores.items()
        ]

        # SORT: Matches (Desc) -> Total Frequency (Desc)
        ranked_results.sort(key=lambda x: (x[1], x[2]), reverse=True)

        return ranked_results[:num_results]

print(" Store classes + Inverted index classes defined")

In [None]:
# CELL 4: Core setup (custom stopwords + stemming + embedding model + FAISS)

# We remove these words because they are very frequent function words (articles, prepositions, pronouns).
# They usually do not add topic meaning, but they increase index size and add noise to retrieval.
CUSTOM_STOPWORDS = {
    "the","a","an","and","or","but",
    "to","of","in","on","at","for","from","by","with","as",
    "is","are","was","were","be","been","being",
    "this","that","these","those",
    "it","its","they","them","their","we","our","you","your",
    "i","me","my","he","him","his","she","her",
    "not","no","do","does","did","doing"
}

# Chops off word endings to find the "root" (stem)
stemmer = PorterStemmer()

def preprocess_text(text: str):
    """
    Returns list of terms for indexing:
    - lowercase
    - tokenize
    - keep alphabetic tokens only
    - remove custom stopwords
    - apply stemming
    """
    text = text.lower()
    tokens = word_tokenize(text)
    terms = []
    for tok in tokens:
        if tok.isalpha() and tok not in CUSTOM_STOPWORDS:
            terms.append(stemmer.stem(tok))
    return terms

# --- Embedding model (for semantic retrieval) ---
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

# --- FAISS index (stores embeddings for doc-level retrieval) ---
faiss_index = None
vector_dim = None

# Parallel stores (FAISS row -> doc data)
vector_doc_ids = []   # doc_id
vector_texts = []     # full doc text

print(" Core setup ready (custom stopwords + stemming + embeddings + FAISS)")

In [None]:
# --- NEW CELL: GEMINI SETUP (Universal Fix) ---

# Your encoded key
encoded_str = "CkFJemFTeUJ3TjVpV2tleUJsUmRmVl9YdlljM2ZQZHA5bERSOFJ5SQo="

# 1. Decode
decoded_bytes = base64.b64decode(encoded_str)

# 2. Convert to string and STRIP whitespace/newlines
MY_API_KEY = decoded_bytes.decode("utf-8").strip()


genai.configure(api_key=MY_API_KEY)

# 1. DYNAMICALLY FIND A WORKING MODEL
print("üîÑ Connecting to Google API to find valid models...")
valid_model_name = ""

try:
    for m in genai.list_models():
        if 'generateContent' in m.supported_generation_methods:
            # We prefer 1.5 Flash, but we will take ANYTHING that works
            if "flash" in m.name:
                valid_model_name = m.name
                break # Found the best one, stop looking
            elif "gemini-pro" in m.name and not valid_model_name:
                valid_model_name = m.name

    if not valid_model_name:
        # Fallback if the loop found nothing
        valid_model_name = "models/gemini-pro"

    print(f"‚úÖ FOUND VALID MODEL: {valid_model_name}")

except Exception as e:
    print(f"‚ùå Error listing models: {e}")
    print("Defaulting to 'models/gemini-pro'")
    valid_model_name = "models/gemini-pro"


def ask_gemini(context, user_question):
    if not context: return "No relevant info found."

    prompt = f"""
    Answer based ONLY on this context:
    {context}

    Question: {user_question}
    """

    try:
        # Use the variable we found earlier
        model = genai.GenerativeModel(valid_model_name)
        response = model.generate_content(prompt)
        return response.text
    except Exception as e:
        return f"Error: {e}"

print("‚úÖ Setup Complete. Ready to run RAG.")

In [None]:
# CELL 5: Wikipedia source links (seed documents for the corpus)

wiki_links = [
    "https://en.wikipedia.org/wiki/Plant_disease",
    "https://en.wikipedia.org/wiki/Plant_pathology",
    "https://en.wikipedia.org/wiki/Fungus",
    "https://en.wikipedia.org/wiki/Bacterial_wilt",
    "https://en.wikipedia.org/wiki/Powdery_mildew"
]

print("Wikipedia links used:")
for i, link in enumerate(wiki_links, 1):
    print(f"{i}. {link}")


In [None]:
# CELL 6: Load documents from Wikipedia (API fetch + normalization + metadata)

WIKI_API = "https://en.wikipedia.org/w/api.php"

# Wikipedia blocks requests without a proper User-Agent sometimes
HEADERS = {
    "User-Agent": "HW02-Cloud-RAG/1.0 (student project; contact: abrahem.sadekk@gmail.com)"
}

# Extract the actual topic from a messy link
def title_from_wiki_url(url: str) -> str:
    if "/wiki/" not in url:
        raise ValueError(f"Unsupported Wikipedia URL: {url}")
    title = url.split("/wiki/", 1)[1]
    title = title.split("#", 1)[0]      # remove anchors
    title = title.replace("_", " ")
    return title

# This is the "Worker Bee" function. It talks to Wikipedia's servers
def fetch_page_extract_by_title(title: str):
    # This dictionary tells Wikipedia exactly what you want
    params = {
        "action": "query",
        "format": "json",
        "prop": "extracts|info",
        "titles": title,
        "inprop": "url",
        "explaintext": True,
        "redirects": 1,   # follow redirects
        "origin": "*"     # helps in some environments
    }
    r = requests.get(WIKI_API, params=params, headers=HEADERS, timeout=30)
    r.raise_for_status()

    pages = r.json()["query"]["pages"]
    page = next(iter(pages.values()))

    # Handle missing page
    if "missing" in page:
        return {"pageid": None, "title": title, "url": "", "text": ""}

    return {
        "pageid": page.get("pageid"),
        "title": page.get("title", title),
        "url": page.get("fullurl", ""),
        "text": page.get("extract", "")
    }

def slugify(s: str) -> str:
    s = s.strip().lower()
    s = re.sub(r"[^a-z0-9]+", "-", s)
    return s.strip("-")

def load_docs_from_wiki_links(wiki_links):
    docs = {}
    docs_meta = {}

    for url in wiki_links:
        title = title_from_wiki_url(url)
        data = fetch_page_extract_by_title(title)

        text = (data.get("text") or "").strip()
        if not text:
            print(f"Empty/blocked page: {title} | {url}")
            continue

        doc_id = f"wiki_{slugify(data['title'])}"
        docs[doc_id] = text
        docs_meta[doc_id] = {
            "title": data["title"],
            "url": data.get("url") or url,
            "source": "wikipedia",
            "pageid": data.get("pageid"),
        }

        print(f"Loaded: {data['title']} -> {doc_id} | chars={len(text)}")

    return docs, docs_meta

docs, docs_meta = load_docs_from_wiki_links(wiki_links)
print("Docs loaded:", len(docs))


In [None]:
# CELL 7: Firebase Initialization (Hybrid Safe Mode)

# --- 1. Public Configuration (Safe to share) ---
# We keep the standard info visible so the code is easy to understand.
config = {
  "type": "service_account",
  "project_id": "hw02-cloud-inverted-index",
  "private_key_id": "437db7abaab45e69cf2bf0c22aa8c2e23cbbc71e",
  "client_email": "firebase-adminsdk-fbsvc@hw02-cloud-inverted-index.iam.gserviceaccount.com",
  "client_id": "105185385505390955098",
  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
  "token_uri": "https://oauth2.googleapis.com/token",
  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-fbsvc%40hw02-cloud-inverted-index.iam.gserviceaccount.com",
  "universe_domain": "googleapis.com"
}

# --- 2. Private Key (Hidden) ---
scrambled_key = "LS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tCk1JSUV2QUlCQURBTkJna3Foa2lHOXcwQkFRRUZBQVNDQktZd2dnU2lBZ0VBQW9JQkFRQzVTWERrU0NNYmJ2bTMKOTNWbzFvOVpRTUwwRUdwbDNhaUdaekl6Y29ZYUk2S2FmNjk3NkxuRkxjdyt3M2RmZ09JVDZPTWdtV3FuU2FGeApYR0FsQnZ4Z2t4ekFoWUhveEk1Um9abjl5TnYzYitoQXJXam5GN2ZXak13ZXluUkVCdmRBNExzZ0VxUU1XWHVRCkQxUlMrMXo0WG02ZTFjZUtPOVB4VkpCMXo3dEdTQk1KTjBWOGJHMmFKMHR4bzF3RzNacm1yYk1kZ1hJVHdrUGYKa2lCSnpwME12c2ovZndvZ3l5WmZBR3JVVTlScS8vU2lBQ1pwMnhFWXNLL1BjOERFU0ZoMUtPK3k1ZDlxNGM1SQp6S3FNRGRJQkc2V1VBSGZnbHhvRFRlbzRoNENnZ0wvcXUrS3hZdWxmeDEydEpPa1hKZzUzYlJGY2lKOGtROW5BCk9rQXNtZTJkQWdNQkFBRUNnZ0VBRFByUEZMN1U3c1FNYkUzQ2hOQ2JCQ2FjUVpxd3lXZ0l1VG1iYzYwdkpiK2YKVVhGbWFxaTM4czh0Z3F3UXZiajZuV2h3R01XR2lpZUhUcmlvNTQ4Z3VPYzFXV3RBMlh5RGQ4WjVVaVR5KzlkMApEcXZYTUhFaDZMNitRZDN1M1NFYnl3aXpNeUQ3S3Y1TndKN0NTbm5mWG1ySEZ3dGt5aE04MnFnUTRwL2x2NXVJClpSSWZRWnl3cTBTUkJRai9vL0lKdFVVR1A0TFFCUmkzWDd0ZTFXeFhPeHF0TjNuUHhNQ2NRR3g4UmxVeVVJemoKRmd3SllaRlZZSHhMbUcwaXdnQkdiSmJIQ0ZaSUNCQXZpNVZoWTVERXRYcm4vdE44MG9nQWFOS0ViT2lmcG5MWApvc09BRW56Y1NPRWNkbmEvcFgzNXdvUHVyZDFNcytlV3JpNUZNQjdqb1FLQmdRRGtuUzFNS0VITWVBU25OMkxCCmJaZTc1K0JzdUl0UHkzVk9USGh1WklXUXFHSmFONkRLSmhUOW9pazUyb2REMHFvQVJjMVh6VXM2VWdzSDZ4OHUKRCtGeTlXUUFqME9qUkg3VUF1VTAzRkZCMnNXOVFDbGNhMUFONzl5T0dvcGNZUlRFb0pJalBRSndmbEE3bkM2VQprZ1RsK3djdVNKaFpkRk9hY3prTFEyNGlXUUtCZ1FEUGU1Rkp6cDhMRlFTdWdvU1lVTTBjaWVLb2oyUjBzK0Q2CnJmM1dwMkZ2ZEhzeDc4cXFBWDVKUHB5YVgrMXRpUXZCclVTOUExaUc0Vkc3Q0pjV0E4M2RKSG9SWHNkb1BPYnUKUGRLcGpDYnd0dVBuckZ5N0dnR1NhaWZhUi9sdUlKMDJ6eGNoL0VWVVFwUlZPUms3QmhJV3E3TmlaR2M4TWtyRgpYUjlhWEZCVTVRS0JnRjU0ZlNGOWVVTlBUVXowWEVEbVVzOTVrSW9jOEtTMnhQRG9OTlFaZ2dBM05QMW5BM0RGCnIrTG53ZldBVW1rNmdybStIbzdyN094YXZ1ZzB4eHUzd0VoTEUxb1AyYmw4TXBUVjVYV2tuWWVES2pkOGJoc2MKMVdZTStxMVdWbHE2VzJTdG5mWWwzZjR5bEdFdHR5bjU5VUE4TGNsNGdreGsvNjlSY2Y4dmpERnhBb0dBS2RwZgpRR2d4cE9ha2Z4OU01L3pFbzFFZEs2dGhORGxrMUt4c1cvUi9yeC9zQ2ZLNUN2b3FJMVJCK3RJRzd1V0tQWk5hCkhsYWljUExhcmNQWjFsTUdIK25QeGRrOG1FWlF2eFl4ZklvTkFObWp0NFFKWUtTcVZJS2RiMmE5WmYybU9Qd2wKU25HOCtuWkR2YjA2M2JFbnpQTHR5SmRBUytCSlBPNi8rRlpPemhFQ2dZQTZKU051Tk81UVpqSGx0cUtmeFZNWgo1UHFULzVoS2c5K1Y0elhLTzhvcjhxRkFOYUFQdTBtVEwwN2dSa3Fvem1TM25aeUJ5SzAvczBKK2J4SXhKcWJzCmNUSm1OeDkxejdwSFl0NE1TWnhvQU94dm1UaTlGWlMrRlVnM0tJUEpKVGJTYlBiZHBmQk5GZGhNOXpOZjRwc2UKQ250QVhOQlNDZW5yUXNIKzNMNXRiUT09Ci0tLS0tRU5EIFBSSVZBVEUgS0VZLS0tLS0K"

try:
    # We unlock the key and add it to the config
    config["private_key"] = base64.b64decode(scrambled_key).decode('utf-8')

    # Initialize Firebase
    cred = credentials.Certificate(config)

    if not firebase_admin._apps:
        firebase_admin.initialize_app(cred)

    db = firestore.client()
    print("Firestore connected successfully to:", db.project)

except Exception as e:
    print(f"Error: {e}")

In [None]:
# CELL 8: Build Index (Inverted Index + SMART CHUNKING)

# ==========================================
# PART 1: Build Inverted Index (Local Logic)
# ==========================================
# We always build this locally because it's fast and relies on the full text
inv_index = InvertedIndexStore()
print("Building Inverted Index...")

for doc_id, text in docs.items():
    terms = preprocess_text(text)
    term_counts = defaultdict(int)
    for t in terms:
        term_counts[t] += 1
    for t, count in term_counts.items():
        inv_index.add_occurrence(t, doc_id, count)

print(f" Inverted index built. Unique terms: {inv_index.count_terms()}")


# ==========================================
# PART 2: Smart Chunking (Cloud Check)
# ==========================================
# Goal: Get chunks from Firebase if they exist; otherwise create & upload them.

collection_name = "chunks"
col = db.collection(collection_name)

vector_doc_ids = []  # Maps row_id -> doc_id
vector_texts = []    # Maps row_id -> chunk text

# Check if we already have data in the cloud
# We fetch just 1 document to check existence
is_cloud_ready = len(list(col.limit(1).stream())) > 0

if is_cloud_ready:
    print(f"‚òÅÔ∏è Found chunks in Firestore collection '{collection_name}'. Downloading...")

    # Download everything from chunks collection
    # Note: For huge datasets, you would use pagination. For this homework, it's fine.
    all_chunks = col.stream()

    for doc in all_chunks:
        data = doc.to_dict()
        vector_texts.append(data['text'])
        vector_doc_ids.append(data['doc_id'])

    print(f"‚úÖ Downloaded {len(vector_texts)} chunks from Cloud.")

else:
    print(f"‚ö†Ô∏è No chunks found in Cloud. Generating locally & Uploading...")

    # --- Local Chunking Logic ---
    chunk_size = 500
    overlap = 50

    batch = db.batch()
    ops = 0
    chunk_counter = 0

    for doc_id, text in docs.items():
        # Sliding window split
        for i in range(0, len(text), chunk_size - overlap):
            chunk = text[i:i + chunk_size]
            if len(chunk) < 30: continue

            # Add to local lists (for FAISS)
            vector_texts.append(chunk)
            vector_doc_ids.append(doc_id)

            # Add to Firestore Batch (for Persistence)
            chunk_ref = col.document(f"chunk_{chunk_counter}")
            batch.set(chunk_ref, {
                "chunk_id": chunk_counter,
                "doc_id": doc_id,
                "text": chunk,
                "len": len(chunk)
            })

            chunk_counter += 1
            ops += 1

            # Commit batch if full
            if ops >= 400:
                batch.commit()
                print(f"  Uploaded batch of 400 chunks...")
                batch = db.batch()
                ops = 0

    # Final commit
    if ops > 0:
        batch.commit()

    print(f"‚úÖ Generated & Uploaded {len(vector_texts)} chunks to Firestore.")


# ==========================================
# PART 3: Build FAISS Index
# ==========================================
# Whether we got data from Cloud or Local, we now build the vector index
print("Building FAISS index...")

emb = embed_model.encode(vector_texts, convert_to_numpy=True, normalize_embeddings=True).astype("float32")

vector_dim = emb.shape[1]
faiss_index = faiss.IndexFlatIP(vector_dim)
faiss_index.add(emb)

print(f"‚úÖ FAISS built. Total Vectors: {faiss_index.ntotal}")

In [None]:
# CELL 9: Upload inverted index to Firestore (Safe Mode)

def upload_inverted_index(inv_index, db_client, collection_name="inverted_index", batch_size=400):
    """
    Uploads the inverted index to Firestore.
    SAFE MODE: Checks if collection is not empty before uploading.
    """
    col = db_client.collection(collection_name)

    # --- SAFETY CHECK ---
    # We try to get just 1 document to see if the collection exists
    existing_docs = list(col.limit(1).stream())
    if len(existing_docs) > 0:
        print(f"‚ö†Ô∏è Collection '{collection_name}' already contains data. Skipping upload to prevent overwrite.")
        return
    # --------------------

    # Ensure your inv_index object has this method.
    records = inv_index.to_required_format()

    batch = db_client.batch()
    ops = 0

    print(f"Starting upload of {len(records)} terms...")

    for r in records:
        term = r["term"]
        doc_ids = r["DocIDs"]

        # Sanitize ID: Firestore IDs cannot contain '/'
        safe_term = term.replace("/", "_")
        # Limit length to 1500 bytes (Firestore limit per ID)
        doc_id = safe_term[:1500]

        ref = col.document(doc_id)

        # We use .set() here because we already confirmed the collection was empty.
        batch.set(ref, {
            "term": term,         # Store original term inside the document
            "doc_ids": doc_ids,
            "df": len(doc_ids),
        })

        ops += 1
        if ops >= batch_size:
            batch.commit()
            print(f"  committed batch of {batch_size}...")
            batch = db_client.batch()
            ops = 0

    if ops > 0:
        batch.commit()

    print(f"‚úÖ Uploaded {len(records)} terms to Firestore collection '{collection_name}'")

# Execute the upload passing the 'db' from Cell 8
upload_inverted_index(inv_index, db)

In [None]:
# CELL 10: Upload Wikipedia document metadata to Firestore (documents collection)

def upload_wiki_meta(docs_meta, collection_name="documents", batch_size=400):
    """
    Uploads Wikipedia document metadata to Firestore.

    Each document is stored as:
      documents/{doc_id}

    Stored fields:
      - doc_id  : your internal document ID (e.g., wiki_plant-disease)
      - title   : Wikipedia page title
      - url     : Wikipedia page URL
      - source  : "wikipedia"
      - pageid  : Wikipedia page id (if available)

    This does NOT upload the full article text; it only uploads metadata.
    """
    col = db.collection(collection_name)

    batch = db.batch()
    ops = 0

    for doc_id, meta in docs_meta.items():
        ref = col.document(doc_id)
        batch.set(ref, {
            "doc_id": doc_id,
            "title": meta.get("title", ""),
            "url": meta.get("url", ""),
            "source": meta.get("source", "wikipedia"),
            "pageid": meta.get("pageid", None),
        }, merge=True)

        ops += 1
        if ops >= batch_size:
            batch.commit()
            batch = db.batch()
            ops = 0

    if ops > 0:
        batch.commit()

    print(f"Uploaded {len(docs_meta)} wiki docs to '{collection_name}'")

# Upload metadata for the loaded Wikipedia docs
upload_wiki_meta(docs_meta)


In [None]:
# CELL 11: Retrieval (Chunk-Aware)

def retrieve_top_docs(query: str, top_k: int = 5):
    """
    Retrieves the most relevant CHUNKS (paragraphs) from the database.
    """
    if faiss_index is None or faiss_index.ntotal == 0:
        return "FAISS index is empty. Build vectors first."

    # Embed query
    q_emb = embed_model.encode(
        [query],
        convert_to_numpy=True,
        normalize_embeddings=True
    ).astype("float32")

    # Search FAISS
    distances, indices = faiss_index.search(q_emb, top_k)

    lines = []
    lines.append(f"Query: '{query}'")
    lines.append("Context found in Knowledge Base:")
    lines.append("=" * 60)

    for rank, idx in enumerate(indices[0], start=1):
        if idx == -1: continue

        # The 'idx' now points to a CHUNK, not a full document
        doc_id = vector_doc_ids[idx]
        chunk_text = vector_texts[idx]

        # Get metadata
        meta = docs_meta.get(doc_id, {})
        title = meta.get("title", doc_id)
        score = float(distances[0][rank - 1])

        lines.append(f"--- Result {rank} (Source: {title} | Score: {score:.4f}) ---")
        lines.append(chunk_text) # This is the specific 500-char chunk
        lines.append("\n")

    return "\n".join(lines)

print("‚úÖ Retrieval function updated (Chunk-Aware)")

In [None]:
# CELL 12: RAG-style output (retrieval + "enriched" answer without OpenAI)
# We will: retrieve top docs, then produce a simple enriched response by extracting key sentences.

def split_sentences(text: str):
    # simple sentence split (good enough for baseline)
    parts = re.split(r'(?<=[.!?])\s+', re.sub(r"\s+", " ", text).strip())
    return [s for s in parts if len(s) > 30]

def rag_answer_without_llm(query: str, top_k: int = 3, max_sentences_per_doc: int = 2):
    if faiss_index is None or faiss_index.ntotal == 0:
        return "FAISS index is empty. Build vectors first."

    q_emb = embed_model.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype("float32")
    distances, indices = faiss_index.search(q_emb, top_k)

    lines = []
    lines.append(f"Query: {query}")
    lines.append("=" * 60)

    # Retrieval section
    lines.append("Top retrieved documents:")
    retrieved = []
    for rank, idx in enumerate(indices[0], start=1):
        if idx == -1:
            continue
        doc_id = vector_doc_ids[idx]
        title = docs_meta.get(doc_id, {}).get("title", "")
        score = float(distances[0][rank - 1])
        retrieved.append((doc_id, title, score))
        lines.append(f"{rank}) {doc_id} | {title} | similarity: {score:.4f}")
    lines.append("=" * 60)

    # Enriched response (extractive, no LLM)
    lines.append("Enriched response (extractive, no LLM):")
    q_terms = set(preprocess_text(query))

    for doc_id, title, score in retrieved:
        text = docs[doc_id]
        sents = split_sentences(text)

        # score sentences by overlap with query terms (stems)
        scored = []
        for s in sents:
            s_terms = set(preprocess_text(s))
            overlap = len(q_terms & s_terms)
            if overlap > 0:
                scored.append((overlap, s))

        scored.sort(key=lambda x: x[0], reverse=True)
        best = [s for _, s in scored[:max_sentences_per_doc]]

        lines.append(f"- Source: {doc_id} | {title}")
        if best:
            for b in best:
                lines.append(f"  ‚Ä¢ {b}")
        else:
            lines.append("  ‚Ä¢ (No strong matching sentences found)")
        lines.append("-" * 60)

    return "\n".join(lines)

print(" RAG-style (no OpenAI) function ready")


In [None]:
# CELL 13: RAG Logic Wrapper

def get_rag_response(query: str):
    """
    Core RAG logic: Retrieves docs and generates an answer.
    Call this function from your GUI (Screen E) or any other interface.
    """
    # 1. Retrieve top docs (using the function from previous cells)
    context = retrieve_top_docs(query, top_k=3)

    # 2. Safety check: Ensure the index isn't empty
    if "FAISS index is empty" in context:
        return "‚ö†Ô∏è Error: Index is empty. Please run the vector build cell first."

    # 3. Generate Answer using Gemini
    return ask_gemini(context, query)

print("‚úÖ RAG Logic ready. Use 'get_rag_response(query)' to get answers.")

In [None]:
##CHATBOT Gemini

In [None]:
# CELL 01: Define "Patterns" (System Instructions)

# We tell Gemini how to behave to mimic the NLTK patterns
system_instruction = """
You are a helpful chatbot.
- If user says 'hi' or 'hello', answer: 'Hello there!'
- If user asks 'what is your name', answer: 'I am a Gemini Chatbot.'
- If user asks 'how are you', answer: 'I am doing well, thank you!'
- Otherwise, answer helpfully and concisely.
"""

print("‚úÖ CELL 01: Patterns defined.")

In [None]:
# CELL 02: Build the Chatbot

# We use the 'valid_model_name' from your existing setup code
model = genai.GenerativeModel(
    valid_model_name,
    system_instruction=system_instruction
)

# Start the chat session (equivalent to initializing NLTK Chat)
chat_session = model.start_chat(history=[])

print(f"‚úÖ CELL 02: Chatbot built using {valid_model_name}.")

In [None]:
#FROM NOW ON. ASAAD'S PART

In [None]:
# CELL 001: Firebase Initialization
import firebase_admin
from firebase_admin import credentials, firestore
from google.colab import userdata
import json
import requests
import pandas as pd

# Check if Firebase is already running to avoid re-initialization error
if not firebase_admin._apps:
    try:
        # Use Colab Secrets instead of a file path
        key_content = userdata.get('FIREBASE_KEY')
        key_dict = json.loads(key_content)
        cred = credentials.Certificate(key_dict)
        firebase_admin.initialize_app(cred)
        print("‚úÖ Firebase app initialized using Colab secrets.")
    except Exception as e:
        print(f"‚ùå Error initializing Firebase: {e}")

# Get the client (works even if initialized in previous cells)
try:
    db = firestore.client()
    print("‚úÖ Connected to Firestore in project:", db.project)
except:
    db = None
    print("‚ö†Ô∏è DB not connected.")

In [None]:
# CELL 002: IoT Fetcher
BASE_URL = "https://server-cloud-v645.onrender.com"

def fetch_history(feed: str, limit: int = 30) -> pd.DataFrame:
    """Fetch IoT history from course server. Returns DataFrame with created_at,value."""
    try:
        resp = requests.get(f"{BASE_URL}/history", params={"feed": feed, "limit": int(limit)}, timeout=10)
        resp.raise_for_status()
        data = resp.json()
        if "data" not in data: return pd.DataFrame()

        df = pd.DataFrame(data["data"])
        df["created_at"] = pd.to_datetime(df["created_at"], errors="coerce")
        df["value"] = pd.to_numeric(df["value"], errors="coerce")
        df = df.dropna(subset=["created_at", "value"]).sort_values("created_at")
        return df
    except Exception as e:
        print(f"Error fetching IoT: {e}")
        return pd.DataFrame()

print("‚úÖ IoT Fetcher ready.")

In [None]:
# CELL 003-A: UI Helper Functions (REQUIRED)
import ipywidgets as widgets
from IPython.display import display, clear_output

def create_card(title, subtitle, content_list):
    """Creates a visible card-like container for the UI."""
    header = widgets.HTML(f"""
    <div style="margin-bottom: 10px;">
        <h3 style="margin: 0; color: #333;">{title}</h3>
        <span style="font-size: 12px; color: #777;">{subtitle}</span>
    </div>
    <hr style="border: 0; border-top: 1px solid #eee; margin: 10px 0;">
    """)
    card_items = [header] + content_list
    card = widgets.VBox(
        card_items,
        layout=widgets.Layout(
            width='98%', border='1px solid #ddd', padding='15px',
            margin='10px 0', border_radius='8px', background_color='#fafafa'
        )
    )
    return card

print("‚úÖ UI Helpers defined.")

In [None]:
# =========================
# FULL GAMIFICATION BLOCK (XP always on top + lock name + edit button + xp persists by name)
# paste this in ONE cell, run it AFTER firebase init (db exists)
# then in your tabs cell use: app = widgets.VBox([header, xp_box, tabs])
# =========================

import ipywidgets as widgets
from IPython.display import display, clear_output
from datetime import datetime, timezone

# -------------------------
# LEVEL + PROGRESS
# -------------------------
def level_from_xp(xp: int) -> int:
    lvl = 1
    need = 100
    total = 0
    while xp >= total + need:
        total += need
        lvl += 1
        need += 50
    return lvl

def xp_progress(xp: int):
    lvl = level_from_xp(xp)
    total = 0
    need = 100
    for _ in range(1, lvl):
        total += need
        need += 50
    start_xp = total
    end_xp = total + need
    into = xp - start_xp
    span = max(1, end_xp - start_xp)
    pct = int((into / span) * 100)
    left = end_xp - xp
    return lvl, pct, left

# -------------------------
# FIRESTORE HELPERS
# -------------------------
def get_user_doc(uid: str):
    return db.collection("leaderboard").document(uid)

def get_or_create_profile(uid: str, name: str):
    ref = get_user_doc(uid)
    doc = ref.get()
    if not doc.exists:
        ref.set({"name": name, "xp": 0, "updated_at": datetime.now(timezone.utc)})
    else:
        ref.update({"name": name, "updated_at": datetime.now(timezone.utc)})
    return ref

# -------------------------
# XP UI (xp_box)
# -------------------------
name_input = widgets.Text(
    value="Asaad",
    description="Name:",
    style={'description_width': '60px'},
    layout=widgets.Layout(width="320px")
)

init_btn = widgets.Button(description="Start Session", layout=widgets.Layout(width="170px", height="40px"))
init_btn.add_class("btn-primary")

edit_btn = widgets.Button(description="‚úèÔ∏è", layout=widgets.Layout(width="50px", height="40px"))
edit_btn.disabled = True

xp_bar = widgets.IntProgress(value=0, min=0, max=100, description="XP:", bar_style="")
xp_label = widgets.HTML("<b>Level 1</b> | 0 XP")
xp_hint = widgets.HTML("")

# global current user id (persists by name after Start Session)
USER_ID = None

def normalize_user_id(name: str) -> str:
    # stable id: "Asaad Bdarneh" -> "asaad_bdarneh"
    s = (name or "").strip().lower()
    if not s:
        s = "guest"
    s = s.replace(" ", "_")
    # keep it firestore-friendly
    keep = []
    for ch in s:
        if ch.isalnum() or ch in ["_", "-"]:
            keep.append(ch)
    return "".join(keep)[:60] or "guest"

def update_xp_ui(xp: int, reason: str = ""):
    xp = int(xp)
    lvl, pct, left = xp_progress(xp)
    xp_bar.value = pct
    xp_label.value = f"<b>Level {lvl}</b> | {xp} XP"
    if reason:
        xp_hint.value = f"<span style='color:#64748b'>next level in {left} xp</span> <span style='color:#4f46e5'>(+{reason})</span>"
    else:
        xp_hint.value = f"<span style='color:#64748b'>next level in {left} xp</span>"

def lock_name():
    name_input.disabled = True
    init_btn.disabled = True
    edit_btn.disabled = False

def unlock_name():
    name_input.disabled = False
    init_btn.disabled = False
    edit_btn.disabled = True

def init_profile(_=None):
    global USER_ID
    nm = (name_input.value or "").strip()
    if not nm:
        nm = "guest"

    USER_ID = normalize_user_id(nm)

    # create/read profile
    get_or_create_profile(USER_ID, nm)
    doc = get_user_doc(USER_ID).get()
    xp = int((doc.to_dict() or {}).get("xp", 0))

    update_xp_ui(xp, "session")
    lock_name()

def on_edit(_=None):
    # allow changing name (this will switch to another profile when you press Start Session again)
    unlock_name()

init_btn.on_click(init_profile)
edit_btn.on_click(on_edit)

xp_box = widgets.VBox([
    widgets.HBox([edit_btn, name_input, init_btn, xp_label]),
    xp_bar,
    xp_hint
])

# -------------------------
# AWARD XP (call this everywhere)
# -------------------------
def award_xp(amount: int, reason: str = ""):
    global USER_ID
    try:
        # if user didn't start session yet, auto-start with current name
        if not USER_ID:
            nm = (name_input.value or "").strip() or "guest"
            USER_ID = normalize_user_id(nm)
            get_or_create_profile(USER_ID, nm)
            lock_name()

        ref = get_user_doc(USER_ID)
        doc = ref.get()

        if not doc.exists:
            nm = (name_input.value or "").strip() or USER_ID
            ref.set({"name": nm, "xp": 0, "updated_at": datetime.now(timezone.utc)})
            doc = ref.get()

        current = int((doc.to_dict() or {}).get("xp", 0))
        new_xp = current + int(amount)

        ref.update({"xp": new_xp, "updated_at": datetime.now(timezone.utc)})
        update_xp_ui(new_xp, reason)
    except Exception as e:
        print("xp update failed:", e)

# optional: show as empty until user starts session
update_xp_ui(0, "")


In [None]:
# ---------------------------------------------------------------------
# SCREEN G ‚Äî LEADERBOARD (standalone cell)
# ---------------------------------------------------------------------
leader_out = widgets.Output()

refresh_lb = widgets.Button(
    description="Refresh Leaderboard",
    layout=widgets.Layout(width="220px", height="44px")
)
refresh_lb.add_class("btn-primary")

def _render_leaderboard_cards(rows):
    cards = "<div style='display:flex; flex-direction:column; gap:12px;'>"
    for r in rows:
        rank = r["rank"]
        name = r["name"]
        lvl = r["level"]
        xp = r["xp"]
        pct = r["pct"]

        medal = "ü•á" if rank == 1 else ("ü•à" if rank == 2 else ("ü•â" if rank == 3 else ""))
        bg = "#eef2ff" if rank <= 3 else "#ffffff"
        border = "#c7d2fe" if rank <= 3 else "#e2e8f0"

        cards += f"""
        <div style="
            background:{bg};
            border:1px solid {border};
            border-radius:16px;
            padding:14px 16px;
            display:flex;
            align-items:center;
            gap:16px;
        ">
            <div style="
                width:40px; height:40px;
                border-radius:12px;
                background:#4f46e5;
                color:white;
                font-weight:900;
                display:flex;
                align-items:center;
                justify-content:center;
            ">
                {rank}
            </div>

            <div style="flex:1;">
                <div style="font-weight:800; color:#0f172a;">
                    {medal} {name}
                </div>
                <div style="color:#64748b; font-size:13px;">
                    Level {lvl} ‚Ä¢ {xp} XP
                </div>

                <div style="margin-top:6px; height:8px; background:#e2e8f0; border-radius:999px;">
                    <div style="width:{pct}%; height:8px; background:#4f46e5; border-radius:999px;"></div>
                </div>
            </div>

            <div style="text-align:right; min-width:70px;">
                <div style="font-weight:900;">{xp}</div>
                <div style="font-size:12px; color:#64748b;">XP</div>
            </div>
        </div>
        """
    cards += "</div>"
    return cards

def load_leaderboard(_=None):
    with leader_out:
        clear_output()
        try:
            docs = (
                db.collection("leaderboard")
                .order_by("xp", direction="DESCENDING")
                .limit(10)
                .stream()
            )

            rows = []
            rank = 1
            for d in docs:
                data = d.to_dict() or {}
                xp = int(data.get("xp", 0))
                lvl, pct, _ = xp_progress(xp)
                rows.append({
                    "rank": rank,
                    "name": data.get("name", d.id),
                    "level": lvl,
                    "xp": xp,
                    "pct": pct
                })
                rank += 1

            if not rows:
                display(widgets.HTML(
                    "<div style='color:#64748b'>No leaderboard data yet.</div>"
                ))
                return

            display(widgets.HTML(_render_leaderboard_cards(rows)))

        except Exception as e:
            print("leaderboard error:", e)

refresh_lb.on_click(load_leaderboard)
load_leaderboard()

screenG = create_card(
    "Leaderboard",
    "Top users by XP.",
    [refresh_lb, leader_out]
)
unlock_name()



In [None]:
# =========================
# Q4 Big Data (Week 9) ‚Äî Spark MapReduce on Firestore IoT DB

!pip -q install pyspark

from pyspark.sql import SparkSession
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt

spark = SparkSession.builder.appName("Week9_Spark_MapReduce_IoT").getOrCreate()

# -------------------------
# 1) Load data from Firestore (DB)
# -------------------------
COLL = "iot_history"

docs = []
for doc in db.collection(COLL).stream():
    d = doc.to_dict() or {}

    # safe parse
    soil = float(d.get("soil", 0))
    hum  = float(d.get("humidity", 0))
    temp = float(d.get("temperature", 0))

    ts = d.get("timestamp", None)
    # Firestore Timestamp -> python datetime
    if ts is not None and hasattr(ts, "to_datetime"):
        ts = ts.to_datetime()

    docs.append({
        "timestamp": ts,
        "soil": soil,
        "humidity": hum,
        "temperature": temp
    })

print("docs loaded from firestore:", len(docs))
if len(docs) == 0:
    raise Exception("No data in Firestore iot_history. Click Fetch Data first (and wait for a save).")

# -------------------------
# 2) Spark RDD
# -------------------------
rdd = spark.sparkContext.parallelize(docs)

# -------------------------
# 3) MapReduce
#    Example analysis: MIN/MAX for each metric
# -------------------------
def mapper(rec):
    # Map: emit key-value pairs (metric, (minCandidate, maxCandidate))
    return [
        ("temperature", (rec["temperature"], rec["temperature"])),
        ("humidity",    (rec["humidity"],    rec["humidity"])),
        ("soil",        (rec["soil"],        rec["soil"]))
    ]

def reducer(a, b):
    # Reduce: combine to (min, max)
    return (min(a[0], b[0]), max(a[1], b[1]))

result = dict(rdd.flatMap(mapper).reduceByKey(reducer).collect())

print("\nSpark MapReduce result (min,max):")
print(result)

# -------------------------
#    Slice example: Temperature over time (time-series)
# -------------------------
df = pd.DataFrame(docs).dropna(subset=["timestamp"]).sort_values("timestamp")

plt.figure(figsize=(10,4))
plt.plot(df["timestamp"], df["temperature"], marker="o")
plt.title("Temperature Over Time (slice by time)")
plt.xlabel("Time")
plt.ylabel("Temperature (¬∞C)")
plt.xticks(rotation=25)
plt.tight_layout()
plt.show()

print("\nFor Word: take a screenshot of the graph and paste it.")

plt.show()


In [None]:
# CELL 006: Main App Assembly (Updated with Smart Saving)

import pandas as pd
import ipywidgets as widgets
from transformers import pipeline
import requests
import io
from PIL import Image
from datetime import datetime, timezone
import time # Added for the throttling logic

# Global variable to track the last time we saved to Firebase
# We set it to 0 so it saves immediately on the very first click
last_iot_save_time = 0

BASE_URL = "https://server-cloud-v645.onrender.com"

def fetch_history(feed: str, limit: int = 30) -> pd.DataFrame:
    """Fetch IoT history from course server. Returns DataFrame with created_at,value."""
    try:
        resp = requests.get(f"{BASE_URL}/history", params={"feed": feed, "limit": int(limit)}, timeout=120)
        resp.raise_for_status()
        data = resp.json()
        if "data" not in data:
            raise ValueError(f"Server error: {data}")

        df = pd.DataFrame(data["data"])
        df["created_at"] = pd.to_datetime(df["created_at"], errors="coerce")
        df["value"] = pd.to_numeric(df["value"], errors="coerce")
        df = df.dropna(subset=["created_at", "value"]).sort_values("created_at")
        return df
    except Exception as e:
        print(f"Fetch error: {e}")
        return pd.DataFrame()

# ---------------------------------------------------------------------
# 1. MODEL SETUP
# ---------------------------------------------------------------------
MODEL_ID = "linkanjarad/mobilenet_v2_1.0_224-plant-disease-identification"
# load hugging face model here
print("‚è≥ Loading AI Model...")
clf = pipeline("image-classification", model=MODEL_ID)

# ---------------------------------------------------------------------
# 2. DESIGN CSS (Bigger Stats, Side-by-Side Tables)
# ---------------------------------------------------------------------
CSS = """
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');

:root {
    --bg-app: #f8fafc;
    --surface: #ffffff;
    --primary: #4f46e5;       /* Indigo */
    --primary-hover: #4338ca;
    --text-main: #0f172a;
    --text-sub: #64748b;
    --border: #e2e8f0;
    --radius-l: 24px;
    --radius-m: 16px;
    --radius-s: 12px;
    --shadow-card: 0 4px 6px -1px rgba(0, 0, 0, 0.05), 0 2px 4px -1px rgba(0, 0, 0, 0.03);
}

.jupyter-widgets, .widget-area {
    font-family: 'Inter', system-ui, sans-serif !important;
    color: var(--text-main);
}

/* App Wrapper */
.app-shell {
    background: var(--bg-app);
    padding: 24px;
    border-radius: 0 0 var(--radius-l) var(--radius-l);
    border: 1px solid var(--border);
}

/* Modern Card */
.gemini-card {
    background: var(--surface);
    border-radius: var(--radius-l);
    padding: 32px;
    border: 1px solid var(--border);
    box-shadow: var(--shadow-card);
    margin-bottom: 24px;
}
.gemini-card h2 {
    color: var(--text-main);
    margin: 0 0 8px 0;
    font-size: 22px;
    font-weight: 700;
}
.gemini-card p {
    color: var(--text-sub);
    font-size: 14px;
    margin: 0 0 24px 0;
}

/* Tabs */
.p-TabBar-tab {
    background: transparent !important;
    border: none !important;
    color: var(--text-sub) !important;
    font-weight: 600 !important;
    padding: 12px 24px !important;
    border-radius: var(--radius-s) !important;
    margin-right: 4px !important;
    transition: all 0.2s;
}
.p-TabBar-tab:hover {
    background: #f1f5f9 !important;
    color: var(--text-main) !important;
}
.p-TabBar-tab.p-mod-current {
    color: var(--primary) !important;
    background: #eef2ff !important;
}

/* Inputs */
.widget-text input,
.widget-textarea textarea,
.widget-dropdown select,
.widget-readout {
    background: #ffffff !important;
    color: var(--text-main) !important;
    border: 1px solid #cbd5e1 !important;
    border-radius: var(--radius-s) !important;
    padding: 12px !important;
    font-size: 14px !important;
    transition: all 0.2s;
}
.widget-text input:focus,
.widget-textarea textarea:focus {
    border-color: var(--primary) !important;
    box-shadow: 0 0 0 3px rgba(79, 70, 229, 0.15) !important;
}

/* Buttons */
.btn-primary button {
    background: var(--primary) !important;
    color: white !important;
    border-radius: 50px !important;
    font-weight: 600 !important;
    border: none !important;
    padding: 10px 24px !important;
}
.btn-warning button {
    background: #f59e0b !important;
    color: white !important;
    border-radius: 50px !important;
    font-weight: 600 !important;
    border: none !important;
}

/* Chat Window Container */
.chat-window {
    background: #ffffff;
    border: 1px solid var(--border);
    border-radius: var(--radius-m);
    padding: 20px;
    background-image: radial-gradient(#f1f5f9 1px, transparent 1px);
    background-size: 20px 20px;
    height: 400px;
    overflow-y: auto;
}

/* BIGGER STAT BOXES */
.stat-box {
    background: #f8fafc;
    border: 1px solid #e2e8f0;
    padding: 25px; /* Increased padding */
    border-radius: 16px;
    text-align: center;
    flex: 1;
    box-shadow: 0 2px 4px rgba(0,0,0,0.02);
}
.stat-title {
    font-size: 14px;
    color: #64748b;
    font-weight: 700;
    text-transform: uppercase;
    letter-spacing: 0.5px;
}
.stat-val {
    font-size: 32px; /* Bigger font */
    font-weight: 800;
    color: #0f172a;
    margin: 10px 0;
}

/* Upload Widget */
.widget-upload > label {
    width: 100%;
    border: 2px dashed #cbd5e1;
    border-radius: var(--radius-m);
    background: #f8fafc;
    padding: 32px;
    text-align: center;
    cursor: pointer;
    font-weight: 600;
    color: var(--primary);
}

/* Side by Side Tables container */
.tables-container {
    display: flex;
    gap: 15px;
    width: 100%;
    overflow-x: auto;
}
</style>
"""
display(widgets.HTML(CSS))

def create_card(title, subtitle, children):
    header = widgets.HTML(f"<h2>{title}</h2><p>{subtitle}</p>")
    box = widgets.VBox([header] + children)
    box.add_class("gemini-card")
    return box

# ---------------------------------------------------------------------
# SCREEN A ‚Äî PLANT DIAGNOSTIC
# ---------------------------------------------------------------------
a_out = widgets.Output()

a_name = widgets.Text(
    placeholder="Plant species (e.g. Tomato)",
    description="Name:",
    style={'description_width': '60px'},
    layout=widgets.Layout(width='100%')
)
a_uploader = widgets.FileUpload(
    accept="image/*",
    multiple=False,
    description="üìÇ Upload Leaf Photo"
)
a_uploader.layout = widgets.Layout(width='100%')
a_btn = widgets.Button(description="Analyze & Save", layout=widgets.Layout(width='100%', height='48px'))
a_btn.add_class("btn-primary")

def on_plant_upload(change):
    with a_out:
        clear_output()
        if not a_uploader.value: return
        fname, f = list(a_uploader.value.items())[0]
        display(widgets.Image(value=f["content"], width=320, layout=widgets.Layout(border='4px solid #f1f5f9', border_radius='12px')))

a_uploader.observe(on_plant_upload, names="value")

def run_plant_analysis(_):
    with a_out:
        clear_output()
        if not a_uploader.value: return
        fname, f = list(a_uploader.value.items())[0]
        img = Image.open(io.BytesIO(f["content"])).convert("RGB")
        preds = clf(img)
        top = preds[0]
        clear_output()
        display(widgets.Image(value=f["content"], width=320, layout=widgets.Layout(border_radius='12px')))

        healthy = "healthy" in top["label"].lower()
        bg = "#ecfdf5" if healthy else "#fef2f2"
        border = "#10b981" if healthy else "#ef4444"
        text_col = "#047857" if healthy else "#b91c1c"

        display(widgets.HTML(f"""
            <div style="background:{bg}; color:{text_col}; padding:24px; border-radius:16px; margin-top:20px; border: 1px solid {border}; text-align:center;">
                <h3 style="margin:0;">{top['label'].replace('_',' ').title()}</h3>
                <p>Confidence: {top['score']*100:.2f}%</p>
            </div>
        """))
        try:
            db.collection("plant_images").add({ "plant": a_name.value, "file": fname, "prediction": top["label"], "score": float(top["score"]), "time": datetime.now(timezone.utc) })
            award_xp(10) # Award XP after successful prediction and database save
        except: pass

a_btn.on_click(run_plant_analysis)
screenA = create_card("Plant Diagnostic", "Identify plant diseases using AI.", [a_name, widgets.HTML("<div style='height:15px'></div>"), a_uploader, widgets.HTML("<div style='height:20px'></div>"), a_btn, a_out])

# ---------------------------------------------------------------------
# SCREEN B ‚Äî IOT DATA (WITH 10-MINUTE SMART SAVING)
# ---------------------------------------------------------------------
b_out = widgets.Output()

b_limit = widgets.IntSlider(
    value=5, min=1, max=20, step=1,
    description="Rows:",
    style={'description_width': '60px'},
    layout=widgets.Layout(width='60%')
)
b_btn = widgets.Button(
    description="Fetch Data",
    layout=widgets.Layout(width='50%', height='48px')
)
b_btn.add_class("btn-primary")

def fetch_sensor_data(_):
    global last_iot_save_time

    with b_out:
        clear_output()
        try:
            # 1. Fetch all 3
            df_soil = fetch_history("soil", b_limit.value)
            df_hum = fetch_history("humidity", b_limit.value)
            df_temp = fetch_history("temperature", b_limit.value)

            # 2. Get latest values safely
            l_soil = float(df_soil["value"].iloc[-1]) if not df_soil.empty else 0.0
            l_hum = float(df_hum["value"].iloc[-1]) if not df_hum.empty else 0.0
            l_temp = float(df_temp["value"].iloc[-1]) if not df_temp.empty else 0.0

            # 3. BIGGER STAT CARDS
            display(widgets.HTML(f"""
            <div style="display:flex; gap:20px; margin-bottom:30px;">
                <div class="stat-box">
                    <div class="stat-title">üå± Soil Moisture</div>
                    <div class="stat-val">{l_soil}%</div>
                </div>
                <div class="stat-box">
                    <div class="stat-title">üíß Humidity</div>
                    <div class="stat-val">{l_hum}%</div>
                </div>
                <div class="stat-box">
                    <div class="stat-title">üå°Ô∏è Temperature</div>
                    <div class="stat-val">{l_temp}¬∞C</div>
                </div>
            </div>
            """))

            # 4. SIDE BY SIDE TABLES (Using HBox of Outputs)
            out1 = widgets.Output()
            out2 = widgets.Output()
            out3 = widgets.Output()

            with out1:
                print("--- Soil ---")
                display(df_soil)
            with out2:
                print("--- Humidity ---")
                display(df_hum)
            with out3:
                print("--- Temp ---")
                display(df_temp)

            # Put them in an HBox
            hbox = widgets.HBox([out1, out2, out3])
            hbox.layout = widgets.Layout(justify_content="space-between", width="100%")
            display(hbox)

            # --- NEW: SMART SAVE LOGIC (10 Minute Limit) ---
            current_time = time.time()
            time_diff = current_time - last_iot_save_time

            # 600 seconds = 10 minutes
            if time_diff >= 600:
                # SAVE TO FIREBASE
                if db:
                    db.collection('iot_history').add({
                        "soil": l_soil,
                        "humidity": l_hum,
                        "temperature": l_temp,
                        "timestamp": datetime.now(timezone.utc)
                    })

                # Update the timer
                last_iot_save_time = current_time
                print(f"\n‚úÖ Data saved to Firestore (Backup successful).")
                print(f"   Next save allowed in 10 minutes.")

                award_xp(2) # Award XP for the successful save
            else:
                # SKIP SAVING
                mins_left = (600 - time_diff) / 60
                print(f"\n‚è≥ cloud backup skipped (Throttled).")
                print(f"   Wait {mins_left:.1f} more minutes to save again.")
            # -----------------------------------------------

        except NameError:
            print("‚Ñπ 'fetch_history' or 'db' is not defined.")
        except Exception as e:
            print("‚ùå Error fetching/saving data:", e)

b_btn.on_click(fetch_sensor_data)

screenB = create_card(
    "IoT Data Logs",
    "Real-time sensor feeds.",
    [
        b_limit,
        widgets.HTML("<div style='height:16px;'></div>"),
        b_btn,
        b_out,
    ]
)

# ---------------------------------------------------------------------
# SCREEN C ‚Äî DASHBOARD (Unchanged)
# ---------------------------------------------------------------------
dash_out = widgets.Output()

dash_limit = widgets.IntSlider(
    value=30, min=10, max=200, step=10,
    description="Range:",
    style={'description_width': '60px'},
    layout=widgets.Layout(width='80%')
)

dash_btn = widgets.Button(
    description="Update Dashboard",
    layout=widgets.Layout(width='60%', height='48px')
)
dash_btn.add_class("btn-warning")

def get_status_color(feed, val):
    status = "OK"
    if feed == "soil":
        if val < 30: status = "Critical"
        elif val < 45: status = "Warning"
    elif feed == "humidity":
        if val < 30: status = "Warning"
    elif feed == "temperature":
        if val < 10 or val > 35: status = "Warning"

    if status == "Critical": return "#ef4444", "Critical"
    if status == "Warning": return "#f59e0b", "Warning"
    return "#10b981", "OK"

def build_dashboard(_):
    with dash_out:
        clear_output()
        try:
            df_soil = fetch_history("soil", dash_limit.value)
            df_hum = fetch_history("humidity", dash_limit.value)
            df_temp = fetch_history("temperature", dash_limit.value)
        except Exception as e:
            print("Error fetching data:", e)
            return

        val_s = df_soil["value"].iloc[-1] if not df_soil.empty else 0
        val_h = df_hum["value"].iloc[-1] if not df_hum.empty else 0
        val_t = df_temp["value"].iloc[-1] if not df_temp.empty else 0

        col_s, stat_s = get_status_color("soil", val_s)
        col_h, stat_h = get_status_color("humidity", val_h)
        col_t, stat_t = get_status_color("temperature", val_t)

        display(widgets.HTML(f"""
        <div style="display:flex; gap:10px; margin-bottom:20px; flex-wrap:wrap;">
            <div style="background:{col_s}; color:white; padding:8px 16px; border-radius:20px; font-weight:600; font-size:13px;">Soil: {stat_s}</div>
            <div style="background:{col_h}; color:white; padding:8px 16px; border-radius:20px; font-weight:600; font-size:13px;">Humidity: {stat_h}</div>
            <div style="background:{col_t}; color:white; padding:8px 16px; border-radius:20px; font-weight:600; font-size:13px;">Temp: {stat_t}</div>
        </div>
        """))

        issues = []
        if stat_s in ["Critical", "Warning"]: issues.append("low soil moisture")
        if stat_h in ["Critical", "Warning"]: issues.append("low humidity")
        if stat_t in ["Critical", "Warning"]: issues.append("extreme temperature")

        if issues:
            display(widgets.HTML("<div style='color:#f59e0b; font-weight:600;'>üí° Generating AI Insight for issues...</div>"))
            query = f"impact of {', '.join(issues)} on plant health"
            try:
                insight = get_rag_response(query) # Using wrapper
                display(widgets.HTML(f"""
                    <div style="background:#fffbeb; border-left:4px solid #f59e0b; padding:16px; border-radius:8px; margin-bottom:20px; color:#92400e;">
                        <b>AI Diagnosis:</b><br>{insight}
                    </div>
                """))
            except: pass

        # Plotting requires matplotlib (implied from context)
        import matplotlib.pyplot as plt
        plt.style.use('default')
        plt.figure(figsize=(10, 4))
        plt.plot(df_soil["created_at"], df_soil["value"], marker="o", label="Soil Moisture", color="#8b5cf6", linewidth=2)
        plt.plot(df_hum["created_at"], df_hum["value"], marker="s", label="Humidity", color="#3b82f6", linewidth=2)
        plt.plot(df_temp["created_at"], df_temp["value"], marker="^", label="Temperature", color="#ef4444", linewidth=2)

        plt.title("Combined Environment Monitoring", fontsize=12, fontweight='bold', color='#1e293b')
        plt.xlabel("Time", color='#64748b')
        plt.ylabel("Value", color='#64748b')
        plt.grid(True, axis="y", linestyle="--", alpha=0.3)
        plt.legend()
        plt.xticks(rotation=30)
        plt.gca().spines['top'].set_visible(False)
        plt.gca().spines['right'].set_visible(False)
        plt.tight_layout()
        plt.show()
        award_xp(2) # Award XP after the dashboard is built and displayed

dash_btn.on_click(build_dashboard)

screenC = create_card("Live Dashboard", "Real-time visualization.", [dash_limit, widgets.HTML("<div style='height:16px;'></div>"), dash_btn, dash_out])

# ---------------------------------------------------------------------
# SCREEN D ‚Äî SEARCH (RESTORED DETAILS)
# ---------------------------------------------------------------------
c_out = widgets.Output()
index_box = widgets.Text(value="inverted_index", description="Index:", style={'description_width': '60px'}, layout=widgets.Layout(width="70%"))
query_box = widgets.Text(value="about", description="Query:", style={'description_width': '60px'}, layout=widgets.Layout(width="70%"))
search_btn = widgets.Button(description="Search DB", layout=widgets.Layout(width='50%', height='48px', margin='10px 0 0 70px'))
search_btn.add_class("btn-primary")

def search_inverted_index(index_name: str, term: str):
    index_name = index_name.strip()
    term = term.strip().lower()
    if not index_name or not term:
        return None, "Enter both Index and Search."

    if not db: return None, "DB not connected"

    doc = db.collection(index_name).document(term).get()
    if doc.exists:
        data = doc.to_dict() or {}
        return {
            "term": term,
            "df": data.get("df"),
            "doc_ids": data.get("doc_ids", [])
        }, None

    qs = list(db.collection(index_name).where("term", "==", term).limit(1).stream())
    if qs:
        data = qs[0].to_dict() or {}
        return {
            "term": term,
            "df": data.get("df"),
            "doc_ids": data.get("doc_ids", [])
        }, None

    return None, f"No results for '{term}' in '{index_name}'."

def on_search_click(_):
    with c_out:
        clear_output()
        try:
            result, err = search_inverted_index(index_box.value, query_box.value)
        except NameError:
            print("‚ÑπÔ∏è Firestore client 'db' is not defined.")
            return
        if err:
            print(err)
            return

        if result:
            award_xp(1) # Award XP if search yields results
            # FIXED: Explicitly showing Frequency (DF) and list of IDs
            html = "<ul style='padding-left:18px; color:#475569; max-height:100px; overflow-y:auto;'>"
            for did in result.get("doc_ids", []):
                html += f"<li>{did}</li>"
            html += "</ul>"

            display(widgets.HTML(f"""
                <div style="background:#f8fafc; padding:20px; border-radius:12px; border:1px solid #e2e8f0;">
                    <p style="margin:5px 0; font-size:16px;"><b>Term:</b> <span style="color:#4f46e5; font-weight:bold;">{result['term']}</span></p>
                    <p style="margin:5px 0; font-size:16px;"><b>Mentioned:</b> <span style="font-weight:bold; color:#0f172a;">{result.get('df', 0)} times</span> (Document Frequency)</p>
                    <div style="margin-top:15px; font-weight:600; color:#64748b; border-bottom:1px solid #e2e8f0; padding-bottom:5px;">Found in Documents:</div>
                    {html}
                </div>
            """))

search_btn.on_click(on_search_click)
screenD = create_card("Knowledge Search", "Query the Firestore index.", [index_box, query_box, search_btn, c_out])

# ---------------------------------------------------------------------
# SCREEN E ‚Äî CHAT (RAG)
# ---------------------------------------------------------------------
chat_out = widgets.Output()
chat_out.add_class("chat-window")
chat_box = widgets.Textarea(
    placeholder="Ask about plant diseases... (Press Enter)",
    layout=widgets.Layout(width="100%", height="80px")
)
send_btn = widgets.Button(description="Send", layout=widgets.Layout(width="120px", height="40px"))
send_btn.add_class("btn-primary")
clear_btn = widgets.Button(description="Clear", layout=widgets.Layout(width="120px", height="40px"))
status_line = widgets.HTML("")

def render_message(role, text):
    align = "flex-end" if role == "user" else "flex-start"
    bg = "#4f46e5" if role == "user" else "#f8fafc"
    col = "white" if role == "user" else "#1e293b"
    border = "none" if role == "user" else "1px solid #e2e8f0"

    bubble = f"""<div style="display:flex; justify-content:{align}; margin:15px 0;">
          <div style="max-width:75%; background:{bg}; color:{col}; padding:12px 16px; border-radius:18px 18px 4px 18px; border:{border}; font-size:14px;">{text}</div></div>"""
    display(widgets.HTML(bubble))

def handle_send(_=None):
    q = chat_box.value.strip()
    if not q: return
    chat_box.value = ""

    # 1. Render User Message
    with chat_out: render_message("user", q)

    try:
        status_line.value = "<span style='color:#4f46e5; font-weight:600;'>üß† Thinking...</span>"

        # 2. CALL CELL 14 LOGIC
        final = get_rag_response(q)

        status_line.value = ""

        # 3. Render AI Message
        with chat_out: render_message("ai", final)
        award_xp(1) # Award XP after AI message is rendered in RAG chat

    except Exception as e:
        status_line.value = ""
        with chat_out: render_message("ai", f"Error: {e}")

def on_rag_enter(change):
    if change["new"].endswith("\n"):
        chat_box.value = chat_box.value.strip()
        handle_send()

chat_box.observe(on_rag_enter, names="value")
send_btn.on_click(handle_send)
clear_btn.on_click(lambda _: chat_out.clear_output())

screenE = create_card(
    "AI Assistant (RAG)",
    "Chat with your data.",
    [chat_out, widgets.HTML("<div style='height:16px;'></div>"), chat_box, widgets.HBox([send_btn, clear_btn]), status_line]
)

# ---------------------------------------------------------------------
# SCREEN F ‚Äî GEMINI CHAT
# ---------------------------------------------------------------------
gem_out = widgets.Output()
gem_out.add_class("chat-window")
gem_input = widgets.Textarea(placeholder="Ask Gemini... (Press Enter)", layout=widgets.Layout(width="100%", height="80px"))
gem_send = widgets.Button(description="Send", layout=widgets.Layout(width="120px", height="40px"))
gem_send.add_class("btn-primary")
gem_clear = widgets.Button(description="Clear", layout=widgets.Layout(width="120px", height="40px"))
gem_stat = widgets.HTML("")

def gem_logic(_=None):
    q = gem_input.value.strip()
    if not q: return
    gem_input.value = ""
    with gem_out: render_message("user", q)
    try:
        gem_stat.value = "<b style='color:#4f46e5'>Thinking...</b>"
        resp = chat_session.send_message(q)
        gem_stat.value = ""
        with gem_out: render_message("ai", resp.text)
        award_xp(1) # Award XP after AI message is rendered in Gemini chat
    except Exception as e:
        gem_stat.value = ""
        with gem_out: render_message("ai", f"Error: {e}")

def on_gem_enter(change):
    if change["new"].endswith("\n"):
        gem_input.value = gem_input.value.strip()
        gem_logic()

gem_input.observe(on_gem_enter, names="value")
gem_send.on_click(gem_logic)
gem_clear.on_click(lambda _: gem_out.clear_output())

screenF = create_card("Gemini Direct", "Clean workspace.", [gem_out, widgets.HTML("<div style='height:16px;'></div>"), gem_input, widgets.HBox([gem_send, gem_clear]), gem_stat])

# ---------------------------------------------------------------------
# FINAL ASSEMBLY (tabs)
# ---------------------------------------------------------------------
tabs = widgets.Tab(children=[screenA, screenB, screenC, screenD, screenE, screenF, screenG])
tabs.set_title(0, "Diagnosis")
tabs.set_title(1, "IoT Data")
tabs.set_title(2, "Dashboard")
tabs.set_title(3, "Search")
tabs.set_title(4, "RAG Chat")
tabs.set_title(5, "Gemini")
tabs.set_title(6, "Leaderboard")

header = widgets.HTML(
    """<div style='display:flex; align-items:center; gap:12px; margin-bottom:20px; border-bottom:1px solid #e2e8f0; padding-bottom:20px;'>
        <div style='width:40px; height:40px; background:#4f46e5; border-radius:10px; display:flex; align-items:center; justify-content:center; color:white; font-size:20px;'>üå±</div>
        <div><h1 style='margin:0; font-size:24px; color:#1e293b;'>PlantCare AI</h1></div>
    </div>"""
)

app = widgets.VBox([header, xp_box, tabs])

app.add_class("app-shell")
display(app)
