#  Vertex Inference Unified

This notebook uses the unified `google-genai` library (imported as `from google import genai`). It supports:
- **Vertex AI Backend:** Uploads videos to GCS during the 'Prepare' step.
- **Gemini API Backend:** Uploads videos using the **File API** during the 'Prepare' step.

**Pipeline Steps:**
1.  **Import Libraries & Configure.**
2.  **Config** - Set up the configuration for the pipeline, including the model, model config, prompts, and prompt config.
2.  **Initialize Clients:** Set up AI client and Storage client.
3.  **(Only the first time) Fetch Dataset:** Downloads metadata from HuggingFace.
4.  **(Only the first time on each API type) Download, Extract & Prepare Videos:** Downloads, extracts, uploads (GCS/File API). Updates metadata.
5.  **Bulk Inference (Async):** Performs inference using pre-uploaded video resources.
6.  **Single Prompt Testing (UI):** Allows interactive testing of the video with prompts

### Notes:

1. After switching from Vertex to Gemini and vice versa, be sure to follow the steps:
    - Run all cells in order to re-upload the videos to the correct storage client, you can enable the SKIP_DOWNLOAD and SKIP_EXTRACT flags to skip the download and extraction steps. Only the upload step is needed

2. Gemini API's file client has a expiry time of 1 day or so for the uploaded files. You may need to follow the steps above to re-upload the files.

## Import Libraries

In [None]:
# Cell 1: Imports (Corrected for `google.genai`)
import os
import csv
import json
import logging
import time
import random
import requests
import datetime
import zipfile
import math
import sys
import asyncio
from typing import Dict, List, Optional, Set, Tuple, Any
from collections import defaultdict
from pathlib import Path
import shutil
import subprocess
import tempfile
import fractions

# Google Cloud & AI Libraries (Unified SDK)
try:
    import google.genai as genai
    from google.genai import types
    from google.genai import errors as genai_errors
    from google.api_core import exceptions as api_core_exceptions
    # GCS Client (Optional, for Vertex Mode)
    try:
        from google.cloud import storage
        GCS_AVAILABLE = True
    except ImportError:
        print("INFO: google-cloud-storage not found. Vertex AI GCS operations unavailable.")
        storage = None
        GCS_AVAILABLE = False
    print("`google.genai` SDK and helpers imported successfully.")
except ImportError as e:
     print(f"ERROR: Failed to import Google libraries: {e}. Install: pip install google-genai google-api-core google-cloud-storage")
     genai = None; types = None; genai_errors = None; api_core_exceptions = None
     storage = None; GCS_AVAILABLE = False
     raise ImportError("FATAL: `google.genai` or `google-api-core` SDK not found.")

# Data Handling & Progress
from datasets import load_dataset
import pandas as pd
from tqdm.notebook import tqdm

# UI Elements
import ipywidgets as widgets
from IPython.display import display, Markdown, HTML, clear_output

# Async in Notebook
import nest_asyncio
nest_asyncio.apply()

## Config Settings

In [None]:
# --- GCP Configuration ---
# PROJECT_ID = "your_google_cloud_project_id" # Your Google Cloud Project ID (Needed for GCS and Vertex AI mode)
# LOCATION = "your_google_cloud_region"      # Your Google Cloud Region (Needed for Vertex AI mode)
# GCS_BUCKET = "your_gcs_bucket" # Your GCS bucket name (Needed for video storage), required for Vertex AI mode

PROJECT_ID = "tiktokllm" # Your Google Cloud Project ID (Needed for GCS and Vertex AI mode)
LOCATION = "us-central1"      # Your Google Cloud Region (Needed for Vertex AI mode)
GCS_BUCKET = "seekdeep-ml-storage" # Your GCS bucket name (Needed for video storage)

# --- Choose Backend Mode ---
# Set USE_VERTEX to True to use the Vertex AI backend (requires ADC or service account auth).
# Set USE_VERTEX to False to use the Gemini API backend (requires GEMINI_API_KEY).
USE_VERTEX = False  # <-- CHANGE THIS TO True TO USE VERTEX AI

# --- Gemini API Key (Only required if USE_VERTEX is False) ---
# IMPORTANT: Replace with your actual Gemini API Key if USE_VERTEX is False.
# Consider loading from environment variables (GOOGLE_API_KEY) or a secure secrets manager.
GEMINI_API_KEY = "AIzaSyC1pFApqINjnjKh4-P2IJqZMEDLPY4DpAI"  # Replace with your actual Gemini API Key



# --- File Paths ---
DATASET_CSV = "dataset.csv"               # Input dataset metadata from HuggingFace
METADATA_FILE = "video_metadata_vertex_inj_correct_mcq.csv" if USE_VERTEX else "video_metadata_non_vertex_inj_correct_mcq.csv"      # Stores video info: video_id, local_path, gcs_uri (if Vertex), question data
RESULTS_FILE = "results_noncot_full_inference.csv"              # Output file for inference predictions
DOWNLOADS_DIR = "downloads"               # Directory for downloaded zip file
EXTRACTED_VIDEOS_DIR = "extracted_videos" # Directory storing extracted .mp4 files locally
SPEED_VIDEOS_DIR = "speed_videos"         # Stores sped up/slowed down videos
HF_CACHE_DIR = "./hf_cache"               # Cache directory for HuggingFace datasets

# --- Step 1: Fetch Dataset Configuration ---
HF_DATASET_NAME = "lmms-lab/AISG_Challenge" # HuggingFace dataset identifier
HF_DATASET_SPLIT = "test"                 # Dataset split to use
SKIP_FETCH = False                        # Set True to skip fetching if DATASET_CSV exists

# --- Step 2: Download & Prepare Videos Configuration ---
VIDEO_ZIP_URL = "https://huggingface.co/datasets/lmms-lab/AISG_Challenge/resolve/main/Benchmark-AllVideos-HQ-Encoded-challenge.zip?download=true"
ZIP_FILE_NAME = "all_videos.zip"
SKIP_DOWNLOAD_ZIP = True                 # Set True to skip downloading if zip exists
SKIP_EXTRACT = True                      # Set True to skip extraction if videos exist locally
SKIP_PREPARE = False                      # Set True to skip video preparation (GCS upload for Vertex, metadata update)
MAX_VIDEOS_TO_PROCESS = None              # Limit videos for testing (e.g., 5), None for all
UPLOAD_BATCH_SIZE_GCS = 10                # Batch size for GCS uploads (Vertex mode only)

# --- Inference Configuration ---
# Choose a model name compatible with your selected method (Vertex AI or Gemini API)

# Examples:

# Vertex AI: gemini-2.0-flash, gemini-2.0-flash-lite, gemini-2.0-pro-exp-02-05, gemini-2.0-flash-thinking-exp-01-21
# Rate limits: https://cloud.google.com/vertex-ai/generative-ai/docs/quotas#gemini-2.0-flash
# Basically 500 requests per minute for 2.0-flash and 2.0-flash-lite (unlimited), 10 requests per minute for 2.0-pro-exp-02-05, gemini-2.5-flash-preview-04-17

# Gemini API: gemini-2.0-flash, gemini-2.0-flash-lite, gemini-2.0-flash-thinking-exp-01-21, gemini-2.5-pro-exp-03-25
# Rate limits: https://ai.google.dev/gemini-api/docs/rate-limits#tier-1
# For free tier: 30 requests per minute for 2.0-flash and 2.0-flash-lite, 10 requests per minute for 2.0-pro-exp-02-05
# For tier-1: 2000 requests per minute for 2.0-flash and 2.0-flash-lite (have to pay), 10 requests per minute for 2.0-pro-exp-02-05 and gemini-2.0-flash-thinking-exp-01-21, gemini-2.5-flash-preview-04-17

# 1.0=normal speed, 0.5=half speed, etc.
VIDEO_SPEED_FACTOR = 0.5

# --- Setup Derived Paths & Directories ---
zip_file_path = Path(DOWNLOADS_DIR) / ZIP_FILE_NAME
extracted_videos_path = Path(EXTRACTED_VIDEOS_DIR)
speed_videos_path = Path(SPEED_VIDEOS_DIR) / str(VIDEO_SPEED_FACTOR)
Path(DOWNLOADS_DIR).mkdir(parents=True, exist_ok=True)
extracted_videos_path.mkdir(parents=True, exist_ok=True)
speed_videos_path.mkdir(parents=True, exist_ok=True)
Path(HF_CACHE_DIR).mkdir(parents=True, exist_ok=True)

# --- Logging Configuration ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)])
logger = logging.getLogger(__name__)

# --- Configuration Validation & Display --- #
warnings_found = False
if USE_VERTEX:
    if not PROJECT_ID or PROJECT_ID == "your-gcp-project-id":
        logger.error("Vertex AI mode requires PROJECT_ID to be set.")
        warnings_found = True
    if not LOCATION:
        logger.error("Vertex AI mode requires LOCATION to be set.")
        warnings_found = True
    if not GCS_BUCKET or GCS_BUCKET == "your-gcs-bucket-name":
        logger.error("Vertex AI mode requires GCS_BUCKET for video uploads.")
        warnings_found = True
    if not GCS_AVAILABLE:
        logger.error("Vertex AI mode requires 'google-cloud-storage', but it's not installed.")
        warnings_found = True
else: # Gemini API Mode
    # Check API Key (explicit or env var)
    effective_api_key = GEMINI_API_KEY if GEMINI_API_KEY != "YOUR_API_KEY_HERE" else os.environ.get("GOOGLE_API_KEY")
    if not effective_api_key:
        logger.error("Gemini API mode requires GEMINI_API_KEY or GOOGLE_API_KEY environment variable.")
        warnings_found = True
    else:
        # Don't store the key in the config display if loaded from env
        if GEMINI_API_KEY == "YOUR_API_KEY_HERE" and os.environ.get("GOOGLE_API_KEY"):
            GEMINI_API_KEY = "(Loaded from GOOGLE_API_KEY env var)"
        logger.info("Gemini API mode configured. Videos will be uploaded via File API.")

if warnings_found:
     print("\n\n************************* WARNING *************************")
     print("Configuration errors detected above. Execution might fail.")
     print("***********************************************************\n")

# Main Model Selection (Innovation 1)
Agentic CoT - Chain of Thought + Summary

## 1. Select CoT Model and Summary Model

### CoT output models

In [None]:
# To See All Available Models, Go into the CoT_ouput_models.py file

from models.CoT_ouput_models import get_cot_model

CoT_model_list = ["gemini-2.0-flash", "gemini-2.0-flash-fine-tuning", 
                  "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]

MODEL_NAME, SYSTEM_PROMPT, PROMPT_TEMPLATES, CONFIG, REQUESTS_PER_MINUTE, MAX_RETRIES, MAX_ASYNC_WORKERS = get_cot_model(CoT_model_list[3])

### Summary Model Selection

In [None]:
# To see all available models, go into the Summary_models.py file
from models.Summary_models import get_summary_model
summary_model_list = ["gemini-2.0-flash-ver1", "gemini-2.0-flash-ver2", "gemini-2.0-flash-ver3"]

QUESTION_MODEL_NAME, QUESTION_SYSTEM_PROMPT, QUESTION_CONFIG = get_summary_model(summary_model_list[0])

## 2. Select NonCoT Model with Instruction Prompt for Direct Answer

In [None]:
# To see all available models, go into the NonCoT_output_models.py file
# NonCoT models are used for Bulk Inference
from models.NonCoT_output_models import get_non_cot_model
non_cot_model_list = ["gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-exp-03-25"]

MODEL_NAME, SYSTEM_PROMPT, PROMPT_TEMPLATES, CONFIG, REQUESTS_PER_MINUTE, MAX_RETRIES, MAX_ASYNC_WORKERS  = get_non_cot_model(non_cot_model_list[0])

# Basic Initialization

## Initialize Google Cloud Clients

In [None]:
storage_client = None
ai_client = None

# --- Initialize Generative AI Client (`google.genai`) --- #
display(Markdown("### Initializing Generative AI Client (`google.genai`)"))
try:
    if USE_VERTEX:
        display(Markdown(f"Vertex AI backend (Project: {PROJECT_ID}, Loc: {LOCATION})..."))
        if not PROJECT_ID or not LOCATION or PROJECT_ID == "your-gcp-project-id":
             raise ValueError("PROJECT_ID/LOCATION invalid for Vertex AI.")
        # Initialize Client for Vertex
        ai_client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)
        display(Markdown(f"✅ Vertex AI Client Initialized."))
    else: # Gemini API Mode
        display(Markdown("Gemini API backend (using API Key)..."))
        effective_api_key = GEMINI_API_KEY if GEMINI_API_KEY != "YOUR_API_KEY_HERE" else os.environ.get("GOOGLE_API_KEY")
        if not effective_api_key:
             if os.environ.get("GOOGLE_API_KEY"): effective_api_key = None # Client uses env var
             else: raise ValueError("Gemini API Key required but not found.")
        # Initialize Client for Gemini API
        ai_client = genai.Client(api_key=effective_api_key, vertexai=False)
        display(Markdown(f"✅ Gemini API Client Initialized."))

except ValueError as ve: display(Markdown(f"❌ **Config Error:** {ve}")); ai_client = None
except Exception as e: display(Markdown(f"❌ **AI Client Error:** {e}.")); logger.error("AI Client Init Failed", exc_info=True); ai_client = None

# --- Initialize Storage Client (ONLY for Vertex AI mode) --- #
if USE_VERTEX:
    display(Markdown("### Initializing GCS Client (Vertex Mode Only)"))
    if not GCS_AVAILABLE: display(Markdown("❌ GCS lib missing.")); raise RuntimeError("Missing GCS lib.")
    if not GCS_BUCKET or GCS_BUCKET == "your-gcs-bucket-name": display(Markdown("❌ GCS_BUCKET needed.")); raise ValueError("GCS_BUCKET required.")
    try:
        storage_client = storage.Client(project=PROJECT_ID)
        if not storage_client.bucket(GCS_BUCKET).exists(): display(Markdown(f"⚠️ GCS Bucket `{GCS_BUCKET}` inaccessible."))
        else: display(Markdown(f"✅ GCS Client Initialized (Bucket: '{GCS_BUCKET}')."))
    except Exception as e:
        display(Markdown(f"❌ **GCS Client Error:** {e}.")); logger.error("GCS Client Init Failed", exc_info=True)
        if not SKIP_PREPARE: raise RuntimeError("GCS client failed.")
        else: display(Markdown("⚠️ GCS client failed, but skipping prep."))
else:
    display(Markdown("### Initializing Gemini API Client (File API)"))
    try:
        storage_client = ai_client.files
    except Exception as e:
        display(Markdown(f"❌ **Gemini File API Client Error:** {e}.")); logger.error("Gemini API Client Init Failed", exc_info=True)
    display(Markdown(f"✅ Gemini File API Client Initialized."))

# --- Final Checks --- #
if ai_client is None: raise RuntimeError("AI client failed.")
if USE_VERTEX and storage_client is None and not SKIP_PREPARE: raise RuntimeError("GCS client failed for Vertex prep.")
display(Markdown("✅ Client initialization complete."))


## Utility Functions

In [None]:
# --- File/Data Handling ---
def load_processed_qids(filename: str) -> Set[str]:
    processed_qids = set()
    if Path(filename).is_file():
        try:
            df = pd.read_csv(filename, usecols=['qid'], dtype={'qid': str}, on_bad_lines='warn')
            processed_qids = set(df['qid'].dropna().unique())
            logger.info(f"Loaded {len(processed_qids)} processed QIDs from {filename}")
        except Exception as e:
            logger.warning(f"Could not read QIDs from {filename}: {e}. Assuming zero processed.")
    return processed_qids

def download_file_with_progress(url: str, destination: Path):
    logger.info(f"Downloading {url} to {destination}...")
    try:
        response = requests.get(url, stream=True, timeout=600)
        response.raise_for_status()
        total_size = int(response.headers.get('content-length', 0))
        block_size = 1024 * 1024
        with open(destination, 'wb') as f, tqdm(
            desc=f"Downloading {destination.name}", total=total_size, unit='iB', unit_scale=True, unit_divisor=1024
        ) as bar:
            for data in response.iter_content(block_size):
                size = f.write(data)
                bar.update(size)
        if total_size != 0 and bar.n != total_size:
            destination.unlink(missing_ok=True)
            raise RuntimeError(f"Download size mismatch for {destination.name}.")
        logger.info(f"Successfully downloaded {destination}")
    except Exception as e:
        destination.unlink(missing_ok=True)
        logger.error(f"Download failed for {url}: {e}")
        raise

def extract_zip(zip_path: Path, extract_to: Path):
    logger.info(f"Extracting {zip_path.name} to {extract_to}...")
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            members = [m for m in zip_ref.namelist() if not m.startswith('__MACOSX/') and not m.endswith('.DS_Store')]
            with tqdm(total=len(members), desc=f"Extracting {zip_path.name}") as pbar:
                for member in members:
                    zip_ref.extract(member=member, path=extract_to)
                    pbar.update(1)
        logger.info(f"Successfully extracted {zip_path} to {extract_to}")
    except Exception as e:
        logger.error(f"Extraction error: {e}")
        raise
    
def move_videos_to_main_directory(base_path):
    """Find all MP4 files in subdirectories and move them to the main directory."""
    logger.info(f"Moving all videos to main directory: {base_path}")
    moved_count = 0
    failed_count = 0
    
    # Find all MP4 files in subdirectories (but not in the main directory)
    for file_path in list(base_path.glob('**/*.mp4')):
        # Skip files already in the main directory or hidden Mac files
        if file_path.parent == base_path or file_path.name.startswith('._'):
            continue
            
        # Destination in the main directory
        dest_path = base_path / file_path.name
        
        try:
            # Move the file
            shutil.move(str(file_path), str(dest_path))
            moved_count += 1
            if moved_count % 50 == 0:
                logger.info(f"Moved {moved_count} videos so far...")
        except Exception as e:
            logger.error(f"Error moving {file_path}: {e}")
            failed_count += 1
    
    logger.info(f"Moved {moved_count} videos to main directory. Failed: {failed_count}")
    

def create_or_update_metadata(metadata_path: str, dataset_df: pd.DataFrame, video_updates: Dict[str, Dict]):
    try:
        required_cols = ['video_id', 'qid']
        update_cols = ['local_path', 'gcs_uri', 'file_api_name', 'status']
        dtype_map = {'video_id': str, 'qid': str} # Ensure IDs are strings

        if not Path(metadata_path).is_file():
            logger.info(f"Creating metadata file: {metadata_path}")
            meta_df = dataset_df.copy()
            for col in update_cols: meta_df[col] = pd.NA
            meta_df['status'] = 'pending'
        else:
            logger.debug(f"Loading existing metadata: {metadata_path}")
            meta_df = pd.read_csv(metadata_path, dtype=dtype_map)
            for col in update_cols: # Add missing update columns if needed
                 if col not in meta_df.columns: meta_df[col] = pd.NA

        if not all(col in meta_df.columns for col in required_cols):
            raise ValueError(f"Metadata missing required columns ({required_cols}).")

        updates_df = pd.DataFrame.from_dict(video_updates, orient='index')
        updates_df.index.name = 'video_id'
        updates_df.reset_index(inplace=True)
        updates_df['video_id'] = updates_df['video_id'].astype(str)

        # Use merge for robust updating across potentially multiple rows per video_id
        # First, prepare updates DF with only the necessary columns (video_id + update_cols)
        merge_cols = ['video_id'] + [col for col in update_cols if col in updates_df.columns]
        updates_to_merge = updates_df[merge_cols].drop_duplicates(subset=['video_id'], keep='last')

        # Merge, prioritizing updates
        # Suffixes help identify original vs update cols if needed, but update will overwrite
        merged_df = pd.merge(meta_df, updates_to_merge, on='video_id', how='left', suffixes=('', '_update'))

        # Apply the updates
        for col in update_cols:
            update_col_name = col + '_update'
            if update_col_name in merged_df.columns:
                # Fill NAs in original col with update col, then drop update col
                meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])
                # Alternative: Directly update where update is not NA
                # meta_df[col] = np.where(merged_df[update_col_name].notna(), merged_df[update_col_name], merged_df[col])

        meta_df.to_csv(metadata_path, index=False, encoding='utf-8')
        logger.info(f"Metadata file '{metadata_path}' updated with {len(video_updates)} video records.")

    except Exception as e:
        logger.error(f"Error updating metadata {metadata_path}: {e}", exc_info=True)
        raise

def load_metadata_for_inference(metadata_file: str = METADATA_FILE) -> Dict[str, List[Dict]]:
    if not Path(metadata_file).is_file(): return {}
    video_questions = defaultdict(list)
    required_col = 'gcs_uri' if USE_VERTEX else 'file_api_name'
    try:
        df = pd.read_csv(metadata_file, dtype=str).fillna('')
        if 'video_id' not in df.columns or required_col not in df.columns:
            logger.error(f"Metadata missing 'video_id' or '{required_col}'.")
            return {}
        valid_df = df[df['video_id'].astype(bool) & df[required_col].astype(bool)]
        if len(valid_df) == 0:
             logger.warning(f"No videos found with '{required_col}' in {metadata_file}. Check Step 4.")
             return {}
        for video_id, group in valid_df.groupby('video_id'):
             video_questions[video_id] = group.to_dict('records')
        logger.info(f"Loaded {len(video_questions)} videos ({len(valid_df)} questions) with valid IDs for inference.")
        return dict(video_questions)
    except Exception as e:
        logger.error(f"Error loading metadata for inference: {e}", exc_info=True)
        return {}

# --- Upload/Verification Helpers ---
def upload_to_gcs(storage_client, bucket_name: str, source_file_path: Path, destination_blob_name: str) -> Optional[str]:
    if not GCS_AVAILABLE or storage_client is None or not source_file_path.is_file(): return None
    try:
        blob = storage_client.bucket(bucket_name).blob(destination_blob_name)
        blob.upload_from_filename(str(source_file_path))
        gcs_uri = f"gs://{bucket_name}/{destination_blob_name}"
        logger.debug(f"GCS OK: {source_file_path} -> {gcs_uri}")
        return gcs_uri
    except Exception as e:
        logger.error(f"GCS Fail: {source_file_path}. Error: {e}")
        return None

def upload_via_file_api(storage_client, local_path: Path, display_name: str) -> Optional[str]:
    if storage_client is None or not local_path.is_file(): return None
    try:
        logger.debug(f"Uploading {local_path} via File API...")
        uploaded_file = storage_client.upload(file=local_path)
        logger.info(f"File API OK: {local_path} -> {uploaded_file.name}")
        return uploaded_file.name
    except Exception as e:
        logger.error(f"File API Fail: {local_path}. Error: {e}", exc_info=True)
        return None

def verify_gcs_file_exists(storage_client, gcs_uri: str) -> bool:
    if not GCS_AVAILABLE or storage_client is None or not gcs_uri: return False
    try:
        exists = storage.Blob.from_string(gcs_uri, client=storage_client).exists()
        if not exists: logger.warning(f"GCS verify failed: {gcs_uri}")
        return exists
    except Exception as e:
        logger.error(f"Error verifying GCS {gcs_uri}: {e}")
        return False

def verify_file_api_resource_exists(storage_client, file_api_name: str) -> bool:
    if not storage_client or not file_api_name: return False
    try:
        _ = storage_client.get(name=file_api_name) # Sync get for verification
        return True
    except Exception as e:
        logger.error(f"Error verifying File API {file_api_name}: {e}")
        return False

def verify_local_file_exists(local_path: str) -> bool:
    exists = Path(local_path).is_file() if local_path else False
    if not exists: logger.warning(f"Local verify failed: {local_path}")
    return exists

# --- Prompt Building ---
def build_prompt(question_info: dict) -> str:
    question = question_info.get("question", "")
    q_type = question_info.get("question_type", "default")
    template = PROMPT_TEMPLATES.get(q_type, PROMPT_TEMPLATES["default"])
    # if q_type is MCQ
    if q_type == "Multiple-choice Question with a Single Correct Answer":
        return template.format(question=question).strip() + "\n" + "E. None of the above"
    return template.format(question=question).strip() + "\n" + question_info.get("question_prompt").strip()

# --- Rate Limiter ---
class AsyncRateLimiter:
    """
    An asyncio-compatible token bucket rate limiter.

    Args:
        rate (int): The maximum number of requests allowed per period.
        period (float): The time period in seconds (default: 60 for RPM).
        capacity (int, optional): The maximum burst capacity. Defaults to `rate`.
    """
    def __init__(self, rate: int, period: float = 60.0, capacity: Optional[int] = None):
        if rate <= 0:
            raise ValueError("Rate must be positive")
        if period <= 0:
            raise ValueError("Period must be positive")

        self.rate = rate
        self.period = float(period)
        self.capacity = float(capacity if capacity is not None else rate)
        self._tokens = self.capacity # Start full
        self._last_refill_time = time.monotonic()
        self._lock = asyncio.Lock()

    def _get_tokens_per_second(self) -> float:
        return self.rate / self.period

    async def _refill(self):
        """Replenishes tokens based on elapsed time. Must be called under lock."""
        now = time.monotonic()
        elapsed = now - self._last_refill_time
        if elapsed > 0:
            tokens_to_add = elapsed * self._get_tokens_per_second()
            self._tokens = min(self.capacity, self._tokens + tokens_to_add)
            self._last_refill_time = now

    async def acquire(self):
        """
        Acquires a token, waiting if necessary.
        """
        async with self._lock:
            await self._refill() # Refill based on time since last acquire/refill

            while self._tokens < 1:
                # Calculate how long to wait for 1 token
                tokens_needed = 1.0 - self._tokens
                wait_time = tokens_needed / self._get_tokens_per_second()

                # Release the lock before sleeping
                lock_released = True
                try:
                    self._lock.release()
                    logger.debug(f"Rate limit hit. Waiting for {wait_time:.3f}s for next token.")
                    await asyncio.sleep(wait_time)
                finally:
                    # Re-acquire the lock if it was released
                    if lock_released:
                        await self._lock.acquire()

                # Refill again after waiting, as more time has passed
                await self._refill()

            # Consume a token
            self._tokens -= 1.0


# Download, Extract & Prepare Dataset

## Fetch Dataset from HuggingFace

In [None]:
dataset_path = Path(DATASET_CSV)

if dataset_path.is_file() and SKIP_FETCH:
    logger.info(f"Dataset file '{DATASET_CSV}' exists and SKIP_FETCH is True. Skipping.")
    display(Markdown(f"✅ Skipping fetch: Found existing `{DATASET_CSV}`."))
    # Load the existing dataframe for use in Step 2
    try:
        dataset_df = pd.read_csv(dataset_path, dtype=str) # Load all as string initially
        logger.info(f"Loaded existing dataset from {DATASET_CSV} ({len(dataset_df)} rows).")
    except Exception as e:
        logger.error(f"Failed to load existing dataset file {DATASET_CSV}: {e}")
        display(Markdown(f"❌ Error loading existing `{DATASET_CSV}`: {e}. Please delete the file or set SKIP_FETCH=False."))
        raise
else:
    logger.info(f"Fetching dataset '{HF_DATASET_NAME}' (split: '{HF_DATASET_SPLIT}') from HuggingFace...")
    try:
        dataset = load_dataset(HF_DATASET_NAME, split=HF_DATASET_SPLIT, cache_dir=HF_CACHE_DIR)
        dataset_df = dataset.to_pandas()
        # Ensure key columns are strings
        for col in ['qid', 'video_id', 'question', 'question_type']:
             if col in dataset_df.columns:
                 dataset_df[col] = dataset_df[col].astype(str)
        dataset_df.to_csv(dataset_path, index=False, encoding='utf-8')
        logger.info(f"Successfully fetched dataset and saved to {DATASET_CSV} ({len(dataset_df)} rows).")
        display(Markdown(f"✅ Dataset fetched and saved to `{DATASET_CSV}` ({len(dataset_df)} rows)."))
        display(dataset_df.head())
    except Exception as e:
        logger.error(f"Failed to fetch or save dataset: {e}", exc_info=True)
        display(Markdown(f"❌ **Error fetching dataset:** {e}. Check connection, dataset name/split, cache dir permissions."))
        raise RuntimeError("Dataset fetching failed. Cannot continue.")

# Ensure dataset_df is loaded if skipping fetch didn't load it (e.g., first run with skip=True and no file)
if 'dataset_df' not in locals():
    if dataset_path.is_file():
        try:
            dataset_df = pd.read_csv(dataset_path, dtype=str)
        except Exception as e:
            logger.error(f"Critical error: Could not load dataset from {DATASET_CSV} after attempting fetch/skip: {e}")
            raise
    else:
        raise RuntimeError(f"Critical error: Dataset DataFrame not loaded and file {DATASET_CSV} not found.")

## Download, Extract, and Prepare Videos

Downloads, extracts, and uploads videos (to GCS or File API). Updates `video_metadata.csv`.

In [None]:
dataset_path = Path(DATASET_CSV)
dataset_df = pd.read_csv(dataset_path, dtype=str)

### Download Video Archive

In [15]:
display(Markdown("### Downloading Archive"))
if zip_file_path.is_file() and SKIP_DOWNLOAD_ZIP:
    display(Markdown(f"✅ Skipping download: Found `{zip_file_path}`."))
else:
    try: download_file_with_progress(VIDEO_ZIP_URL, zip_file_path); display(Markdown(f"✅ Downloaded: `{zip_file_path}`."))
    except Exception as e:
        display(Markdown(f"❌ **Download Error:** {e}."))
        if not SKIP_EXTRACT or not SKIP_PREPARE: raise RuntimeError(f"Download failed.")
        else: display(Markdown("⚠️ Download failed, skipping steps."))

2025-04-25 15:03:10,209 - INFO - Successfully downloaded downloads/all_videos.zip


✅ Downloaded: `downloads/all_videos.zip`.

### Extract Video Archive

In [16]:
display(Markdown("### Extracting Archive"))
if any(extracted_videos_path.glob('*.mp4')) and SKIP_EXTRACT:
    display(Markdown(f"✅ Skipping extraction: Files in `{extracted_videos_path}`."))
elif not zip_file_path.is_file():
    display(Markdown(f"❌ Cannot extract: `{zip_file_path}` missing."))
    if not SKIP_PREPARE: raise RuntimeError(f"Zip missing.")
    else: display(Markdown("⚠️ Extraction skipped (no zip)."))
else:
    try: 
        extract_zip(zip_file_path, extracted_videos_path)
        # Move all videos to main directory
        move_videos_to_main_directory(extracted_videos_path)
        display(Markdown(f"✅ Extracted to `{extracted_videos_path}` and moved all videos to main directory."))
    except Exception as e:
        display(Markdown(f"❌ **Extraction Error:** {e}."))
        if not SKIP_PREPARE: raise RuntimeError("Extraction failed.")
        else: display(Markdown("⚠️ Extraction failed, skipping prep."))

### Extracting Archive

2025-04-25 15:03:10,246 - INFO - Extracting all_videos.zip to extracted_videos...


Extracting all_videos.zip:   0%|          | 0/290 [00:00<?, ?it/s]

2025-04-25 15:03:16,417 - INFO - Successfully extracted downloads/all_videos.zip to extracted_videos
2025-04-25 15:03:16,419 - INFO - Moving all videos to main directory: extracted_videos
2025-04-25 15:03:16,424 - INFO - Moved 50 videos so far...
2025-04-25 15:03:16,426 - INFO - Moved 100 videos so far...
2025-04-25 15:03:16,429 - INFO - Moved 150 videos so far...
2025-04-25 15:03:16,431 - INFO - Moved 200 videos so far...
2025-04-25 15:03:16,433 - INFO - Moved 250 videos so far...
2025-04-25 15:03:16,434 - INFO - Moved 289 videos to main directory. Failed: 0


✅ Extracted to `extracted_videos` and moved all videos to main directory.

### Slow/Speed Up Videos
Losslessly change video speed while also re-encoding audio to maintain pitch. As
a result, is super fast. Could be made faster if using asyncio to concurrently run
ffmpeg.

In [17]:
async def run_subprocess(cmd, check=True, capture_output=False):
    """Helper function to run subprocess asynchronously."""
    stdout_pipe = asyncio.subprocess.PIPE if capture_output else asyncio.subprocess.DEVNULL
    # Capture stderr only if check is True or capture_output is True, otherwise DEVNULL
    stderr_pipe = asyncio.subprocess.PIPE if check or capture_output else asyncio.subprocess.DEVNULL

    process = await asyncio.create_subprocess_exec(
        *cmd,
        stdout=stdout_pipe,
        stderr=stderr_pipe
    )
    stdout, stderr = await process.communicate()

    if check and process.returncode != 0:
        error_msg = f"Command '{' '.join(cmd)}' failed with return code {process.returncode}"
        stderr_decoded = stderr.decode(errors='ignore') if stderr else ""
        if stderr_decoded:
            error_msg += f"\nStderr: {stderr_decoded}"
        # Raise specific exception to potentially capture stderr later
        raise subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)

    return stdout, stderr, process.returncode

async def process_single_video(vid_path, speed_videos_path, VIDEO_SPEED_FACTOR, semaphore):
    """Asynchronously processes a single video. Returns status string."""
    vid_path_str = str(vid_path.resolve())
    out_path = speed_videos_path / vid_path.name
    out_path_str = str(out_path.resolve())

    async with semaphore: # Limit concurrency
        if out_path.is_file():
            return 'skipped'

        if VIDEO_SPEED_FACTOR == 1.0:
            try:
                # Use asyncio.to_thread for potentially blocking I/O
                await asyncio.to_thread(shutil.copy, vid_path_str, out_path_str)
                return 'processed'
            except Exception as e:
                try:
                    logger.error(f"Error copying {vid_path.name}: {e}")
                except NameError:
                    print(f"Error copying {vid_path.name}: {e}")
                return 'error'

        # --- Process video with speed change ---
        tf_bitstream_path = None
        tf_audio_path = None
        tf_final_path = None
        try:
            # Create temporary files (synchronous part is okay here)
            # Context manager ensures files are closed before ffmpeg uses them
            with tempfile.NamedTemporaryFile(delete=False, suffix=".h264") as tf_b, \
                 tempfile.NamedTemporaryFile(delete=False, suffix=".aac") as tf_a, \
                 tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tf_f:
                tf_bitstream_name = tf_b.name
                tf_audio_name = tf_a.name
                tf_final_name = tf_f.name
            # Store paths for cleanup
            tf_bitstream_path = Path(tf_bitstream_name)
            tf_audio_path = Path(tf_audio_name)
            tf_final_path = Path(tf_final_name)


            # Get original FPS
            ffprobe_cmd = [
                "ffprobe", "-v", "error", "-select_streams", "v", "-of", "default=noprint_wrappers=1:nokey=1",
                "-show_entries", "stream=r_frame_rate", vid_path_str
            ]
            stdout, _, _ = await run_subprocess(ffprobe_cmd, check=True, capture_output=True)
            fps = float(fractions.Fraction(stdout.decode().strip()))
            new_fps = fps * VIDEO_SPEED_FACTOR

            # Extract and speed up audio
            factor = VIDEO_SPEED_FACTOR
            filter_parts = []
            while factor > 2.0:
                filter_parts.append("atempo=2.0")
                factor /= 2.0
            while factor < 0.5:
                filter_parts.append("atempo=0.5")
                factor /= 0.5
            if abs(factor - 1.0) > 1e-6:
                 filter_parts.append(f"atempo={factor:.6f}")

            if not filter_parts:
                 audio_cmd = ["ffmpeg", "-y", "-i", vid_path_str, "-vn", "-c:a", "copy", tf_audio_name]
            else:
                audio_filter = ",".join(filter_parts)
                audio_cmd = ["ffmpeg", "-y", "-i", vid_path_str, "-vn", "-filter:a", audio_filter, "-c:a", "aac", "-b:a", "128k", tf_audio_name]
            await run_subprocess(audio_cmd, check=True)


            # Extract h264 bitstream
            extract_cmd = ["ffmpeg", "-y", "-i", vid_path_str, "-map", "0:v", "-c:v", "copy", "-bsf:v", "h264_mp4toannexb", tf_bitstream_name]
            await run_subprocess(extract_cmd, check=True)

            # Remux bitstream with new audio and FPS
            remux_cmd = ["ffmpeg", "-y", "-fflags", "+genpts", "-r", f"{new_fps:.6f}", "-i", tf_bitstream_name, "-i", tf_audio_name, "-map", "0:v", "-map", "1:a", "-c:v", "copy", "-c:a", "copy", tf_final_name]
            await run_subprocess(remux_cmd, check=True)

            # Move final file (use asyncio.to_thread)
            await asyncio.to_thread(shutil.move, tf_final_name, out_path_str)
            return 'processed'

        except Exception as e:
            err_msg = f"Error processing {vid_path.name}: {e}"
            # Include ffmpeg stderr if available
            if isinstance(e, subprocess.CalledProcessError) and e.stderr:
                 err_msg += f"\nFFmpeg/FFprobe Stderr:\n{e.stderr.decode(errors='ignore')}"
            try:
                logger.error(err_msg)
            except NameError:
                print(err_msg)
            return 'error'
        finally:
            # Clean up temporary files asynchronously using to_thread
            async def _cleanup():
                if tf_bitstream_path and tf_bitstream_path.exists():
                    tf_bitstream_path.unlink(missing_ok=True)
                if tf_audio_path and tf_audio_path.exists():
                    tf_audio_path.unlink(missing_ok=True)
                # tf_final is moved, only delete if error occurred before move
                if tf_final_path and tf_final_path.exists():
                    tf_final_path.unlink(missing_ok=True)
            # Run sync cleanup in thread only if paths were assigned
            if tf_bitstream_path or tf_audio_path or tf_final_path:
                 await asyncio.to_thread(_cleanup)


# --- Main Cell Logic ---

async def run_processing(): # Wrap in an async function to use await
    display(Markdown("### Preparing Videos"))
    if dataset_df is None:
        raise RuntimeError("Dataset DF unavailable.")

    # Ensure output directory exists
    speed_videos_path.mkdir(parents=True, exist_ok=True)

    all_video_ids = sorted(list(dataset_df['video_id'].dropna().unique()))
    # Use logging if available, otherwise print
    try:
        logger.info(f"Processing {len(all_video_ids)} unique video IDs.")
    except NameError:
        print(f"Processing {len(all_video_ids)} unique video IDs.")

    vid_paths = list(extracted_videos_path.glob("*.mp4"))

    # Limit concurrency
    concurrency_limit = MAX_ASYNC_WORKERS
    try:
        logger.info(f"Using concurrency limit: {concurrency_limit}")
    except NameError:
        print(f"Using concurrency limit: {concurrency_limit}")
    semaphore = asyncio.Semaphore(concurrency_limit)

    tasks = []
    # Keep the familiar loop structure for creating tasks
    print(f"Preparing tasks for {len(vid_paths)} videos...")
    for vid_path in vid_paths:
         # Create a task for each video processing job
         # Pass necessary arguments to the task creator
         task = asyncio.create_task(process_single_video(vid_path, speed_videos_path, VIDEO_SPEED_FACTOR, semaphore))
         tasks.append(task)

    # Now, run all the created tasks concurrently and display progress
    # Use asyncio.as_completed with a standard tqdm progress bar
    print(f"Transforming {len(tasks)} Videos...")
    results = []
    # Use the imported tqdm (now tqdm.auto) to create a standard progress bar instance
    with tqdm(total=len(tasks), desc="Transforming Videos", unit="video") as pbar:
        for future in asyncio.as_completed(tasks):
            try:
                result = await future # Get result from completed task
                results.append(result)
            except Exception as exc:
                # Log errors from tasks that failed internally if not caught by process_single_video
                # (process_single_video should ideally return 'error' status instead of raising)
                try:
                    logger.error(f"Task for a video failed: {exc}")
                except NameError:
                    print(f"Task for a video failed: {exc}")
                results.append('error') # Count as error if task itself fails unexpectedly
            finally:
                 pbar.update(1) # Increment progress bar regardless of outcome


    # Count results
    processed = results.count('processed')
    skipped = results.count('skipped')
    errors = results.count('error')

    print(f"\n\n{skipped} videos skipped, {processed} videos processed, {errors} errors, {len(vid_paths)} total.")

# --- Execute the async processing ---
# In a Jupyter Notebook, you usually need to await the top-level async function.
# If top-level await isn't enabled, you might need nest_asyncio or run manually.
# Using await directly is the most common way in modern notebooks.
await run_processing()


### Preparing Videos

2025-04-25 15:03:16,580 - INFO - Processing 289 unique video IDs.
2025-04-25 15:03:16,581 - INFO - Using concurrency limit: 15
Preparing tasks for 289 videos...
Transforming 289 Videos...


Transforming Videos:   0%|          | 0/289 [00:00<?, ?video/s]

  del work_item
  del work_item
  del work_item
  del work_item
  del work_item
  del work_item
  del work_item
  del work_item
  del work_item
  del work_item




0 videos skipped, 289 videos processed, 0 errors, 289 total.


### Preparing and Upload Videos to GCS or File API

In [21]:
# --- Prepare Videos (Upload GCS/File API) & Update Metadata --- #
#-- Can set SKIP_PREPARE to True to skip this step if you already uploaded it into bucket through Vertex AI Mode/ into files through Gemini API key --#
SKIP_PREPARE = False
display(Markdown("### Preparing Videos & Updating Metadata"))
if SKIP_PREPARE:
    display(Markdown("✅ Skipping video preparation."))
elif storage_client is None:
     display(Markdown("❌ Cannot prepare: Client not ready.")); raise RuntimeError("Client missing.")
else:
    if dataset_df is None: raise RuntimeError("Dataset DF unavailable.")
    all_video_ids = sorted(list(dataset_df['video_id'].dropna().unique()))
    logger.info(f"Processing {len(all_video_ids)} unique video IDs.")

    videos_to_process_ids = all_video_ids
    if MAX_VIDEOS_TO_PROCESS is not None:
        videos_to_process_ids = all_video_ids[:MAX_VIDEOS_TO_PROCESS]
        logger.info(f"Limiting to {len(videos_to_process_ids)} videos.")

    # Load existing metadata to check status
    existing_statuses = {}
    resource_ids = {}
    required_id_col = 'gcs_uri' if USE_VERTEX else 'file_api_name'
    if Path(METADATA_FILE).is_file():
        try:
            existing_df = pd.read_csv(METADATA_FILE, dtype=str)
            if 'video_id' in existing_df.columns and 'status' in existing_df.columns:
                existing_statuses = pd.Series(existing_df.status.values, index=existing_df.video_id).to_dict()
            if 'video_id' in existing_df.columns and required_id_col in existing_df.columns:
                 resource_ids = pd.Series(existing_df[required_id_col].values, index=existing_df.video_id).dropna().to_dict()
            logger.info("Checked existing metadata statuses/IDs.")
        except Exception as e: logger.warning(f"Could not load existing metadata: {e}")

    video_metadata_updates = {}
    processed_count, upload_failures, missing_local, skipped_count = 0, 0, 0, 0
    num_batches = math.ceil(len(videos_to_process_ids) / UPLOAD_BATCH_SIZE_GCS)
    prep_mode = "GCS Upload" if USE_VERTEX else "File API Upload"

    with tqdm(total=len(videos_to_process_ids), desc=f"Preparing ({prep_mode})") as pbar:
        for i in range(0, len(videos_to_process_ids), UPLOAD_BATCH_SIZE_GCS):
            batch_ids = videos_to_process_ids[i : i + UPLOAD_BATCH_SIZE_GCS]
            batch_num = (i // UPLOAD_BATCH_SIZE_GCS) + 1
            logger.info(f"Prep Batch {batch_num}/{num_batches}...")
            current_batch_updates = {}

            for video_id in batch_ids:
                pbar.set_postfix_str(f"ID: {video_id}")
                update_data = {"local_path": None, "gcs_uri": None, "file_api_name": None, "status": "error_unknown"}
                local_video_path = speed_videos_path / f"{video_id}.mp4"
                current_status = existing_statuses.get(video_id, 'pending')
                existing_resource_id = resource_ids.get(video_id)
                is_already_processed = False

                # Check if already uploaded and verified
                if current_status in ['uploaded_gcs', 'uploaded_file_api'] and existing_resource_id:
                     verified = False
                     if USE_VERTEX: verified = verify_gcs_file_exists(storage_client, existing_resource_id)
                     else: verified = verify_file_api_resource_exists(storage_client, existing_resource_id)
                     if verified:
                         logger.debug(f"Skipping verified video {video_id} ('{current_status}').")
                         is_already_processed = True
                         skipped_count += 1
                         update_data.update({ # Ensure metadata is consistent
                             'local_path': str(local_video_path) if local_video_path.is_file() else None,
                             'status': current_status,
                             required_id_col: existing_resource_id
                         })
                     else:
                         logger.warning(f"Video {video_id} ({current_status}) needs re-processing (verification failed).")
                elif current_status != 'pending':
                     logger.debug(f"Video {video_id} has non-pending status '{current_status}' but no verified resource ID. Re-processing.")

                if is_already_processed:
                    processed_count += 1
                    current_batch_updates[video_id] = update_data
                    pbar.update(1)
                    continue

                # Process if needed
                if local_video_path.is_file():
                    update_data["local_path"] = str(local_video_path)
                    resource_id_result = None
                    if USE_VERTEX:
                        blob_name = f"videos/{video_id}.mp4"
                        resource_id_result = upload_to_gcs(storage_client, GCS_BUCKET, local_video_path, blob_name)
                        if resource_id_result: update_data.update({"gcs_uri": resource_id_result, "status": "uploaded_gcs"})
                        else: update_data["status"] = "gcs_upload_failed"; upload_failures += 1
                    else: # Gemini API
                        resource_id_result = upload_via_file_api(storage_client, local_video_path, f"vid_{video_id}")
                        if resource_id_result: update_data.update({"file_api_name": resource_id_result, "status": "uploaded_file_api"})
                        else: update_data["status"] = "file_api_upload_failed"; upload_failures += 1
                else:
                    logger.warning(f"Local file missing: {local_video_path}")
                    missing_local += 1
                    update_data["status"] = "local_missing"

                current_batch_updates[video_id] = update_data
                processed_count += 1
                pbar.update(1)

            # Update metadata after batch
            if current_batch_updates:
                 try: create_or_update_metadata(METADATA_FILE, dataset_df, current_batch_updates)
                 except Exception as e: logger.error(f"Metadata update failed batch {batch_num}: {e}")
                 video_metadata_updates.update(current_batch_updates)

    logger.info(f"Prep finished. Checked: {processed_count}, Skipped(verified): {skipped_count}, Missing Local: {missing_local}, Upload Failures: {upload_failures}")
    display(Markdown(f"✅ Video preparation complete. See logs. Metadata: `{METADATA_FILE}`."))


### Preparing Videos & Updating Metadata

2025-04-25 15:19:33,438 - INFO - Processing 289 unique video IDs.


Preparing (File API Upload):   0%|          | 0/289 [00:00<?, ?it/s]

2025-04-25 15:19:33,443 - INFO - Prep Batch 1/29...
2025-04-25 15:19:34,727 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:19:34,975 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwqj4_bj9QgW99MzYS_F9PnvFPLFj8zR0cHawx1q5n-8QvDStpcdclKOLN-Rr1Y7qD0hpkfYdxELqgVbFBapl47LBJ1-X_igVwqEOSVX9w&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:19:35,832 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwqj4_bj9QgW99MzYS_F9PnvFPLFj8zR0cHawx1q5n-8QvDStpcdclKOLN-Rr1Y7qD0hpkfYdxELqgVbFBapl47LBJ1-X_igVwqEOSVX9w&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:19:35,834 - INFO - File API OK: speed_videos/0.5/-HAFFvsDCr4.mp4 -> files/iu58ecavbqoz
2025-04-25 15:19:37,003 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:19:37,209 -

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:20:25,971 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:20:27,615 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwoSno4giZk8DIb_M7nhRATMqAzgjSVgTCGFYXeKnNpegIQT7wWY2AaMVMJQnn9pFhJeW_-qgau0PssXPrlARved3NlMKidH_Hx5NQMD5G4&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:20:27,616 - INFO - File API OK: speed_videos/0.5/3i4wmfNpqRg.mp4 -> files/q1t7bztbv7gn
2025-04-25 15:20:28,834 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:20:29,035 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwprfqq0_1Jl2t3SqR0mZ3xeont6wQ79rF2wyn5Yi4siZZhDwFMSif9o1vP7qIEe0jfS6TNEqW1Ls2pqKZbDGdkoOu6kPaUPARqx98SjTg&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:20:30,490 - INFO - HTTP Request: POST https://generativelangua

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:20:52,313 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:20:53,768 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwrTyFgyDUx7FQDlVVCslBKng9bVAsdfLjAIbZUxLKOj6xmCgkajvfReYKn9H8XewInvoywUnIQ0FpCajwk5JoysdahD9Z6nWlSIHCaVLg&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:20:53,770 - INFO - File API OK: speed_videos/0.5/68kf4Xd63rc.mp4 -> files/lo61ln1q7g3d
2025-04-25 15:20:55,018 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:20:55,198 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwoQ64hDXGNZ-mSP0CMdSxn3pVzU3tFyxB_llBuLE1Pr5vvWMUEWZhianTZdjzTPhEOjv2dgbGLqtaQn11CYTSSOt6Yslho7T-iqO-Ce6-4&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:20:55,364 - INFO - HTTP Request: POST https://generativelangua

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:21:16,435 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:21:16,610 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwpxTB4K6sdhyu-QsuNXNmBLfEIcxmdZscj4m3XP_wXyVl6XJUxqQDypQxc_IQWKV-UOKeNy_Y9B-l8o9hThmPfI8wsXL4I9toTi9OYOoN8&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:21:16,771 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwpxTB4K6sdhyu-QsuNXNmBLfEIcxmdZscj4m3XP_wXyVl6XJUxqQDypQxc_IQWKV-UOKeNy_Y9B-l8o9hThmPfI8wsXL4I9toTi9OYOoN8&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:21:16,934 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwpxTB4K6sdhyu-QsuNXNmBLfEIcxmdZscj4m3XP_wXyVl6XJUxqQDypQxc_IQWKV-UOKeNy_Y9B-l8o9hThmPfI8wsXL4I9toTi9OYOoN8&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:21:17,727 - IN

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:21:37,145 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:21:37,885 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwqpx07zq6kLeYrkdYxtw4LfL-T5WjOT6SUft5JAFoDTwTBvRQr0pChxABaaup1vHijTN4pF6vDfJZKGRTgyKSEFWwoH4frKVPV6qyGZHg&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:21:37,887 - INFO - File API OK: speed_videos/0.5/Ai4fCLMGL9Y.mp4 -> files/vanulnnyg0f
2025-04-25 15:21:38,468 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:21:39,334 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwpxah9MHFFr19CmwUeDlLbAnEhwcEuVNDCoNGHqj3FSCPRB12_VvIt7W0ZVEHfSKkvwJ1bdR8R26_vtZOGQ_CllVLgPq5MUpoqp2VSo8HY&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:21:39,335 - INFO - File API OK: speed_videos/0.5/AoUnw7jcHhg.mp

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:22:00,357 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:22:00,519 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwpFx8OXei-YBOaIV3zFyCONSwJJsjwxlmwP_fW5vnj9xoYQcHcl7g2vbv6mjGM6gVC1tyHgpN402KbDEYpqKkIAnY8F2VSfDsE81qdSLW4&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:22:01,311 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwpFx8OXei-YBOaIV3zFyCONSwJJsjwxlmwP_fW5vnj9xoYQcHcl7g2vbv6mjGM6gVC1tyHgpN402KbDEYpqKkIAnY8F2VSfDsE81qdSLW4&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:22:01,312 - INFO - File API OK: speed_videos/0.5/CTYUD-TtBPA.mp4 -> files/tfezmd2gvvz2
2025-04-25 15:22:02,155 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:22:02,341 - INFO - HTTP Request: POST https://generativelangu

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:22:21,447 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:22:21,620 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwogGW5xEG5ALFNBxB93v-dJfrsqhw-P6BXWjG6PpZ5mV0YRRQUxrneV2yO1HYbKUHitjGRxNigRXviTEEuoYzUV23oAM59iuCW24_P_6pY&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:22:22,436 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwogGW5xEG5ALFNBxB93v-dJfrsqhw-P6BXWjG6PpZ5mV0YRRQUxrneV2yO1HYbKUHitjGRxNigRXviTEEuoYzUV23oAM59iuCW24_P_6pY&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:22:22,437 - INFO - File API OK: speed_videos/0.5/E2ILLN8TiPA.mp4 -> files/m5gnyqn9mr2h
2025-04-25 15:22:23,567 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:22:23,742 - INFO - HTTP Request: POST https://generativelangu

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:22:42,045 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:22:42,236 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwqp8U4ai-6CTGOM1CcPeSRV8arcwG96yFHF1E4LYG25n6ofOspxaQ0RhzIZWqh-WF63rJ_oDucLey6fo854Umtf9V24pmIlcsahwQpNtg&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:22:43,653 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwqp8U4ai-6CTGOM1CcPeSRV8arcwG96yFHF1E4LYG25n6ofOspxaQ0RhzIZWqh-WF63rJ_oDucLey6fo854Umtf9V24pmIlcsahwQpNtg&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:22:43,654 - INFO - File API OK: speed_videos/0.5/H-9qGWrW83M.mp4 -> files/vh16vlhn4b3x
2025-04-25 15:22:44,167 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:22:44,333 - INFO - HTTP Request: POST https://generativelanguag

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:23:01,447 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:23:02,320 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwrH5SJjJA3FxyhNZMLfbxGim6B4mO_ImbZ8msv8uGxE-fhOMRfe34RNzRFMJkfSUvKh_0j3ixfmWGu6UF_RTkcvTYA-LOVa8cahlWnRGLk&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:23:02,321 - INFO - File API OK: speed_videos/0.5/JmEOYcf8wiE.mp4 -> files/jewa8fbqjx5w
2025-04-25 15:23:02,925 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:23:03,084 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwo0bkfbgGqPTLoPrilEbPDy8pEv1aIZQzwi1g7lPMTqfcqvPC5l3-ZILcOIjk4ZGIE_2a1gGGHQ-digxK0E1ycTppWuKhIUFDT443dLFg&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:23:03,924 - INFO - HTTP Request: POST https://generativelangua

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:23:17,630 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:23:17,792 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwrPetfcaRVaIo-8lNyXUu9EfoEHAkLfGV7Y0bQDEf5LO3eWZirdQQ6QK_9RoLErj56NoKAi-UwiWlr9BUqoHyoMDFU95w_4DJKQpiWHDw&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:23:18,608 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwrPetfcaRVaIo-8lNyXUu9EfoEHAkLfGV7Y0bQDEf5LO3eWZirdQQ6QK_9RoLErj56NoKAi-UwiWlr9BUqoHyoMDFU95w_4DJKQpiWHDw&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:23:18,610 - INFO - File API OK: speed_videos/0.5/L9dnb-wfsnc.mp4 -> files/yza2i60kmsi2
2025-04-25 15:23:19,175 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:23:19,356 - INFO - HTTP Request: POST https://generativelanguag

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:23:40,367 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:23:41,882 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwp35P8GWtomfBVB-fwGNsITstJYev44z4pLTi7ExZSXqEJ24FASaVTnUw4e48h6XGRwjYX_YdUSS12faZlBM4qE-xBZ2KPUY4PuGa5UZ84&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:23:41,884 - INFO - File API OK: speed_videos/0.5/NK6UDnyZf4g.mp4 -> files/wzt290o2qamw
2025-04-25 15:23:42,424 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:23:42,602 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwpZhWhp_dtpy9px7B0ZaUiBDPixdW9DAoNMDileR6sSwJgP0zstyY-DdjHuUSawn3X_FL-MSyu0Tep9RK3FNCykFbt7klwqQwVO5PzZqQ&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:23:43,340 - INFO - HTTP Request: POST https://generativelangua

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:24:04,464 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:24:04,653 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwptbudSbQkxjisfTO-VvWx0vuhJCZ9sw95vpof56xFw0bnou1OfhOKXE0n2_9B9YJ6GpdoWwkb09LMa9EFeLdxHdyAHzj0UoCScPHcxmQ&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:24:05,950 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwptbudSbQkxjisfTO-VvWx0vuhJCZ9sw95vpof56xFw0bnou1OfhOKXE0n2_9B9YJ6GpdoWwkb09LMa9EFeLdxHdyAHzj0UoCScPHcxmQ&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:24:05,951 - INFO - File API OK: speed_videos/0.5/PPLTAZ2pBK0.mp4 -> files/24pig69k28lr
2025-04-25 15:24:06,519 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:24:06,704 - INFO - HTTP Request: POST https://generativelanguag

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:24:26,853 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:24:27,059 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwqsT48tHA8K5NlDUHpYh3r49yWt83HtzmfMiEeF0hmzU69cTVEhww_xjHwoAYMxXbrbPAuYEONAdbwGcjcJDT-4zGXVYq3XBTWuL2QrWqA&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:24:27,879 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwqsT48tHA8K5NlDUHpYh3r49yWt83HtzmfMiEeF0hmzU69cTVEhww_xjHwoAYMxXbrbPAuYEONAdbwGcjcJDT-4zGXVYq3XBTWuL2QrWqA&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:24:27,881 - INFO - File API OK: speed_videos/0.5/RdDzFuX1b1s.mp4 -> files/n9uwjyy5k93p
2025-04-25 15:24:28,442 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:24:28,644 - INFO - HTTP Request: POST https://generativelangu

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:24:47,346 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:24:48,815 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwoYsB2rLIWV9jbVd6iSYftUEaWc-3Rb6FFL_nJLBFyuGDy9xQ9o7B7M1baUQ073acWyDivLtdXCRdQGintbm8_O4JfndWNUv0ZtZiREVLw&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:24:48,817 - INFO - File API OK: speed_videos/0.5/VoyvvpcrUgo.mp4 -> files/zer94gq1a936
2025-04-25 15:24:49,415 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:24:49,609 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwoIrMJ3c4H1vzw6zQScwU_Uw-KikDH34vvWNFITz4zWJJLVmx1QxaYncEPE02ry8gujPPO3I8U0ef57jgSmktHMlIf_s438CcGe06sbUPk&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:24:50,445 - INFO - HTTP Request: POST https://generativelangu

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:25:03,944 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:25:04,143 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwo8-J19c1D4Dg9si_PEdwbG3d9q2piS66YbJDN8s8w22h5uOKAp0JBL7JlNIVqV_JGFl8NycfE71kVAs_oNQeM_0Zgfng-D3X2YB9UgdiU&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:25:04,326 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwo8-J19c1D4Dg9si_PEdwbG3d9q2piS66YbJDN8s8w22h5uOKAp0JBL7JlNIVqV_JGFl8NycfE71kVAs_oNQeM_0Zgfng-D3X2YB9UgdiU&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:25:04,485 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwo8-J19c1D4Dg9si_PEdwbG3d9q2piS66YbJDN8s8w22h5uOKAp0JBL7JlNIVqV_JGFl8NycfE71kVAs_oNQeM_0Zgfng-D3X2YB9UgdiU&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:25:05,291 - IN

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:25:20,448 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:25:20,629 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwqTHV4Rl7zaXoIev7HAVLslCru8kS4p46w9ZVlXlUWJdxdE7AokrHrWCOLyRGPXDKPOGRlnpQ03siuh6JeTU-9a35WOmvMd66PSMsLdkA&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:25:21,580 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwqTHV4Rl7zaXoIev7HAVLslCru8kS4p46w9ZVlXlUWJdxdE7AokrHrWCOLyRGPXDKPOGRlnpQ03siuh6JeTU-9a35WOmvMd66PSMsLdkA&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:25:21,582 - INFO - File API OK: speed_videos/0.5/Z5fnnWTVTrI.mp4 -> files/p0wj1jenm575
2025-04-25 15:25:22,064 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:25:22,262 - INFO - HTTP Request: POST https://generativelanguag

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:25:37,839 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:25:38,805 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwplc7MlOEhNEwS_KP1zEMdjF4ndA5YJHyYbjCrw6ZaBUht0EuaW3cZjO9lzn21L3nHiQWFBxc_luUAKCml_HRBzpB3UBagg7b7nRM2v6w&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:25:38,806 - INFO - File API OK: speed_videos/0.5/_uSQXykxzT0.mp4 -> files/3b5m2fxndrr4
2025-04-25 15:25:39,427 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:25:39,649 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwqDxbz9qpemQ1V247Srw2LlPG_Yepo5GyrrlSVcLLu7DAcseVRZzDgOhzjWK_z9w_NifF5zf3UGDMqEEj4BieFvQlGQaW4YAz3ldaXiRw&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:25:40,558 - INFO - HTTP Request: POST https://generativelanguag

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:25:55,103 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:25:55,896 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwrWxgjXSpeHcsguXpGAduxT20E51pDnZEsJ35IcbceY9V3wzzMkwSFtPeluiG8QeXIN1ObHKcKg6oTGrxAmjX3NdwtyBbtK2AN_hDbP_ak&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:25:55,898 - INFO - File API OK: speed_videos/0.5/budZcalJRO8.mp4 -> files/rdvltt9oyage
2025-04-25 15:25:56,523 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:25:56,706 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwrGj26gnUK8vWNPLWhOFmc-bCFnYAnIYn7ratpXL68XZohPVDMK0jswsOuwAmDZikM2Cc8fWU4iCvNlpsgQBxXxxMiBEUBQ0f5t2DNj8Ns&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:25:56,862 - INFO - HTTP Request: POST https://generativelangu

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:26:16,185 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:26:17,612 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwqu_jbDi_yVR-KKIHs93c65zRo6PgxkJzT-WYZrnlzq071BdWG6-rHKfDmLnkb4_bLF1XWx7zLsFbAdqFnuaLGhav2tnCMEXDgpz8GGNg&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:26:17,613 - INFO - File API OK: speed_videos/0.5/diEfe0tsJY8.mp4 -> files/df3tkz6rxlj5
2025-04-25 15:26:18,203 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:26:19,050 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwpCPVU7pPinzKK9taEfI0XPtPNkP2ZjX_kylavSfppBwaXdzwf5EU8tPkOZG7tG0PR7zUpTUntPO-URREt4IJ6GyqArK_tzpFGQpAaPAIQ&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:26:19,052 - INFO - File API OK: speed_videos/0.5/eDcteTYALgI.m

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:26:34,722 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:26:34,888 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwr82-Kqq2n1oycSIUnRIj63kleuW0za_YOIZd89a4p_-KvXxA3KnrvKjkLmeighg8UReXPt7vHhxHSmxj9PyfJ1SI5gMm2intceR3vawA&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:26:35,050 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwr82-Kqq2n1oycSIUnRIj63kleuW0za_YOIZd89a4p_-KvXxA3KnrvKjkLmeighg8UReXPt7vHhxHSmxj9PyfJ1SI5gMm2intceR3vawA&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:26:35,735 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwr82-Kqq2n1oycSIUnRIj63kleuW0za_YOIZd89a4p_-KvXxA3KnrvKjkLmeighg8UReXPt7vHhxHSmxj9PyfJ1SI5gMm2intceR3vawA&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:26:35,737 - INFO 

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:26:55,797 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:26:57,149 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwoLHnN-82O-XigdahxCd3GSU9-rvnMAzWLDN3PW_P8NJvcIx4thKvqqAwX2ALvwLbtg7_i5DlSBlv6v3LwIZij_69lfKC3vsNP8dRD0SA&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:26:57,151 - INFO - File API OK: speed_videos/0.5/ir89-1OXw5U.mp4 -> files/syimd6t76zp2
2025-04-25 15:26:58,285 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:26:59,052 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwq16DsZCkM_KNiuQQ9h_fM0jv0PUn6EgtIBg8njHOGYenIzju7Twm20rVEaN4I4RolXMAkKTfge-dKHqYCz-6WJBnRfxBc9bg59qU53G74&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:26:59,054 - INFO - File API OK: speed_videos/0.5/j3cE7hoF5aI.m

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:27:17,711 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:27:17,897 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwr-1kRn7oWIESwjupg0MsIal6m93iPsd9ijyqivwa7oIzTx4ddtyGwoiBR78Du5dpSNcSRfVEiI7-2RLoPY1kX3RLWcRMgZL2PiTu3IHaY&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:27:18,735 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwr-1kRn7oWIESwjupg0MsIal6m93iPsd9ijyqivwa7oIzTx4ddtyGwoiBR78Du5dpSNcSRfVEiI7-2RLoPY1kX3RLWcRMgZL2PiTu3IHaY&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:27:18,736 - INFO - File API OK: speed_videos/0.5/lAmfy5J8iEA.mp4 -> files/5o6uq2jk4yjx
2025-04-25 15:27:19,324 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:27:19,489 - INFO - HTTP Request: POST https://generativelangu

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:27:38,058 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:27:38,548 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwonlY54-mYPCAqdY837yb1vqGX6JAoVEhAbY0s93Df9aBXmQSLDEdX89refMSRbVwq5ctCfeFQGoogLCnqJLuD_c01QzG28jf1K5BZi-Y4&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:27:39,438 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwonlY54-mYPCAqdY837yb1vqGX6JAoVEhAbY0s93Df9aBXmQSLDEdX89refMSRbVwq5ctCfeFQGoogLCnqJLuD_c01QzG28jf1K5BZi-Y4&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:27:39,440 - INFO - File API OK: speed_videos/0.5/nBaWVmaLh1A.mp4 -> files/pbabkcbnd48f
2025-04-25 15:27:40,021 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:27:40,351 - INFO - HTTP Request: POST https://generativelangu

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:27:54,040 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:27:54,826 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwqMT0rMJ357P351QSpJVUiCm-cxfj5dOmzWg8ngAaC4UjvF2jgO_PiC9qj-QbblJsxeK4C7OmegpfRG9iXvLxdz9lG0pjEUD9OiByu5OvI&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:27:54,827 - INFO - File API OK: speed_videos/0.5/pUIkzB8KebU.mp4 -> files/c6ctvif4oug1
2025-04-25 15:27:55,389 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:27:55,564 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwrUZ5hzDwxOTB09ssZ-R9WXnq8OyXMyGKHhUnQOB29NedfbQnYXo1a4rVL4v1sfGD3Hwz1O1cpzodc3ktMlPKxA7Ra4ICcMduHOVumtCg&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:27:56,326 - INFO - HTTP Request: POST https://generativelangua

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:28:11,203 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:28:11,923 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwoSpG50q_SpHmifsvgFEBc1exx3IuPWVMLCzcQEi0_nGZaqHtbRPh3lqlWbYLzsRKNtcoEqdpk6lDFKFGu1BI_u0XA4vHGXDo9cjHGDZA&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:28:11,925 - INFO - File API OK: speed_videos/0.5/shh15L-O8KE.mp4 -> files/1l8uw7j3fh5u
2025-04-25 15:28:12,412 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:28:13,159 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwrPkpL3W-FTXkn5Cc1JgYqX3-b-1BRbLSj0OiFsmN6HEAHL9a7flH-L1B8fl8Rn-WiGSvAoId8hh2MpsT-iKGi7TmPhYulbMILvWaDI&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:28:13,161 - INFO - File API OK: speed_videos/0.5/sj81PWrerDk.mp4 

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:28:25,953 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:28:26,773 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwpDIOtjWs6Zs3Kkvm2CpY8NChkpNIcLgBjWCGxfNZT6gwk90FkFXGAEby52G1e4uhMPgU25xJriPnEsNNY99pgGhxLE2ezyLaw1tOwgog&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:28:26,774 - INFO - File API OK: speed_videos/0.5/uXvcjTHtZIs.mp4 -> files/w0v75uy7bzno
2025-04-25 15:28:27,317 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:28:28,173 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwpkiwtHq3T_yRKsqrDpC0xpuzi9SwMQHc2iW4E7wYl_yLFXHLdOxKtofxe6ZTWHjyNDdCEE7IzkIPk9_-_q9UihCrwIxoaUmUnrSiCxug&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:28:28,175 - INFO - File API OK: speed_videos/0.5/uYIWSSQNV4w.mp

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:28:44,393 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:28:44,575 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwrJVHEVifS7DzJ6Fn-dVqP5HivjqjTltFYxzbFJ4ixaiFHhOdenBh5mk9dfYdzK6c6_SB_T9vAA-opHcsT8icdLihtUfOlrJNO1VccwfAI&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:28:45,315 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwrJVHEVifS7DzJ6Fn-dVqP5HivjqjTltFYxzbFJ4ixaiFHhOdenBh5mk9dfYdzK6c6_SB_T9vAA-opHcsT8icdLihtUfOlrJNO1VccwfAI&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:28:45,317 - INFO - File API OK: speed_videos/0.5/vz9D4nqhTyk.mp4 -> files/w3z8qmj6ea4y
2025-04-25 15:28:45,893 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:28:46,758 - INFO - HTTP Request: POST https://generativelangu

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


2025-04-25 15:29:03,329 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:29:04,178 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2VwrNkCvjZie8gdJaIWeyMAgyBOUTheILbbHd_g78Mt9WiUt0t9xibfd4AlNqezZlPam8Vr1fvyQJpAGmbD-bWFVRdJHRpxmTY5muQIflig&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:29:04,179 - INFO - File API OK: speed_videos/0.5/yFHKF8TjT5o.mp4 -> files/z5aht67pjkiz
2025-04-25 15:29:04,723 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files "HTTP/1.1 200 OK"
2025-04-25 15:29:05,503 - INFO - HTTP Request: POST https://generativelanguage.googleapis.com/upload/v1beta/files?upload_id=AAO2Vwq2MhAynrpwMVOrItmMJs1q1ldK-ovKp6LbQJZqI6mWXrCNR9Dubkf2oyRKPs8RjjxVY_WYUc4IWlL4-LqY8iVX9bKDZ2UoJvv3Ero8Uwg&upload_protocol=resumable "HTTP/1.1 200 OK"
2025-04-25 15:29:05,504 - INFO - File API OK: speed_videos/0.5/yQ2YZQhvc2c.m

  meta_df[col] = merged_df[update_col_name].fillna(merged_df[col])


✅ Video preparation complete. See logs. Metadata: `video_metadata_non_vertex_inj_correct_mcq.csv`.

# Testing UI

## Single Prompt Single Question Testing UI

In [23]:
def perform_inference_single_sync(question_info: Dict, client: Any) -> Dict[str, Any]:
    qid = question_info.get("qid", "?")
    prompt_text = build_prompt(question_info)
    gcs_uri = question_info.get("gcs_uri")
    file_api_name = question_info.get("file_api_name")
    start_time = time.time()
    result = None

    try:
        video_part = None
        if USE_VERTEX:
            if not gcs_uri: raise ValueError("Missing GCS URI.")
            video_part = types.Part.from_uri(mime_type="video/mp4", file_uri=gcs_uri)
        else:
            if not file_api_name: raise ValueError("Missing File API name.")
            try:
                # Fetch File object sync
                file_object = client.files.get(name=file_api_name)
                video_part = file_object
            except genai_errors.NotFoundError: raise FileNotFoundError(f"File API '{file_api_name}' not found.")
            except Exception as e: raise RuntimeError(f"Failed get File API obj: {e}")

        question_content = types.Content(
            role="user", 
            parts=[types.Part.from_text(text=prompt_text)]
        )
        contents = [
            question_content,
            video_part
        ]
            

    except (ValueError, FileNotFoundError, RuntimeError) as e:
        logger.error(f"QID {qid} (Sync): Input Error - {e}")
        return {"qid": qid, "pred": f"ERROR: Input Fail - {e}", "duration": 0, "status": "Failed (Input)"}

    # Inference with Retries (Sync)
    for attempt in range(MAX_RETRIES + 1):
        try:
            api_start = time.time()
            # Use sync client.models
            response = client.models.generate_content(
                model=MODEL_NAME,
                contents=contents,
                config=CONFIG,
            )
            answer, reason, status, err_detail = "ERROR", "UNKNOWN", "Success", ""
            try: # Process Response
                answer = response
                if response.candidates: reason = response.candidates[0].finish_reason.name
            except ValueError as ve:
                status, err_detail = "Blocked/Empty", f"ValueError: {ve}. "
                answer = f"ERROR: {status}. {err_detail}"
            result = {"qid": qid, "pred": answer, "duration": time.time()-start_time, "finish_reason": reason, "status": status}
            return result
        except (api_core_exceptions.ResourceExhausted) as e:
             if attempt < MAX_RETRIES: time.sleep(INITIAL_BACKOFF_SECONDS * (2**attempt))
             else: result = {"qid": qid, "pred": f"ERROR: Max Retries ({type(e).__name__}) - {e}", "duration": time.time()-start_time, "status": "Failed (Retries)"}; return result
        except genai_errors.APIError as e:
             result = {"qid": qid, "pred": f"ERROR: GenAI APIError - {e}", "duration": time.time()-start_time, "status": "Failed (API Error)"}; return result
        except Exception as e:
             result = {"qid": qid, "pred": f"ERROR: Unexpected - {e}", "duration": time.time()-start_time, "status": "Failed (Unexpected)"}; return result
    # Fallback
    result = {"qid": qid, "pred": "ERROR: Unknown after retries", "duration": time.time()-start_time, "status": "Failed (Unknown)"}
    return result

# --- UI Setup (using `google.genai` types where needed) --- #
ui_video_questions = {}
try: ui_video_questions = load_metadata_for_inference(METADATA_FILE)
except Exception as e: display(Markdown(f"❌ UI Load Error: {e}"))

# (Widgets setup remains the same)
video_options = [("Select video...", None)]
if ui_video_questions: video_options.extend(sorted([(f"{vid} ({len(qs)}q)", vid) for vid, qs in ui_video_questions.items()]))
video_selector = widgets.Dropdown(options=video_options, description='Video ID:', disabled=not ui_video_questions, style={'description_width': 'initial'})
question_selector = widgets.Dropdown(options=[("Select question...", None)], description='Question (QID):', disabled=True, layout=widgets.Layout(width='95%'), style={'description_width': 'initial'})
run_button = widgets.Button(description='Run Inference', disabled=True, button_style='primary', icon='play')
output_area = widgets.Output()

# --- Widget Interaction Logic (Remains mostly the same, calls updated sync function) --- #
def on_video_selected(change):
    # (Same logic)
    selected_video_id = change['new']
    question_selector.options = [("Select question...", None)]
    question_selector.value = None
    question_selector.disabled = True
    run_button.disabled = True
    output_area.clear_output()
    if selected_video_id and selected_video_id in ui_video_questions:
        questions = ui_video_questions[selected_video_id]
        question_options = sorted([(f"{q.get('qid', 'N/A')}: {q.get('question', '')[:80]}...", q) for q in questions if q.get('qid')])
        if question_options:
            question_selector.options = [("Select question...", None)] + question_options
            question_selector.disabled = False

def on_question_selected(change):
    # (Same logic)
    selected_question_info = change['new']
    run_button.disabled = selected_question_info is None
    output_area.clear_output()
    if selected_question_info:
        with output_area:
            display(Markdown("### Selected Info"))
            id_col = 'gcs_uri' if USE_VERTEX else 'file_api_name'
            display(pd.Series({
                 'qid': selected_question_info.get('qid'),
                 'question': selected_question_info.get('question'),
                 f'Resource ({id_col})': selected_question_info.get(id_col)
            }).to_frame('Value'))

def on_run_button_clicked(b):
    run_button.disabled = True
    output_area.clear_output()
    with output_area:
        if not video_selector.value or not question_selector.value: display(Markdown("❌ Select video & question.")); run_button.disabled = False; return
        if ai_client is None: display(Markdown("❌ AI Client not ready.")); run_button.disabled = False; return

        q_info = question_selector.value
        qid = q_info.get('qid')
        resource_col = 'gcs_uri' if USE_VERTEX else 'file_api_name'
        resource_id = q_info.get(resource_col)

        display(Markdown(f"### Running QID: {qid}"))
        display(Markdown(f"--- Verifying Resource --- ({'GCS' if USE_VERTEX else 'File API'}) ---"))
        verified = False
        if not resource_id: display(Markdown(f"❌ Error: Missing '{resource_col}' ID."))
        elif USE_VERTEX: verified = verify_gcs_file_exists(storage_client, resource_id)
        else: verified = verify_file_api_resource_exists(storage_client, resource_id)

        if not verified: display(Markdown("❌ Verification failed.")); run_button.disabled = False; return
        display(Markdown(f"✅ Resource Verified: {resource_id}"))

        display(Markdown("Video Preview:"))
        video_path = Path(extracted_videos_path) / f"{video_selector.value}.mp4"
        if video_path.is_file():
            video_widget = widgets.Video.from_file(video_path, width=400, height=300)
            display(video_widget)

        display(Markdown("### Inference Details"))
        display(Markdown(f"**Video ID:** {video_selector.value} | **QID:** {qid}"))
        display(Markdown(f"**Resource ID ({resource_col}):** {resource_id}"))

        prompt = build_prompt(q_info)
        display(Markdown("### Prompt"))
        display(Markdown(f"**Question:** {q_info.get('question', '')}"))
        display(Markdown(f"**Question Type:** {q_info.get('question_type', 'default')}"))
        display(Markdown(f"**Prompt Template:** {PROMPT_TEMPLATES.get(q_info.get('question_type', 'default'), PROMPT_TEMPLATES['default'])}"))
        display(Markdown(f"**System Prompt:** {SYSTEM_PROMPT}"))
        display(HTML(f"<pre style='white-space: pre-wrap; border: 1px solid #000; padding: 10px;'>{prompt}</pre>"))
        display(Markdown("--- Performing Inference (Sync) ---"))

        # CALL THE CORRECTED SYNC FUNCTION
        inference_result = perform_inference_single_sync(q_info, ai_client)

        display(Markdown("--- Result ---"))
        if inference_result and isinstance(inference_result, dict):
             # (Result display + Save button logic remains the same)
            status, duration, answer, reason = (
                inference_result.get("status", "?"), inference_result.get('duration', -1),
                inference_result.get('pred', ''), inference_result.get('finish_reason', 'N/A')
            )
            display(Markdown(f"**Status:** {status} | **Duration:** {duration:.2f}s | **Reason:** {reason}"))
            display(Markdown("**Response:**"))
            display(HTML(f"<pre style='white-space: pre-wrap; border: 1px solid #000; padding: 10px;'>{answer}</pre>"))
        else:
            display(Markdown("❌ Invalid result."))

    run_button.disabled = False

# Register & Display
video_selector.observe(on_video_selected, names='value')
question_selector.observe(on_question_selected, names='value')
run_button.on_click(on_run_button_clicked)
display(Markdown("### Select Video & Question")); 
display(video_selector)
display(question_selector)
display(run_button)
display(output_area)

2025-04-25 17:38:37,492 - INFO - Loaded 289 videos (1500 questions) with valid IDs for inference.


### Select Video & Question

Dropdown(description='Video ID:', options=(('Select video...', None), ('-HAFFvsDCr4 (5q)', '-HAFFvsDCr4'), ('-…

Dropdown(description='Question (QID):', disabled=True, layout=Layout(width='95%'), options=(('Select question.…

Button(button_style='primary', description='Run Inference', disabled=True, icon='play', style=ButtonStyle())

Output()

## MCQ Prompt chaining Testing UI - turn by turn Format

### MCQ Prompt chaining with CoCoT reasoning with summary

In [25]:
def sort_heuristics(qs: list[dict]) -> list[dict]:
    """
    1) Correctly-led Open-ended Question first
    2) Multiple-choice Question with a Single Correct Answer
    3) All other questions in original order
    4) Finally, “Wrongly-led Open-ended Question”
    """
    correct = []
    mcq = []
    other = []
    wrongly = []

    for q in qs:
        qt = q.get("question_type", "")
        if qt == "Correctly-led Open-ended Question":
            correct.append(q)
        elif qt == "Multiple-choice Question with a Single Correct Answer":
            mcq.append(q)
        elif qt == "Wrongly-led Open-ended Question":
            wrongly.append(q)
        else:
            other.append(q)

    return (
        correct
        + mcq
        + other
        + wrongly
    )


# ──────────────────────────────────────────────────────────────────────────────
# 1⃣  Request renderer – handles list + File objects safely
# ──────────────────────────────────────────────────────────────────────────────
def _render_request(contents: list, turn_idx: int):
    """
    Pretty‑print the payload sent to the model.

    Handles:
    • `types.Part` (binary video or textual description)
    • `types.Content` (normal chat messages)
    • plain File objects (Vertex File API)
    • nested lists of Parts (video_part list in Vertex branch)
    """
    # flatten in case the first element itself is a list of Parts
    flat: list[Any] = []
    for item in contents:
        if isinstance(item, list):
            flat.extend(item)
        else:
            flat.append(item)

    lines = []
    for c in flat:
        if isinstance(c, types.Part):
            # text Part vs. binary video Part
            if getattr(c, "text", None):
                lines.append(f"video_desc: {c.text}")
            else:
                lines.append("[video/mp4]")
        elif hasattr(c, "parts"):                           # types.Content
            role = getattr(c, "role", "?")
            txt = getattr(c.parts[0], "text", "")
            lines.append(f"{role}: {txt}")
        else:                                               # plain File
            lines.append("[file]")

    joined_lines = '\n'.join(lines)
    display(HTML(
        f"<div style='border:1px dashed #999;padding:8px;margin:8px 0;'>"
        f"<strong>Turn {turn_idx} – Request sent to model:</strong>"
        f"<pre style='white-space:pre-wrap;margin:4px 0 0;'>{joined_lines}</pre>"
        f"</div>"
    ))

# ──────────────────────────────────────────────────────────────────────────────
# 2⃣  Sync chat routine – video sent every turn
# ──────────────────────────────────────────────────────────────────────────────
def run_chat_for_video_sync(video_id: str, qs: list[Dict], client: Any) -> None:
    qs = sort_heuristics(qs)
    if not qs:
        display(Markdown("❌ No questions for this video.")); return

    first = qs[0]
    try:
        if USE_VERTEX:
            if not first.get("gcs_uri"): raise ValueError("Missing GCS URI.")
            video_part = types.Part.from_uri(mime_type="video/mp4", file_uri=first["gcs_uri"])
        else:
            if not first.get("file_api_name"): raise ValueError("Missing File API name.")
            video_part = client.files.get(name=first["file_api_name"])
    except Exception as e:
        display(Markdown(f"❌ Video resource error: {e}")); return

    chat: list = []

    for idx, q in enumerate(qs, 1):
        user_msg = types.Content(role="user",
                                 parts=[types.Part.from_text(text=build_prompt(q))])
        # always include video_part
        contents = [video_part] + chat + [user_msg]

        _render_request(contents, idx)

        try:
            rsp = client.models.generate_content(model=MODEL_NAME,
                                                 contents=contents,
                                                 config=CONFIG)
            answer = rsp.text.strip()
            if answer:
              summary_content = types.Content(
              role="user",
              parts=[
                  types.Part.from_text(text=build_prompt(q)),
                  types.Part.from_text(text=answer)
              ])
              summary_rsp = client.models.generate_content(model=QUESTION_MODEL_NAME,
                                    contents=summary_content,
                                    config=QUESTION_CONFIG)
              sumamry_answer = summary_rsp.text.strip()
            finish_reason = (rsp.candidates[0].finish_reason.name
                             if rsp.candidates and rsp.candidates[0].finish_reason else "UNKNOWN")
        except Exception as e:
            answer, finish_reason = f"ERROR: {e}", "Failed (API)"

        display(HTML(
            f"<div style='border:1px solid #000;margin:8px 0;padding:10px;'>"
            f"<b>Q{idx}:</b> {build_prompt(q)}</div>"
        ))
        display(HTML(
            f"<div style='border:1px solid #000;margin:0 0 12px;"
            f"padding:10px;background:#f9f9f9;'><b>CoT:</b> {answer}</div>"
        ))
        display(HTML(
            f"<div style='border:1px solid #000;margin:0 0 12px;"
            f"padding:10px;background:#f9f9f9;'><b>Summary Answer:</b> {sumamry_answer}</div>"
        ))

        chat.extend([
            user_msg,
            types.Content(role="model",
                          parts=[types.Part.from_text(text=answer or "…")])
        ])

# ──────────────────────────────────────────────────────────────────────────────
# 2⃣  UI – select a single video then run the chat routine
# ──────────────────────────────────────────────────────────────────────────────
ui_video_questions = {}
try:
    ui_video_questions = load_metadata_for_inference(METADATA_FILE)
except Exception as e:
    display(Markdown(f"❌ UI Load Error: {e}"))

video_selector = widgets.Dropdown(
    options=[("Select video…", None)] + [
        (f"{vid} ({len(q)} q)", vid) for vid, q in sorted(ui_video_questions.items())
    ],
    description="Video ID:",
    style={'description_width': 'initial'}
)
run_button = widgets.Button(description="Run Inference",
                            disabled=True, button_style="primary", icon="play")
output_area = widgets.Output()

def _on_video_change(c): run_button.disabled = (c["new"] is None); output_area.clear_output()

def _on_run_click(_):
    run_button.disabled = True; output_area.clear_output()
    with output_area:
        vid = video_selector.value
        if vid is None:                                  display(Markdown("❌ Choose a video.")); run_button.disabled=False; return
        if ai_client is None:                            display(Markdown("❌ AI client not ready.")); run_button.disabled=False; return

        display(Markdown(f"## Video `{vid}`"))
        if Path(extracted_videos_path, f"{vid}.mp4").is_file():
            display(widgets.Video.from_file(Path(extracted_videos_path, f"{vid}.mp4"), width=400, height=300))
        display(Markdown("---"))
        run_chat_for_video_sync(vid, ui_video_questions[vid], ai_client)
    run_button.disabled = False

video_selector.observe(_on_video_change, names="value")
run_button.on_click(_on_run_click)

display(Markdown("### Select a video and run inference"))
display(video_selector); display(run_button); display(output_area)

2025-04-25 17:39:33,163 - INFO - Loaded 289 videos (1500 questions) with valid IDs for inference.


### Select a video and run inference

Dropdown(description='Video ID:', options=(('Select video…', None), ('-HAFFvsDCr4 (5\u202fq)', '-HAFFvsDCr4'),…

Button(button_style='primary', description='Run Inference', disabled=True, icon='play', style=ButtonStyle())

Output()