In [33]:
# CELL 1: IMPORTS AND INITIAL SETUP
# ===================================================

# This script processes medication history videos and extracts structured information 
# using Vertex AI Gemini models with sequential few-shot learning examples.
# Enhanced to capture timestamps and screenshots where drugs are most clearly visible.

# Standard libraries
import os
import json
import re
import tempfile
import subprocess
import time
import random
from pathlib import Path
from datetime import datetime

# Cloud and API libraries
from google.cloud import aiplatform
from google.cloud import storage
import vertexai
from vertexai.generative_models import GenerativeModel, Part, GenerationConfig

# Document processing
from docx import Document
from docx.shared import Pt, Inches
from docx.enum.section import WD_ORIENTATION

# Utilities
from termcolor import colored
import imageio_ffmpeg

# Video processing libraries
import cv2
import numpy as np
from PIL import Image, ImageFilter

In [None]:

# ===================================================
# CELL 2: ENVIRONMENT AND CONFIGURATION SETUP
# ===================================================

# Check environment and set credentials
print(os.listdir())
print(os.getcwd())
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "YOUR_CREDENTIALS_PATH"
print(os.path.exists("YOUR_CREDENTIALS_PATH"))

# Initialize Vertex AI
aiplatform.init(
    project='YOUR_PROJECT_ID',
    location='us-central1'
)
print("Authentication Successful")

# Initialize Vertex AI again (keeping this to maintain exact functionality)
vertexai.init(project="YOUR_PROJECT_ID", location="us-central1")

# Set up model configurations
vision_model = GenerativeModel("gemini-2.5-pro-preview-06-05")  # Initialize the multimodal model
flash_model = GenerativeModel("gemini-2.5-pro-preview-06-05")

# Set generation configuration
generation_config = GenerationConfig(
    temperature=0,
    max_output_tokens=16000,
    top_p=0.95,
    frequency_penalty=0,
    presence_penalty=0
)

verification_config = GenerationConfig(
    temperature=0,
    max_output_tokens=16000,
    top_p=0.95,
    frequency_penalty=0,
    presence_penalty=0
)

# Configure bucket and retry settings
bucket_name = "YOUR_PRIMARY_BUCKET"
VIDEO_PREFIX = "complete_videos/"  # Added to target complete videos folder
few_shot_bucket = "YOUR_EXAMPLES_BUCKET"
MAX_RETRIES = 5
BACKOFF_INITIAL_DELAY = 2

# Date the medication history was conducted
date_conducted = datetime.now().strftime("%d/%m/%Y")

# Verify ffmpeg installation
ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
print(f"Using ffmpeg from: {ffmpeg_path}")

# Configure output directory structure
OUTPUT_DIR = "Combined_Processing_Output"
PROMPTS_DIR = os.path.join(OUTPUT_DIR, "prompts")
LOGS_DIR = os.path.join(OUTPUT_DIR, "logs")
EXAMPLES_DIR = os.path.join(OUTPUT_DIR, "examples")
SCREENSHOTS_DIR = os.path.join(OUTPUT_DIR, "screenshots")

# Create all directories
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(PROMPTS_DIR, exist_ok=True)
os.makedirs(LOGS_DIR, exist_ok=True)
os.makedirs(EXAMPLES_DIR, exist_ok=True)
os.makedirs(SCREENSHOTS_DIR, exist_ok=True)

print(f"Outputs will be saved to: {OUTPUT_DIR}")

# Create a log file to capture all conversation
LOG_FILE = os.path.join(LOGS_DIR, "full_conversation_log.txt")
def log_message(message):
    with open(LOG_FILE, "a", encoding="utf-8") as f:
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        f.write(f"[{timestamp}] {message}\n")
    print(message)

# Create a dedicated conversation logger for API calls
CONVERSATION_LOG_FILE = os.path.join(LOGS_DIR, "api_conversation_log.txt")
def log_conversation(role, content, api_call_id=None):
    """Log a conversation turn with clear formatting"""
    with open(CONVERSATION_LOG_FILE, "a", encoding="utf-8") as f:
        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        
        # Add a header for each API call
        if api_call_id:
            f.write(f"\n\n{'='*80}\n")
            f.write(f"API CALL #{api_call_id} - {timestamp}\n")
            f.write(f"{'='*80}\n\n")
        
        # Format based on role
        if role == "user" or role == "system":
            f.write(f"[{timestamp}] {role.upper()}: \n")
            # Indent the content for readability
            content_lines = content.split('\n')
            for line in content_lines:
                f.write(f"  {line}\n")
        elif role == "assistant":
            f.write(f"[{timestamp}] MODEL RESPONSE: \n")
            # Indent the content for readability
            content_lines = content.split('\n')
            for line in content_lines:
                f.write(f"  {line}\n")
        f.write("\n")  # Add an extra newline for clarity

log_message(f"Combined script started. Output directory: {OUTPUT_DIR}")


In [35]:
# ===================================================
# CELL 3: PROMPT DEFINITIONS
# ===================================================

# Medication History Extraction Prompt
HISTORY_EXTRACTION_PROMPT = (
    "You are tasked with transcribing and extracting structured information from a medication history interview conducted by a clinical pharmacist. The source material may include audio-only or video recordings of a conversation between a pharmacist and a patient. Your objective is to identify and document all relevant medication-related information conveyed during the interaction. This includes both spoken dialogue and any visible written materials (e.g., medication labels, packaging, or charts) captured in the video. The information must be extracted and formatted precisely according to the step-by-step instructions outlined below."

    "\n\n Step 1: Document the following patient and pharmacist details:"
    "\n     - The pharmacist's name."
    "\n     - The patient's name (first and last name), date of birth (dd/mm/yyyy), and any medicine allergies (document the generic medicine name and the reaction. If a patient reports no allergies, specify this as 'Nil known')."

    "\n\n Step 2: Document a complete and accurate list of the patient's regular medications, including:"
    "\n     - The generic name of the medicine (example: metformin, ramipril, epoetin lambda). If the medicine is in a combination, ensure that you return both medicines separated by a forward slash. You do not need to include the brand name or salt forms in your response."
    "\n     - The medicine strength and form (example: 500 mg tablets, 200 mg/5 mL syrup, 8000 unit injection). If the medicine is in a combination ensure you return both strengths separated by a forward slash. If the medicine is a slow-release or controlled-release preparation, please ensure you also document this."
    "\n     - The directions for each medicine, expressed as the number of units and the frequency of the medicine taken. (example: One tablet every morning, one tablet twice a day, one tablet at night). Do not include any other details."
    "\n     - What the patient reports taking each of the medicines for (the indication). Document this using medical nomenclature. (example: hypertension, diabetes, infection)."
    
    "\n\n Step 3: Document any clinical information related to the medication, including:"
    "\n     - If the patient crushes or modifies the tablet/capsule (example: 'the patient crushes the tablet and mixes it with yogurt')."
    "\n     - If specific dosing times are mentioned (example: 'the patient takes the medicine at 8 AM and 8 PM, as instructed by their cardiologist')."
    "\n     - If the patient requires assistance to administer the medicine (example: 'the patient's wife administers the injection')."
    "\n     - If the patient has difficulty handling medication due to dexterity issues (e.g., 'The patient uses a pill cutter to split the tablet before taking it')."
    "\n     - If there are recent medicine changes or discrepancies clarified by the pharmacist (example: 'The patient stated that the medicine was started this week')."
    "\n     - If there is any associated monitoring with the medicine (example: 'The patient has their calcium monitored each week by their family physician')."
    "\n     - If no additional details are relevant, return '(missing)'."

        "\n\nStep 4: Document the source of the information:"
    "\n     - 'audio only' when information was only spoken by the patient or pharmacist."
    "\n     - 'visual only' when information was only found on the medication box or dispense label."
    "\n     - 'audio+visual' when information is identical in both audio and visual sources."
    "\n     - If there is ambiguity between the audio and visual cues, include both interpretations in your response using the keys 'audio' and 'visual' (e.g., 'audio: <value>' and 'visual: <value>')."

    "\n\nStep 5: Return the extracted information in the following structured JSON format:"
    '\n{\n  "pharmacist": "<pharmacist_name>",\n  "name": "<patient_name>",\n  "dob": "<date_of_birth>",\n  "allergies": "<allergies>",\n  "medications": [\n    {\n      "medicine": "<generic_name>",\n      "strength_and_form": "<strength_and_form>",\n      "dose": "<dose_summary>",\n      "indication": "<indication_per_patient>",\n      "clinical_notes": "<clinical_notes_for_this_medication>",\n      "source": "<audio only|visual only|audio+visual>"\n    },\n    ...\n  ]\n}'
    "\n\nFinal Directive: Please ensure that your final output is strictly the JSON object described above, with no additional commentary."

    "\n\nFinal Directive: Please ensure that your final output strictly matches the JSON object described above, capturing 'audio' and 'visual' verbatim segments for each field, even if marked as 'missing'. Do not include any additional commentary or explanations."
)

# MODIFIED MEDICATION TIMESTAMP PROMPT FOR TWO TIMESTAMPS
MEDICATION_TIMESTAMP_PROMPT = (
    "You are tasked with reviewing a recorded medication history interview between a clinical pharmacist and a patient. Your goal is to identify the exact timestamps when medication dispensing labels—visible on boxes, bottles, or other packaging—are most clearly shown in the recording. These timestamps will be used for documentation and verification purposes."

    "\n\n Step 1: Determine the two clearest timestamps for each medication"
    "\n For each medication identified in the recording, locate two timestamps where:"
    "\n     - The dispensing label is fully visible and unobstructed"
    "\n     - The medication name, including both brand and generic names (if shown), is clearly visible"
    "\n     - Label details such as strength, directions, and patient name are legible"
    "\n     - The image quality is suitable for accurate transcription"
    "\n     - The two timestamps should capture the clearest possible views"

    "\n\n Step 2: Report your findings using the following format"
    "\n     - Format the time expressed as MM:SS (example: 12:34)"
    "\n     - Round each timestamp to the nearest second where label clarity is highest"
    "\n     - Report your findings as Medication Name: [Name as shown on label or state 'Unknown if illegible’, Clearest Timestamps: [First timestamp] and [Second timestamp]"

    "\n\n Final directive"
    "\n If no medication containers with clearly readable labels are visible in the recording, respond with exactly:"
    "\n No clear medication labels found."
    "\n Do not include any guesses or assumptions based on unclear or partial label views."
)


FULL_TRANSCRIPTION_PROMPT = (
    "You are tasked with transcribing a recorded medication history interview between a clinical pharmacist and a patient. Your goal is to provide a complete and accurate verbatim transcription of all spoken dialogue for clinical documentation purposes."

    "\n\n Step 1: Transcribe all spoken words in the exact sequence they occur, this includes:"
    "\n     - Every question asked by the pharmacist"
    "\n     - Every response from the patient"
    "\n     - All clarifications, corrections, or follow-up statements by either party"
    "\n     - Medication names, dosages, and directions exactly as spoken, even if mispronounced"

    "\n\n Step 2: Ensure accuracy of the transcribed record"
    "\n     - Do not correct mispronunciations or incomplete terms"
    "\n     - Include all numerical details such as dose, timing, and dates"
    "\n     - Accurately transcribe all medical conditions or terminology"
    "\n     - If a word is partially audible, include your best interpretation followed by the word 'unclear' in square brackets"

    "\n\n Step 3: Use speaker tags for clarity"
    "\n     - Begin each new line with either 'pharmacist' or 'patient' followed by a colon, depending on who is speaking"
    "\n     - Start a new line for every change in speaker"
    "\n     - Do not insert any of your own commentary or interpretations"

    "\n\n Final directive"
    "\n Return only the verbatim transcript of the dialogue with clear speaker tags."
    "\n Do not summarize, paraphrase, or omit any part of the conversation."
    "\n Precision and completeness are essential for clinical accuracy."
)

log_message("Prompts initialized")


Prompts initialized


In [36]:

# ===================================================
# CELL 4: UTILITY FUNCTIONS
# ===================================================

def api_call_with_backoff(func, max_retries=5, initial_delay=BACKOFF_INITIAL_DELAY):
    """Execute an API call with exponential backoff for rate limits and other errors"""
    delay = initial_delay
    for retry in range(max_retries):
        try:
            return func()
        except Exception as e:
            error_str = str(e)
            log_message(f"API error on attempt {retry+1}: {error_str}")
            
            if retry < max_retries - 1:
                # Add jitter to avoid thundering herd
                sleep_time = delay * (1 + random.random() * 0.1)
                
                if "429" in error_str:
                    message = f"⏳ Rate limit hit. Waiting {sleep_time:.1f}s before retry {retry+1}/{max_retries}..."
                elif "500" in error_str:
                    message = f"⚠️ Internal server error. Waiting {sleep_time:.1f}s before retry {retry+1}/{max_retries}..."
                else:
                    message = f"⚠️ API error. Waiting {sleep_time:.1f}s before retry {retry+1}/{max_retries}..."
                
                log_message(colored(message, "yellow"))
                time.sleep(sleep_time)
                delay *= 2
            else:
                log_message(colored(f"❌ API error after {max_retries} retries: {error_str}", "red"))
                raise

def clean_bad_encoding(s):
    """Clean potential bad character encoding in strings"""
    return s.encode('latin1', 'replace').decode('utf-8', 'replace')

def clean_json(data):
    """Clean potential bad character encoding in JSON data"""
    if isinstance(data, dict):
        return {k: clean_json(v) for k, v in data.items()}
    elif isinstance(data, list):
        return [clean_json(i) for i in data]
    elif isinstance(data, str):
        return clean_bad_encoding(data)
    return data

def load_few_shot_examples():
    """Load few-shot examples from JSON file with video references"""
    try:
        with open("examples4a.json", "r", encoding="utf-8") as f:
            raw_examples = json.load(f)
            few_shot_examples = clean_json(raw_examples)
        log_message(f"Loaded {len(few_shot_examples)} examples from examples4a.json")
        return few_shot_examples
    except FileNotFoundError:
        log_message(colored("⚠️ examples4a.json file not found. Running without examples.", "yellow"))
        return []
    except Exception as e:
        log_message(colored(f"❌ Error loading examples4a.json: {e}", "red"))
        return []

def get_mime_type(uri):
    """Determine the MIME type based on file extension"""
    uri_lower = uri.lower()
    if uri_lower.endswith(".png"):
        return "image/png"
    elif uri_lower.endswith((".jpg", ".jpeg")):
        return "image/jpeg"
    elif uri_lower.endswith(".mov"):
        return "video/quicktime"
    else:
        return "video/mp4"

def list_videos_in_bucket(bucket_name, prefix=""):
    """Get all video URIs from a bucket with specified prefix"""
    try:
        storage_client = storage.Client()
        bucket = storage_client.bucket(bucket_name)
        blobs = bucket.list_blobs(prefix=prefix)
        video_uris = [f"gs://{bucket_name}/{blob.name}" for blob in blobs if blob.name.lower().endswith((".mp4", ".mov", ".avi", ".mkv"))]
        log_message(f"Found {len(video_uris)} videos in gs://{bucket_name}/{prefix}")
        return video_uris
    except Exception as e:
        log_message(colored(f"❌ Error listing videos in bucket: {e}", "red"))
        return []

def extract_instructions():
    """Extract instruction section for verifier prompt"""
    start_key = "Step 1:"
    end_key = "Final Directive"
    
    if start_key in HISTORY_EXTRACTION_PROMPT and end_key in HISTORY_EXTRACTION_PROMPT:
        instructional_section = HISTORY_EXTRACTION_PROMPT.split(start_key)[1].split(end_key)[0].strip()
        instructional_section = "Step 1:" + instructional_section
    else:
        raise ValueError("Instructional section could not be found. Please check formatting.")
    
    return instructional_section

def create_verifier_prompt():
    """Create the verification prompt by reusing extraction guidelines and examples"""
    instructional_section = extract_instructions()
    
    VERIFIER_PROMPT = (
    "You are a medication history verification agent. Your task is to review a structured medication history that was previously extracted from a pharmacist–patient interview recorded as audio or video. Your responsibility is to ensure that each item in the medication history is fully and accurately supported by the recording."

    "\n\n This is Part 2 of a two-step process. In Part 1, structured information—such as medication names, strengths, doses, and indications—was extracted directly from the recording. In this Part 2 task, you must critically verify the accuracy of the medication history against the original recording, correct any inaccuracies, and ensure the final medication history is returned in the JSON format specified in Part 1."

    "\n\n Below is the step-by-step process for verifying the accuracy of the extracted medication history."

    "\n\n Step 1: Verify the accuracy of the extracted medication information"
    "\n   Review each piece of information in the extracted medication history and confirm it matches the recording. For each medication, you must check:"
    "\n     - Medicine name: Ensure the medication name matches exactly what was mentioned in the recording"
    "\n     - Strength and form: Verify the medication strength and form (tablets, syrup, etc.) are correct"
    "\n     - Dose instructions: Check that the dosing frequency and timing match the recording"
    "\n     - Indication: Confirm the reason for taking the medication is accurately recorded"
    "\n     - Clinical notes: Verify any additional details about how the patient takes or handles the medication"

    "\n\n If any inaccuracies are identified, you must correct them. Here are some examples:"
    "\n     - Wrong medicine name: If the medication history shows 'simvastatin' but the recording clearly states 'atorvastatin', correct the entry to 'atorvastatin'"
    "\n     - Wrong dosing: If the medication history shows 'one tablet in the morning' but the recording states 'one tablet at night', change the entry to match the recording"
    "\n     - Missing medicines: If the patient mentioned taking paracetamol when needed, but this doesn't appear in the medication history, add it as a new medication entry"

    "\n\n Step 2: Check for missing or duplicate medications"
    "\n     - Missing medications: Ensure all medications mentioned by the patient are included in the final medication history"
    "\n     - Duplicate entries: If the same medication appears more than once in the medication history, combine the entries into a single, complete entry"

    "\n\n Step 3: Return the corrected medication history"
    "\n   Use the exact same JSON structure and field names as specified in Part 1. If any information cannot be verified from the recording, mark that field as \"(missing)\". Return only the corrected JSON object with no additional text or explanations."

    "\n\nFinal directive"
        
        "\n\n Always strictly base your review on the recording provided. Never "
        "invent, infer, or guess missing information. If you cannot verify a "
        "field with certainty, mark it as '(missing)'. Apply this rule "
        "consistently across all fields including medicine names, strengths, "
        "forms, dosages, indications, and clinical notes."
        
        "\n\nFor reference, these are the original instructions provided in part 1."
        + instructional_section +
        
        "\n\nHere is the extracted JSON of the medication history extracted in part 1."
        
        "\n\nHere is the extracted JSON to verify:\n"
    )
    
    return VERIFIER_PROMPT

# MODIFIED PARSING FUNCTION FOR TWO TIMESTAMPS
def parse_timestamp_response(response_text):
    """Parse timestamp response from the model - handles multiple formats"""
    parsed_data = []
    
    # Pattern 1: "Medication Name: <name>, Clearest Timestamps: <time1> and <time2>"
    pattern1 = re.compile(
        r"Medication Name:\s*(.*?),\s*Clearest Timestamps:\s*((?:\d{1,2}:)?\d{1,2}:\d{2})\s*and\s*((?:\d{1,2}:)?\d{1,2}:\d{2})",
        re.IGNORECASE
    )
    
    # Pattern 2: "<medication_name>: Clearest Timestamps: <time1> and <time2>"
    pattern2 = re.compile(
        r"([^:]+?):\s*Clearest Timestamps:\s*((?:\d{1,2}:)?\d{1,2}:\d{2})\s*and\s*((?:\d{1,2}:)?\d{1,2}:\d{2})",
        re.IGNORECASE
    )
    
    # Try pattern 1 first
    matches = pattern1.findall(response_text)
    pattern_used = "Pattern 1 (with 'Medication Name:')"
    
    # If no matches, try pattern 2
    if not matches:
        matches = pattern2.findall(response_text)
        pattern_used = "Pattern 2 (direct medication name)"
    
    for match in matches:
        med_name = match[0].strip()
        timestamp1_str = match[1].strip()
        timestamp2_str = match[2].strip()
        parsed_data.append({
            "medication_name": med_name if med_name else "Unknown",
            "timestamp1": timestamp1_str,
            "timestamp2": timestamp2_str
        })

    if not parsed_data and "Not found" not in response_text and "No medication" not in response_text and "No clear medication labels found" not in response_text:
        log_message(colored(f"⚠️ Could not parse any structured timestamps from response: {response_text[:300]}...", "yellow"))
        # Debug: Let's see what each pattern would match
        debug1 = pattern1.findall(response_text)
        debug2 = pattern2.findall(response_text)
        log_message(colored(f"🔍 Debug - Pattern 1 matches: {len(debug1)}, Pattern 2 matches: {len(debug2)}", "blue"))
    elif not parsed_data:
        log_message(colored(f"ℹ️ Model indicated no medication labels found: {response_text[:300]}", "blue"))
    else:
        log_message(colored(f"✅ Successfully parsed {len(parsed_data)} medication timestamps using {pattern_used}", "green"))
        for i, data in enumerate(parsed_data):
            log_message(colored(f"  {i+1}. {data['medication_name']}: {data['timestamp1']} and {data['timestamp2']}", "cyan"))
    
    return parsed_data

def time_str_to_milliseconds(time_str):
    """Convert time string to milliseconds"""
    parts = list(map(int, time_str.split(':')))
    if len(parts) == 3:  # HH:MM:SS
        h, m, s = parts
    elif len(parts) == 2:  # MM:SS
        h = 0
        m, s = parts
    else:
        raise ValueError(f"Invalid time format (expected HH:MM:SS or MM:SS): {time_str}")
    return (h * 3600 + m * 60 + s) * 1000

def extract_and_save_frame(video_path, timestamp_ms, output_path, apply_sharpening=True):
    """Extract a frame from video at timestamp and save it with optional sharpening"""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        log_message(colored(f"❌ Error: Could not open video {video_path}", "red"))
        return False
    cap.set(cv2.CAP_PROP_POS_MSEC, timestamp_ms)
    ret, frame = cap.read()
    cap.release()

    if ret and frame is not None:
        try:
            # Convert OpenCV frame (BGR) to PIL Image (RGB)
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            pil_image = Image.fromarray(frame_rgb)

            if apply_sharpening:
                sharpened_image = pil_image.filter(ImageFilter.UnsharpMask(radius=1, percent=150))
                sharpened_image.save(output_path)
                log_message(colored(f"🖼️ Screenshot saved (sharpened): {output_path}", "green"))
            else:
                pil_image.save(output_path)
                log_message(colored(f"🖼️ Screenshot saved (original): {output_path}", "green"))
            return True
        except Exception as e:
            log_message(colored(f"❌ Error saving/processing frame {output_path}: {e}", "red"))
            return False
    else:
        log_message(colored(f"⚠️ Failed to extract frame at {timestamp_ms}ms from {video_path}", "yellow"))
        return False

# NEW FRAME EXTRACTION FUNCTION FOR TWO FRAMES PER TIMESTAMP
def extract_frame_pair(video_path, timestamp_ms, med_name_for_file, timestamp_str, video_dir, pair_index, timestamp_index):
    """Extract two frames per timestamp: frame 1 and frame 30 of the second"""
    extracted_frames = []
    
    # Frame 1 (at exact timestamp)
    frame1_filename = f"step3_med_{med_name_for_file}_time_{timestamp_str.replace(':', '')}_ts{timestamp_index}_frame1_{pair_index}.png"
    frame1_path = os.path.join(video_dir, frame1_filename)
    
    if extract_and_save_frame(video_path, timestamp_ms, frame1_path, apply_sharpening=True):
        extracted_frames.append(frame1_path)
    
    # Frame 30 (approximately 967ms later, assuming 30fps)
    # At 30fps: 1000ms/30fps = 33.33ms per frame
    # Frame 30 = Frame 1 + (29 * 33.33ms) ≈ Frame 1 + 967ms
    frame30_offset_ms = 967
    frame30_ms = timestamp_ms + frame30_offset_ms
    
    frame30_filename = f"step3_med_{med_name_for_file}_time_{timestamp_str.replace(':', '')}_ts{timestamp_index}_frame30_{pair_index}.png"
    frame30_path = os.path.join(video_dir, frame30_filename)
    
    if extract_and_save_frame(video_path, frame30_ms, frame30_path, apply_sharpening=True):
        extracted_frames.append(frame30_path)
    
    return extracted_frames

def create_excel_report(verified_data, video_dir, case_number):
    """Create an Excel report in the specified format"""
    try:
        # Check if openpyxl is available
        try:
            import openpyxl
            from openpyxl.styles import Font, Alignment, PatternFill, Border, Side
            from openpyxl.utils import get_column_letter
        except ImportError:
            log_message(colored(f"    ⚠️ openpyxl not available. Skipping Excel report creation.", "yellow"))
            log_message(colored(f"    💡 Install with: pip install openpyxl to enable Excel export", "blue"))
            return None
        
        # Create a new workbook and select the active sheet
        wb = openpyxl.Workbook()
        ws = wb.active
        ws.title = f"Case_{case_number}"
        
        # Define styles
        header_font = Font(name='Calibri', size=11, bold=True)
        regular_font = Font(name='Calibri', size=11)
        center_alignment = Alignment(horizontal='center', vertical='center')
        left_alignment = Alignment(horizontal='left', vertical='center')
        
        # Define borders
        thin_border = Border(
            left=Side(style='thin'),
            right=Side(style='thin'),
            top=Side(style='thin'),
            bottom=Side(style='thin')
        )
        
        # Header fill color (light blue)
        header_fill = PatternFill(start_color='D9E1F2', end_color='D9E1F2', fill_type='solid')
        
        # Set column widths
        ws.column_dimensions['A'].width = 20
        ws.column_dimensions['B'].width = 25
        
        # Case information section
        row = 1
        
        # Case number
        ws[f'A{row}'] = f'Case {case_number}:'
        ws[f'A{row}'].font = header_font
        row += 1
        
        # Patient name
        ws[f'A{row}'] = 'Patient name:'
        ws[f'A{row}'].font = header_font
        ws[f'B{row}'] = verified_data.get('name', '(missing)')
        ws[f'B{row}'].font = regular_font
        row += 1
        
        # Patient DOB
        ws[f'A{row}'] = 'Patient date of birth:'
        ws[f'A{row}'].font = header_font
        ws[f'B{row}'] = verified_data.get('dob', '(missing)')
        ws[f'B{row}'].font = regular_font
        row += 1
        
        # Allergies
        ws[f'A{row}'] = 'ADR/Allergy:'
        ws[f'A{row}'].font = header_font
        ws[f'B{row}'] = verified_data.get('allergies', '(missing)')
        ws[f'B{row}'].font = regular_font
        row += 2  # Extra space before medications
        
        # Medications section
        medications = verified_data.get('medications', [])
        
        if medications:
            for med_idx, medication in enumerate(medications):
                # Add some spacing between medications if not the first one
                if med_idx > 0:
                    row += 1
                
                # Medication name
                ws[f'A{row}'] = 'Medication'
                ws[f'A{row}'].font = header_font
                ws[f'A{row}'].fill = header_fill
                ws[f'A{row}'].border = thin_border
                ws[f'A{row}'].alignment = left_alignment
                
                ws[f'B{row}'] = medication.get('medicine', '(missing)')
                ws[f'B{row}'].font = regular_font
                ws[f'B{row}'].border = thin_border
                ws[f'B{row}'].alignment = left_alignment
                row += 1
                
                # Strength and form
                ws[f'A{row}'] = 'Strength and form'
                ws[f'A{row}'].font = header_font
                ws[f'A{row}'].fill = header_fill
                ws[f'A{row}'].border = thin_border
                ws[f'A{row}'].alignment = left_alignment
                
                ws[f'B{row}'] = medication.get('strength_and_form', '(missing)')
                ws[f'B{row}'].font = regular_font
                ws[f'B{row}'].border = thin_border
                ws[f'B{row}'].alignment = left_alignment
                row += 1
                
                # Dose
                ws[f'A{row}'] = 'Dose'
                ws[f'A{row}'].font = header_font
                ws[f'A{row}'].fill = header_fill
                ws[f'A{row}'].border = thin_border
                ws[f'A{row}'].alignment = left_alignment
                
                ws[f'B{row}'] = medication.get('dose', '(missing)')
                ws[f'B{row}'].font = regular_font
                ws[f'B{row}'].border = thin_border
                ws[f'B{row}'].alignment = left_alignment
                row += 1
                
                # Indication per patient
                ws[f'A{row}'] = 'Indication per patient'
                ws[f'A{row}'].font = header_font
                ws[f'A{row}'].fill = header_fill
                ws[f'A{row}'].border = thin_border
                ws[f'A{row}'].alignment = left_alignment
                
                indication = medication.get('indication', '(missing)')
                if indication.lower() == '(missing)':
                    indication = 'Missing'
                ws[f'B{row}'] = indication
                ws[f'B{row}'].font = regular_font
                ws[f'B{row}'].border = thin_border
                ws[f'B{row}'].alignment = left_alignment
                row += 1
                
                # Clinical notes
                ws[f'A{row}'] = 'Clinical notes'
                ws[f'A{row}'].font = header_font
                ws[f'A{row}'].fill = header_fill
                ws[f'A{row}'].border = thin_border
                ws[f'A{row}'].alignment = left_alignment
                
                clinical_notes = medication.get('clinical_notes', '(missing)')
                if clinical_notes.lower() == '(missing)':
                    clinical_notes = 'Missing'
                ws[f'B{row}'] = clinical_notes
                ws[f'B{row}'].font = regular_font
                ws[f'B{row}'].border = thin_border
                ws[f'B{row}'].alignment = left_alignment
                row += 1
        
        # Save the Excel file
        excel_filename = f"Case_{case_number}_Medication_History.xlsx"
        excel_path = os.path.join(video_dir, excel_filename)
        wb.save(excel_path)
        
        log_message(colored(f"    📊 Excel report saved to {excel_path}", "green"))
        return excel_path
        
    except Exception as e:
        log_message(colored(f"    ❌ Error creating Excel report: {e}", "red"))
        return None


In [37]:

# ===================================================
# CELL 5: FEW-SHOT LEARNING FUNCTIONS
# ===================================================

def build_sequential_few_shot_learning(examples, max_examples=1):
    """Build a sequential few-shot learning conversation"""
    conversation_examples = [ex for ex in examples if ex.get("conversation_example", False)]
    examples_to_use = conversation_examples[:max_examples]
    
    if not examples_to_use:
        log_message(colored("⚠️ No conversation examples found in examples4a.json.", "yellow"))
        return []
        
    log_message(colored(f"Building sequential few-shot learning with {len(examples_to_use)} conversation examples", "cyan"))
    
    # Initialize conversation log file for this session
    if os.path.exists(CONVERSATION_LOG_FILE):
        with open(CONVERSATION_LOG_FILE, "a", encoding="utf-8") as f:
            f.write(f"\n\n{'#'*80}\n")
            f.write(f"NEW FEW-SHOT LEARNING SESSION STARTED: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"{'#'*80}\n\n")
    else:
        with open(CONVERSATION_LOG_FILE, "w", encoding="utf-8") as f:
            f.write(f"MEDICATION HISTORY EXTRACTION API CONVERSATION LOG\n")
            f.write(f"Created: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"{'#'*80}\n\n")
    
    conversation = [{"role": "user", "content": HISTORY_EXTRACTION_PROMPT}]
    log_conversation("user", HISTORY_EXTRACTION_PROMPT, api_call_id="initial_prompt")
    
    with open(os.path.join(PROMPTS_DIR, "initial_prompt.txt"), "w", encoding="utf-8") as f:
        f.write(HISTORY_EXTRACTION_PROMPT)
    
    api_call_count = 0
    success_count = 0
    
    # Process each conversation example
    for i, example in enumerate(examples_to_use, 1):
        log_message(colored(f"\n🔄 Processing conversation example {i}/{len(examples_to_use)}", "cyan"))
        dialogue = example.get("dialogue", [])
        
        example_dir = os.path.join(EXAMPLES_DIR, f"example_{i}")
        os.makedirs(example_dir, exist_ok=True)
        
        dialogue_file = os.path.join(example_dir, "dialogue.json")
        with open(dialogue_file, "w", encoding="utf-8") as f:
            json.dump(dialogue, f, indent=2)
        log_message(colored(f"💾 Saved example dialogue to {dialogue_file}", "green"))
        
        for j, turn in enumerate(dialogue, 1):
            if turn.get("role") != "user":
                continue
                
            content = turn.get("content", "")
            media = turn.get("media", [])
            expected_output = turn.get("expected_output")
            
            log_message(colored(f"\n🔄 Processing dialogue turn {j} - Content: {content[:50]}...", "cyan"))
            
            turn_dir = os.path.join(example_dir, f"turn_{j}")
            os.makedirs(turn_dir, exist_ok=True)
            
            turn_file = os.path.join(turn_dir, "prompt.txt")
            with open(turn_file, "w", encoding="utf-8") as f:
                f.write(content)
                if media:
                    f.write("\n\nMedia files:\n")
                    for m in media:
                        f.write(f"- {m}\n")
                if expected_output:
                    f.write("\n\nExpected output:\n")
                    f.write(json.dumps(expected_output, indent=2))
            
            # Make API call if this turn has media
            if media:
                prompt_parts = [Part.from_text(content)]
                
                for media_uri in media:
                    mime_type = get_mime_type(media_uri)
                    prompt_parts.append(Part.from_uri(uri=media_uri, mime_type=mime_type))
                
                api_call_count += 1
                call_id = f"{i}.{j}"
                log_message(colored(f"📞 API Call #{api_call_count} (ID: {call_id}): Showing media", "yellow"))
                
                log_conversation("user", content, api_call_id=call_id)
                for media_uri in media:
                    log_conversation("system", f"[Included media file: {media_uri}]")
                
                try:
                    media_response = api_call_with_backoff(
                        lambda: vision_model.generate_content(
                            prompt_parts,
                            generation_config=generation_config
                        )
                    )
                    
                    success_count += 1
                    response_text = media_response.text
                    
                    log_conversation("assistant", response_text)
                    
                    response_file = os.path.join(turn_dir, "response.txt")
                    with open(response_file, "w", encoding="utf-8") as f:
                        f.write(response_text)
                    
                    log_message(colored(f"✅ Model response saved to {response_file}", "green"))
                    log_message(colored(f"📝 Model response preview: {response_text[:100]}...", "cyan"))
                    
                except Exception as e:
                    log_message(colored(f"❌ API call failed even after retries: {e}", "red"))
                    log_conversation("system", f"ERROR: API call failed after retries: {e}")
                
                time.sleep(1)
            
            # Process expected output if present
            if expected_output:
                expected_output_prompt = f"{content}\n\n{json.dumps(expected_output, indent=2)}"
                prompt_parts = [Part.from_text(expected_output_prompt)]
                
                api_call_count += 1
                call_id = f"{i}.{j}_output"
                log_message(colored(f"📞 API Call #{api_call_count} (ID: {call_id}): Showing expected output format", "yellow"))
                
                log_conversation("user", expected_output_prompt, api_call_id=call_id)
                
                try:
                    output_response = api_call_with_backoff(
                        lambda: vision_model.generate_content(
                            prompt_parts,
                            generation_config=generation_config
                        )
                    )
                    
                    success_count += 1
                    response_text = output_response.text
                    
                    log_conversation("assistant", response_text)
                    
                    output_response_file = os.path.join(turn_dir, "output_response.txt")
                    with open(output_response_file, "w", encoding="utf-8") as f:
                        f.write(response_text)
                    
                    log_message(colored(f"✅ Output response saved to {output_response_file}", "green"))
                    log_message(colored(f"📝 Output response preview: {response_text[:100]}...", "cyan"))
                    
                except Exception as e:
                    log_message(colored(f"❌ Output API call failed even after retries: {e}", "red"))
                    log_conversation("system", f"ERROR: Output API call failed after retries: {e}")
                
                time.sleep(1)
    
    # Add final instruction
    final_instruction = "Now I will give you a new medication history video to process. Extract the information using the exact JSON format you've learned from the examples."
    conversation.append({"role": "user", "content": final_instruction})
    
    log_conversation("user", final_instruction, api_call_id="final_instruction")
    
    with open(os.path.join(PROMPTS_DIR, "final_instruction.txt"), "w", encoding="utf-8") as f:
        f.write(final_instruction)
    
    conversation_file = os.path.join(PROMPTS_DIR, "sequential_conversation_history.json")
    with open(conversation_file, "w", encoding="utf-8") as f:
        json.dump(conversation, f, indent=2, ensure_ascii=False)
    log_message(colored(f"💾 Saved sequential conversation history to {conversation_file}", "green"))
    
    log_message(colored(f"✨ Few-shot learning completed: {success_count}/{api_call_count} API calls succeeded", "cyan"))
    
    return conversation

In [38]:
# ===================================================
# CELL 6: PROCESSING FUNCTIONS FOR EACH TASK
# ===================================================

def extract_medication_history(gcs_uri, video_dir, idx):
    """Task 1: Extract medication history using sequential few-shot learning"""
    log_message(colored(f"  📋 Step 1: Extracting medication history for video {idx}...", "cyan"))
    
    try:
        prompt_parts = [
            Part.from_text(HISTORY_EXTRACTION_PROMPT),
            Part.from_text("Now I will give you a new medication history video to process. Extract the information using the exact JSON format you've learned from the examples.")
        ]
        
        mime_type = get_mime_type(gcs_uri)
        prompt_parts.append(Part.from_uri(uri=gcs_uri, mime_type=mime_type))
        
        log_conversation("user", HISTORY_EXTRACTION_PROMPT, api_call_id=f"extract_{idx}")
        log_conversation("user", "Now I will give you a new medication history video to process.")
        log_conversation("system", f"[Included video file: {gcs_uri}]")
        
        response = api_call_with_backoff(
            lambda: vision_model.generate_content(
                prompt_parts,
                generation_config=generation_config
            )
        )
        
        raw_text = response.text.strip()
        log_conversation("assistant", raw_text)
        
        # Save full response
        full_response_path = os.path.join(video_dir, "step1_extraction_response.txt")
        with open(full_response_path, "w", encoding="utf-8") as f:
            f.write(raw_text)
        log_message(colored(f"    💾 Extraction response saved to {full_response_path}", "green"))
        
        # Clean and parse JSON
        cleaned_text = re.sub(r"```json\n|\n```", "", raw_text)
        json_match = re.search(r'(\{.*\})', cleaned_text, re.DOTALL)
        if json_match:
            cleaned_text = json_match.group(1)
        
        try:
            data = json.loads(cleaned_text)
            
            # Save parsed JSON
            json_path = os.path.join(video_dir, "step1_extracted_data.json")
            with open(json_path, "w", encoding="utf-8") as f:
                json.dump(data, f, indent=2)
            log_message(colored(f"    ✅ Extracted data saved to {json_path}", "green"))
            
            return {"success": True, "data": data, "raw_response": raw_text}
            
        except json.JSONDecodeError as e:
            log_message(colored(f"    ❌ JSON parsing error: {e}", "red"))
            failed_path = os.path.join(video_dir, "step1_extraction_failed.txt")
            with open(failed_path, "w", encoding="utf-8") as f:
                f.write(raw_text)
            return {"success": False, "error": f"JSON parsing error: {e}", "raw_response": raw_text}
            
    except Exception as e:
        log_message(colored(f"    ❌ Extraction error: {e}", "red"))
        return {"success": False, "error": str(e)}

def validate_medication_history(gcs_uri, video_dir, idx, extracted_data):
    """Task 2: Validate extracted medication history"""
    log_message(colored(f"  🔍 Step 2: Validating medication history for video {idx}...", "cyan"))
    
    try:
        verifier_prompt = create_verifier_prompt() + json.dumps(extracted_data, indent=2)
        
        prompt_parts = [Part.from_text(verifier_prompt)]
        mime_type = get_mime_type(gcs_uri)
        prompt_parts.append(Part.from_uri(uri=gcs_uri, mime_type=mime_type))
        
        # Save verification prompt
        prompt_file = os.path.join(video_dir, "step2_verification_prompt.txt")
        with open(prompt_file, "w", encoding="utf-8") as f:
            f.write(verifier_prompt)
        
        log_conversation("user", verifier_prompt, api_call_id=f"verify_{idx}")
        log_conversation("system", f"[Included video file: {gcs_uri}]")
        
        response = api_call_with_backoff(
            lambda: flash_model.generate_content(
                prompt_parts, 
                generation_config=verification_config
            ),
            max_retries=3
        )
        
        raw_text = response.text.strip()
        log_conversation("assistant", raw_text)
        
        # Save full response
        full_response_path = os.path.join(video_dir, "step2_validation_response.txt")
        with open(full_response_path, "w", encoding="utf-8") as f:
            f.write(raw_text)
        log_message(colored(f"    💾 Validation response saved to {full_response_path}", "green"))
        
        # Extract and parse JSON
        json_match = re.search(r'(\{.*\})', raw_text, re.DOTALL)
        if json_match:
            cleaned_text = json_match.group(1)
        else:
            cleaned_text = re.sub(r"```json\n|\n```", "", raw_text)
        
        try:
            verified_data = json.loads(cleaned_text)
            
            # Save verified JSON
            verified_path = os.path.join(video_dir, "step2_verified_data.json")
            with open(verified_path, "w", encoding="utf-8") as f:
                json.dump(verified_data, f, indent=2)
            log_message(colored(f"    ✅ Verified data saved to {verified_path}", "green"))
            
            # Create Excel report
            excel_path = create_excel_report(verified_data, video_dir, idx)
            
            return {"success": True, "data": verified_data, "raw_response": raw_text, "excel_path": excel_path}
            
        except json.JSONDecodeError as e:
            log_message(colored(f"    ❌ JSON parsing error in validation: {e}", "red"))
            return {"success": False, "error": f"JSON parsing error: {e}", "raw_response": raw_text}
            
    except Exception as e:
        log_message(colored(f"    ❌ Validation error: {e}", "red"))
        return {"success": False, "error": str(e)}

# MODIFIED EXTRACT_TIMESTAMPS FUNCTION FOR TWO TIMESTAMPS AND FRAME PAIRS
def extract_timestamps(gcs_uri, video_dir, idx):
    """Task 3: Extract timestamps for clearest medication visibility - now with two timestamps per medication"""
    log_message(colored(f"  ⏰ Step 3: Extracting timestamps for video {idx}...", "cyan"))
    
    try:
        video_part = Part.from_uri(uri=gcs_uri, mime_type=get_mime_type(gcs_uri))
        timestamp_prompt_parts = [MEDICATION_TIMESTAMP_PROMPT, video_part]
        
        response = api_call_with_backoff(
            lambda: vision_model.generate_content(
                timestamp_prompt_parts,
                generation_config=generation_config
            )
        )
        
        if response and response.text:
            raw_response = response.text.strip()
            
            # Save timestamp response
            timestamp_response_path = os.path.join(video_dir, "step3_timestamp_response.txt")
            with open(timestamp_response_path, "w", encoding="utf-8") as f:
                f.write(raw_response)
            log_message(colored(f"    💾 Timestamp response saved to {timestamp_response_path}", "green"))
            
            # Parse timestamps
            parsed_timestamps = parse_timestamp_response(raw_response)
            
            if parsed_timestamps:
                total_expected_frames = len(parsed_timestamps) * 2 * 2  # 2 timestamps per med * 2 frames per timestamp
                log_message(colored(f"    🖼️ Found {len(parsed_timestamps)} medications with 2 timestamps each, extracting {total_expected_frames} frames...", "yellow"))
                
                # Download video temporarily for frame extraction
                storage_client = storage.Client()
                with tempfile.NamedTemporaryFile(suffix=Path(gcs_uri).suffix, delete=False) as tmp_video_file:
                    local_video_path = tmp_video_file.name
                
                try:
                    bucket_name = gcs_uri.split('/')[2]  # Extract bucket name from gs://bucket_name/path
                    bucket_gcs = storage_client.bucket(bucket_name)
                    blob_gcs_path = gcs_uri.replace(f"gs://{bucket_name}/", "")
                    blob = bucket_gcs.blob(blob_gcs_path)
                    blob.download_to_filename(local_video_path)
                    
                    extracted_frames = []
                    for i, ts_data in enumerate(parsed_timestamps):
                        try:
                            med_name_for_file = re.sub(r'[\W_]+', '', ts_data["medication_name"])[:50]
                            
                            # Process first timestamp
                            timestamp1_ms = time_str_to_milliseconds(ts_data["timestamp1"])
                            frames1 = extract_frame_pair(
                                local_video_path, 
                                timestamp1_ms, 
                                med_name_for_file, 
                                ts_data["timestamp1"], 
                                video_dir, 
                                i+1, 
                                1
                            )
                            extracted_frames.extend(frames1)
                            
                            # Process second timestamp
                            timestamp2_ms = time_str_to_milliseconds(ts_data["timestamp2"])
                            frames2 = extract_frame_pair(
                                local_video_path, 
                                timestamp2_ms, 
                                med_name_for_file, 
                                ts_data["timestamp2"], 
                                video_dir, 
                                i+1, 
                                2
                            )
                            extracted_frames.extend(frames2)
                            
                        except ValueError as ve:
                            log_message(colored(f"    ❌ Invalid time format for medication {i+1}: {ve}", "red"))
                        except Exception as frame_ex:
                            log_message(colored(f"    ❌ Error during frame extraction for medication {i+1}: {frame_ex}", "red"))
                    
                    # Save timestamp data
                    timestamp_data_path = os.path.join(video_dir, "step3_timestamp_data.json")
                    with open(timestamp_data_path, "w", encoding="utf-8") as f:
                        json.dump({
                            "parsed_timestamps": parsed_timestamps,
                            "extracted_frames": extracted_frames,
                            "raw_response": raw_response
                        }, f, indent=2)
                    
                    log_message(colored(f"    ✅ Extracted {len(extracted_frames)} frames total", "green"))
                    return {"success": True, "timestamps": parsed_timestamps, "frames": extracted_frames, "raw_response": raw_response}
                    
                finally:
                    if os.path.exists(local_video_path):
                        os.remove(local_video_path)
                        
            else:
                log_message(colored(f"    ℹ️ No timestamps parsed", "blue"))
                return {"success": True, "timestamps": [], "frames": [], "raw_response": raw_response}
        else:
            log_message(colored(f"    ⚠️ No timestamp response received", "yellow"))
            return {"success": False, "error": "No response received"}
            
    except Exception as e:
        log_message(colored(f"    ❌ Timestamp extraction error: {e}", "red"))
        return {"success": False, "error": str(e)}

def extract_transcription(gcs_uri, video_dir, idx):
    """Task 4: Extract full conversation transcription"""
    log_message(colored(f"  🗣️ Step 4: Extracting transcription for video {idx}...", "cyan"))
    
    try:
        video_part = Part.from_uri(uri=gcs_uri, mime_type=get_mime_type(gcs_uri))
        transcription_prompt_parts = [FULL_TRANSCRIPTION_PROMPT, video_part]
        
        response = api_call_with_backoff(
            lambda: vision_model.generate_content(
                transcription_prompt_parts,
                generation_config=generation_config
            )
        )
        
        if response and response.text:
            transcription_text = response.text.strip()
            
            # Save transcription
            transcription_path = os.path.join(video_dir, "step4_full_transcription.txt")
            with open(transcription_path, "w", encoding="utf-8") as f:
                f.write(transcription_text)
            log_message(colored(f"    ✅ Transcription saved to {transcription_path}", "green"))
            
            return {"success": True, "transcription": transcription_text}
        else:
            log_message(colored(f"    ⚠️ No transcription response received", "yellow"))
            return {"success": False, "error": "No response received"}
            
    except Exception as e:
        log_message(colored(f"    ❌ Transcription error: {e}", "red"))
        return {"success": False, "error": str(e)}


In [39]:

# ===================================================
# CELL 7: MAIN PROCESSING FUNCTION
# ===================================================

def process_all_videos():
    """Main function to process all videos with all 4 tasks"""
    
    # Load few-shot examples and build learning context
    few_shot_examples = load_few_shot_examples()
    if not few_shot_examples:
        log_message(colored("❌ No examples found. Cannot proceed with sequential learning approach.", "red"))
        return
    
    # Build sequential few-shot learning context
    log_message(colored("🔄 Building few-shot learning context...", "cyan"))
    conversation_history = build_sequential_few_shot_learning(few_shot_examples)
    
    if not conversation_history:
        log_message(colored("❌ Failed to build conversation history. Aborting.", "red"))
        return
    
    # Get list of videos
    video_uris = list_videos_in_bucket(bucket_name, VIDEO_PREFIX)
    if not video_uris:
        log_message(colored("❌ No videos found to process.", "red"))
        return
    
    log_message(colored(f"🎬 Starting processing of {len(video_uris)} videos", "green"))
    
    all_results = []
    
    # Process each video with all 4 tasks
    for idx, gcs_uri in enumerate(video_uris, start=1):
        log_message(colored(f"\n📹 Processing video {idx}/{len(video_uris)}: {gcs_uri}", "magenta"))
        
        # Create video-specific directory
        video_filename = Path(gcs_uri).name.split('.')[0]
        video_dir = os.path.join(OUTPUT_DIR, f"History_{idx}")
        os.makedirs(video_dir, exist_ok=True)
        
        video_result = {
            "video_uri": gcs_uri,
            "video_index": idx,
            "video_filename": video_filename,
            "video_directory": video_dir,
            "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            "step1_extraction": None,
            "step2_validation": None, 
            "step3_timestamps": None,
            "step4_transcription": None,
            "overall_success": False
        }
        
        try:
            # Task 1: Extract medication history
            extraction_result = extract_medication_history(gcs_uri, video_dir, idx)
            video_result["step1_extraction"] = extraction_result
            
            # Task 2: Validate medication history (only if extraction succeeded)
            if extraction_result.get("success") and extraction_result.get("data"):
                validation_result = validate_medication_history(gcs_uri, video_dir, idx, extraction_result["data"])
                video_result["step2_validation"] = validation_result
            else:
                log_message(colored(f"  ⚠️ Skipping validation due to extraction failure", "yellow"))
                video_result["step2_validation"] = {"success": False, "error": "Extraction failed, skipping validation"}
            
            # Task 3: Extract timestamps (independent of previous tasks)
            timestamp_result = extract_timestamps(gcs_uri, video_dir, idx)
            video_result["step3_timestamps"] = timestamp_result
            
            # Task 4: Extract transcription (independent of previous tasks)
            transcription_result = extract_transcription(gcs_uri, video_dir, idx)
            video_result["step4_transcription"] = transcription_result
            
            # Determine overall success
            video_result["overall_success"] = (
                extraction_result.get("success", False) or
                timestamp_result.get("success", False) or
                transcription_result.get("success", False)
            )
            
            # Save individual video result
            video_result_path = os.path.join(video_dir, "processing_result.json")
            with open(video_result_path, "w", encoding="utf-8") as f:
                json.dump(video_result, f, indent=2, ensure_ascii=False)
            log_message(colored(f"  💾 Video result saved to {video_result_path}", "green"))
            
        except Exception as e:
            log_message(colored(f"  ❌ Error processing video {idx}: {e}", "red"))
            video_result["error"] = str(e)
        
        all_results.append(video_result)
        
        # Add delay between videos
        if idx < len(video_uris):
            log_message(colored(f"⏱️ Waiting 3 seconds before processing next video...", "yellow"))
            time.sleep(3)
    
    # Save comprehensive results
    final_results_path = os.path.join(OUTPUT_DIR, "final_processing_results.json")
    with open(final_results_path, "w", encoding="utf-8") as f:
        json.dump(all_results, f, indent=2, ensure_ascii=False)
    log_message(colored(f"💾 Final results saved to {final_results_path}", "green"))
    
    # Create summary report
    create_summary_report(all_results)
    
    return all_results

def create_summary_report(all_results):
    """Create a summary report of all processing"""
    try:
        summary_path = os.path.join(OUTPUT_DIR, "processing_summary.txt")
        with open(summary_path, "w") as f:
            f.write(f"Combined Medication History Processing Summary\n")
            f.write(f"=" * 50 + "\n\n")
            f.write(f"Date: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}\n")
            f.write(f"Total videos processed: {len(all_results)}\n\n")
            
            # Count successes for each step
            extraction_success = sum(1 for r in all_results if r.get("step1_extraction", {}).get("success", False))
            validation_success = sum(1 for r in all_results if r.get("step2_validation", {}).get("success", False)) 
            timestamp_success = sum(1 for r in all_results if r.get("step3_timestamps", {}).get("success", False))
            transcription_success = sum(1 for r in all_results if r.get("step4_transcription", {}).get("success", False))
            overall_success = sum(1 for r in all_results if r.get("overall_success", False))
            
            f.write(f"Success Rates:\n")
            f.write(f"- Step 1 (Extraction): {extraction_success}/{len(all_results)} ({extraction_success/len(all_results)*100:.1f}%)\n")
            f.write(f"- Step 2 (Validation): {validation_success}/{len(all_results)} ({validation_success/len(all_results)*100:.1f}%)\n")
            f.write(f"- Step 3 (Timestamps): {timestamp_success}/{len(all_results)} ({timestamp_success/len(all_results)*100:.1f}%)\n")
            f.write(f"- Step 4 (Transcription): {transcription_success}/{len(all_results)} ({transcription_success/len(all_results)*100:.1f}%)\n")
            f.write(f"- Overall Success: {overall_success}/{len(all_results)} ({overall_success/len(all_results)*100:.1f}%)\n\n")
            
            f.write(f"Individual Video Results:\n")
            f.write(f"-" * 30 + "\n")
            for i, result in enumerate(all_results, 1):
                f.write(f"\nVideo {i}: {result['video_filename']}\n")
                f.write(f"  Directory: {result['video_directory']}\n")
                f.write(f"  Extraction: {'✅' if result.get('step1_extraction', {}).get('success') else '❌'}\n")
                f.write(f"  Validation: {'✅' if result.get('step2_validation', {}).get('success') else '❌'}\n")
                f.write(f"  Excel Report: {'✅' if result.get('step2_validation', {}).get('excel_path') else '❌'}\n")
                f.write(f"  Timestamps: {'✅' if result.get('step3_timestamps', {}).get('success') else '❌'}\n")
                f.write(f"  Transcription: {'✅' if result.get('step4_transcription', {}).get('success') else '❌'}\n")
                f.write(f"  Overall: {'✅' if result.get('overall_success') else '❌'}\n")
        
        log_message(colored(f"📄 Summary report saved to {summary_path}", "green"))
        
        # Also create a Word version
        try:
            doc = Document()
            doc.add_heading("Combined Medication History Processing Summary", 0)
            
            doc.add_paragraph(f"Generated on: {datetime.now().strftime('%d/%m/%Y %H:%M:%S')}")
            doc.add_paragraph(f"Total videos processed: {len(all_results)}")
            
            doc.add_heading("Success Rates", 1)
            doc.add_paragraph(f"Step 1 (Extraction): {extraction_success}/{len(all_results)} ({extraction_success/len(all_results)*100:.1f}%)")
            doc.add_paragraph(f"Step 2 (Validation): {validation_success}/{len(all_results)} ({validation_success/len(all_results)*100:.1f}%)")
            doc.add_paragraph(f"Step 3 (Timestamps): {timestamp_success}/{len(all_results)} ({timestamp_success/len(all_results)*100:.1f}%)")
            doc.add_paragraph(f"Step 4 (Transcription): {transcription_success}/{len(all_results)} ({transcription_success/len(all_results)*100:.1f}%)")
            doc.add_paragraph(f"Overall Success: {overall_success}/{len(all_results)} ({overall_success/len(all_results)*100:.1f}%)")
            
            doc.add_heading("Individual Video Results", 1)
            for i, result in enumerate(all_results, 1):
                doc.add_heading(f"Video {i}: {result['video_filename']}", 2)
                doc.add_paragraph(f"Directory: {result['video_directory']}")
                doc.add_paragraph(f"Extraction: {'✅' if result.get('step1_extraction', {}).get('success') else '❌'}")
                doc.add_paragraph(f"Validation: {'✅' if result.get('step2_validation', {}).get('success') else '❌'}")
                doc.add_paragraph(f"Excel Report: {'✅' if result.get('step2_validation', {}).get('excel_path') else '❌'}")
                doc.add_paragraph(f"Timestamps: {'✅' if result.get('step3_timestamps', {}).get('success') else '❌'}")
                doc.add_paragraph(f"Transcription: {'✅' if result.get('step4_transcription', {}).get('success') else '❌'}")
                doc.add_paragraph(f"Overall: {'✅' if result.get('overall_success') else '❌'}")
            
            summary_doc_path = os.path.join(OUTPUT_DIR, "processing_summary.docx")
            doc.save(summary_doc_path)
            log_message(colored(f"📄 Word summary saved to {summary_doc_path}", "green"))
            
        except Exception as e:
            log_message(colored(f"⚠️ Error creating Word summary: {e}", "yellow"))
            
    except Exception as e:
        log_message(colored(f"❌ Error creating summary report: {e}", "red"))


In [None]:

# ===================================================
# CELL 8: MAIN EXECUTION
# ===================================================

if __name__ == "__main__":
    try:
        # Print header
        log_message(colored("\n" + "="*80, "cyan"))
        log_message(colored("  COMBINED MEDICATION HISTORY PROCESSING", "cyan"))
        log_message(colored("  4-Step Sequential Processing: Extract → Validate → Timestamps → Transcribe", "cyan"))
        log_message(colored("="*80 + "\n", "cyan"))
        
        # Run the combined processing
        results = process_all_videos()
        
        # Final summary
        if results:
            successful_videos = sum(1 for r in results if r.get("overall_success", False))
            log_message(colored(f"\n✅ PROCESSING COMPLETED", "green"))
            log_message(colored(f"📊 Successfully processed {successful_videos}/{len(results)} videos", "green"))
            log_message(colored(f"📁 All outputs saved in: {OUTPUT_DIR}", "green"))
        else:
            log_message(colored("⚠️ No videos were processed successfully", "yellow"))
            
    except Exception as e:
        log_message(colored(f"\n❌ CRITICAL ERROR IN MAIN EXECUTION: {e}", "red"))
        import traceback
        traceback_str = traceback.format_exc()
        log_message(colored(f"Traceback:\n{traceback_str}", "red"))
        
        # Save the error to a file
        error_file = os.path.join(LOGS_DIR, "error_log.txt")
        with open(error_file, "w") as f:
            f.write(f"Error timestamp: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"Error message: {str(e)}\n\n")
            f.write(f"Traceback:\n{traceback_str}")
        log_message(colored(f"Error details saved to {error_file}", "red"))
