In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [10]:
# ==============================================================================
# CELL 1: Install Dependencies and Setup
# ==============================================================================
import subprocess
import sys
import os
import warnings
import re

# Suppress warnings
os.environ['PYGAME_HIDE_SUPPORT_PROMPT'] = "hide"
warnings.filterwarnings('ignore')

def install_packages():
    """Install all required packages"""
    packages = [
        "PyMuPDF==1.23.26",
        "opencv-python-headless", 
        "google-generativeai",
        "edge-tts",
        "langchain",
        "moviepy",
        "Pillow"
    ]
    
    print("Installing required packages...")
    for package in packages:
        try:
            subprocess.run([sys.executable, "-m", "pip", "install", package], 
                          check=True, capture_output=True)
            print(f"✅ {package}")
        except:
            print(f"⚠️ Failed to install {package}")

install_packages()
print("✅ Cell 1 Complete - Dependencies installed")

Installing required packages...
✅ PyMuPDF==1.23.26
✅ opencv-python-headless
✅ google-generativeai
✅ edge-tts
✅ langchain
✅ moviepy
✅ Pillow
✅ Cell 1 Complete - Dependencies installed


In [11]:
# ==============================================================================
# CELL 2: Enhanced PDF Image Extractor (Fixed)
# ==============================================================================

import io
import numpy as np
from PIL import Image, ImageDraw
import subprocess
import sys

def install_and_import_pymupdf():
    """Install and properly import PyMuPDF"""
    try:
        # Clean install
        subprocess.run([sys.executable, "-m", "pip", "uninstall", "-y", "PyMuPDF", "fitz"], 
                      capture_output=True, check=False)
        subprocess.run([sys.executable, "-m", "pip", "install", "PyMuPDF==1.23.26"], check=True)
        
        # Clear module cache
        for module_name in list(sys.modules.keys()):
            if 'fitz' in module_name.lower():
                del sys.modules[module_name]
        
        import fitz
        
        # Test functionality
        test_doc = fitz.open()
        test_doc.close()
        print("✅ PyMuPDF installed and working")
        return fitz
        
    except Exception as e:
        print(f"PyMuPDF installation failed: {e}")
        return None

class EnhancedPDFImageExtractor:
    def __init__(self, min_figure_size=5000):  # Reduced threshold to capture more images
        self.min_figure_size = min_figure_size
        self.fitz = install_and_import_pymupdf()
        
    def is_meaningful_image(self, pix):
        """Check if image is meaningful (not just background/decorative)"""
        if pix.width < 50 or pix.height < 50:
            return False
        
        # Check if image has sufficient complexity
        area = pix.width * pix.height
        return area >= self.min_figure_size
        
    def extract_all_images(self, pdf_path, output_dir="all_extracted_images"):
        """Extract ALL images from PDF with better filtering"""
        if not self.fitz:
            print("PyMuPDF not available")
            return [], []
        
        os.makedirs(output_dir, exist_ok=True)
        figures, metadata = [], []
        processed_xrefs = set()  # Track processed images to avoid duplicates
        
        try:
            doc = self.fitz.open(pdf_path)
            print(f"Processing {len(doc)} pages for image extraction...")
            
            for page_num in range(len(doc)):
                page = doc[page_num]
                
                # Method 1: Get all images on the page
                image_list = page.get_images(full=True)
                print(f"Page {page_num + 1}: Found {len(image_list)} potential images")
                
                for img_index, img in enumerate(image_list):
                    try:
                        xref = img[0]
                        
                        # Skip if already processed
                        if xref in processed_xrefs:
                            continue
                        processed_xrefs.add(xref)
                        
                        # Get image data
                        base_image = doc.extract_image(xref)
                        image_bytes = base_image["image"]
                        image_ext = base_image["ext"]
                        
                        # Create PIL image
                        img_pil = Image.open(io.BytesIO(image_bytes))
                        
                        # Check if image is meaningful
                        if img_pil.width * img_pil.height < self.min_figure_size:
                            continue
                            
                        # Convert to RGB if needed
                        if img_pil.mode != 'RGB':
                            img_pil = img_pil.convert('RGB')
                        
                        # Save image
                        save_path = os.path.join(output_dir, f"page{page_num+1}_img{len(figures)+1}.png")
                        img_pil.save(save_path, "PNG", quality=95)
                        
                        figures.append(save_path)
                        metadata.append({
                            "page": page_num + 1,
                            "method": "direct_extract",
                            "size": (img_pil.width, img_pil.height),
                            "original_format": image_ext
                        })
                        
                        print(f"  ✅ Saved: {os.path.basename(save_path)} ({img_pil.width}x{img_pil.height})")
                        
                    except Exception as e:
                        print(f"  ⚠️ Error with image {img_index + 1}: {e}")
                        continue
                
                # Method 2: Render page regions with drawings/charts
                # This captures vector graphics that might not be in image_list
                try:
                    # Get page as image at high resolution
                    mat = self.fitz.Matrix(2.0, 2.0)  # 2x zoom
                    pix = page.get_pixmap(matrix=mat)
                    
                    # Convert to PIL
                    img_data = pix.tobytes("png")
                    page_img = Image.open(io.BytesIO(img_data))
                    
                    # If this page has few extracted images but is large, save the page render
                    page_images_count = len([m for m in metadata if m["page"] == page_num + 1])
                    if page_images_count < 2 and page_img.width > 800:  # Likely contains charts/diagrams
                        save_path = os.path.join(output_dir, f"page{page_num+1}_fullrender.png")
                        page_img.save(save_path, "PNG", quality=95)
                        
                        figures.append(save_path)
                        metadata.append({
                            "page": page_num + 1,
                            "method": "page_render",
                            "size": (page_img.width, page_img.height),
                            "original_format": "rendered"
                        })
                        print(f"  ✅ Saved page render: {os.path.basename(save_path)}")
                    
                    pix = None
                    
                except Exception as e:
                    print(f"  ⚠️ Error rendering page {page_num + 1}: {e}")
            
            doc.close()
            
        except Exception as e:
            print(f"Error in image extraction: {e}")
            
        print(f"\n🎯 TOTAL: {len(figures)} images extracted from PDF")
        return figures, metadata

# Initialize and run enhanced extraction
pdf_path = "/kaggle/input/edu-video/1-s2.0-S0957417424008327-main.pdf"

if os.path.exists(pdf_path):
    extractor = EnhancedPDFImageExtractor(min_figure_size=3000)  # Lower threshold
    figures, metadata = extractor.extract_all_images(pdf_path)
    print(f"\n🎉 Extracted {len(figures)} figures total")
    
    # Show breakdown
    methods = {}
    for m in metadata:
        method = m["method"]
        methods[method] = methods.get(method, 0) + 1
    
    for method, count in methods.items():
        print(f"  - {method}: {count} images")
        
else:
    print(f"❌ PDF not found: {pdf_path}")
    figures, metadata = [], []

print("✅ Cell 2 Complete - Enhanced image extraction finished")

Collecting PyMuPDF==1.23.26
  Using cached PyMuPDF-1.23.26-cp311-none-manylinux2014_x86_64.whl.metadata (3.4 kB)
Using cached PyMuPDF-1.23.26-cp311-none-manylinux2014_x86_64.whl (4.4 MB)
Installing collected packages: PyMuPDF
Successfully installed PyMuPDF-1.23.26
✅ PyMuPDF installed and working
Processing 13 pages for image extraction...
Page 1: Found 3 potential images
  ✅ Saved: page1_img1.png (236x298)
  ✅ Saved: page1_img2.png (248x271)
  ✅ Saved: page1_img3.png (119x119)
Page 2: Found 0 potential images
  ✅ Saved page render: page2_fullrender.png
Page 3: Found 0 potential images
  ✅ Saved page render: page3_fullrender.png
Page 4: Found 1 potential images
  ✅ Saved: page4_img6.png (1697x563)
  ✅ Saved page render: page4_fullrender.png
Page 5: Found 1 potential images
  ✅ Saved: page5_img8.png (63x59)
  ✅ Saved page render: page5_fullrender.png
Page 6: Found 0 potential images
  ✅ Saved page render: page6_fullrender.png
Page 7: Found 0 potential images
  ✅ Saved page render: page7_

In [15]:
# ==============================================================================
# CELL 3: Enhanced Image Explanation Generation (FIXED for Full Model Names)
# ==============================================================================

import google.generativeai as genai
import re

# Configure Gemini API
GEMINI_API_KEY = "AIzaSyD9AjDpmfsLWRhz5olrBmL3OLkEsqq6QcU"
genai.configure(api_key=GEMINI_API_KEY)

def list_available_models():
    """List supported Gemini models for debugging"""
    print("🔍 Listing available Gemini models...")
    available_models = []
    for model in genai.list_models():
        if 'generateContent' in model.supported_generation_methods:
            available_models.append(model.name)
            print(f"  - {model.name} (supports vision: {model.supported_generation_methods})")
    print(f"\n📋 Total supported models: {len(available_models)}")
    return available_models

def get_image_explanations(figures, metadata):
    """Generate educational explanations for extracted figures using a stable 2025 Gemini model"""
    # List models first for debugging
    available_models = list_available_models()
    
    # Choose a stable 2025 model (fallback chain with full model names)
    model_name = None
    if 'models/gemini-2.5-flash' in available_models:
        model_name = 'gemini-2.5-flash'
    elif 'models/gemini-2.0-flash' in available_models:
        model_name = 'gemini-2.0-flash'
    elif 'models/gemini-1.5-flash' in available_models:
        model_name = 'gemini-1.5-flash'
    else:
        # Fallback to the first available vision-capable model
        for m in available_models:
            if 'gemini' in m and 'generateContent' in genai.list_models().get(m, {}).supported_generation_methods:
                model_name = m.replace('models/', '')  # Strip 'models/' prefix
                break
        if not model_name:
            raise ValueError("No suitable Gemini model found!")
    
    print(f"🤖 Using model: {model_name}")
    
    diagrams = []
    model = genai.GenerativeModel(model_name)
    
    print(f"🔍 Generating explanations for {len(figures)} figures...")
    
    for i, fig_path in enumerate(figures):
        try:
            with Image.open(fig_path) as image:
                # Prompt for educational explanation suitable for narration and display
                prompt = """
                Analyze this image and provide a clear, educational explanation in 2-3 sentences.
                First, briefly transcribe any key visible text (titles, labels, main data) to reference.
                Then explain:
                1. What type of visualization this is (chart, diagram, graph, etc.)
                2. What key information or concept it presents
                3. Any important data trends or relationships shown
                
                Make it suitable for an educational video narration and on-screen text. Be specific, informative, and engaging.
                Output as a single cohesive paragraph.
                """
                
                response = model.generate_content([prompt, image])
                explanation = response.text.strip()
                
                # Clean up for display and audio
                explanation = re.sub(r'[*#]+', '', explanation)  # Remove markdown
                explanation = re.sub(r'\n+', ' ', explanation)  # Single line for audio
                explanation_display = re.sub(r'\s+', ' ', explanation).strip()  # Clean for text overlay
                
                diagrams.append({
                    "image_path": fig_path, 
                    "explanation_audio": explanation,  # For TTS
                    "explanation_display": explanation_display,  # For on-screen text below image
                    "page": metadata[i]["page"] if i < len(metadata) else 1
                })
                
                print(f"✅ Figure {i+1} (Page {diagrams[-1]['page']}): {explanation_display[:100]}...")
                
        except Exception as e:
            print(f"⚠️ Error processing {fig_path}: {e}")
            # Add fallback explanation
            page_num = metadata[i]["page"] if i < len(metadata) else i+1
            fallback_explanation = f"This figure from page {page_num} illustrates key visual information related to the research findings discussed in this section."
            diagrams.append({
                "image_path": fig_path,
                "explanation_audio": fallback_explanation,
                "explanation_display": fallback_explanation,
                "page": page_num
            })
    
    return diagrams

# Generate explanations
if figures:
    diagrams = get_image_explanations(figures, metadata)
    print(f"\n✅ Generated {len(diagrams)} explanations")
else:
    diagrams = []
    print("No figures to explain")

print("✅ Cell 3 Complete - Image explanations generated")

🔍 Listing available Gemini models...
  - models/gemini-2.5-pro-preview-03-25 (supports vision: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent'])
  - models/gemini-2.5-flash-preview-05-20 (supports vision: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent'])
  - models/gemini-2.5-flash (supports vision: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent'])
  - models/gemini-2.5-flash-lite-preview-06-17 (supports vision: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent'])
  - models/gemini-2.5-pro-preview-05-06 (supports vision: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent'])
  - models/gemini-2.5-pro-preview-06-05 (supports vision: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent'])
  - models/gemini-2.5-pro (supports vision: ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateCont

In [16]:
# ==============================================================================
# CELL 4: Skip PDF Text Extraction (Image-Centric Approach)
# ==============================================================================

# Skipping main PDF text extraction as we're focusing on per-image explanations for narration
print("ℹ️ Skipping PDF text extraction - narration will use image-specific explanations")
pdf_content = ""  # Not used
chunks = []  # Not used for audio

print("✅ Cell 4 Complete - PDF text extraction skipped")

ℹ️ Skipping PDF text extraction - narration will use image-specific explanations
✅ Cell 4 Complete - PDF text extraction skipped


In [17]:
# ==============================================================================
# CELL 5: Skip Text Chunking (Image-Centric Approach)
# ==============================================================================

# Skipping text chunking as we're using per-image explanations for narration
print("ℹ️ Skipping text chunking - narration segments will use individual image explanations")
# Set chunks to explanations for compatibility (one per image)
if 'diagrams' in locals():
    chunks = [d['explanation_audio'] for d in diagrams]
else:
    chunks = []

print(f"✅ Cell 5 Complete - Text chunking skipped; {len(chunks)} explanation segments ready")

ℹ️ Skipping text chunking - narration segments will use individual image explanations
✅ Cell 5 Complete - Text chunking skipped; 19 explanation segments ready


In [18]:
# ==============================================================================
# CELL 6: Professional Audio Generation from Image Explanations (FIXED)
# ==============================================================================

import asyncio
import edge_tts

async def professional_text_to_speech(text, voice="en-US-JennyNeural", rate="-5%", pitch="+0Hz"):
    """Convert text to professional narration using Edge TTS"""
    # Clean text for speech
    speech_text = text.strip()
    
    # Remove any remaining formatting artifacts
    speech_text = re.sub(r'[*#_`]', '', speech_text)  # Markdown
    speech_text = re.sub(r'http[s]?://\S+', 'website link', speech_text)  # URLs
    speech_text = re.sub(r'\b[A-Z]{2,}\b', lambda m: m.group().lower(), speech_text)  # ACRONYMS to lowercase
    
    # Improve pronunciation of common terms
    replacements = {
        'et al.': 'and colleagues',
        'i.e.': 'that is',
        'e.g.': 'for example',
        'vs.': 'versus',
        'etc.': 'and so on',
        'COVID-19': 'COVID nineteen',
        'AI': 'artificial intelligence',
        'ML': 'machine learning',
        'IoT': 'Internet of Things',
        'API': 'A P I'
    }
    
    for old, new in replacements.items():
        speech_text = speech_text.replace(old, new)
    
    # Generate speech
    tts = edge_tts.Communicate(speech_text, voice=voice, rate=rate, pitch=pitch)
    audio_stream = b""
    async for chunk in tts.stream():
        if chunk["type"] == "audio":
            audio_stream += chunk["data"]
    
    return audio_stream

async def generate_explanation_audio(diagrams, output_dir="explanation_audio"):
    """Generate audio files for each image's explanation"""
    os.makedirs(output_dir, exist_ok=True)
    audio_paths = []
    
    # Professional voice options
    voices = [
        "en-US-JennyNeural",    # Clear, professional female voice
        "en-US-GuyNeural",      # Clear, professional male voice  
        "en-US-AriaNeural",     # Warm, engaging female voice
        "en-US-DavisNeural"     # Confident male voice
    ]
    
    selected_voice = voices[0]  # Use Jenny by default
    print(f"🎙️ Using voice: {selected_voice}")
    print(f"🔊 Generating audio from explanations for {len(diagrams)} images...")
    
    for i, diagram in enumerate(diagrams):
        explanation = diagram.get('explanation_audio', '').strip()
        if not explanation:
            continue
            
        out_path = os.path.join(output_dir, f"image_{i+1:02d}_explanation.mp3")
        print(f"🔄 Processing image {i+1}/{len(diagrams)}...")
        
        try:
            audio_bytes = await professional_text_to_speech(
                explanation, 
                voice=selected_voice, 
                rate="-5%",  # Slightly slower for clarity
                pitch="+0Hz"
            )
            
            with open(out_path, "wb") as f:
                f.write(audio_bytes)
            
            audio_paths.append(out_path)
            
            # Calculate estimated duration (rough estimate)
            word_count = len(explanation.split())
            estimated_duration = word_count / 2.5  # ~2.5 words per second
            print(f"✅ Audio saved: {os.path.basename(out_path)} (~{estimated_duration:.1f}s)")
            
        except Exception as e:
            print(f"⚠️ Error generating audio for image {i+1}: {e}")
            continue
    
    return audio_paths

# Generate explanation audio files
if diagrams:
    audio_files = await generate_explanation_audio(diagrams)
    print(f"\n🔊 Generated {len(audio_files)} explanation audio files")
    
    # Calculate total estimated duration
    total_words = sum(len(d.get('explanation_audio', '').split()) for d in diagrams)
    total_duration = total_words / 2.5 / 60  # Convert to minutes
    print(f"📊 Estimated total video duration: {total_duration:.1f} minutes")
else:
    audio_files = []
    print("No diagrams available for audio generation")

print("✅ Cell 6 Complete - Explanation audio generation finished")

🎙️ Using voice: en-US-JennyNeural
🔊 Generating audio from explanations for 19 images...
🔄 Processing image 1/19...
✅ Audio saved: image_01_explanation.mp3 (~32.4s)
🔄 Processing image 2/19...
✅ Audio saved: image_02_explanation.mp3 (~28.8s)
🔄 Processing image 3/19...
✅ Audio saved: image_03_explanation.mp3 (~24.4s)
🔄 Processing image 4/19...
✅ Audio saved: image_04_explanation.mp3 (~28.4s)
🔄 Processing image 5/19...
✅ Audio saved: image_05_explanation.mp3 (~38.4s)
🔄 Processing image 6/19...
✅ Audio saved: image_06_explanation.mp3 (~27.2s)
🔄 Processing image 7/19...
✅ Audio saved: image_07_explanation.mp3 (~31.6s)
🔄 Processing image 8/19...
✅ Audio saved: image_08_explanation.mp3 (~27.6s)
🔄 Processing image 9/19...
✅ Audio saved: image_09_explanation.mp3 (~36.0s)
🔄 Processing image 10/19...
✅ Audio saved: image_10_explanation.mp3 (~32.8s)
🔄 Processing image 11/19...
✅ Audio saved: image_11_explanation.mp3 (~32.8s)
🔄 Processing image 12/19...
✅ Audio saved: image_12_explanation.mp3 (~30.4

In [19]:
# ==============================================================================
# CELL 7: Professional Video Generation Class (FIXED for Per-Image Explanation Sync)
# ==============================================================================

from moviepy.editor import *
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import os

class ProfessionalVideoGenerator:
    def __init__(self, width=1920, height=1080, fps=30):  # Full HD, smooth playback
        self.width = width
        self.height = height
        self.fps = fps
        self.bg_color = (15, 23, 42)  # Professional dark blue
        self.text_color = (248, 250, 252)  # Off-white
        self.accent_color = (59, 130, 246)  # Blue accent
        
    def load_professional_font(self, fontsize, bold=False):
        """Load professional font with multiple fallbacks"""
        font_paths = [
            "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf" if bold else "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
            "/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf" if bold else "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf",
            "/System/Library/Fonts/Helvetica.ttc",
            "/Windows/Fonts/arial.ttf"
        ]
        
        for font_path in font_paths:
            try:
                return ImageFont.truetype(font_path, fontsize)
            except:
                continue
        
        return ImageFont.load_default()
    
    def wrap_text_professional(self, text, font, max_width):
        """Professional text wrapping with better line breaks"""
        words = text.split()
        lines = []
        current_line = []
        
        for word in words:
            test_line = ' '.join(current_line + [word])
            try:
                bbox = font.getbbox(test_line)
                line_width = bbox[2] - bbox[0]
            except:
                line_width = len(test_line) * (font.size * 0.6)
            
            if line_width <= max_width:
                current_line.append(word)
            else:
                if current_line:
                    lines.append(' '.join(current_line))
                current_line = [word]
        
        if current_line:
            lines.append(' '.join(current_line))
        
        return lines
    
    def create_title_slide(self, title, duration=4):
        """Create professional title slide"""
        def make_title_frame(t):
            img = Image.new('RGB', (self.width, self.height), self.bg_color)
            draw = ImageDraw.Draw(img)
            
            # Add gradient-like effect
            for i in range(self.height//3):
                alpha = int(255 * (i / (self.height//3)) * 0.1)
                color = (self.bg_color[0] + alpha//4, self.bg_color[1] + alpha//4, self.bg_color[2] + alpha//2)
                draw.rectangle([0, i*3, self.width, i*3+3], fill=color)
            
            # Title
            title_font = self.load_professional_font(72, bold=True)
            title_lines = self.wrap_text_professional(title, title_font, self.width - 200)
            
            total_title_height = len(title_lines) * 80
            start_y = (self.height - total_title_height) // 2
            
            for i, line in enumerate(title_lines):
                try:
                    bbox = draw.textbbox((0, 0), line, font=title_font)
                    text_width = bbox[2] - bbox[0]
                except:
                    text_width = len(line) * 40
                
                x = (self.width - text_width) // 2
                y = start_y + i * 80
                
                # Text shadow
                draw.text((x+3, y+3), line, fill=(0, 0, 0, 128), font=title_font)
                # Main text
                draw.text((x, y), line, fill=self.text_color, font=title_font)
            
            # Accent line
            line_y = start_y + total_title_height + 30
            draw.rectangle([self.width//4, line_y, 3*self.width//4, line_y+5], fill=self.accent_color)
            
            return np.array(img)
        
        return VideoClip(make_title_frame, duration=duration)
    
    def create_image_slide(self, image_path, explanation_display, duration, image_number):
        """Create slide for one image with explanation displayed below"""
        def make_image_frame(t):
            img = Image.new('RGB', (self.width, self.height), self.bg_color)
            draw = ImageDraw.Draw(img)
            
            # Image number in top-right corner
            num_font = self.load_professional_font(24)
            num_text = f"Figure {image_number}"
            draw.text((self.width - 200, 30), num_text, fill=self.accent_color, font=num_font)
            
            # Load and process image (upper 60% of screen)
            try:
                with Image.open(image_path) as content_img:
                    if content_img.mode != 'RGB':
                        content_img = content_img.convert('RGB')
                    
                    # Calculate scaling to fit in upper 60% of screen
                    available_width = self.width - 100
                    available_height = int(self.height * 0.6) - 50
                    
                    # Scale image proportionally
                    img_ratio = content_img.width / content_img.height
                    area_ratio = available_width / available_height
                    
                    if img_ratio > area_ratio:
                        # Image is wider - fit to width
                        new_width = available_width
                        new_height = int(available_width / img_ratio)
                    else:
                        # Image is taller - fit to height
                        new_height = available_height
                        new_width = int(available_height * img_ratio)
                    
                    content_img_resized = content_img.resize((new_width, new_height), Image.Resampling.LANCZOS)
                    
                    # Center image in upper area
                    img_x = (self.width - new_width) // 2
                    img_y = 80 + (available_height - new_height) // 2
                    
                    # Add subtle border and shadow
                    shadow_offset = 8
                    shadow_color = (0, 0, 0, 60)
                    
                    # Shadow
                    shadow_img = Image.new('RGBA', (new_width, new_height), shadow_color)
                    img.paste(shadow_img, (img_x + shadow_offset, img_y + shadow_offset), shadow_img)
                    
                    # Border
                    border_color = (71, 85, 105)
                    draw.rectangle([img_x-3, img_y-3, img_x+new_width+3, img_y+new_height+3], 
                                 outline=border_color, width=3)
                    
                    # Paste main image
                    img.paste(content_img_resized, (img_x, img_y))
                    
            except Exception as e:
                print(f"Error loading image {image_path}: {e}")
                # Fallback: show placeholder
                placeholder_font = self.load_professional_font(36)
                placeholder_text = f"Figure {image_number}: Image not available"
                placeholder_color = (100, 100, 100)
                
                # Draw placeholder rectangle
                placeholder_width = 600
                placeholder_height = 400
                placeholder_x = (self.width - placeholder_width) // 2
                placeholder_y = 150
                
                draw.rectangle([placeholder_x, placeholder_y, 
                              placeholder_x + placeholder_width, 
                              placeholder_y + placeholder_height], 
                             fill=placeholder_color, outline=border_color, width=2)
                
                # Center text in placeholder
                try:
                    bbox = draw.textbbox((0, 0), placeholder_text, font=placeholder_font)
                    text_width = bbox[2] - bbox[0]
                    text_height = bbox[3] - bbox[1]
                except:
                    text_width = len(placeholder_text) * 20
                    text_height = 36
                
                text_x = placeholder_x + (placeholder_width - text_width) // 2
                text_y = placeholder_y + (placeholder_height - text_height) // 2
                draw.text((text_x, text_y), placeholder_text, fill=self.text_color, font=placeholder_font)
            
            # Explanation area (lower 35% of screen) - displayed below image
            desc_start_y = int(self.height * 0.68)
            desc_area_height = self.height - desc_start_y - 50
            
            # Background panel for explanation
            panel_margin = 40
            panel_color = (30, 41, 59)
            draw.rectangle([panel_margin, desc_start_y, self.width - panel_margin, self.height - 30], 
                         fill=panel_color, outline=(71, 85, 105), width=2)
            
            # Explanation text
            if explanation_display and explanation_display.strip():
                expl_font = self.load_professional_font(28)  # Font for explanation text
                expl_lines = self.wrap_text_professional(explanation_display, expl_font, self.width - 120)
                
                line_height = 38
                total_text_height = len(expl_lines) * line_height
                
                # Center text vertically in explanation area
                text_start_y = desc_start_y + (desc_area_height - total_text_height) // 2 + 15
                
                for i, line in enumerate(expl_lines):
                    y_pos = text_start_y + i * line_height
                    # Add subtle text shadow for readability
                    draw.text((62, y_pos + 2), line, fill=(0, 0, 0, 100), font=expl_font)
                    draw.text((60, y_pos), line, fill=self.text_color, font=expl_font)
            
            return np.array(img)
        
        return VideoClip(make_image_frame, duration=duration)
    
    def create_end_slide(self, duration=3):
        """Create professional end slide"""
        def make_end_frame(t):
            img = Image.new('RGB', (self.width, self.height), self.bg_color)
            draw = ImageDraw.Draw(img)
            
            # Gradient effect
            for i in range(self.height//2):
                alpha = int(255 * (1 - i / (self.height//2)) * 0.15)
                color = (self.bg_color[0] + alpha//3, self.bg_color[1] + alpha//3, self.bg_color[2] + alpha//2)
                draw.rectangle([0, i*2, self.width, i*2+2], fill=color)
            
            # Thank you message
            end_font = self.load_professional_font(64, bold=True)
            end_text = "Thank You for Watching"
            
            try:
                bbox = draw.textbbox((0, 0), end_text, font=end_font)
                text_width = bbox[2] - bbox[0]
            except:
                text_width = len(end_text) * 35
            
            x = (self.width - text_width) // 2
            y = (self.height - 70) // 2
            
            # Text with shadow
            draw.text((x+4, y+4), end_text, fill=(0, 0, 0, 128), font=end_font)
            draw.text((x, y), end_text, fill=self.text_color, font=end_font)
            
            # Decorative elements
            accent_y = y + 100
            draw.rectangle([self.width//4, accent_y, 3*self.width//4, accent_y+6], fill=self.accent_color)
            
            return np.array(img)
        
        return VideoClip(make_end_frame, duration=duration)
    
    def generate_professional_video(self, diagrams, audio_files, 
                                   output_path="professional_image_explanation_video.mp4", 
                                   title="PDF Images with Explanations"):
        """Generate video with one segment per image: image + explanation below + synced audio"""
        
        print(f"🎬 Generating professional video...")
        print(f"  🖼️ Images: {len(diagrams)}")
        print(f"  🔊 Audio files: {len(audio_files)}")
        
        all_clips = []
        
        # 1. Title slide
        print("🎯 Creating title slide...")
        title_clip = self.create_title_slide(title, duration=4)
        all_clips.append(title_clip)
        
        # 2. One content segment per image
        for i, (diagram, audio_file) in enumerate(zip(diagrams, audio_files)):
            print(f"🔄 Processing image {i+1}/{len(diagrams)}...")
            
            try:
                # Load audio to get exact duration
                audio_clip = AudioFileClip(audio_file)
                segment_duration = audio_clip.duration
                
                # Create video clip for this image with explanation
                image_clip = self.create_image_slide(
                    diagram["image_path"],
                    diagram["explanation_display"],
                    segment_duration,
                    i + 1
                )
                
                # Sync audio to video
                synced_clip = image_clip.set_audio(audio_clip)
                all_clips.append(synced_clip)
                print(f"  ✅ Image {i+1} segment created (duration: {segment_duration:.1f}s)")
                
            except Exception as e:
                print(f"  ❌ Error creating image {i+1}: {e}")
                continue
        
        # 3. End slide
        print("🎯 Creating end slide...")
        end_clip = self.create_end_slide(duration=3)
        all_clips.append(end_clip)
        
        if not all_clips:
            print("❌ No clips were created!")
            return None
        
        print("🔄 Assembling final video...")
        try:
            # Combine all clips
            final_video = concatenate_videoclips(all_clips, method="compose")
            
            # Calculate total duration
            total_duration = sum(clip.duration for clip in all_clips)
            print(f"📊 Total video duration: {total_duration/60:.1f} minutes")
            
            # Export with high quality settings
            print("💾 Exporting video (this may take a while)...")
            
            export_params = {
                'fps': self.fps,
                'codec': 'libx264',
                'preset': 'medium',  # Good quality/speed balance
                'audio_codec': 'aac',
                'bitrate': '4000k',  # High quality
                'audio_bitrate': '192k',
                'temp_audiofile': 'temp-audio.m4a',
                'remove_temp': True,
                'verbose': False,
                'logger': None,
                'threads': 4
            }
            
            try:
                final_video.write_videofile(output_path, **export_params)
                print(f"✅ High-quality video exported successfully!")
                
            except Exception as export_error:
                print(f"⚠️ High-quality export failed: {export_error}")
                print("🔄 Trying with fallback settings...")
                
                # Fallback export settings
                fallback_params = {
                    'fps': 24,
                    'codec': 'libx264',
                    'preset': 'fast',
                    'bitrate': '2000k',
                    'verbose': False,
                    'logger': None
                }
                
                final_video.write_videofile(output_path, **fallback_params)
                print(f"✅ Video exported with fallback settings!")
            
            # File info
            if os.path.exists(output_path):
                file_size = os.path.getsize(output_path) / (1024 * 1024)
                print(f"📁 Final video: {output_path}")
                print(f"📏 File size: {file_size:.1f} MB")
                print(f"🎥 Resolution: {self.width}x{self.height}")
                print(f"⏱️ Duration: {total_duration/60:.1f} minutes")
            
        except Exception as e:
            print(f"❌ Video assembly failed: {e}")
            return None
        
        finally:
            # Cleanup
            print("🧹 Cleaning up...")
            for clip in all_clips:
                try:
                    clip.close()
                except:
                    pass
            
            try:
                final_video.close()
            except:
                pass
        
        return output_path

# Initialize the generator (but don't generate yet - wait for Cell 8)
video_generator = ProfessionalVideoGenerator()

print("✅ Cell 7 Complete - Professional video generator class created")

✅ Cell 7 Complete - Professional video generator class created


In [20]:
# ==============================================================================
# CELL 8: Generate Final Professional Image Explanation Video
# ==============================================================================

def create_professional_explanation_video():
    """Create the complete professional video with image explanations and synced audio"""
    
    print("🎬 STARTING IMAGE EXPLANATION VIDEO GENERATION")
    print("=" * 60)
    
    # Verify we have all required components
    missing_components = []
    
    if not diagrams:
        missing_components.append("explanations/diagrams")
    if not audio_files:
        missing_components.append("audio files")
    if not figures:
        missing_components.append("figures")
    
    if missing_components:
        print(f"❌ Missing components: {', '.join(missing_components)}")
        return None
    
    # Component summary
    print(f"📊 COMPONENT SUMMARY:")
    print(f"  🖼️ Images: {len(figures)}")
    print(f"  📝 Explanations: {len(diagrams)}")
    print(f"  🔊 Audio files: {len(audio_files)}")
    
    # Verify audio files exist
    valid_audio_files = []
    for audio_file in audio_files:
        if os.path.exists(audio_file):
            valid_audio_files.append(audio_file)
            # Get file size for verification
            size_mb = os.path.getsize(audio_file) / (1024 * 1024)
            print(f"  ✅ {os.path.basename(audio_file)} ({size_mb:.1f} MB)")
        else:
            print(f"  ⚠️ Missing: {audio_file}")
    
    if not valid_audio_files:
        print("❌ No valid audio files found!")
        return None
    
    # Verify diagram/image files exist
    valid_diagrams = []
    for diagram in diagrams:
        if os.path.exists(diagram["image_path"]):
            valid_diagrams.append(diagram)
        else:
            print(f"  ⚠️ Missing image: {diagram['image_path']}")
    
    print(f"  📁 Valid diagrams: {len(valid_diagrams)}")
    
    # Show estimated video duration
    total_segments = min(len(valid_diagrams), len(valid_audio_files))
    estimated_content_duration = sum(len(d.get('explanation_audio', '').split()) for d in valid_diagrams[:total_segments]) / 2.5  # words per second
    estimated_total_duration = estimated_content_duration + 7  # + title + end slides
    
    print(f"  ⏱️ Estimated duration: {estimated_total_duration/60:.1f} minutes")
    print()
    
    # Initialize professional video generator
    print("🎯 Initializing Professional Video Generator...")
    video_generator = ProfessionalVideoGenerator(
        width=1920, 
        height=1080, 
        fps=30
    )
    
    # Generate the professional video
    print("🚀 Starting video generation process...")
    output_path = video_generator.generate_professional_video(
        diagrams=valid_diagrams[:len(valid_audio_files)],  # Match to available audio
        audio_files=valid_audio_files,
        output_path="professional_image_explanation_video.mp4",
        title="PDF Images with AI Explanations"
    )
    
    return output_path

def verify_final_video(video_path):
    """Verify the final video was created successfully"""
    if not video_path:
        print("❌ Video generation failed - no output path returned")
        return False
    
    if not os.path.exists(video_path):
        print(f"❌ Video file not found: {video_path}")
        return False
    
    # Get file info
    file_size_mb = os.path.getsize(video_path) / (1024 * 1024)
    
    print("🎉 VIDEO GENERATION SUCCESSFUL!")
    print("=" * 50)
    print(f"📁 File: {video_path}")
    print(f"💾 Size: {file_size_mb:.1f} MB")
    print(f"🎥 Resolution: 1920x1080 (Full HD)")
    print(f"🎬 Format: MP4 (H.264)")
    
    # Verify it's not corrupted (basic check)
    if file_size_mb > 5:  # Should be at least 5MB for a real video
        print("✅ File appears to be valid")
        print("🎯 Ready for download and viewing!")
        return True
    else:
        print("⚠️ File seems too small - may be corrupted")
        return False

# Execute the complete video generation pipeline
print("🎬 PROFESSIONAL IMAGE EXPLANATION VIDEO GENERATOR")
print("=" * 60)
print("Starting complete video generation pipeline...")
print()

try:
    # Generate the video
    final_video_path = create_professional_explanation_video()
    
    if final_video_path:
        # Verify the result
        success = verify_final_video(final_video_path)
        
        if success:
            print("\n🎊 PIPELINE COMPLETED SUCCESSFULLY!")
            print("Your professional image explanation video is ready!")
        else:
            print("\n⚠️ Pipeline completed but video may have issues")
    else:
        print("\n❌ PIPELINE FAILED")
        print("Please check the error messages above")

except Exception as e:
    print(f"\n💥 UNEXPECTED ERROR: {e}")
    print("Pipeline failed with exception")

print("\n✅ Cell 8 Complete - Video generation pipeline executed")

🎬 PROFESSIONAL IMAGE EXPLANATION VIDEO GENERATOR
Starting complete video generation pipeline...

🎬 STARTING IMAGE EXPLANATION VIDEO GENERATION
📊 COMPONENT SUMMARY:
  🖼️ Images: 19
  📝 Explanations: 19
  🔊 Audio files: 19
  ✅ image_01_explanation.mp3 (0.2 MB)
  ✅ image_02_explanation.mp3 (0.2 MB)
  ✅ image_03_explanation.mp3 (0.1 MB)
  ✅ image_04_explanation.mp3 (0.2 MB)
  ✅ image_05_explanation.mp3 (0.2 MB)
  ✅ image_06_explanation.mp3 (0.2 MB)
  ✅ image_07_explanation.mp3 (0.2 MB)
  ✅ image_08_explanation.mp3 (0.2 MB)
  ✅ image_09_explanation.mp3 (0.3 MB)
  ✅ image_10_explanation.mp3 (0.2 MB)
  ✅ image_11_explanation.mp3 (0.2 MB)
  ✅ image_12_explanation.mp3 (0.2 MB)
  ✅ image_13_explanation.mp3 (0.3 MB)
  ✅ image_14_explanation.mp3 (0.2 MB)
  ✅ image_15_explanation.mp3 (0.3 MB)
  ✅ image_16_explanation.mp3 (0.3 MB)
  ✅ image_17_explanation.mp3 (0.2 MB)
  ✅ image_18_explanation.mp3 (0.3 MB)
  ✅ image_19_explanation.mp3 (0.2 MB)
  📁 Valid diagrams: 19
  ⏱️ Estimated duration: 10.2 minu