# 🎬 Video Feature Extraction Tool

**All-in-One Notebook** - Complete video analysis solution with all code embedded.

## Features:
- **Shot Cut Detection**: Counts hard cuts using frame-to-frame pixel analysis
- **Motion Analysis**: Computes average motion magnitude via optical flow
- **Text Detection (OCR)**: Detects text presence and extracts keywords
- **Object/Person Detection**: Estimates person vs object dominance using YOLOv8

---

## 1. Install Dependencies

Run this cell once to install required packages.

In [2]:
# Uncomment and run to install dependencies
# !pip install numpy opencv-python pillow pytesseract ultralytics pyyaml tqdm lapx -q

## 2. Core Implementation

All the video feature extraction code is embedded below.

In [3]:
# ============================================================================
# IMPORTS
# ============================================================================

import json
import logging
import sys
from abc import ABC, abstractmethod
from collections import Counter
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple

import cv2
import numpy as np

# Optional imports
try:
    import pytesseract
    TESSERACT_AVAILABLE = True
except ImportError:
    TESSERACT_AVAILABLE = False
    pytesseract = None

try:
    from ultralytics import YOLO
    YOLO_AVAILABLE = True
except ImportError:
    YOLO_AVAILABLE = False
    YOLO = None

print("✅ Core imports loaded")
print(f"   OpenCV: {cv2.__version__}")
print(f"   Tesseract: {'Available' if TESSERACT_AVAILABLE else 'Not installed'}")
print(f"   YOLO: {'Available' if YOLO_AVAILABLE else 'Not installed'}")

✅ Core imports loaded
   OpenCV: 4.12.0
   Tesseract: Available
   YOLO: Available


In [4]:
# ============================================================================
# CUSTOM EXCEPTIONS
# ============================================================================

class VideoFeatureExtractorError(Exception):
    """Base exception for all Video Feature Extractor errors."""
    def __init__(self, message: str, details: dict = None):
        super().__init__(message)
        self.message = message
        self.details = details or {}

class VideoNotFoundError(VideoFeatureExtractorError):
    """Raised when the specified video file does not exist."""
    def __init__(self, video_path: str):
        super().__init__(f"Video file not found: {video_path}")
        self.video_path = video_path

class VideoOpenError(VideoFeatureExtractorError):
    """Raised when a video file cannot be opened."""
    def __init__(self, video_path: str, reason: str = None):
        message = f"Unable to open video: {video_path}"
        if reason:
            message += f" - {reason}"
        super().__init__(message)
        self.video_path = video_path

class InvalidFeatureError(VideoFeatureExtractorError):
    """Raised when an invalid feature name is requested."""
    def __init__(self, requested: list, valid: set):
        invalid = set(requested) - valid
        super().__init__(f"Invalid feature(s): {invalid}. Valid: {valid}")

print("✅ Exception classes defined")

✅ Exception classes defined


In [5]:
# ============================================================================
# CONFIGURATION
# ============================================================================

@dataclass
class ShotCutConfig:
    """Configuration for shot cut detection."""
    enabled: bool = True
    frame_step: int = 1
    diff_threshold: float = 30.0
    min_gap_frames: int = 5

@dataclass
class MotionConfig:
    """Configuration for motion analysis."""
    enabled: bool = True
    frame_step: int = 2
    pyr_scale: float = 0.5
    levels: int = 3
    winsize: int = 15
    iterations: int = 3
    poly_n: int = 5
    poly_sigma: float = 1.2

@dataclass
class TextDetectionConfig:
    """Configuration for OCR text detection."""
    enabled: bool = True
    frame_step: int = 15
    min_confidence: float = 70.0
    language: str = "eng"

@dataclass
class ObjectDetectionConfig:
    """Configuration for YOLO object detection."""
    enabled: bool = True
    frame_step: int = 15
    confidence_threshold: float = 0.5
    nms_threshold: float = 0.4
    model_size: str = "n"  # n, s, m, l, x
    use_gpu: bool = False

@dataclass
class ExtractorConfig:
    """Main configuration container."""
    shot_cut: ShotCutConfig = field(default_factory=ShotCutConfig)
    motion: MotionConfig = field(default_factory=MotionConfig)
    text_detection: TextDetectionConfig = field(default_factory=TextDetectionConfig)
    object_detection: ObjectDetectionConfig = field(default_factory=ObjectDetectionConfig)
    log_level: str = "INFO"

print("✅ Configuration classes defined")

✅ Configuration classes defined


In [6]:
# ============================================================================
# VIDEO UTILITIES
# ============================================================================

@dataclass
class VideoMetadata:
    """Container for video metadata."""
    path: str
    width: int
    height: int
    fps: float
    total_frames: int
    duration_seconds: float
    codec: str
    file_size_bytes: int
    
    def to_dict(self) -> dict:
        return {
            "path": self.path,
            "resolution": {"width": self.width, "height": self.height},
            "fps": self.fps,
            "total_frames": self.total_frames,
            "duration_seconds": round(self.duration_seconds, 2),
            "codec": self.codec,
            "file_size_bytes": self.file_size_bytes
        }

def validate_video_file(video_path: Path) -> Path:
    """Validate that a video file exists."""
    path = Path(video_path).resolve()
    if not path.is_file():
        raise VideoNotFoundError(str(video_path))
    return path

def get_video_metadata(video_path: Path) -> VideoMetadata:
    """Extract comprehensive metadata from a video file."""
    path = validate_video_file(video_path)
    
    capture = cv2.VideoCapture(str(path))
    if not capture.isOpened():
        raise VideoOpenError(str(path))
    
    try:
        width = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = capture.get(cv2.CAP_PROP_FPS)
        total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
        
        fourcc = int(capture.get(cv2.CAP_PROP_FOURCC))
        codec = "".join([chr((fourcc >> 8 * i) & 0xFF) for i in range(4)])
        
        duration = total_frames / fps if fps > 0 else 0.0
        file_size = path.stat().st_size
        
        return VideoMetadata(
            path=str(path),
            width=width,
            height=height,
            fps=fps,
            total_frames=total_frames,
            duration_seconds=duration,
            codec=codec.strip(),
            file_size_bytes=file_size
        )
    finally:
        capture.release()

print("✅ Video utilities defined")

✅ Video utilities defined


In [7]:
# ============================================================================
# SHOT CUT DETECTION
# ============================================================================

def detect_shot_cuts(
    video_path: Path,
    config: ShotCutConfig,
    verbose: bool = True
) -> Dict[str, Any]:
    """
    Detect hard cuts in video by measuring mean pixel differences.
    
    Args:
        video_path: Path to the video file
        config: Shot cut detection configuration
        verbose: Whether to print progress
        
    Returns:
        Dictionary with shot cut count and metadata
    """
    if verbose:
        print(f"🎬 Detecting shot cuts...")
    
    capture = cv2.VideoCapture(str(video_path))
    if not capture.isOpened():
        raise VideoOpenError(str(video_path))
    
    try:
        total_frames = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
        
        cuts: List[int] = []
        frame_idx = 0
        last_cut_frame = -config.min_gap_frames
        prev_gray = None
        
        while True:
            ok, frame = capture.read()
            if not ok:
                break
            
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            
            if prev_gray is not None:
                frame_diff = cv2.absdiff(gray, prev_gray)
                mean_diff = float(np.mean(frame_diff))
                
                if mean_diff > config.diff_threshold:
                    if (frame_idx - last_cut_frame) >= config.min_gap_frames:
                        cuts.append(frame_idx)
                        last_cut_frame = frame_idx
            
            prev_gray = gray
            frame_idx += 1
            
            # Frame stepping
            if config.frame_step > 1:
                for _ in range(config.frame_step - 1):
                    capture.grab()
                    frame_idx += 1
    finally:
        capture.release()
    
    if verbose:
        print(f"   ✓ Found {len(cuts)} cuts")
    
    return {
        "shot_cut_count": len(cuts),
        "cut_frames": cuts[:100],
        "frame_step_used": config.frame_step,
        "mean_diff_threshold": config.diff_threshold,
        "min_gap_frames": config.min_gap_frames,
    }

print("✅ Shot cut detection defined")

✅ Shot cut detection defined


In [8]:
# ============================================================================
# MOTION ANALYSIS
# ============================================================================

def analyze_motion(
    video_path: Path,
    config: MotionConfig,
    verbose: bool = True
) -> Dict[str, Any]:
    """
    Analyze motion in video using Farneback optical flow.
    
    Args:
        video_path: Path to the video file
        config: Motion analysis configuration
        verbose: Whether to print progress
        
    Returns:
        Dictionary with motion statistics
    """
    if verbose:
        print(f"🏃 Analyzing motion...")
    
    capture = cv2.VideoCapture(str(video_path))
    if not capture.isOpened():
        raise VideoOpenError(str(video_path))
    
    try:
        magnitudes: List[float] = []
        prev_gray = None
        
        while True:
            ok, frame = capture.read()
            if not ok:
                break
            
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            
            if prev_gray is not None:
                flow = cv2.calcOpticalFlowFarneback(
                    prev_gray, gray, None,
                    config.pyr_scale,
                    config.levels,
                    config.winsize,
                    config.iterations,
                    config.poly_n,
                    config.poly_sigma,
                    0
                )
                mag, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
                magnitudes.append(float(np.mean(mag)))
            
            prev_gray = gray
            
            # Frame stepping
            if config.frame_step > 1:
                for _ in range(config.frame_step - 1):
                    capture.grab()
    finally:
        capture.release()
    
    # Calculate statistics
    if magnitudes:
        avg_motion = float(np.mean(magnitudes))
        max_motion = float(np.max(magnitudes))
        min_motion = float(np.min(magnitudes))
        std_motion = float(np.std(magnitudes))
    else:
        avg_motion = max_motion = min_motion = std_motion = 0.0
    
    if verbose:
        print(f"   ✓ Avg motion: {avg_motion:.4f}")
    
    return {
        "average_motion_magnitude": round(avg_motion, 4),
        "max_motion_magnitude": round(max_motion, 4),
        "min_motion_magnitude": round(min_motion, 4),
        "motion_std": round(std_motion, 4),
        "motion_samples": len(magnitudes),
        "frame_step_used": config.frame_step,
    }

print("✅ Motion analysis defined")

✅ Motion analysis defined


In [9]:
# ============================================================================
# TEXT DETECTION (OCR)
# ============================================================================

def detect_text(
    video_path: Path,
    config: TextDetectionConfig,
    verbose: bool = True
) -> Dict[str, Any]:
    """
    Detect text in video frames using Tesseract OCR.
    
    Args:
        video_path: Path to the video file
        config: Text detection configuration
        verbose: Whether to print progress
        
    Returns:
        Dictionary with text detection results
    """
    if not TESSERACT_AVAILABLE:
        if verbose:
            print("📝 Text detection: Tesseract not installed, skipping...")
        return {
            "text_present_ratio": 0.0,
            "frames_with_text": 0,
            "total_frames_evaluated": 0,
            "keywords_top10": [],
            "available": False,
            "error": "Tesseract OCR not installed"
        }
    
    # Check if tesseract binary is available
    try:
        pytesseract.get_tesseract_version()
    except pytesseract.TesseractNotFoundError:
        if verbose:
            print("📝 Text detection: Tesseract binary not in PATH, skipping...")
            print("   Please install Tesseract OCR and add it to your system PATH.")
            print("   macOS: brew install tesseract")
            print("   Windows: https://github.com/UB-Mannheim/tesseract/wiki")
        return {
            "text_present_ratio": 0.0,
            "frames_with_text": 0,
            "total_frames_evaluated": 0,
            "keywords_top10": [],
            "available": False,
            "error": "Tesseract binary not found in PATH"
        }
    
    if verbose:
        print(f"📝 Detecting text (OCR)...")
    
    capture = cv2.VideoCapture(str(video_path))
    if not capture.isOpened():
        raise VideoOpenError(str(video_path))
    
    try:
        frames_evaluated = 0
        frames_with_text = 0
        keywords: Counter = Counter()
        
        while True:
            ok, frame = capture.read()
            if not ok:
                break
            
            frames_evaluated += 1
            
            # Preprocess frame
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            blurred = cv2.GaussianBlur(gray, (3, 3), 0)
            _, binary = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
            
            # Run OCR
            try:
                data = pytesseract.image_to_data(
                    binary,
                    output_type=pytesseract.Output.DICT,
                    lang=config.language
                )
            except Exception:
                continue
            
            # Process results
            has_text = False
            for word, conf in zip(data.get("text", []), data.get("conf", [])):
                if not word or word.isspace():
                    continue
                try:
                    conf_val = float(conf)
                except (ValueError, TypeError):
                    continue
                
                if conf_val >= config.min_confidence:
                    has_text = True
                    cleaned = word.strip().lower()
                    if len(cleaned) >= 2 and cleaned.isalnum():
                        keywords[cleaned] += 1
            
            if has_text:
                frames_with_text += 1
            
            # Frame stepping
            if config.frame_step > 1:
                for _ in range(config.frame_step - 1):
                    if not capture.grab():
                        break
    finally:
        capture.release()
    
    ratio = frames_with_text / frames_evaluated if frames_evaluated > 0 else 0.0
    top_keywords = [word for word, _ in keywords.most_common(10)]
    
    if verbose:
        print(f"   ✓ Text in {frames_with_text}/{frames_evaluated} frames sampled ({ratio:.1%})")
    
    return {
        "text_present_ratio": round(ratio, 4),
        "frames_with_text": frames_with_text,
        "total_frames_evaluated": frames_evaluated,
        "keywords_top10": top_keywords,
        "unique_words": len(keywords),
        "frame_step_used": config.frame_step,
        "min_confidence": config.min_confidence,
        "available": True,
    }

print("✅ Text detection defined")

✅ Text detection defined


In [10]:
# ============================================================================
# OBJECT/PERSON DETECTION (YOLO)
# ============================================================================

# Global model cache
_yolo_model = None

def detect_objects(
    video_path: Path,
    config: ObjectDetectionConfig,
    verbose: bool = True
) -> Dict[str, Any]:
    """
    Detect objects and people in video using YOLOv8.
    Uses tracking to count unique persons.
    
    Args:
        video_path: Path to the video file
        config: Object detection configuration
        verbose: Whether to print progress
        
    Returns:
        Dictionary with object detection results
    """
    global _yolo_model
    
    if not YOLO_AVAILABLE:
        if verbose:
            print("🎯 Object detection: YOLO not installed, skipping...")
        return {
            "persons_detected": 0,
            "unique_persons": 0,
            "objects_detected": 0,
            "person_ratio": 0.0,
            "object_ratio": 0.0,
            "dominant_category": "unknown",
            "frames_evaluated": 0,
            "available": False,
            "error": "ultralytics package not installed"
        }
    
    if verbose:
        print(f"🎯 Detecting objects with YOLOv8 (Tracking enabled)...")
    
    # Load model (cached)
    if _yolo_model is None:
        model_name = f"yolov8{config.model_size}.pt"
        if verbose:
            print(f"   Loading model: {model_name}")
        _yolo_model = YOLO(model_name)
    
    model = _yolo_model
    
    capture = cv2.VideoCapture(str(video_path))
    if not capture.isOpened():
        raise VideoOpenError(str(video_path))
    
    try:
        persons_detections = 0
        objects_detections = 0
        unique_person_ids = set()
        class_counts: Dict[str, int] = {}
        frames_evaluated = 0
        
        while True:
            ok, frame = capture.read()
            if not ok:
                break
            
            frames_evaluated += 1
            
            # Run YOLO inference with tracking
            # persist=True is important for tracking objects between frames
            results = model.track(
                frame,
                persist=True,
                conf=config.confidence_threshold,
                verbose=False,
                device="cuda" if config.use_gpu else "cpu"
            )
            
            # Process detections
            for result in results:
                boxes = result.boxes
                if boxes is None:
                    continue
                
                # Iterate through detected boxes
                for i, box in enumerate(boxes):
                    class_id = int(box.cls[0])
                    class_name = model.names[class_id]
                    
                    class_counts[class_name] = class_counts.get(class_name, 0) + 1
                    
                    if class_id == 0:  # Person class
                        persons_detections += 1
                        # Get tracking ID if available
                        if box.id is not None:
                            track_id = int(box.id[0])
                            unique_person_ids.add(track_id)
                    else:
                        objects_detections += 1
            
            # Frame stepping
            if config.frame_step > 1:
                for _ in range(config.frame_step - 1):
                    if not capture.grab():
                        break
    finally:
        capture.release()
    
    # Calculate ratios based on total detections for presence
    total = persons_detections + objects_detections
    person_ratio = persons_detections / total if total > 0 else 0.0
    object_ratio = objects_detections / total if total > 0 else 0.0
    
    if persons_detections > objects_detections:
        dominant = "person"
    elif objects_detections > persons_detections:
        dominant = "object"
    else:
        dominant = "tie"
    
    sorted_classes = dict(sorted(class_counts.items(), key=lambda x: x[1], reverse=True))
    unique_persons_count = len(unique_person_ids)
    
    if verbose:
        print(f"   ✓ Total Detections: {persons_detections} persons, {objects_detections} objects")
        print(f"   ✓ Unique Persons Tracked: {unique_persons_count}")
    
    return {
        "persons_detected": persons_detections,
        "unique_persons": unique_persons_count,
        "objects_detected": objects_detections,
        "person_ratio": round(person_ratio, 4),
        "object_ratio": round(object_ratio, 4),
        "dominant_category": dominant,
        "class_distribution": sorted_classes,
        "top_classes": list(sorted_classes.keys())[:10],
        "frames_evaluated": frames_evaluated,
        "frame_step_used": config.frame_step,
        "confidence_threshold": config.confidence_threshold,
        "model_used": f"yolov8{config.model_size}",
        "available": True,
    }

print("✅ Object detection defined (with tracking)")

✅ Object detection defined


In [11]:
# ============================================================================
# MAIN EXTRACTOR CLASS
# ============================================================================

class VideoFeatureExtractor:
    """
    Main class for extracting features from video files.
    
    Example:
        extractor = VideoFeatureExtractor()
        results = extractor.extract("video.mp4", features=["cuts", "motion"])
    """
    
    VERSION = "2.0.0"
    AVAILABLE_FEATURES = {"cuts", "motion", "text", "objects"}
    
    def __init__(self, config: ExtractorConfig = None):
        self.config = config or ExtractorConfig()
    
    def check_availability(self) -> Dict[str, bool]:
        """Check which features are available."""
        return {
            "cuts": True,  # Always available (OpenCV)
            "motion": True,  # Always available (OpenCV)
            "text": TESSERACT_AVAILABLE and self._check_tesseract(),
            "objects": YOLO_AVAILABLE,
        }
    
    def _check_tesseract(self) -> bool:
        """Check if Tesseract binary is available."""
        if not TESSERACT_AVAILABLE:
            return False
        try:
            pytesseract.get_tesseract_version()
            return True
        except:
            return False
    
    def extract(
        self,
        video_path: str | Path,
        features: List[str] = None,
        include_metadata: bool = True,
        verbose: bool = True
    ) -> Dict[str, Any]:
        """
        Extract features from a video file.
        
        Args:
            video_path: Path to the video file
            features: List of features to extract (default: all available)
            include_metadata: Whether to include video metadata
            verbose: Whether to print progress
            
        Returns:
            Dictionary with all extraction results
        """
        start_time = datetime.now()
        video_path = Path(video_path)
        
        # Validate video
        video_path = validate_video_file(video_path)
        
        # Determine features
        if features is None:
            features = list(self.AVAILABLE_FEATURES)
        
        # Validate features
        invalid = set(features) - self.AVAILABLE_FEATURES
        if invalid:
            raise InvalidFeatureError(features, self.AVAILABLE_FEATURES)
        
        if verbose:
            print(f"\n{'='*60}")
            print(f"🎬 Video Feature Extraction v{self.VERSION}")
            print(f"{'='*60}")
            print(f"📁 Video: {video_path.name}")
            print(f"📋 Features: {', '.join(features)}")
            print(f"{'='*60}\n")
        
        # Initialize output
        output: Dict[str, Any] = {
            "video_path": str(video_path),
            "extraction_timestamp": datetime.utcnow().isoformat() + "Z",
            "features_requested": features,
            "extractor_version": self.VERSION,
        }
        
        # Add metadata
        if include_metadata:
            try:
                metadata = get_video_metadata(video_path)
                output["video_metadata"] = metadata.to_dict()
            except Exception as e:
                output["video_metadata"] = {"error": str(e)}
        
        # Extract features
        results: Dict[str, Any] = {}
        
        if "cuts" in features:
            results["shot_cut_detection"] = detect_shot_cuts(
                video_path, self.config.shot_cut, verbose
            )
        
        if "motion" in features:
            results["motion_analysis"] = analyze_motion(
                video_path, self.config.motion, verbose
            )
        
        if "text" in features:
            results["text_detection"] = detect_text(
                video_path, self.config.text_detection, verbose
            )
        
        if "objects" in features:
            results["object_person_dominance"] = detect_objects(
                video_path, self.config.object_detection, verbose
            )
        
        output["results"] = results
        
        # Calculate processing time
        elapsed = (datetime.now() - start_time).total_seconds()
        output["processing_time_seconds"] = round(elapsed, 2)
        
        if verbose:
            print(f"\n{'='*60}")
            print(f"✅ Extraction complete in {elapsed:.2f} seconds")
            print(f"{'='*60}")
        
        return output
    
    def extract_to_json(
        self,
        video_path: str | Path,
        output_path: str | Path = None,
        features: List[str] = None,
        pretty: bool = True,
        verbose: bool = True
    ) -> str:
        """Extract features and return/save as JSON."""
        results = self.extract(video_path, features, verbose=verbose)
        
        indent = 2 if pretty else None
        json_output = json.dumps(results, indent=indent, default=str)
        
        if output_path:
            Path(output_path).write_text(json_output)
            if verbose:
                print(f"💾 Saved to: {output_path}")
        
        return json_output

print("✅ VideoFeatureExtractor class defined")
print(f"\n🎉 All components loaded! Ready to extract features.")

✅ VideoFeatureExtractor class defined

🎉 All components loaded! Ready to extract features.


---

## 3. Usage Examples

Now let's use the extractor to analyze videos!

In [12]:
# Create extractor instance
extractor = VideoFeatureExtractor()

# Check feature availability
print("📋 Feature Availability:")
print("-" * 40)
for feature, available in extractor.check_availability().items():
    status = "✅ Available" if available else "❌ Not available"
    print(f"   {feature:10} {status}")

📋 Feature Availability:
----------------------------------------
   cuts       ✅ Available
   motion     ✅ Available
   text       ❌ Not available
   objects    ✅ Available


In [13]:
# Set video path - UPDATE THIS TO YOUR VIDEO
VIDEO_PATH = Path("videoplayback.mp4")

if VIDEO_PATH.exists():
    print(f"✅ Video found: {VIDEO_PATH}")
    
    # Get metadata
    metadata = get_video_metadata(VIDEO_PATH)
    print(f"\n📹 Video Info:")
    print(f"   Resolution: {metadata.width}x{metadata.height}")
    print(f"   Duration:   {metadata.duration_seconds:.2f}s")
    print(f"   FPS:        {metadata.fps}")
    print(f"   Frames:     {metadata.total_frames}")
else:
    print(f"❌ Video not found: {VIDEO_PATH}")
    print("   Please update VIDEO_PATH to point to your video file.")

✅ Video found: videoplayback.mp4

📹 Video Info:
   Resolution: 360x640
   Duration:   11.70s
   FPS:        30.0
   Frames:     351


### 3.1 Extract Shot Cuts and Motion (Fast)

In [14]:
# Extract fast features
if VIDEO_PATH.exists():
    results = extractor.extract(
        VIDEO_PATH,
        features=["cuts", "motion"]
    )
    
    # Display results
    print("\n📊 RESULTS:")
    print("-" * 40)
    
    cuts = results["results"]["shot_cut_detection"]
    print(f"\n🎬 Shot Cuts:")
    print(f"   Count: {cuts['shot_cut_count']}")
    
    motion = results["results"]["motion_analysis"]
    print(f"\n🏃 Motion:")
    print(f"   Average: {motion['average_motion_magnitude']:.4f}")
    print(f"   Max:     {motion['max_motion_magnitude']:.4f}")
    print(f"   Std Dev: {motion['motion_std']:.4f}")


🎬 Video Feature Extraction v2.0.0
📁 Video: videoplayback.mp4
📋 Features: cuts, motion

🎬 Detecting shot cuts...
   ✓ Found 0 cuts
🏃 Analyzing motion...
   ✓ Avg motion: 0.9316

✅ Extraction complete in 2.64 seconds

📊 RESULTS:
----------------------------------------

🎬 Shot Cuts:
   Count: 0

🏃 Motion:
   Average: 0.9316
   Max:     2.8623
   Std Dev: 0.5769


### 3.2 Object Detection with YOLOv8

In [15]:
# Extract objects (may download model on first run)
if VIDEO_PATH.exists():
    object_results = extractor.extract(
        VIDEO_PATH,
        features=["objects"]
    )
    
    obj = object_results["results"]["object_person_dominance"]
    
    if obj.get("available", True):
        print("\n🎯 Object Detection Results:")
        print("-" * 40)
        print(f"   Persons:  {obj['persons_detected']}")
        print(f"   Objects:  {obj['objects_detected']}")
        print(f"   Dominant: {obj['dominant_category'].upper()}")
        
        if obj.get('class_distribution'):
            print("\n   📦 Top Classes:")
            for cls, count in list(obj['class_distribution'].items())[:5]:
                print(f"      {cls}: {count}")
    else:
        print(f"   ⚠️ {obj.get('error', 'Not available')}")


🎬 Video Feature Extraction v2.0.0
📁 Video: videoplayback.mp4
📋 Features: objects

🎯 Detecting objects with YOLOv8...
   Loading model: yolov8n.pt
   ✓ 24 persons, 0 objects detected

✅ Extraction complete in 0.57 seconds

🎯 Object Detection Results:
----------------------------------------
   Persons:  24
   Objects:  0
   Dominant: PERSON

   📦 Top Classes:
      person: 24


### 3.3 Custom Configuration

In [16]:
# Create custom configuration
custom_config = ExtractorConfig()

# More sensitive shot cut detection
custom_config.shot_cut.diff_threshold = 20.0  # Lower = more sensitive

# Faster motion analysis
custom_config.motion.frame_step = 5  # Skip more frames

# Higher confidence for objects
custom_config.object_detection.confidence_threshold = 0.7

# Create new extractor with custom config
custom_extractor = VideoFeatureExtractor(custom_config)

print("✅ Custom configuration created")
print(f"   Shot cut threshold: {custom_config.shot_cut.diff_threshold}")
print(f"   Motion frame step:  {custom_config.motion.frame_step}")
print(f"   Object confidence:  {custom_config.object_detection.confidence_threshold}")

✅ Custom configuration created
   Shot cut threshold: 20.0
   Motion frame step:  5
   Object confidence:  0.7


In [17]:
# Run with custom config
if VIDEO_PATH.exists():
    custom_results = custom_extractor.extract(
        VIDEO_PATH,
        features=["cuts", "motion"]
    )
    
    print("\n📊 Custom Config Results:")
    print(f"   Cuts found: {custom_results['results']['shot_cut_detection']['shot_cut_count']}")
    print(f"   Motion avg: {custom_results['results']['motion_analysis']['average_motion_magnitude']:.4f}")


🎬 Video Feature Extraction v2.0.0
📁 Video: videoplayback.mp4
📋 Features: cuts, motion

🎬 Detecting shot cuts...
   ✓ Found 0 cuts
🏃 Analyzing motion...
   ✓ Avg motion: 1.9809

✅ Extraction complete in 1.09 seconds

📊 Custom Config Results:
   Cuts found: 0
   Motion avg: 1.9809


### 3.4 Export to JSON

In [18]:
# Export full results to JSON
if VIDEO_PATH.exists():
    output_file = "video_analysis_results.json"
    
    json_output = extractor.extract_to_json(
        VIDEO_PATH,
        output_path=output_file,
        features=["cuts", "motion"],
        pretty=True
    )
    
    print(f"\n📄 JSON Preview (first 500 chars):")
    print(json_output[:500] + "...")


🎬 Video Feature Extraction v2.0.0
📁 Video: videoplayback.mp4
📋 Features: cuts, motion

🎬 Detecting shot cuts...
   ✓ Found 0 cuts
🏃 Analyzing motion...
   ✓ Avg motion: 0.9316

✅ Extraction complete in 2.55 seconds
💾 Saved to: video_analysis_results.json

📄 JSON Preview (first 500 chars):
{
  "video_path": "/Users/ridam/Desktop/White Panda/videoplayback.mp4",
  "extraction_timestamp": "2026-01-14T09:38:57.626928Z",
  "features_requested": [
    "cuts",
    "motion"
  ],
  "extractor_version": "2.0.0",
  "video_metadata": {
    "path": "/Users/ridam/Desktop/White Panda/videoplayback.mp4",
    "resolution": {
      "width": 360,
      "height": 640
    },
    "fps": 30.0,
    "total_frames": 351,
    "duration_seconds": 11.7,
    "codec": "h264",
    "file_size_bytes": 877848
  },
...


### 3.5 Full Extraction (All Features)

In [19]:
# Run all available features
if VIDEO_PATH.exists():
    availability = extractor.check_availability()
    available_features = [f for f, avail in availability.items() if avail]
    
    print(f"Running features: {available_features}")
    
    full_results = extractor.extract(
        VIDEO_PATH,
        features=available_features
    )

Running features: ['cuts', 'motion', 'objects']

🎬 Video Feature Extraction v2.0.0
📁 Video: videoplayback.mp4
📋 Features: cuts, motion, objects

🎬 Detecting shot cuts...
   ✓ Found 0 cuts
🏃 Analyzing motion...
   ✓ Avg motion: 0.9316
🎯 Detecting objects with YOLOv8...
   ✓ 24 persons, 0 objects detected

✅ Extraction complete in 3.01 seconds


In [20]:
# Display comprehensive summary
if VIDEO_PATH.exists():
    print("\n" + "=" * 60)
    print("📊 COMPLETE EXTRACTION SUMMARY")
    print("=" * 60)
    
    print(f"\n📁 Video: {full_results['video_path'].split('/')[-1]}")
    print(f"⏱️  Time:  {full_results['processing_time_seconds']}s")
    
    for key, data in full_results['results'].items():
        print(f"\n{'─' * 50}")
        print(f"📋 {key.replace('_', ' ').title()}:")
        
        if 'error' in data:
            print(f"   ⚠️ {data['error']}")
        else:
            for k, v in list(data.items())[:5]:
                if isinstance(v, float):
                    print(f"   {k}: {v:.4f}")
                elif isinstance(v, list):
                    print(f"   {k}: {len(v)} items")
                elif isinstance(v, dict):
                    print(f"   {k}: {len(v)} entries")
                else:
                    print(f"   {k}: {v}")


📊 COMPLETE EXTRACTION SUMMARY

📁 Video: videoplayback.mp4
⏱️  Time:  3.01s

──────────────────────────────────────────────────
📋 Shot Cut Detection:
   shot_cut_count: 0
   cut_frames: 0 items
   frame_step_used: 1
   mean_diff_threshold: 30.0000
   min_gap_frames: 5

──────────────────────────────────────────────────
📋 Motion Analysis:
   average_motion_magnitude: 0.9316
   max_motion_magnitude: 2.8623
   min_motion_magnitude: 0.2001
   motion_std: 0.5769
   motion_samples: 175

──────────────────────────────────────────────────
📋 Object Person Dominance:
   persons_detected: 24
   objects_detected: 0
   person_ratio: 1.0000
   object_ratio: 0.0000
   dominant_category: person


---

## 4. Error Handling

In [21]:
# Example: Handle missing video
try:
    extractor.extract("/nonexistent/video.mp4", verbose=False)
except VideoNotFoundError as e:
    print(f"✅ VideoNotFoundError caught: {e.message}")

# Example: Handle invalid feature
try:
    extractor.extract(VIDEO_PATH, features=["invalid"], verbose=False)
except InvalidFeatureError as e:
    print(f"✅ InvalidFeatureError caught: {e.message}")

✅ VideoNotFoundError caught: Video file not found: /nonexistent/video.mp4
✅ InvalidFeatureError caught: Invalid feature(s): {'invalid'}. Valid: {'objects', 'motion', 'cuts', 'text'}


---

## 5. Cleanup

In [22]:
# Clean up temporary files
import os

temp_files = ["video_analysis_results.json"]

for f in temp_files:
    if Path(f).exists():
        os.remove(f)
        print(f"🗑️ Removed: {f}")

print("\n✅ Cleanup complete!")

🗑️ Removed: video_analysis_results.json

✅ Cleanup complete!


---

## 📚 Quick Reference

### Available Features
| Feature | Description | Requirements |
|---------|-------------|-------------|
| `cuts` | Shot cut detection | OpenCV |
| `motion` | Motion analysis | OpenCV |
| `text` | OCR text detection | pytesseract + Tesseract |
| `objects` | Object/person detection | ultralytics (YOLOv8) |

### Configuration Options
```python
config = ExtractorConfig()

# Shot cut settings
config.shot_cut.diff_threshold = 30.0  # Lower = more sensitive
config.shot_cut.min_gap_frames = 5

# Motion settings
config.motion.frame_step = 2  # Higher = faster

# Object detection settings
config.object_detection.confidence_threshold = 0.5
config.object_detection.model_size = "n"  # n, s, m, l, x
```