In [None]:
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import mediapipe as mp
import time
from collections import deque
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
import threading
import queue

In [None]:
class SwinTransformerBlock(nn.Module):
    """Swin Transformer Block for processing facial landmark sequences"""

    def __init__(self, dim, num_heads, window_size=7, shift_size=0):
        super().__init__()
        self.dim = dim
        self.num_heads = num_heads
        self.window_size = window_size
        self.shift_size = shift_size

        self.norm1 = nn.LayerNorm(dim)
        self.attn = WindowAttention(dim, window_size, num_heads)
        self.norm2 = nn.LayerNorm(dim)
        self.mlp = MLP(dim, int(dim * 4))

    def forward(self, x):
        # x shape: (batch_size, sequence_length, dim)
        shortcut = x
        x = self.norm1(x)

        # Window-based multi-head self attention
        x = self.attn(x)
        x = shortcut + x

        # MLP
        shortcut = x
        x = self.norm2(x)
        x = self.mlp(x)
        x = shortcut + x

        return x

In [None]:
class WindowAttention(nn.Module):
    """Window-based Multi-head Self Attention"""

    def __init__(self, dim, window_size, num_heads):
        super().__init__()
        self.dim = dim
        self.window_size = window_size
        self.num_heads = num_heads
        head_dim = dim // num_heads
        self.scale = head_dim ** -0.5

        self.qkv = nn.Linear(dim, dim * 3)
        self.proj = nn.Linear(dim, dim)

    def forward(self, x):
        B, N, C = x.shape
        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        q, k, v = qkv[0], qkv[1], qkv[2]

        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)

        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
        x = self.proj(x)
        return x

class MLP(nn.Module):
    """Multi-Layer Perceptron"""

    def __init__(self, in_features, hidden_features):
        super().__init__()
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.fc2 = nn.Linear(hidden_features, in_features)

    def forward(self, x):
        x = F.gelu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
class SwingTransformerVideoProcessor(nn.Module):
    """Swing Transformer for processing facial landmark sequences"""

    def __init__(self, landmark_dim=2, embed_dim=128, num_layers=4, num_heads=8, sequence_length=30):
        super().__init__()
        self.embed_dim = embed_dim
        self.sequence_length = sequence_length
        self.landmark_dim = landmark_dim

        # Embedding layer for landmarks
        self.landmark_embed = nn.Linear(landmark_dim, embed_dim)
        self.pos_embed = nn.Parameter(torch.randn(1, sequence_length, embed_dim))

        # Swin Transformer blocks
        self.blocks = nn.ModuleList([
            SwinTransformerBlock(embed_dim, num_heads) for _ in range(num_layers)
        ])

        self.norm = nn.LayerNorm(embed_dim)

        # Output heads for different tasks
        self.emotion_head = nn.Linear(embed_dim, 7)  # 7 basic emotions
        self.attention_head = nn.Linear(embed_dim, 1)  # Attention weights
        
        # Simple emotion classifier for single frame analysis
        self.single_frame_emotion = nn.Sequential(
            nn.Linear(landmark_dim, embed_dim),
            nn.ReLU(),
            nn.Linear(embed_dim, embed_dim // 2),
            nn.ReLU(),
            nn.Linear(embed_dim // 2, 7)
        )
        
        # Initialize weights
        self._initialize_weights()

    def _initialize_weights(self):
        """Initialize model weights"""
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, landmarks, single_frame_mode=False):
        if single_frame_mode:
            # Single frame emotion prediction
            if len(landmarks.shape) == 3:
                # (batch, features) or (batch, seq_len, features) -> use last frame
                if landmarks.shape[1] > landmarks.shape[2]:
                    landmarks = landmarks[:, -1, :]  # Use last frame
                else:
                    landmarks = landmarks.mean(dim=1)  # Average if unclear
            
            emotions = self.single_frame_emotion(landmarks)
            # Create dummy attention weights
            attention_weights = torch.ones(landmarks.shape[0], landmarks.shape[-1] // 2) * 0.5
            return emotions, attention_weights

        # Handle different input shapes
        if len(landmarks.shape) == 3:
            # landmarks shape: (batch_size, sequence_length, features)
            B, T, F = landmarks.shape
            x = landmarks
        elif len(landmarks.shape) == 4:
            # landmarks shape: (batch_size, sequence_length, num_landmarks, 2)
            B, T, N, D = landmarks.shape
            # Flatten landmarks for each timestep
            x = landmarks.reshape(B, T, N * D)
        else:
            raise ValueError(f"Unexpected landmark tensor shape: {landmarks.shape}")

        # Embed landmarks
        if x.shape[-1] != self.landmark_dim:
            # If input features don't match expected dimension, use linear projection
            if not hasattr(self, 'input_proj'):
                self.input_proj = nn.Linear(x.shape[-1], self.landmark_dim)
            x = self.input_proj(x)

        x = self.landmark_embed(x)

        # Add positional embedding (handle variable sequence lengths)
        seq_len = min(T, self.sequence_length)
        x = x[:, :seq_len, :] + self.pos_embed[:, :seq_len, :]

        # Apply Swin Transformer blocks
        for block in self.blocks:
            x = block(x)

        x = self.norm(x)

        # Get predictions and attention weights
        emotions = self.emotion_head(x.mean(dim=1))  # Global average pooling
        attention_weights = torch.softmax(self.attention_head(x).squeeze(-1), dim=1)

        return emotions, attention_weights

In [None]:
class FacialLandmarkProcessor:
    """Process facial landmarks using MediaPipe"""

    def __init__(self):
        self.mp_face_mesh = mp.solutions.face_mesh
        self.face_mesh = self.mp_face_mesh.FaceMesh(
            static_image_mode=False,
            max_num_faces=1,
            refine_landmarks=True,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5
        )

        # Key facial landmarks indices (68 key points similar to dlib)
        self.key_landmarks = [
            # Jaw line (17 points)
            10, 338, 297, 332, 284, 251, 389, 356, 454, 323, 361, 288, 397, 365, 379, 378, 400,
            # Right eyebrow (5 points)
            70, 63, 105, 66, 107,
            # Left eyebrow (5 points)
            296, 334, 293, 300, 276,
            # Nose (9 points)
            1, 2, 5, 4, 6, 19, 94, 125, 141,
            # Right eye (6 points)
            33, 7, 163, 144, 145, 153,
            # Left eye (6 points)
            362, 398, 384, 385, 386, 387,
            # Mouth (20 points)
            61, 84, 17, 314, 405, 320, 307, 375, 321, 308, 324, 318, 402, 317, 14, 87, 178, 88, 95, 78
        ]

    def extract_landmarks(self, image):
        """Extract facial landmarks from image"""
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        results = self.face_mesh.process(rgb_image)

        if results.multi_face_landmarks:
            landmarks = results.multi_face_landmarks[0]
            h, w = image.shape[:2]

            # Extract key landmarks
            key_points = []
            for idx in self.key_landmarks:
                if idx < len(landmarks.landmark):
                    point = landmarks.landmark[idx]
                    key_points.append([point.x * w, point.y * h])

            return np.array(key_points)

        return None

In [None]:
class RealTimeVideoProcessor:
    """Real-time video processing with Swing Transformer"""

    def __init__(self, sequence_length=30):
        self.sequence_length = sequence_length
        self.landmark_processor = FacialLandmarkProcessor()

        # Initialize Swing Transformer
        self.model = SwingTransformerVideoProcessor(
            landmark_dim=136,  # 68 landmarks * 2 coordinates = 136 features
            embed_dim=128,
            num_layers=4,
            num_heads=8,
            sequence_length=sequence_length
        )
        self.model.eval()

        # Landmark sequence buffer
        self.landmark_buffer = deque(maxlen=sequence_length)

        # Emotion labels
        self.emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

        # Colors for different facial regions (brighter colors for better visibility)
        self.region_colors = {
            'jaw': (255, 50, 50),      # Bright Red
            'eyebrows': (50, 255, 50),  # Bright Green
            'nose': (50, 50, 255),     # Bright Blue
            'eyes': (255, 255, 50),    # Bright Yellow
            'mouth': (255, 50, 255),   # Bright Magenta
        }

        # Define regions
        self.regions = {
            'jaw': list(range(0, 17)),
            'eyebrows': list(range(17, 27)),
            'nose': list(range(27, 36)),
            'eyes': list(range(36, 48)),
            'mouth': list(range(48, 68))
        }

    def normalize_landmarks(self, landmarks):
        """Normalize landmarks to [-1, 1] range"""
        if landmarks is None:
            return None

        # Center around mean
        center = landmarks.mean(axis=0)
        centered = landmarks - center

        # Scale to [-1, 1]
        scale = np.max(np.abs(centered))
        if scale > 0:
            normalized = centered / scale
        else:
            normalized = centered

        return normalized

    def draw_landmarks_with_features(self, image, landmarks, attention_weights=None, active_features=None):
        """Draw landmarks with feature highlighting - ENHANCED VERSION"""
        if landmarks is None:
            return image

        # Create overlay
        overlay = image.copy()

        # Draw landmarks by region with LARGER sizes
        for region_name, indices in self.regions.items():
            color = self.region_colors[region_name]

            for i, idx in enumerate(indices):
                if idx < len(landmarks):
                    x, y = int(landmarks[idx][0]), int(landmarks[idx][1])

                    # Determine point size based on attention/importance - MUCH LARGER
                    base_radius = 6  # Increased from 3
                    if attention_weights is not None and idx < len(attention_weights):
                        # Scale radius based on attention weight
                        attention_val = float(attention_weights[idx])
                        radius = max(4, int(6 + attention_val * 8))  # Larger range
                    else:
                        radius = base_radius

                    # Draw outer circle (border) for better visibility
                    cv2.circle(overlay, (x, y), radius + 2, (255, 255, 255), 2)  # White border
                    # Draw main point
                    cv2.circle(overlay, (x, y), radius, color, -1)

        # Connect landmarks within regions with thicker lines
        self.draw_connections(overlay, landmarks)

        # Blend overlay with original image
        alpha = 0.5  # Increased opacity for better visibility
        image = cv2.addWeighted(overlay, alpha, image, 1 - alpha, 0)

        return image

    def draw_connections(self, image, landmarks):
        """Draw connections between landmarks with thicker lines"""
        if landmarks is None or len(landmarks) < 68:
            return

        line_thickness = 1  # Increased from 1

        # Jaw line connections
        jaw_indices = self.regions['jaw']
        for i in range(len(jaw_indices) - 1):
            if jaw_indices[i] < len(landmarks) and jaw_indices[i+1] < len(landmarks):
                pt1 = tuple(map(int, landmarks[jaw_indices[i]]))
                pt2 = tuple(map(int, landmarks[jaw_indices[i+1]]))
                cv2.line(image, pt1, pt2, self.region_colors['jaw'], line_thickness)

        # Eye connections (simplified)
        eye_regions = [(36, 42), (42, 48)]  # Right and left eye ranges
        for start, end in eye_regions:
            for i in range(start, end - 1):
                if i < len(landmarks) and i + 1 < len(landmarks):
                    pt1 = tuple(map(int, landmarks[i]))
                    pt2 = tuple(map(int, landmarks[i + 1]))
                    cv2.line(image, pt1, pt2, self.region_colors['eyes'], line_thickness)

        # Mouth connections
        mouth_indices = self.regions['mouth']
        for i in range(len(mouth_indices) - 1):
            if mouth_indices[i] < len(landmarks) and mouth_indices[i+1] < len(landmarks):
                pt1 = tuple(map(int, landmarks[mouth_indices[i]]))
                pt2 = tuple(map(int, landmarks[mouth_indices[i+1]]))
                cv2.line(image, pt1, pt2, self.region_colors['mouth'], line_thickness)

    def draw_info_panel(self, image, emotions, attention_weights, feature_count, buffer_status=""):
        """Draw information panel with predictions and feature stats - FIXED VERSION"""
        h, w = image.shape[:2]
        panel_width = 400  # Increased width for better visibility
        panel_height = h

        # Create info panel with darker background
        panel = np.zeros((panel_height, panel_width, 3), dtype=np.uint8)
        panel.fill(20)  # Very dark background

        # Add border to panel
        cv2.rectangle(panel, (0, 0), (panel_width-1, panel_height-1), (100, 100, 100), 2)

        y_offset = 30
        line_height = 25
        font_scale = 0.6
        font_thickness = 2

        # Title with background
        title_bg = np.zeros((40, panel_width, 3), dtype=np.uint8)
        title_bg.fill(60)
        panel[0:40, :] = title_bg
        cv2.putText(panel, "SWING TRANSFORMER ANALYSIS", (10, 25),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
        y_offset = 60

        # Buffer status
        status_color = (0, 255, 255) if "Ready" in buffer_status else (255, 255, 0)
        cv2.putText(panel, f"Status: {buffer_status}", (10, y_offset),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, status_color, 1)
        y_offset += line_height

        # Feature count
        cv2.putText(panel, f"Active Features: {feature_count}/68", (10, y_offset),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 255), 1)
        y_offset += line_height + 10

        # Section separator
        cv2.line(panel, (10, y_offset), (panel_width-10, y_offset), (100, 100, 100), 1)
        y_offset += 20

        # Emotion predictions section
        cv2.putText(panel, "EMOTION PREDICTIONS:", (10, y_offset),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
        y_offset += 30

        if emotions is not None:
            # Get emotion probabilities
            if isinstance(emotions, torch.Tensor):
                emotion_probs = F.softmax(emotions, dim=1)[0].detach().numpy()
            else:
                emotion_probs = F.softmax(torch.tensor(emotions), dim=0).numpy()

            # Sort emotions by probability for better display
            emotion_data = list(zip(self.emotion_labels, emotion_probs))
            emotion_data.sort(key=lambda x: x[1], reverse=True)

            for i, (label, prob) in enumerate(emotion_data):
                # Color coding based on probability
                if prob > 0.4:
                    color = (0, 255, 0)  # High confidence - Green
                elif prob > 0.2:
                    color = (0, 255, 255)  # Medium confidence - Yellow
                else:
                    color = (128, 128, 128)  # Low confidence - Gray

                # Emotion label and value
                cv2.putText(panel, f"{label}: {prob:.3f}", (15, y_offset),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

                # Draw probability bar
                bar_width = int(prob * 250)  # Increased bar width
                bar_height = 8
                # Background bar
                cv2.rectangle(panel, (15, y_offset + 8), (265, y_offset + 16), (50, 50, 50), -1)
                # Probability bar
                if bar_width > 0:
                    cv2.rectangle(panel, (15, y_offset + 8), (15 + bar_width, y_offset + 16), color, -1)

                y_offset += 28

        else:
            cv2.putText(panel, "No emotions detected", (15, y_offset),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (128, 128, 128), 1)
            y_offset += line_height

        # Section separator
        y_offset += 10
        cv2.line(panel, (10, y_offset), (panel_width-10, y_offset), (100, 100, 100), 1)
        y_offset += 20

        # Attention visualization section
        cv2.putText(panel, "FEATURE ATTENTION:", (10, y_offset),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
        y_offset += 30

        # Region attention summary
        if attention_weights is not None:
            region_attention = {}
            att_weights = attention_weights if isinstance(attention_weights, np.ndarray) else attention_weights.numpy()
            
            for region_name, indices in self.regions.items():
                region_att = np.mean([att_weights[i] if i < len(att_weights) else 0.1 for i in indices])
                region_attention[region_name] = region_att

            # Sort regions by attention
            sorted_regions = sorted(region_attention.items(), key=lambda x: x[1], reverse=True)

            for region_name, att_val in sorted_regions:
                color = self.region_colors[region_name]
                cv2.putText(panel, f"{region_name.upper()}: {att_val:.3f}", (15, y_offset),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

                # Draw attention bar
                bar_width = int(att_val * 200)
                # Background bar
                cv2.rectangle(panel, (15, y_offset + 8), (215, y_offset + 16), (50, 50, 50), -1)
                # Attention bar
                if bar_width > 0:
                    cv2.rectangle(panel, (15, y_offset + 8), (15 + bar_width, y_offset + 16), color, -1)

                y_offset += 28

        else:
            cv2.putText(panel, "Computing attention...", (15, y_offset),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (128, 128, 128), 1)

        # Combine panel with main image
        combined = np.hstack([image, panel])
        return combined

    def process_frame(self, frame):
        """Process a single frame - ENHANCED VERSION"""
        # Extract landmarks
        landmarks = self.landmark_processor.extract_landmarks(frame)

        if landmarks is not None:
            # Normalize landmarks
            normalized_landmarks = self.normalize_landmarks(landmarks)

            # Add to buffer
            if normalized_landmarks is not None:
                self.landmark_buffer.append(normalized_landmarks)

            # Process with transformer
            emotions = None
            attention_weights = None
            buffer_status = f"Buffer: {len(self.landmark_buffer)}/{self.sequence_length}"

            if len(self.landmark_buffer) >= 5:  # Start predictions with fewer frames
                # Prepare input tensor
                current_buffer = list(self.landmark_buffer)
                
                if len(current_buffer) >= self.sequence_length:
                    # Full sequence analysis
                    sequence = np.array(current_buffer)
                    sequence_flat = sequence.reshape(sequence.shape[0], -1)
                    sequence_tensor = torch.FloatTensor(sequence_flat).unsqueeze(0)

                    with torch.no_grad():
                        emotions, attention_weights = self.model(sequence_tensor, single_frame_mode=False)
                        attention_weights = attention_weights[0].numpy()
                    
                    buffer_status = "Full Sequence Analysis"
                else:
                    # Single frame analysis for early predictions
                    current_landmarks = normalized_landmarks.flatten()
                    landmarks_tensor = torch.FloatTensor(current_landmarks).unsqueeze(0)
                    
                    with torch.no_grad():
                        emotions, attention_weights = self.model(landmarks_tensor, single_frame_mode=True)
                        if isinstance(attention_weights, torch.Tensor):
                            attention_weights = attention_weights[0].numpy()
                    
                    buffer_status = f"Single Frame Analysis ({len(current_buffer)} frames)"

            else:
                buffer_status = f"Collecting frames... ({len(self.landmark_buffer)}/5)"

            # Draw landmarks and info
            frame_with_landmarks = self.draw_landmarks_with_features(
                frame, landmarks, attention_weights
            )

            # Add info panel
            feature_count = len(landmarks) if landmarks is not None else 0
            final_frame = self.draw_info_panel(
                frame_with_landmarks, emotions, attention_weights, feature_count, buffer_status
            )

            return final_frame

        else:
            # No face detected
            no_face_panel = self.draw_info_panel(
                frame, None, None, 0, "No face detected"
            )
            return no_face_panel

        return frame

In [None]:

# Colab-specific helper functions
def process_uploaded_video(uploaded_file_path, output_name="processed_video.mp4", max_frames=300):
    """Process uploaded video in Colab"""
    output_path = f"/content/{output_name}"
    return main(uploaded_file_path, output_path, max_frames)

def process_sample_frames(video_path, num_frames=50):
    """Process only first N frames for quick testing"""
    print(f"Processing first {num_frames} frames for quick preview...")
    return main(video_path, None, num_frames)

In [None]:
def main(video_path=None, output_path=None, max_frames=None):
    """Main function for Colab video processing"""
    print("Initializing Swing Transformer Video Processor...")
    processor = RealTimeVideoProcessor(sequence_length=30)

    # Initialize video capture
    if video_path is None:
        print("Please provide a video file path")
        return

    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print(f"Error: Could not open video file: {video_path}")
        return

    # Get video properties
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    print(f"Video properties: {width}x{height}, {fps} FPS, {total_frames} frames")

    # Setup output video writer if output path is provided
    out = None
    if output_path:
        # Output will be wider due to info panel
        output_width = width + 400  # Updated panel width
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, fps, (output_width, height))
        print(f"Output will be saved to: {output_path}")

    # Process frames
    frame_count = 0
    processed_frames = []

    print("Processing video frames...")

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Finished processing all frames")
            break

        frame_count += 1

        # Process frame
        processed_frame = processor.process_frame(frame)

        # Add frame counter
        cv2.putText(processed_frame, f"Frame: {frame_count}/{total_frames}", (10, 30),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)

        # Save to output video
        if out is not None:
            out.write(processed_frame)

        # Store processed frames for display
        # if len(processed_frames) < 16:
        #     processed_frames.append(processed_frame.copy())
        processed_frames.append(processed_frame.copy())
        if len(processed_frames) > 16:
            processed_frames.pop(0)
        # Progress update
        if frame_count % 30 == 0:
            progress = (frame_count / total_frames) * 100
            print(f"Progress: {progress:.1f}% ({frame_count}/{total_frames}) - Emotions: {'Active' if len(processor.landmark_buffer) >= 5 else 'Buffering'}")

        # Stop if max_frames limit reached
        if max_frames and frame_count >= max_frames:
            print(f"Reached maximum frame limit: {max_frames}")
            break

    # Cleanup
    cap.release()
    if out is not None:
        out.release()
        print(f"Output video saved successfully: {output_path}")

    print(f"Video processing completed. Processed {frame_count} frames.")

    # Display sample frames in Colab
    if processed_frames:
        print("\nDisplaying sample processed frames:")
        from IPython.display import display, Image
        import matplotlib.pyplot as plt

        # Show 16 processed frames in 4x4 grid
        fig, axes = plt.subplots(4, 4, figsize=(24, 18))  # Even larger for panel visibility
        axes = axes.flatten()

        for i, frame in enumerate(processed_frames[:16]):
            if i < len(axes):
                # Convert BGR to RGB for matplotlib
                rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                axes[i].imshow(rgb_frame)
                axes[i].set_title(f'Frame {i+1}', fontsize=12)
                axes[i].axis('off')

        # Hide any unused subplots
        for i in range(len(processed_frames), len(axes)):
            axes[i].axis('off')

        plt.tight_layout()
        plt.show()

    return processed_frames

In [None]:
if __name__ == "__main__":
    # Example usage for Colab:

    # For uploaded video file:
    processed_frames = process_uploaded_video("/content/video2.mp4")

    # For quick testing with sample frames:
    # processed_frames = process_sample_frames("/content/your_video.mp4", num_frames=100)

    # For full processing with output:
    # processed_frames = main("/content/input_video.mp4", "/content/output_video.mp4")

    print("To use this code in Colab:")
    print("1. Upload your video file")
    print("2. Call: process_uploaded_video('/content/your_video_file.mp4')")
    print("3. Or for quick preview: process_sample_frames('/content/your_video_file.mp4', 50)")