In [9]:
# Install all required packages
!pip install opencv-python numpy easyocr ultralytics



In [10]:
# Import all required libraries
import cv2
import numpy as np
import easyocr
import torch
import json
import re
from collections import Counter
from ultralytics import YOLO

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Motion Analysis:** *Quantify the average motion using Optical Flow.*

In [11]:
def analyze_motion(video_path: str) -> float:
    """
    Quantify average motion in a video using Optical Flow.

    Args:
        video_path: Path to the video file

    Returns:
        Average motion magnitude across all frames
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Cannot open video file: {video_path}")

    ret, prev_frame = cap.read()
    if not ret:
        cap.release()
        raise ValueError("Cannot read first frame from video")

    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    motion_magnitudes = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Calculate optical flow
        flow = cv2.calcOpticalFlowFarneback(
            prev_gray, gray, None,
            pyr_scale=0.5, levels=3, winsize=15,
            iterations=3, poly_n=5, poly_sigma=1.2, flags=0
        )

        magnitude, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        motion_magnitudes.append(np.mean(magnitude))
        prev_gray = gray

    cap.release()
    return float(np.mean(motion_magnitudes)) if motion_magnitudes else 0.0

#**Text Detection (OCR):** *Determine text_present_ratio or extract keywords.*

In [12]:
def clean_words(text: str) -> list:
    """Extract and clean words from text (alphabetic, lowercase, length >= 3)."""
    words = re.findall(r"[A-Za-z]+", text.lower())
    return [w for w in words if len(w) >= 3]


def analyze_text_ocr_easyocr(
    video_path: str,
    ocr_frame_step: int = 10,
    languages: list = None,
    min_conf: float = 0.5,
) -> dict:
    """
    Analyze text presence in video using EasyOCR.

    Args:
        video_path: Path to video file
        ocr_frame_step: Process every Nth frame (default: 10)
        languages: List of language codes
        min_conf: Minimum confidence threshold (default: 0.5)

    Returns:
        Dictionary with text_present_ratio, keywords, and file path
    """
    languages = languages or ['en']
    use_gpu = torch.cuda.is_available()
    print(f"Using GPU: {use_gpu}")

    reader = easyocr.Reader(languages, gpu=use_gpu)
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        raise ValueError(f"Cannot open video file: {video_path}")

    ocr_total_frames = 0
    ocr_text_frames = 0
    all_words = []
    frame_idx = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx % ocr_frame_step == 0:
            ocr_total_frames += 1
            results = reader.readtext(frame, detail=True)

            valid_texts = [
                text for (bbox, text, conf) in results
                if conf >= min_conf and text.strip()
            ]

            if valid_texts:
                ocr_text_frames += 1
                for text in valid_texts:
                    all_words.extend(clean_words(text))

        frame_idx += 1

    cap.release()

    text_present_ratio = (
        ocr_text_frames / ocr_total_frames if ocr_total_frames > 0 else 0.0
    )
    keywords = list(Counter(all_words).keys())

    return {
        "file": video_path,
        "text_present_ratio": float(text_present_ratio),
        "keywords": keywords,
    }

# **Shot Cut Detection:** *Calculate the number of "hard cuts."*

In [13]:
def detect_hard_cuts(
    video_path: str,
    diff_threshold: float = 30.0,
    min_scene_length: int = 5
) -> dict:
    """
    Detect hard cuts (abrupt shot changes) in a video.

    Args:
        video_path: Path to video file
        diff_threshold: Threshold on mean abs diff (0-255) to detect a cut
        min_scene_length: Minimum frames between cuts to avoid flicker detection

    Returns:
        Dictionary with hard_cuts count, cut_frames list, and file path
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Cannot open video file: {video_path}")

    ret, prev_frame = cap.read()
    if not ret:
        cap.release()
        raise ValueError("Cannot read first frame from video")

    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    cut_frames = []
    frame_idx = 1
    last_cut_frame = -min_scene_length

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        diff_score = float(np.mean(cv2.absdiff(gray, prev_gray)))

        # Detect hard cut
        if diff_score > diff_threshold and (frame_idx - last_cut_frame) >= min_scene_length:
            cut_frames.append(frame_idx)
            last_cut_frame = frame_idx

        prev_gray = gray
        frame_idx += 1

    cap.release()

    return {
        "file": video_path,
        "hard_cuts": len(cut_frames),
        "cut_frames": cut_frames,
    }

# **Object vs. Person Dominance:** *Calculate a ratio of people versus objects detected using a pre-trained model YOLO.*

In [14]:
def analyze_person_object_dominance(
    video_path: str,
    model_path: str = "yolov8x.pt",
    det_frame_step: int = 10,
    conf_thres: float = 0.5,
) -> dict:
    """
    Analyze person vs object dominance in video using YOLO.

    Args:
        video_path: Path to video file
        model_path: YOLO model weights (e.g., 'yolov8x.pt')
        det_frame_step: Run detection on every Nth frame
        conf_thres: Minimum confidence for counting a detection

    Returns:
        Dictionary with detection counts and ratios
    """
    model = YOLO(model_path)
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        raise ValueError(f"Cannot open video file: {video_path}")

    person_detections = 0
    object_detections = 0
    frame_idx = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Run YOLO on sampled frames
        if frame_idx % det_frame_step == 0:
            results = model(frame, verbose=False)

            if results:
                boxes = results[0].boxes
                if boxes is not None:
                    for box in boxes:
                        if float(box.conf[0]) >= conf_thres:
                            class_name = model.names[int(box.cls[0])]
                            if class_name == "person":
                                person_detections += 1
                            else:
                                object_detections += 1

        frame_idx += 1

    cap.release()

    # Calculate ratios
    total_detections = person_detections + object_detections
    person_ratio = person_detections / total_detections if total_detections > 0 else 0.0
    object_ratio = object_detections / total_detections if total_detections > 0 else 0.0
    person_to_object_ratio = person_detections / object_detections if object_detections > 0 else None

    return {
        "file": video_path,
        "person_detections": person_detections,
        "object_detections": object_detections,
        "person_ratio": float(person_ratio),
        "object_ratio": float(object_ratio),
        "person_to_object_ratio": person_to_object_ratio,
    }

In [15]:
def extract_video_features(
    video_path: str,
    ocr_frame_step: int = 10,
    det_frame_step: int = 10,
    yolo_model: str = "yolov8x.pt"
) -> dict:
    """
    Extract comprehensive features from a video file.

    This function combines all feature extraction methods:
    - Hard cuts detection
    - Motion analysis (optical flow)
    - Text detection (OCR)
    - Person vs object dominance (YOLO)

    Args:
        video_path: Path to the video file
        ocr_frame_step: Sample every Nth frame for OCR (default: 10)
        det_frame_step: Sample every Nth frame for YOLO detection (default: 10)
        yolo_model: YOLO model to use (default: "yolov8n.pt")

    Returns:
        Dictionary containing all extracted features
    """
    print(f"Processing video: {video_path}\n")

    # Extract features from different analyses
    print("1. Detecting hard cuts...")
    cut_info = detect_hard_cuts(video_path)

    print("2. Analyzing motion...")
    avg_motion = analyze_motion(video_path)

    print("3. Detecting text with OCR...")
    ocr_info = analyze_text_ocr_easyocr(
        video_path,
        ocr_frame_step=ocr_frame_step,
        languages=['en'],
        min_conf=0.5,
    )

    print("4. Analyzing person vs object dominance...")
    dominance_info = analyze_person_object_dominance(
        video_path,
        model_path=yolo_model,
        det_frame_step=det_frame_step,
        conf_thres=0.5,
    )

    # Combine all features into a single dictionary
    features = {
        "file": video_path,
        "hard_cuts": cut_info["hard_cuts"],
        "cut_frames": cut_info["cut_frames"],
        "average_motion": float(avg_motion),
        "text_present_ratio": ocr_info["text_present_ratio"],
        "keywords": ocr_info["keywords"],
        "person_detections": dominance_info["person_detections"],
        "object_detections": dominance_info["object_detections"],
        "person_ratio": dominance_info["person_ratio"],
        "object_ratio": dominance_info["object_ratio"],
        "person_to_object_ratio": dominance_info["person_to_object_ratio"],
    }

    print("\n Feature extraction complete!")
    return features

In [16]:
# Example usage
if __name__ == "__main__":
    # Update this path to your video file
    video_path = "/content/drive/MyDrive/Test_Video/new.mp4"

    # Extract all features
    features = extract_video_features(video_path)

    # Display results
    print("\n" + "="*60)
    print("EXTRACTED FEATURES")
    print("="*60)
    print(json.dumps(features, indent=2))

    # Save to JSON file
    output_path = "/content/drive/MyDrive/Test_Video/video_features.json"
    with open(output_path, "w") as f:
        json.dump(features, f, indent=2)

    print(f"\n Features saved to: {output_path}")

Processing video: /content/drive/MyDrive/Test_Video/new.mp4

1. Detecting hard cuts...
2. Analyzing motion...
3. Detecting text with OCR...
Using GPU: True
4. Analyzing person vs object dominance...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x.pt to 'yolov8x.pt': 100% ━━━━━━━━━━━━ 130.5MB 335.6MB/s 0.4s

 Feature extraction complete!

EXTRACTED FEATURES
{
  "file": "/content/drive/MyDrive/Test_Video/new.mp4",
  "hard_cuts": 3,
  "cut_frames": [
    832,
    1150,
    1348
  ],
  "average_motion": 0.23675572872161865,
  "text_present_ratio": 0.9863945578231292,
  "keywords": [
    "english",
    "thank",
    "you",
    "father",
    "used",
    "always",
    "tell",
    "something",
    "speeches",
    "alwaya",
    "which",
    "want",
    "share",
    "with",
    "that",
    "fit",
    "inside",
    "glass",
    "slipper",
    "why",
    "know",
    "like",
    "were",
    "told",
    "cinderella",
    "did",
    "dosyde",
    "dosyoe",
    "wh