# Motion Analysis Pipeline

## Overview
This Jupyter notebook implements a distributed video motion analysis pipeline. It processes videos in parallel across multiple instances, extracting motion-related features using optical flow and scene detection techniques.

### Key Features
- Distributed processing across multiple VMs
- Optical flow analysis for motion tracking
- Scene detection and shot analysis
- Circular statistics for motion direction
- Entropy-based feature extraction

### Prerequisites


In [None]:
!pip install scenedetect
!pip install opencv-python
!pip install scipy

import numpy as np
import pandas as pd
import cv2
import sys
from pathlib import Path
from math import atan2, pi
from scipy.stats import entropy
from scenedetect import detect, AdaptiveDetector



### Motion Features Extracted
- Motion magnitude (mean, std, volatility)
- Motion direction (mean, std, volatility)
- Spatial entropy (mean, std, volatility)
- Information entropy (mean, std, volatility)
- Shot detection rate (shots per minute)

### Process Flow
1. Split video list across instances
2. Process each video in parallel:
   - Extract frames at specified intervals
   - Calculate optical flow
   - Compute motion statistics
   - Detect scene transitions
3. Save results to instance-specific CSV files
4. Merge results in post-processing

### Usage
Configure `num_instances` and `instance_id` for distributed processing. Results are saved to `motion_data_{instance_id}.csv` files for later merging.

In [None]:


def circular_mean_std(angles):
    sin_sum = np.sum(np.sin(angles))
    cos_sum = np.sum(np.cos(angles))
    mean_angle = atan2(sin_sum, cos_sum)
    mean_angle = mean_angle if mean_angle >= 0 else mean_angle + 2 * pi
    angular_deviation = np.sqrt(-2 * np.log(np.sqrt(sin_sum**2 + cos_sum**2) / len(angles)))
    return mean_angle, angular_deviation

def circular_volatility(angles):
    diffs = np.diff(angles)
    diffs = np.arctan2(np.sin(diffs), np.cos(diffs))  # Normalize differences
    return np.std(diffs)

def compute_entropy(image):
    hist, _ = np.histogram(image.ravel(), bins=256, range=(0, 256), density=True)
    return entropy(hist, base=2)

def detect_shots(video_path):
    detector = AdaptiveDetector()
    scene_list = detect(str(video_path), detector)
    cap = cv2.VideoCapture(str(video_path))
    duration = cap.get(cv2.CAP_PROP_FRAME_COUNT) / cap.get(cv2.CAP_PROP_FPS)
    cap.release()
    return len(scene_list) / (duration / 60) if duration > 0 else 0

def process_video(video_path, sampling_interval=1):
    cap = cv2.VideoCapture(str(video_path))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    sample_frames = np.arange((fps * sampling_interval) // 2, frame_count, fps * sampling_interval)
    
    ret, prev_frame = cap.read()
    if not ret:
        cap.release()
        return None
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    
    magnitudes, angles, spatial_entropies, info_entropies = [], [], [], []
    
    for idx in sample_frames:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        
        flow = cv2.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        mag, ang = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        
        magnitudes.append(np.mean(mag))
        angles.append(np.mean(ang))
        spatial_entropies.append(compute_entropy(gray))
        info_entropies.append(compute_entropy(mag))
        
        prev_gray = gray
    
    cap.release()
    
    if not magnitudes:
        return None
    
    angle_mean, angle_std = circular_mean_std(np.array(angles))
    avg_shots_per_min = detect_shots(video_path)
    
    return {
        'video_id': video_path.stem,
        'file_name': video_path.stem,
        'motion_magnitude_mean': np.mean(magnitudes),
        'motion_magnitude_std': np.std(magnitudes),
        'motion_magnitude_volatility': np.std(np.diff(magnitudes)),
        'motion_direction_mean': angle_mean,
        'motion_direction_std': angle_std,
        'motion_direction_volatility': circular_volatility(np.array(angles)),
        'spatial_entropy_mean': np.mean(spatial_entropies),
        'spatial_entropy_std': np.std(spatial_entropies),
        'spatial_entropy_volatility': np.std(np.diff(spatial_entropies)),
        'info_entropy_mean': np.mean(info_entropies),
        'info_entropy_std': np.std(info_entropies),
        'info_entropy_volatility': np.std(np.diff(info_entropies)),
        'avg_shots_per_min': avg_shots_per_min
    }

def split_video_list(video_files, num_splits):
    """Split video files into roughly equal parts."""
    return np.array_split(video_files, num_splits)

def process_videos_subset(video_files, csv_path, motion_data_path, sampling_interval=1):
    """Process only a given subset of videos, checking both CSVs."""
    processed_videos = set()
    
    # Check the first CSV
    if csv_path.exists():
        df_existing = pd.read_csv(csv_path)
        processed_videos.update(set(df_existing['video_id']))
    
    # Check the second CSV
    if motion_data_path.exists():
        df_motion_data = pd.read_csv(motion_data_path)
        processed_videos.update(set(df_motion_data['video_id']))
    
    for video in video_files:
        if video.stem in processed_videos:
            continue
        
        result = process_video(video, sampling_interval)
        if result:
            df_new = pd.DataFrame([result])
            df_new.to_csv(csv_path, mode='a', header=not csv_path.exists(), index=False)



In [None]:
# Manually specify parameters
num_instances = 5  # Adjust this to match the number of VMs
instance_id = 4  # Manually change this for each VM (0, 1, 2, ...)

folder_path = Path('../../YouTube_Downloader/Complete_Downloads')
all_videos = list(folder_path.glob('*.mp4'))
csv_path = Path(f"motion_data_{instance_id}.csv")
motion_data_path = Path("motion_data.csv")



# Split videos and select the subset for this instance
video_chunks = split_video_list(all_videos, num_instances)

if instance_id < len(video_chunks):
    process_videos_subset(video_chunks[instance_id], csv_path, motion_data_path, sampling_interval=10)
