In [None]:
import cv2
import mediapipe as mp
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from pathlib import Path

In [9]:

video_paths=r"C:\Users\Josemiles\Desktop\Datasets\After\KOA-PD-NM"

In [None]:
PoseLandmark = mp.solutions.pose.PoseLandmark
mp_pose = mp.solutions.pose
np.set_printoptions(precision=4, suppress=True)

In [None]:
def compute_angle(a, b, c):
    a, b, c = np.array(a), np.array(b), np.array(c)
    ba = a - b
    bc = c - b
    cos_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-6)
    angle = np.arccos(np.clip(cos_angle, -1.0, 1.0))
    return np.degrees(angle)


In [None]:
def detect_heel_strike(prev_y, current_y, threshold=0.005):
    if prev_y is None:
        return False
    velocity = current_y - prev_y
    return -threshold < velocity < threshold

In [None]:
def compute_stride_length(prev_pos, curr_pos):
    if prev_pos is None:
        return 0.0
    prev = np.array(prev_pos)
    curr = np.array(curr_pos)
    return float(np.linalg.norm(curr - prev))

In [None]:
def extract_frame_landmarks(landmark_list):
    d = {}
    for idx, lm_enum in enumerate(PoseLandmark):
        name = lm_enum.name
        lm = landmark_list[idx]
        d[f"{name}_x"] = float(lm.x)
        d[f"{name}_y"] = float(lm.y)
        d[f"{name}_z"] = float(lm.z)
        d[f"{name}_visibility"] = float(lm.visibility)

    # Calculate joint angles (with error handling)
    try:
        d['left_knee_angle'] = compute_angle([d['LEFT_HIP_x'], d['LEFT_HIP_y']], [d['LEFT_KNEE_x'], d['LEFT_KNEE_y']], [d['LEFT_ANKLE_x'], d['LEFT_ANKLE_y']])
        d['right_knee_angle'] = compute_angle([d['RIGHT_HIP_x'], d['RIGHT_HIP_y']], [d['RIGHT_KNEE_x'], d['RIGHT_KNEE_y']], [d['RIGHT_ANKLE_x'], d['RIGHT_ANKLE_y']])
        d['left_hip_angle'] = compute_angle([d['LEFT_SHOULDER_x'], d['LEFT_SHOULDER_y']], [d['LEFT_HIP_x'], d['LEFT_HIP_y']], [d['LEFT_KNEE_x'], d['LEFT_KNEE_y']])
        d['right_hip_angle'] = compute_angle([d['RIGHT_SHOULDER_x'], d['RIGHT_SHOULDER_y']], [d['RIGHT_HIP_x'], d['RIGHT_HIP_y']], [d['RIGHT_KNEE_x'], d['RIGHT_KNEE_y']])
    except Exception:
        # If landmarks missing, set angles to NaN
        d['left_knee_angle'] = float('nan')
        d['right_knee_angle'] = float('nan')
        d['left_hip_angle'] = float('nan')
        d['right_hip_angle'] = float('nan')
    
    return d



In [None]:
def initialize_video_processing(video_path):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
    pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)
    return cap, fps, pose

print('initialize_video_processing ready')


In [None]:
def process_video_for_gait(video_path, cycles_per_video=5):
    cap, fps, pose = initialize_video_processing(video_path)
    frame_number = 0
    gait_count = 0
    recording_cycle = False
    current_cycle = []
    prev_left_heel_y = None
    prev_left_heel_pos = None
    all_rows = []
    
    # Get total frames and duration for temporal context
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    video_duration = total_frames / fps if fps > 0 else 0

    while cap.isOpened() and gait_count < cycles_per_video:
        ret, frame = cap.read()
        if not ret:
            break
        frame_number += 1
        
        # Calculate timestamp for this frame
        timestamp = frame_number / fps
        
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(frame_rgb)
        if results.pose_landmarks:
            lm = results.pose_landmarks.landmark
            row = extract_frame_landmarks(lm)
            
            # Add temporal information
            row['frame_idx'] = frame_number
            row['timestamp'] = timestamp
            row['fps'] = fps
            row['video_duration'] = video_duration
            row['total_frames'] = total_frames

            left_heel_y = row.get('LEFT_HEEL_y')
            left_heel_pos = [row.get('LEFT_HEEL_x'), row.get('LEFT_HEEL_y')]

            if detect_heel_strike(prev_left_heel_y, left_heel_y):
                if not recording_cycle:
                    recording_cycle = True
                    current_cycle = []
                    cycle_start_frame = frame_number
                    cycle_start_time = timestamp
                    prev_left_heel_pos = left_heel_pos
                else:
                    recording_cycle = False
                    gait_count += 1
                    cycle_end_frame = frame_number
                    cycle_end_time = timestamp
                    stride_length = compute_stride_length(prev_left_heel_pos, left_heel_pos)
                    step_time_sec = cycle_end_time - cycle_start_time
                    cycle_frame_count = cycle_end_frame - cycle_start_frame
                    
                    # Add cycle-level temporal features to all frames in this cycle
                    for r in current_cycle:
                        r['stride_length'] = stride_length
                        r['step_time_sec'] = step_time_sec
                        r['cycle_frame_count'] = cycle_frame_count
                        r['cycle_start_time'] = cycle_start_time
                        r['cycle_end_time'] = cycle_end_time
                    all_rows.extend(current_cycle)

            prev_left_heel_y = left_heel_y

            if recording_cycle:
                current_cycle.append(row)

    cap.release()
    pose.close()
    return all_rows

In [None]:
def extract_gait_features_to_csv(video_paths, output_csv='gait_features.csv', cycles_per_video=5, allowed_exts=None):
    allowed_exts = allowed_exts or ['.mp4', '.avi', '.mov', '.mkv']

    if isinstance(video_paths, (str, Path)):
        video_paths = [video_paths]

    all_rows = []
    video_items = []

    # Flexible video discovery function: depth-first until videos are found
    def find_videos_recursively(path, max_depth=10, current_depth=0):
        """Recursively find video files, returning videos found in the first directory that contains them.
        This prefers videos closer to the provided path and only goes deeper when necessary."""
        videos_found = []
        path = Path(path)

        if not path.exists() or current_depth > max_depth:
            return videos_found

        if path.is_file() and path.suffix.lower() in allowed_exts:
            return [path]

        if path.is_dir():
            # Check for videos directly in this folder
            direct_videos = [child for child in sorted(path.iterdir()) if child.is_file() and child.suffix.lower() in allowed_exts]
            if direct_videos:
                return direct_videos

            # Otherwise, check subdirectories (depth-first)
            for subdir in sorted([c for c in path.iterdir() if c.is_dir()]):
                vids = find_videos_recursively(subdir, max_depth, current_depth + 1)
                if vids:
                    # Return as soon as we find videos in a deeper folder
                    return vids

        return videos_found

    # Discover videos for all provided roots
    for p in video_paths:
        video_items.extend(find_videos_recursively(p))

    if not video_items:
        print('No video files found in provided paths. Writing empty CSV.')
        out_dir = Path('Datasets')
        out_dir.mkdir(parents=True, exist_ok=True)
        df = pd.DataFrame(all_rows)
        out_path = out_dir / output_csv
        df.to_csv(out_path, index=False)
        print(f'Saved {len(df)} rows to {out_path}')
        return df

    # Group videos by the folder where they were found (deepest folder)
    videos_by_folder = {}
    for v in video_items:
        parent = Path(v).parent
        videos_by_folder.setdefault(parent, []).append(v)

    for folder, vids in videos_by_folder.items():
        folder_name = folder.name or ''
        for video_path in vids:
            try:
                rows = process_video_for_gait(str(video_path), cycles_per_video=cycles_per_video)
            except Exception as e:
                print(f'Error processing {video_path}: {e}')
                continue
            for r in rows:
                r['gait_pattern'] = folder_name
            all_rows.extend(rows)

    # Build DataFrame and save to Datasets/
    out_dir = Path('Datasets')
    out_dir.mkdir(parents=True, exist_ok=True)
    df = pd.DataFrame(all_rows)
    out_path = out_dir / output_csv
    df.to_csv(out_path, index=False)
    print(f'Saved {len(df)} rows to {out_path}')
    return df

In [12]:

df= extract_gait_features_to_csv(video_paths, output_csv='test_empty_restored.csv')
print('Rows:', len(df))
print('Datasets files:', [p.name for p in Path('Datasets').glob('*')])


Error processing C:\Users\Josemiles\Desktop\Datasets\After\KOA-PD-NM\Knee Osteoarthritis\Knee Osteoarthritis Early Level\001_KOA_01_EL.MOV: name 'initialize_video_processing' is not defined
Error processing C:\Users\Josemiles\Desktop\Datasets\After\KOA-PD-NM\Knee Osteoarthritis\Knee Osteoarthritis Early Level\001_KOA_02_EL.MOV: name 'initialize_video_processing' is not defined
Error processing C:\Users\Josemiles\Desktop\Datasets\After\KOA-PD-NM\Knee Osteoarthritis\Knee Osteoarthritis Early Level\002_KOA_01_EL.MOV: name 'initialize_video_processing' is not defined
Error processing C:\Users\Josemiles\Desktop\Datasets\After\KOA-PD-NM\Knee Osteoarthritis\Knee Osteoarthritis Moderate Disease\006_KOA_01_MD.MOV: name 'initialize_video_processing' is not defined
Error processing C:\Users\Josemiles\Desktop\Datasets\After\KOA-PD-NM\Knee Osteoarthritis\Knee Osteoarthritis Moderate Disease\014_KOA_02_MD.MOV: name 'initialize_video_processing' is not defined
Error processing C:\Users\Josemiles\Desk

In [13]:
# Test the enhanced flexible navigation
print("=== Testing Enhanced Flexible Navigation ===")

# Test 1: Use the enhanced function with flexible navigation
print("\n--- Test 1: Enhanced function with aggregated features ---")
df_enhanced = extract_enhanced_gait_features_to_csv(
    video_paths=video_paths,
    output_csv='enhanced_flexible_navigation_test.csv',
    include_raw_frames=False,  # Aggregated features
    cycles_per_video=3  # Fewer cycles for faster testing
)

print(f"Enhanced extraction result: {len(df_enhanced)} video summaries")
if len(df_enhanced) > 0:
    print("Columns:", list(df_enhanced.columns))
    if 'disorder_label' in df_enhanced.columns:
        print("Disorder labels found:", df_enhanced['disorder_label'].value_counts().to_dict())
    if 'total_frames' in df_enhanced.columns:
        print("Total frames per video:", df_enhanced[['video_id', 'total_frames', 'extracted_frames']].head())

print("\n--- Available CSV files in Datasets ---")
datasets_dir = Path('Datasets')
if datasets_dir.exists():
    csv_files = list(datasets_dir.glob('*.csv'))
    for csv_file in csv_files:
        size_kb = csv_file.stat().st_size / 1024
        print(f"  {csv_file.name}: {size_kb:.1f} KB")

=== Testing Enhanced Flexible Navigation ===

--- Test 1: Enhanced function with aggregated features ---
Processing video 1/11: 001_KOA_01_EL.MOV
Error processing C:\Users\Josemiles\Desktop\Datasets\After\KOA-PD-NM\Knee Osteoarthritis\Knee Osteoarthritis Early Level\001_KOA_01_EL.MOV: name 'initialize_video_processing' is not defined
Processing video 2/11: 001_KOA_02_EL.MOV
Error processing C:\Users\Josemiles\Desktop\Datasets\After\KOA-PD-NM\Knee Osteoarthritis\Knee Osteoarthritis Early Level\001_KOA_02_EL.MOV: name 'initialize_video_processing' is not defined
Processing video 3/11: 002_KOA_01_EL.MOV
Error processing C:\Users\Josemiles\Desktop\Datasets\After\KOA-PD-NM\Knee Osteoarthritis\Knee Osteoarthritis Early Level\002_KOA_01_EL.MOV: name 'initialize_video_processing' is not defined
Processing video 4/11: 006_KOA_01_MD.MOV
Error processing C:\Users\Josemiles\Desktop\Datasets\After\KOA-PD-NM\Knee Osteoarthritis\Knee Osteoarthritis Moderate Disease\006_KOA_01_MD.MOV: name 'initialize

In [14]:
# Quick validation of navigation results
print("=== Navigation Results Summary ===")

# Check if any videos were found and processed
datasets_path = Path('Datasets')
if datasets_path.exists():
    csv_files = list(datasets_path.glob('*.csv'))
    latest_csv = max(csv_files, key=lambda f: f.stat().st_mtime) if csv_files else None
    
    if latest_csv:
        print(f"Latest CSV: {latest_csv.name}")
        
        # Read and show key info about the latest results
        try:
            df_check = pd.read_csv(latest_csv)
            print(f"Rows processed: {len(df_check)}")
            
            if 'disorder_label' in df_check.columns:
                disorder_counts = df_check['disorder_label'].value_counts()
                print(f"Gait patterns found: {disorder_counts.to_dict()}")
            
            if 'total_frames' in df_check.columns and 'extracted_frames' in df_check.columns:
                print("Frame extraction summary:")
                for _, row in df_check[['video_id', 'total_frames', 'extracted_frames', 'extraction_efficiency']].head(3).iterrows():
                    print(f"  {row['video_id']}: {row['extracted_frames']}/{row['total_frames']} frames ({row['extraction_efficiency']:.2%})")
                    
        except Exception as e:
            print(f"Error reading CSV: {e}")
    else:
        print("No CSV files found")
else:
    print("Datasets directory not found")

=== Navigation Results Summary ===
Latest CSV: enhanced_flexible_navigation_test.csv
Error reading CSV: No columns to parse from file


In [None]:
def compute_temporal_features(df_video):
    """
    Compute temporal gait features for a single video (all frames belong to same disorder).
    This creates summary statistics that capture temporal dynamics while maintaining 
    video-level coherence for musculoskeletal disorder classification.
    """
    if len(df_video) == 0:
        return {}
        
    temporal_features = {}
    
    # Video-level temporal info
    temporal_features['extracted_frames'] = len(df_video)  # Frames with detected gait cycles
    temporal_features['video_duration'] = df_video['video_duration'].iloc[0] if 'video_duration' in df_video else 0
    temporal_features['avg_fps'] = df_video['fps'].iloc[0] if 'fps' in df_video else 30
    
    # Get actual total frames from video metadata
    if 'total_frames' in df_video and len(df_video) > 0:
        temporal_features['total_frames'] = int(df_video['total_frames'].iloc[0])
    else:
        # Fallback: calculate from duration and fps
        if temporal_features['video_duration'] > 0 and temporal_features['avg_fps'] > 0:
            temporal_features['total_frames'] = int(temporal_features['video_duration'] * temporal_features['avg_fps'])
        else:
            temporal_features['total_frames'] = temporal_features['extracted_frames']
    
    # Calculate extraction efficiency
    if temporal_features['total_frames'] > 0:
        temporal_features['extraction_efficiency'] = temporal_features['extracted_frames'] / temporal_features['total_frames']
    else:
        temporal_features['extraction_efficiency'] = 0.0
    
    # Gait cycle statistics
    if 'step_time_sec' in df_video:
        step_times = df_video['step_time_sec'].dropna()
        if len(step_times) > 0:
            temporal_features['avg_step_time'] = step_times.mean()
            temporal_features['std_step_time'] = step_times.std()
            temporal_features['cadence_steps_per_min'] = 60.0 / step_times.mean() if step_times.mean() > 0 else 0
    
    # Joint angle temporal dynamics (capture movement patterns)
    angle_cols = [col for col in df_video.columns if 'angle' in col]
    for angle_col in angle_cols:
        if angle_col in df_video:
            angles = df_video[angle_col].dropna()
            if len(angles) > 1:
                temporal_features[f'{angle_col}_mean'] = angles.mean()
                temporal_features[f'{angle_col}_std'] = angles.std()
                temporal_features[f'{angle_col}_range'] = angles.max() - angles.min()
                # Temporal derivative (rate of change)
                angle_diff = angles.diff().dropna()
                if len(angle_diff) > 0:
                    temporal_features[f'{angle_col}_velocity_mean'] = angle_diff.mean()
                    temporal_features[f'{angle_col}_velocity_std'] = angle_diff.std()
    
    # Stride characteristics
    if 'stride_length' in df_video:
        strides = df_video['stride_length'].dropna()
        if len(strides) > 0:
            temporal_features['avg_stride_length'] = strides.mean()
            temporal_features['std_stride_length'] = strides.std()
            temporal_features['stride_consistency'] = 1.0 - (strides.std() / strides.mean()) if strides.mean() > 0 else 0
    
    # Temporal symmetry (left vs right limb timing)
    left_heel_y = df_video.get('LEFT_HEEL_y', pd.Series())
    right_heel_y = df_video.get('RIGHT_HEEL_y', pd.Series())
    if len(left_heel_y) > 1 and len(right_heel_y) > 1:
        left_movement = left_heel_y.diff().dropna()
        right_movement = right_heel_y.diff().dropna()
        if len(left_movement) > 0 and len(right_movement) > 0:
            # Cross-correlation for temporal symmetry
            min_len = min(len(left_movement), len(right_movement))
            left_move_norm = (left_movement[:min_len] - left_movement[:min_len].mean()) / left_movement[:min_len].std()
            right_move_norm = (right_movement[:min_len] - right_movement[:min_len].mean()) / right_movement[:min_len].std()
            temporal_features['limb_symmetry'] = np.corrcoef(left_move_norm, right_move_norm)[0, 1] if min_len > 1 else 0
    
    return temporal_features

print('temporal feature computation ready')

In [None]:
def extract_enhanced_gait_features_to_csv(video_paths, output_csv='enhanced_gait_features.csv', 
                                       cycles_per_video=5, allowed_exts=None, include_raw_frames=False):
    """
    Enhanced gait feature extraction with temporal coherence for musculoskeletal disorder analysis.

    Args:
        video_paths: List of video file/folder paths
        output_csv: Output CSV filename
        cycles_per_video: Number of gait cycles to extract per video
        allowed_exts: Video file extensions to process
        include_raw_frames: If True, save frame-by-frame data; if False, save aggregated features per video

    Returns:
        DataFrame with extracted features
    """
    allowed_exts = allowed_exts or ['.mp4', '.avi', '.mov', '.mkv']

    if isinstance(video_paths, (str, Path)):
        video_paths = [video_paths]

    all_rows = []
    video_items = []

    # Collect video files with flexible navigation
    def find_videos_recursively(path, max_depth=10, current_depth=0):
        """Recursively find video files, returning videos found in the first directory that contains them."""
        videos_found = []
        path = Path(path)

        if not path.exists() or current_depth > max_depth:
            return videos_found

        if path.is_file() and path.suffix.lower() in allowed_exts:
            return [path]

        if path.is_dir():
            direct_videos = [child for child in sorted(path.iterdir()) if child.is_file() and child.suffix.lower() in allowed_exts]
            if direct_videos:
                return direct_videos

            for subdir in sorted([c for c in path.iterdir() if c.is_dir()]):
                vids = find_videos_recursively(subdir, max_depth, current_depth + 1)
                if vids:
                    return vids

        return videos_found

    for p in video_paths:
        video_items.extend(find_videos_recursively(p))

    if not video_items:
        print('No video files found in provided paths. Writing empty CSV.')
        out_dir = Path('Datasets')
        out_dir.mkdir(parents=True, exist_ok=True)
        df = pd.DataFrame()
        out_path = out_dir / output_csv
        df.to_csv(out_path, index=False)
        print(f'Saved empty CSV to {out_path}')
        return df

    # Group videos by the folder where they were found and process
    videos_by_folder = {}
    for v in video_items:
        parent = Path(v).parent
        videos_by_folder.setdefault(parent, []).append(v)

    for video_idx, (folder, vids) in enumerate(videos_by_folder.items()):
        folder_name = folder.name or 'unknown'
        print(f'Processing folder {video_idx+1}/{len(videos_by_folder)}: {folder} (label: {folder_name})')
        for video_path in vids:
            print(f'  - {video_path.name}')
            try:
                frame_rows = process_video_for_gait(str(video_path), cycles_per_video=cycles_per_video)
                if not frame_rows:
                    print(f'    No gait cycles detected in {video_path.name}')
                    continue

                # Add video-level metadata to each frame
                for row in frame_rows:
                    row['video_id'] = video_path.stem
                    row['video_path'] = str(video_path)
                    row['disorder_label'] = folder_name
                    row['gait_pattern'] = folder_name  # Backwards compatibility

                if include_raw_frames:
                    all_rows.extend(frame_rows)
                else:
                    df_video = pd.DataFrame(frame_rows)
                    temporal_features = compute_temporal_features(df_video)

                    video_summary = {
                        'video_id': video_path.stem,
                        'video_path': str(video_path),
                        'disorder_label': folder_name,
                        'gait_pattern': folder_name,
                        'total_gait_cycles': len(df_video) // max(1, len(df_video['cycle_start_time'].dropna().unique())),
                        **temporal_features
                    }
                    all_rows.append(video_summary)

            except Exception as e:
                print(f'Error processing {video_path}: {e}')
                continue

    # Save results
    out_dir = Path('Datasets')
    out_dir.mkdir(parents=True, exist_ok=True)
    df = pd.DataFrame(all_rows)
    out_path = out_dir / output_csv
    df.to_csv(out_path, index=False)

    print(f'Saved {len(df)} rows to {out_path}')
    print(f'Data type: {"Frame-by-frame" if include_raw_frames else "Video-level aggregated"}')
    if 'disorder_label' in df.columns:
        print(f'Disorder distribution: {df["disorder_label"].value_counts().to_dict()}')

    return df

print('enhanced gait feature extraction ready')

enhanced gait feature extraction ready


In [None]:
# Example 1: Extract aggregated temporal features (one row per video)
# This is ideal for traditional ML classifiers - each video becomes one sample with its disorder label
print("=== Example 1: Video-level aggregated features ===")
# df_aggregated = extract_enhanced_gait_features_to_csv(
#     video_paths=['path/to/video_folder/'],  # Folder with labeled videos
#     output_csv='aggregated_gait_features.csv',
#     include_raw_frames=False  # One row per video with temporal summary features
# )

print("=== Example 2: Frame-by-frame temporal sequences ===")
# This preserves full temporal information for sequence models (RNN/LSTM)
# df_sequences = extract_enhanced_gait_features_to_csv(
#     video_paths=['path/to/video_folder/'],
#     output_csv='temporal_gait_sequences.csv', 
#     include_raw_frames=True  # Multiple rows per video, preserving time order
# )

print("=== Example 3: Test with existing empty folder ===")
# Test on empty list (should create empty CSV)
df_test = extract_enhanced_gait_features_to_csv(
    video_paths=[],
    output_csv='test_enhanced_empty.csv',
    include_raw_frames=False
)
print(f"Test result: {len(df_test)} rows")