# DAY 1

## Import the Required Libraries

In [1]:
import os
import cv2
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
from scipy.signal import resample

import mediapipe as mp
from mediapipe.python.solutions.drawing_utils import draw_landmarks
from mediapipe.python.solutions.drawing_styles import get_default_pose_landmarks_style

import yt_dlp

from fastdtw import fastdtw
from scipy.spatial.distance import euclidean
from dtaidistance import dtw

## 1.Keypoint Extrations from the video

In [2]:
def extract_keypoints_from_video(video_path, output_csv, max_frames=None, start_sec=None, end_sec=None, debug_dir=None):
    cap = cv2.VideoCapture(video_path)
    mp_pose = mp.solutions.pose
    pose = mp_pose.Pose()

    fps = cap.get(cv2.CAP_PROP_FPS)
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    duration = total_frames / fps

    print(f"\n[INFO] Processing video: {video_path}")
    print(f"[INFO] FPS: {fps:.2f}, Total Frames: {int(total_frames)}, Duration: {duration:.2f}s")

    os.makedirs(os.path.dirname(output_csv), exist_ok=True)
    if debug_dir:
        os.makedirs(debug_dir, exist_ok=True)

    frame_idx = 0
    saved_idx = 0
    first_frame_time, last_frame_time = None, None

    with open(output_csv, mode='w', newline='') as csv_file:
        writer = csv.writer(csv_file)
        header = ['frame']
        for i in range(33):
            header += [f'x{i}', f'y{i}', f'z{i}']
        writer.writerow(header)

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            time_sec = frame_idx / fps
            frame_idx += 1

            if start_sec and time_sec < start_sec:
                continue
            if end_sec and time_sec > end_sec:
                break
            if max_frames and saved_idx >= max_frames:
                break

            if first_frame_time is None:
                first_frame_time = time_sec
            last_frame_time = time_sec

            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            result = pose.process(frame_rgb)

            row = [saved_idx]
            if result.pose_landmarks:
                for lm in result.pose_landmarks.landmark:
                    row += [lm.x, lm.y, lm.z]
            else:
                row += [0.0] * (33 * 3)
            writer.writerow(row)

            if debug_dir and saved_idx % 30 == 0:
                debug_frame = frame.copy()
                if result.pose_landmarks:
                    draw_landmarks(
                        debug_frame,
                        result.pose_landmarks,
                        mp_pose.POSE_CONNECTIONS,
                        landmark_drawing_spec=get_default_pose_landmarks_style()
                    )
                debug_path = os.path.join(debug_dir, f"frame_{saved_idx}_pose.jpg")
                cv2.imwrite(debug_path, debug_frame)

            saved_idx += 1

    cap.release()
    print(f"[INFO] Keypoints saved to {output_csv}")
    print(f"[INFO] Frames processed: {saved_idx}")
    print(f"[INFO] Time range: {first_frame_time:.2f}s to {last_frame_time:.2f}s\n")



## 2. DTW Score Comparison 

In [3]:
def load_keypoints_from_csv(csv_file):
    keypoints = []
    with open(csv_file, 'r') as f:
        reader = csv.reader(f)
        next(reader)
        for row in reader:
            frame_data = list(map(float, row[1:]))
            keypoints.append(frame_data)
    return np.array(keypoints)


def compute_dtw_distance(seq1, seq2):
    distance, path = fastdtw(seq1, seq2, dist=euclidean)
    return distance, path

## 3. Preprocess Keypoints

In [4]:


def preprocess_keypoints(csv_path, selected_indices):
    df = pd.read_csv(csv_path)
    sequence = []

    for _, row in df.iterrows():
        keypoints = np.array(row[1:]).astype(float) 
        pose = []
        for i in selected_indices:
            x, y = keypoints[i*3], keypoints[i*3 + 1]  
            pose.extend([x, y])
        pose = np.array(pose)

        root_x, root_y = pose[0], pose[1]
        pose -= np.array([root_x, root_y] * (len(pose) // 2))
        norm = np.linalg.norm(pose)
        pose = pose / norm if norm != 0 else pose

        sequence.append(pose)
    
    return np.array(sequence)


def align_and_resample(seq1, seq2):
    min_len = min(len(seq1), len(seq2))
    seq1_resampled = resample(seq1, min_len)
    seq2_resampled = resample(seq2, min_len)
    return seq1_resampled, seq2_resampled

# Main Pipeline for Running Day 1 diliverables

In [5]:
def download_youtube_video(url, output_path="video.mp4"):
    try:
        ydl_opts = {
            'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4',
            'outtmpl': output_path,
            'merge_output_format': 'mp4',
            'quiet': False,
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        print(f"[INFO] Downloaded video to: {output_path}")
        return output_path
    except Exception as e:
        print(f"[ERROR] Failed to download {url}: {e}")
        return None

In [None]:

if __name__ == "__main__":
    SELECTED_INDICES = [11, 13, 15, 23, 25, 27] # to only map important keypoints such as shoulders, elbow, wrists etc
    OUTPUT_DIR = "outputs"
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    video1_url = "https://www.youtube.com/watch?v=V0UDCppaxEw"
    video2_url = "https://www.youtube.com/watch?v=zC_F23Rxu-A"

    video1_path = "video1.mp4"
    video2_path = "video2.mp4"
    output_csv1 = os.path.join(r"../data", "video1_keypoints.csv")
    output_csv2 = os.path.join(r"../data", "video2_keypoints.csv")

    if not os.path.exists(video1_path):
        download_youtube_video(video1_url, video1_path)
    if not os.path.exists(video2_path):
        download_youtube_video(video2_url, video2_path)

    extract_keypoints_from_video(video1_path, output_csv1, debug_dir="debug_frames/video1")
    extract_keypoints_from_video(video2_path, output_csv2, start_sec=21, end_sec=31, debug_dir="debug_frames/video2")

    print("[INFO] Preprocessing keypoints...")
    seq1 = preprocess_keypoints(output_csv1, SELECTED_INDICES)
    seq2 = preprocess_keypoints(output_csv2, SELECTED_INDICES)

    min_len = min(len(seq1), len(seq2))
    seq1 = resample(seq1, min_len)
    seq2 = resample(seq2, min_len)

    dtw_distance, dtw_path = fastdtw(seq1, seq2, dist=euclidean)
    print(f"DTW Distance Score: {round(dtw_distance, 2)}")



[INFO] Processing video: video1.mp4
[INFO] FPS: 30.07, Total Frames: 435, Duration: 14.47s
[INFO] Keypoints saved to ../data\video1_keypoints.csv
[INFO] Frames processed: 434
[INFO] Time range: 0.00s to 14.40s


[INFO] Processing video: video2.mp4
[INFO] FPS: 30.00, Total Frames: 1431, Duration: 47.70s
[INFO] Keypoints saved to ../data\video2_keypoints.csv
[INFO] Frames processed: 301
[INFO] Time range: 21.00s to 31.00s

[INFO] Preprocessing keypoints...
DTW Distance Score: 140.83


: 