In [103]:
import cv2
import os

# Paths
video_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/videos"
frames_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/frames"

# Ensure output directory exists
os.makedirs(frames_dir, exist_ok=True)

def extract_frames(video_path, output_dir, interval=1):
    """
    Extract frames from a video at a given interval (default: every 1 frame).
    """
    cap = cv2.VideoCapture(video_path)
    count = 0
    frame_count = 0
    video_name = os.path.basename(video_path).rsplit('.', 1)[0]  # Get the video name without extension

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Save every 'interval' frame
        if count % interval == 0:
            frame_path = os.path.join(output_dir, f"{video_name}_frame{frame_count}.jpg")
            cv2.imwrite(frame_path, frame)
            frame_count += 1

        count += 1

    cap.release()

# Process all videos
for video_file in os.listdir(video_dir):
    # Check if the filename ends with ".mp4" and starts with a valid number
    if video_file.endswith(".mp4"):
        try:
            # Split by '_' and attempt to convert the first part to float
            prefix = video_file.split('_')[0]
            float(prefix)  # This ensures it works for values like '0.5', '1', or '0'
            
            # Proceed with frame extraction
            video_path = os.path.join(video_dir, video_file)
            video_frames_dir = os.path.join(frames_dir, video_file.rsplit('.', 1)[0])  # Use full name
            os.makedirs(video_frames_dir, exist_ok=True)
            extract_frames(video_path, video_frames_dir)
            print(f"Frames extracted for {video_file}")
        except ValueError:
            print(f"Skipping file with invalid prefix: {video_file}")

print("Frame extraction completed!")

Frames extracted for 1_user2.mp4
Frames extracted for 1_user1.mp4
Frames extracted for 1_user5.mp4
Frames extracted for 1_user10.mp4
Frames extracted for 1_user12.mp4
Frames extracted for 1_user6.mp4
Frames extracted for 1_user13.mp4
Frames extracted for 0_user3.mp4
Frames extracted for 0_user23.mp4
Frames extracted for 0_user21.mp4
Frames extracted for 0_user7.mp4
Frames extracted for 0_user9.mp4
Frames extracted for 1_user22.mp4
Frames extracted for 1_user8.mp4
Frames extracted for 1_user20.mp4
Frame extraction completed!


In [104]:
import cv2
import os
import mediapipe as mp
import json
import math

# Paths
frames_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/frames"
keypoints_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/keypoints"

# Ensure output directory exists
os.makedirs(keypoints_dir, exist_ok=True)

# Initialize MediaPipe Pose
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True, model_complexity=2, enable_segmentation=False)

def extract_keypoints_from_frame(frame_path):
    """
    Extract keypoints relevant to posture and alignment, including the knee lift and arm lift.
    """
    image = cv2.imread(frame_path)
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    result = pose.process(rgb_image)

    if result.pose_landmarks:
        # Extract keypoints
        keypoints = {
            "right_foot_index": {
                "x": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_FOOT_INDEX].x,
                "y": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_FOOT_INDEX].y,
                "z": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_FOOT_INDEX].z,
            },
            "right_heel": {
                "x": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HEEL].x,
                "y": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HEEL].y,
                "z": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HEEL].z,
            },
            "left_foot_index": {
                "x": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_FOOT_INDEX].x,
                "y": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_FOOT_INDEX].y,
                "z": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_FOOT_INDEX].z,
            },
            "left_heel": {
                "x": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HEEL].x,
                "y": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HEEL].y,
                "z": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_HEEL].z,
            },
            "right_knee": {
                "x": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_KNEE].x,
                "y": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_KNEE].y,
                "z": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_KNEE].z,
            },
            "left_knee": {
                "x": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_KNEE].x,
                "y": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_KNEE].y,
                "z": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_KNEE].z,
            },
            "right_shoulder": {
                "x": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER].x,
                "y": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER].y,
                "z": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_SHOULDER].z,
            },
            "left_shoulder": {
                "x": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER].x,
                "y": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER].y,
                "z": result.pose_landmarks.landmark[mp_pose.PoseLandmark.LEFT_SHOULDER].z,
            },
            "right_hip": {  # Adding the missing right_hip keypoint
                "x": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP].x,
                "y": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP].y,
                "z": result.pose_landmarks.landmark[mp_pose.PoseLandmark.RIGHT_HIP].z,
            },
        }

        # Calculate key angles
        knee_lift_angle = calculate_knee_lift_angle(keypoints)
        arm_lift_angle = calculate_arm_lift_angle(keypoints)
        
        # Store the angles
        keypoints["knee_lift_angle"] = knee_lift_angle
        keypoints["arm_lift_angle"] = arm_lift_angle
        
        return keypoints
    return None


def calculate_knee_lift_angle(keypoints):
    """
    Calculate the angle between the hip, knee, and ankle to determine the knee lift.
    """
    # Get the coordinates of the hip, knee, and ankle
    right_hip = keypoints["right_hip"]
    right_knee = keypoints["right_knee"]
    right_heel = keypoints["right_heel"]

    # Calculate vectors from the hip to knee and knee to heel
    hip_to_knee = (right_knee["x"] - right_hip["x"], right_knee["y"] - right_hip["y"])
    knee_to_heel = (right_heel["x"] - right_knee["x"], right_heel["y"] - right_knee["y"])

    # Calculate the angle between the vectors using dot product
    dot_product = hip_to_knee[0] * knee_to_heel[0] + hip_to_knee[1] * knee_to_heel[1]
    hip_to_knee_magnitude = math.sqrt(hip_to_knee[0]**2 + hip_to_knee[1]**2)
    knee_to_heel_magnitude = math.sqrt(knee_to_heel[0]**2 + knee_to_heel[1]**2)
    
    cos_theta = dot_product / (hip_to_knee_magnitude * knee_to_heel_magnitude)
    angle = math.acos(cos_theta) * 180 / math.pi  # Convert from radians to degrees
    
    return angle

def calculate_arm_lift_angle(keypoints):
    """
    Calculate the angle between the shoulder and elbow to evaluate arm lift.
    """
    # Get the coordinates of the shoulder and elbow
    right_shoulder = keypoints["right_shoulder"]
    left_shoulder = keypoints["left_shoulder"]
    
    # Calculate the angle between the arms and the body
    arm_angle = math.atan2(left_shoulder["y"] - right_shoulder["y"], 
                           left_shoulder["x"] - right_shoulder["x"]) * 180 / math.pi
    
    return arm_angle

def process_frames(video_frames_dir, output_path):
    """
    Process all frames for a video and save keypoints as JSON.
    """
    keypoints_data = []
    for frame_file in sorted(os.listdir(video_frames_dir)):
        frame_path = os.path.join(video_frames_dir, frame_file)
        keypoints = extract_keypoints_from_frame(frame_path)
        if keypoints:
            keypoints_data.append(keypoints)

    # Save to JSON
    with open(output_path, 'w') as f:
        json.dump(keypoints_data, f, indent=4)

# Process frames for each video
for video_name in os.listdir(frames_dir):
    video_frames_dir = os.path.join(frames_dir, video_name)
    if os.path.isdir(video_frames_dir):
        output_path = os.path.join(keypoints_dir, f"{video_name}_keypoints.json")
        process_frames(video_frames_dir, output_path)
        print(f"Keypoints extracted for {video_name}")

print("Keypoint extraction completed!")

I0000 00:00:1736968423.494328 4520682 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M4 Pro
W0000 00:00:1736968423.549189 4554811 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736968423.586935 4554814 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Keypoints extracted for 0_user3
Keypoints extracted for 1_user13
Keypoints extracted for 1_user5
Keypoints extracted for 1_user22
Keypoints extracted for 1_user2
Keypoints extracted for 1_user12
Keypoints extracted for 0_user21
Keypoints extracted for 0_user9
Keypoints extracted for 0_user7
Keypoints extracted for 1_user10
Keypoints extracted for 1_user8
Keypoints extracted for 0_user23
Keypoints extracted for 1_user1
Keypoints extracted for 1_user6
Keypoints extracted for 1_user20
Keypoint extraction completed!


In [105]:
import os
import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image

# Paths
frames_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/frames"
clip_features_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/clip_features"
os.makedirs(clip_features_dir, exist_ok=True)

# Load CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

def extract_clip_features(video_frames_dir, output_path, batch_size=8):
    """
    Extract CLIP features for all frames in a video using batch processing.
    """
    frame_features = []
    frame_paths = sorted(os.listdir(video_frames_dir))
    images = []

    for i, frame_file in enumerate(frame_paths):
        try:
            frame_path = os.path.join(video_frames_dir, frame_file)
            image = Image.open(frame_path).convert("RGB")
            images.append(image)

            # Process batch
            if len(images) == batch_size or i == len(frame_paths) - 1:
                inputs = processor(images=images, return_tensors="pt", padding=True).to(device)
                with torch.no_grad():
                    image_features = model.get_image_features(**inputs).cpu().numpy()
                    frame_features.extend(image_features)
                images = []  # Clear batch to free memory
        except Exception as e:
            print(f"Error processing frame {frame_file}: {e}")
            continue

    # Save features
    torch.save(frame_features, output_path)

# Process all videos
for video_name in os.listdir(frames_dir):
    video_frames_dir = os.path.join(frames_dir, video_name)
    if os.path.isdir(video_frames_dir):
        output_path = os.path.join(clip_features_dir, f"{video_name}_clip.pt")
        extract_clip_features(video_frames_dir, output_path)
        print(f"CLIP features extracted for {video_name}")

print("CLIP feature extraction completed!")

CLIP features extracted for 0_user3
CLIP features extracted for 1_user13
CLIP features extracted for 1_user5
CLIP features extracted for 1_user22
CLIP features extracted for 1_user2
CLIP features extracted for 1_user12
CLIP features extracted for 0_user21
CLIP features extracted for 0_user9
CLIP features extracted for 0_user7
CLIP features extracted for 1_user10
CLIP features extracted for 1_user8
CLIP features extracted for 0_user23
CLIP features extracted for 1_user1
CLIP features extracted for 1_user6
CLIP features extracted for 1_user20
CLIP feature extraction completed!


In [106]:
import json
import os
import torch
import numpy as np
import math

# Paths
keypoints_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/keypoints"
clip_features_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/clip_features"
combined_features_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/combined_features"

# Ensure output directory exists
os.makedirs(combined_features_dir, exist_ok=True)

def calculate_knee_lift(keypoints):
    """
    Detect knee lift based on the relative position of the knees and hips.
    """
    lifted_knee_frames = []
    for i in range(1, len(keypoints)):
        try:
            # Extract keypoints for the left and right knees and hips
            left_knee = keypoints[i][25]  # LEFT_KNEE
            right_knee = keypoints[i][26]  # RIGHT_KNEE
            left_hip = keypoints[i][23]  # LEFT_HIP
            right_hip = keypoints[i][24]  # RIGHT_HIP
        except KeyError as e:
            print(f"Missing keypoint {e} at frame {i}, skipping.")
            continue

        # Calculate the vertical distance between the knee and the hip
        left_knee_lift = left_knee['y'] - left_hip['y']
        right_knee_lift = right_knee['y'] - right_hip['y']

        # Define a threshold for significant knee lift
        if left_knee_lift > 0.1 or right_knee_lift > 0.1:  # Adjust the threshold based on your data
            lifted_knee_frames.append(i)

    return lifted_knee_frames

def calculate_arm_lift(keypoints):
    """
    Detect arm lift based on the position of the shoulders and elbows.
    """
    lifted_arm_frames = []
    for i in range(1, len(keypoints)):
        try:
            # Extract keypoints for left and right shoulders and elbows
            left_shoulder = keypoints[i][11]  # LEFT_SHOULDER
            right_shoulder = keypoints[i][12]  # RIGHT_SHOULDER
            left_elbow = keypoints[i][13]  # LEFT_ELBOW
            right_elbow = keypoints[i][14]  # RIGHT_ELBOW
        except KeyError as e:
            print(f"Missing keypoint {e} at frame {i}, skipping.")
            continue

        # Calculate the vertical position of the elbows compared to the shoulders
        left_arm_lift = left_elbow['y'] - left_shoulder['y']
        right_arm_lift = right_elbow['y'] - right_shoulder['y']

        # Define a threshold for arm lift (both arms lifted above shoulders)
        if left_arm_lift > 0.1 and right_arm_lift > 0.1:  # Adjust the threshold as necessary
            lifted_arm_frames.append(i)

    return lifted_arm_frames

def combine_features(video_name, pose_path, clip_path, output_path):
    """
    Combine pose-based features and CLIP embeddings for a video.
    """
    # Load pose-based features
    with open(pose_path, 'r') as f:
        keypoints_data = json.load(f)

    # Detect knee lift and arm lift
    lifted_knee_frames = calculate_knee_lift(keypoints_data)
    lifted_arm_frames = calculate_arm_lift(keypoints_data)

    # Load CLIP embeddings
    clip_data = torch.load(clip_path)

    # Combine features for each frame
    combined_data = []
    for i, clip_frame in enumerate(clip_data):
        # Use pose features for the current frame
        pose_features = {
            "knee_lift": 1 if i in lifted_knee_frames else 0,  # Binary flag for knee lift
            "arm_lifted": 1 if i in lifted_arm_frames else 0,  # Binary flag for arm lifted high
        }
        combined_frame = np.concatenate([clip_frame, list(pose_features.values())])
        combined_data.append(combined_frame)

    # Save combined features
    torch.save(combined_data, output_path)

# Process all videos
for video_name in os.listdir(keypoints_dir):
    if video_name.endswith("_keypoints.json"):
        video_name_base = video_name.replace("_keypoints.json", "")
        pose_path = os.path.join(keypoints_dir, video_name)
        clip_path = os.path.join(clip_features_dir, f"{video_name_base}_clip.pt")
        output_path = os.path.join(combined_features_dir, f"{video_name_base}_combined.pt")

        if os.path.exists(clip_path):
            combine_features(video_name_base, pose_path, clip_path, output_path)
            print(f"Combined features saved for {video_name_base}")

print("Feature combination completed!")

Missing keypoint 25 at frame 1, skipping.
Missing keypoint 25 at frame 2, skipping.
Missing keypoint 25 at frame 3, skipping.
Missing keypoint 25 at frame 4, skipping.
Missing keypoint 25 at frame 5, skipping.
Missing keypoint 25 at frame 6, skipping.
Missing keypoint 11 at frame 1, skipping.
Missing keypoint 11 at frame 2, skipping.
Missing keypoint 11 at frame 3, skipping.
Missing keypoint 11 at frame 4, skipping.
Missing keypoint 11 at frame 5, skipping.
Missing keypoint 11 at frame 6, skipping.
Combined features saved for 0_user3
Missing keypoint 25 at frame 1, skipping.
Missing keypoint 11 at frame 1, skipping.
Combined features saved for 1_user8
Missing keypoint 25 at frame 1, skipping.
Missing keypoint 25 at frame 2, skipping.
Missing keypoint 25 at frame 3, skipping.
Missing keypoint 25 at frame 4, skipping.
Missing keypoint 25 at frame 5, skipping.
Missing keypoint 25 at frame 6, skipping.
Missing keypoint 25 at frame 7, skipping.
Missing keypoint 11 at frame 1, skipping.
Miss

  clip_data = torch.load(clip_path)


In [107]:
import torch
from torch.utils.data import Dataset, DataLoader
import os

# Paths
combined_features_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/combined_features"

# Hyperparameters
sequence_length = 20  # Adjust as needed (still remains as sequence length)
batch_size = 16       # Adjust as needed

# Ensure input size is always 515
input_size = 515

class AthleticsDataset(Dataset):
    def __init__(self, combined_features_dir, sequence_length, input_size):
        """
        Handles loading and processing of combined features for athletics data.
        Ensures input size is always 515.
        """
        self.data = []
        self.labels = []
        self.sequence_length = sequence_length
        self.input_size = input_size

        for file in os.listdir(combined_features_dir):
            if file.endswith("_combined.pt"):
                # Load combined features
                video_features = torch.tensor(torch.load(os.path.join(combined_features_dir, file)), dtype=torch.float32)

                # Truncate or pad sequences to the desired length
                if video_features.shape[0] >= self.sequence_length:
                    video_features = video_features[:self.sequence_length]
                else:
                    padding = torch.zeros((self.sequence_length - video_features.shape[0], video_features.shape[1]))
                    video_features = torch.cat((video_features, padding), dim=0)

                # Ensure features match the input size (515)
                if video_features.shape[1] != self.input_size:
                    padding = torch.zeros((video_features.shape[0], self.input_size - video_features.shape[1]))
                    video_features = torch.cat((video_features, padding), dim=1)
                self.data.append(video_features)

                # Extract label from filename
                label = float(file.split("_")[0])  # Extract label from file name
                self.labels.append(label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# Initialize dataset and DataLoader
def get_data_loaders(combined_features_dir, sequence_length, batch_size, input_size, train_split=0.8):
    dataset = AthleticsDataset(combined_features_dir, sequence_length, input_size)
    train_size = int(train_split * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, input_size  # Return loaders and input size

# Create DataLoaders
train_loader, val_loader, input_size = get_data_loaders(combined_features_dir, sequence_length, batch_size, input_size)

print(f"DataLoaders created. Input size: {input_size}")


DataLoaders created. Input size: 515


  video_features = torch.tensor(torch.load(os.path.join(combined_features_dir, file)), dtype=torch.float32)


In [108]:
import torch.nn as nn

class TemporalModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        """
        LSTM-based model for sequence prediction.
        """
        super(TemporalModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (hidden, _) = self.lstm(x)  # Use the final hidden state
        output = self.fc(hidden[-1])  # Fully connected output
        return output

In [109]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import itertools
import os

# Paths
combined_features_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/disc_throwing/stages/stage3/combined_features"

# Dataset and DataLoader
class AthleticsDataset(torch.utils.data.Dataset):
    def __init__(self, combined_features_dir, sequence_length):
        self.data = []
        self.labels = []
        self.sequence_length = sequence_length

        for file in os.listdir(combined_features_dir):
            if file.endswith("_combined.pt"):
                video_features = torch.tensor(torch.load(os.path.join(combined_features_dir, file)), dtype=torch.float32)

                if video_features.shape[0] >= self.sequence_length:
                    video_features = video_features[:self.sequence_length]
                else:
                    padding = torch.zeros((self.sequence_length - video_features.shape[0], video_features.shape[1]))
                    video_features = torch.cat((video_features, padding), dim=0)

                self.data.append(video_features)
                label = float(file.split("_")[0])  # Extract label from file name
                self.labels.append(label)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

def get_data_loaders(combined_features_dir, sequence_length, batch_size, train_split=0.8):
    dataset = AthleticsDataset(combined_features_dir, sequence_length)
    train_size = int(train_split * len(dataset))
    val_size = len(dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, dataset[0][0].shape[1]

# Define LSTM model
class TemporalModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout):
        super(TemporalModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (hidden, _) = self.lstm(x)
        output = self.fc(hidden[-1])
        return output

# Training function with early stopping
def train_model(model, train_loader, val_loader, criterion, optimizer, device, num_epochs=500, patience=100):
    best_val_loss = float("inf")
    patience_counter = 0
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        for features, labels in train_loader:
            features, labels = features.to(device), torch.tensor(labels, dtype=torch.float32).to(device)

            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs.squeeze(), labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        # Validation phase
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for features, labels in val_loader:
                features, labels = features.to(device), torch.tensor(labels, dtype=torch.float32).to(device)
                outputs = model(features)
                loss = criterion(outputs.squeeze(), labels)
                val_loss += loss.item()

        train_loss /= len(train_loader)
        val_loss /= len(val_loader)
        print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

        # Early stopping logic with 100 epochs patience
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Early stopping triggered after {patience} epochs without improvement!")
            break

    return best_val_loss


# Hyperparameter grid search
# def hyperparameter_search():
    # Hyperparameter grid
    hidden_sizes = [32, 64, 128]
    num_layers = [1, 2, 3]
    learning_rates = [0.001, 0.005, 0.0001, 0.0005]
    dropouts = [0.0, 0.2, 0.3]

    # Initialize variables to track the best model
    best_val_loss = float("inf")
    best_params = None
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Prepare data
    train_loader, val_loader, input_size = get_data_loaders(combined_features_dir, sequence_length=20, batch_size=8)

    # Iterate over all combinations of hyperparameters
    for hidden_size, num_layer, learning_rate, dropout in itertools.product(hidden_sizes, num_layers, learning_rates, dropouts):
        print(f"Testing configuration: Hidden Size={hidden_size}, Num Layers={num_layer}, LR={learning_rate}, Dropout={dropout}")
        
        # Initialize model, criterion, and optimizer
        model = TemporalModel(input_size=input_size, hidden_size=hidden_size, num_layers=num_layer, output_size=1, dropout=dropout).to(device)
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        # Train the model
        val_loss = train_model(model, train_loader, val_loader, criterion, optimizer, device)

        # Update best model if this configuration is better
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_params = {
                "hidden_size": hidden_size,
                "num_layers": num_layer,
                "learning_rate": learning_rate,
                "dropout": dropout,
            }

    print(f"Best Configuration: {best_params}, Validation Loss: {best_val_loss:.4f}")
    return best_params

# Run hyperparameter search
# best_params = hyperparameter_search()

In [110]:
# Train the final model with the best configuration
final_hidden_size = 128
final_num_layers = 1
final_learning_rate = 0.00075
final_dropout = 0.35

# Prepare DataLoaders (use full dataset for training)
train_loader, val_loader, input_size = get_data_loaders(combined_features_dir, sequence_length=20, batch_size=32)
print(f"Input size for the model: {input_size}")

# Initialize final model
device = "cuda" if torch.cuda.is_available() else "cpu"
final_model = TemporalModel(input_size=input_size, hidden_size=final_hidden_size, num_layers=final_num_layers, output_size=1, dropout=final_dropout).to(device)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(final_model.parameters(), lr=final_learning_rate)

# Train final model
final_val_loss = train_model(final_model, train_loader, val_loader, criterion, optimizer, device, num_epochs=500, patience=100)
print(f"Final model trained. Validation Loss: {final_val_loss:.4f}")

Input size for the model: 514
Epoch [1/500], Train Loss: 0.2521, Val Loss: 0.3335
Epoch [2/500], Train Loss: 0.2495, Val Loss: 0.2852
Epoch [3/500], Train Loss: 0.2262, Val Loss: 0.2555
Epoch [4/500], Train Loss: 0.2196, Val Loss: 0.2397
Epoch [5/500], Train Loss: 0.2101, Val Loss: 0.2290
Epoch [6/500], Train Loss: 0.1935, Val Loss: 0.2251
Epoch [7/500], Train Loss: 0.1786, Val Loss: 0.2229
Epoch [8/500], Train Loss: 0.1673, Val Loss: 0.2110
Epoch [9/500], Train Loss: 0.1547, Val Loss: 0.1897
Epoch [10/500], Train Loss: 0.1409, Val Loss: 0.1733
Epoch [11/500], Train Loss: 0.1324, Val Loss: 0.1754
Epoch [12/500], Train Loss: 0.1267, Val Loss: 0.1977
Epoch [13/500], Train Loss: 0.1190, Val Loss: 0.2383
Epoch [14/500], Train Loss: 0.1160, Val Loss: 0.2682
Epoch [15/500], Train Loss: 0.1112, Val Loss: 0.2791
Epoch [16/500], Train Loss: 0.1023, Val Loss: 0.2970


  video_features = torch.tensor(torch.load(os.path.join(combined_features_dir, file)), dtype=torch.float32)
  features, labels = features.to(device), torch.tensor(labels, dtype=torch.float32).to(device)
  features, labels = features.to(device), torch.tensor(labels, dtype=torch.float32).to(device)


Epoch [17/500], Train Loss: 0.0976, Val Loss: 0.3348
Epoch [18/500], Train Loss: 0.0879, Val Loss: 0.3875
Epoch [19/500], Train Loss: 0.0810, Val Loss: 0.4256
Epoch [20/500], Train Loss: 0.0755, Val Loss: 0.4433
Epoch [21/500], Train Loss: 0.0693, Val Loss: 0.4700
Epoch [22/500], Train Loss: 0.0675, Val Loss: 0.5152
Epoch [23/500], Train Loss: 0.0635, Val Loss: 0.5652
Epoch [24/500], Train Loss: 0.0632, Val Loss: 0.5835
Epoch [25/500], Train Loss: 0.0612, Val Loss: 0.5851
Epoch [26/500], Train Loss: 0.0608, Val Loss: 0.6058
Epoch [27/500], Train Loss: 0.0585, Val Loss: 0.6320
Epoch [28/500], Train Loss: 0.0577, Val Loss: 0.6443
Epoch [29/500], Train Loss: 0.0559, Val Loss: 0.6500
Epoch [30/500], Train Loss: 0.0535, Val Loss: 0.6698
Epoch [31/500], Train Loss: 0.0501, Val Loss: 0.6876
Epoch [32/500], Train Loss: 0.0476, Val Loss: 0.6926
Epoch [33/500], Train Loss: 0.0473, Val Loss: 0.7138
Epoch [34/500], Train Loss: 0.0394, Val Loss: 0.7329
Epoch [35/500], Train Loss: 0.0376, Val Loss: 

In [111]:
import matplotlib.pyplot as plt

def evaluate_and_collect_predictions(model, data_loader, device):
    """
    Evaluate the model on the provided DataLoader and collect true vs. predicted values.
    """
    model.eval()
    true_labels = []
    predicted_labels = []

    with torch.no_grad():
        for features, labels in data_loader:
            features = features.to(device)
            labels = labels.to(device)

            # Get model predictions
            outputs = model(features).squeeze()
            predicted_labels.extend(outputs.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())

    return true_labels, predicted_labels

# Use validation set for evaluation
true_labels, predicted_labels = evaluate_and_collect_predictions(final_model, val_loader, device)


In [112]:
import cv2
import os
import torch
from PIL import Image
import numpy as np
from transformers import CLIPProcessor, CLIPModel
import mediapipe as mp

# Paths
video_dir = "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/videos"
sequence_length = 20  # Sequence length for LSTM

# Initialize CLIP and MediaPipe
device = "cuda" if torch.cuda.is_available() else "cpu"
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=True, model_complexity=2, enable_segmentation=False)

# Ensure input size is always 515
input_size = 515

# Helper functions
def extract_frames(video_path, interval=5):
    """
    Extract frames from a video at a given interval.
    """
    frames = []
    cap = cv2.VideoCapture(video_path)
    count = 0

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if count % interval == 0:
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(rgb_frame)

        count += 1

    cap.release()
    return frames

def extract_keypoints(frames):
    """
    Extract pose keypoints from frames using MediaPipe.
    """
    keypoints = []
    for frame in frames:
        result = pose.process(frame)
        if result.pose_landmarks:
            keypoints.append([
                {"x": lm.x, "y": lm.y, "z": lm.z, "visibility": lm.visibility}
                for lm in result.pose_landmarks.landmark
            ])
    return keypoints

def calculate_velocity_and_acceleration(keypoints):
    """
    Calculate velocity and acceleration using the knee and arm keypoints.
    Assumes that knee lift and arm movement are important.
    """
    velocities, accelerations = [], []
    for i in range(1, len(keypoints)):
        # Calculate velocity using distance between knee and arm keypoints
        knee_prev = keypoints[i - 1][25]  # Left knee
        knee_curr = keypoints[i][25]  # Left knee
        arm_prev = keypoints[i - 1][11]  # Left shoulder
        arm_curr = keypoints[i][11]  # Left shoulder

        # Calculate velocity as Euclidean distance between knee and arm
        velocity_knee = np.sqrt((knee_curr['x'] - knee_prev['x'])**2 + (knee_curr['y'] - knee_prev['y'])**2)
        velocity_arm = np.sqrt((arm_curr['x'] - arm_prev['x'])**2 + (arm_curr['y'] - arm_prev['y'])**2)
        velocities.append((velocity_knee + velocity_arm) / 2)

        if i > 1:
            accelerations.append(velocities[-1] - velocities[-2])
    return velocities, accelerations

def assess_knee_and_arm_angle(keypoints):
    """
    Assess the knee and arm lift during takeoff by calculating angles
    between the hip, knee, and arm keypoints.
    """
    angles = []
    for i in range(1, len(keypoints)):
        # Compute the angle between the knee and arm (using knee, hip, and shoulder keypoints)
        knee = keypoints[i][25]  # Left knee
        hip = keypoints[i][23]  # Left hip
        shoulder = keypoints[i][11]  # Left shoulder

        # Calculate vectors for the knee and arm
        knee_vector = np.array([hip['x'] - knee['x'], hip['y'] - knee['y']])
        arm_vector = np.array([shoulder['x'] - knee['x'], shoulder['y'] - knee['y']])

        # Compute the angle between the knee and arm using dot product
        dot_product = np.dot(knee_vector, arm_vector)
        magnitude_knee = np.linalg.norm(knee_vector)
        magnitude_arm = np.linalg.norm(arm_vector)

        # Angle in radians
        angle = np.arccos(dot_product / (magnitude_knee * magnitude_arm))
        angles.append(np.degrees(angle))  # Convert to degrees

    return angles

def extract_clip_features(frames):
    """
    Extract CLIP embeddings for each frame.
    """
    embeddings = []
    for frame in frames:
        image = Image.fromarray(frame)
        inputs = clip_processor(images=image, return_tensors="pt").to(device)
        with torch.no_grad():
            embedding = clip_model.get_image_features(**inputs).cpu().numpy().flatten()
            embeddings.append(embedding)
    return embeddings

def combine_features(clip_embeddings, velocities, accelerations, knee_arm_angles):
    """
    Combine CLIP embeddings, velocities, accelerations, and knee-arm angles into a single feature tensor.
    Ensure the final feature vector has 514 features.
    """
    combined_features = []
    for i, clip_embedding in enumerate(clip_embeddings):
        # Combine features with pose features (velocity, acceleration, and knee-arm angle)
        feature_vector = np.concatenate([clip_embedding, 
                                        [velocities[i] if i < len(velocities) else 0],
                                        [accelerations[i] if i < len(accelerations) else 0],
                                        [knee_arm_angles[i] if i < len(knee_arm_angles) else 0]])

        # Ensure the feature vector has 514 features
        if len(feature_vector) > 514:
            feature_vector = feature_vector[:514]  # Truncate if there are more than 514 features
        elif len(feature_vector) < 514:
            padding = np.zeros(514 - len(feature_vector))  # Pad with zeros if there are fewer than 514 features
            feature_vector = np.concatenate([feature_vector, padding])

        combined_features.append(feature_vector)
    
    return torch.tensor(combined_features, dtype=torch.float32)

def process_and_predict(video_path, model, sequence_length):
    """
    Process a single video and predict its score.
    """
    print(f"Processing video: {os.path.basename(video_path)}")
    # Step 1: Extract frames
    frames = extract_frames(video_path)

    # Step 2: Extract keypoints
    keypoints = extract_keypoints(frames)

    # Step 3: Calculate pose-based features (velocity, acceleration)
    velocities, accelerations = calculate_velocity_and_acceleration(keypoints)
    knee_arm_angles = assess_knee_and_arm_angle(keypoints)  # Assess knee and arm angle

    # Step 4: Extract CLIP features
    clip_embeddings = extract_clip_features(frames)

    # Step 5: Combine features
    combined_features = combine_features(clip_embeddings, velocities, accelerations, knee_arm_angles)

    # Step 6: Truncate or pad sequences to fixed length (515 features)
    if combined_features.shape[0] >= sequence_length:
        combined_features = combined_features[:sequence_length]
    else:
        padding = torch.zeros((sequence_length - combined_features.shape[0], combined_features.shape[1]))
        combined_features = torch.cat((combined_features, padding), dim=0)

    # Step 7: Predict using the model
    model.eval()
    with torch.no_grad():
        combined_features = combined_features.unsqueeze(0).to(device)  # Add batch dimension
        prediction = model(combined_features).squeeze().cpu().item()

    print(f"Prediction: {prediction:.4f}")
    return prediction

# Process all videos in the directory and predict
results = {}
for video_file in os.listdir(video_dir):
    if video_file.endswith(".mp4"):
        video_path = os.path.join(video_dir, video_file)
        prediction = process_and_predict(video_path, final_model, sequence_length)
        results[video_file] = prediction

# Print results
print("\nPrediction Results:")
for video, prediction in results.items():
    print(f"{video}: {prediction:.4f}")


I0000 00:00:1736968454.885045 4520682 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M4 Pro
W0000 00:00:1736968454.950298 4555805 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736968455.001522 4555797 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


Processing video: 1_user2.mp4
Prediction: 0.9825
Processing video: 1_user1.mp4
Prediction: 0.9945
Processing video: 1_user5.mp4
Prediction: 0.0429
Processing video: 1_user10.mp4
Prediction: 0.7786
Processing video: 1_user12.mp4
Prediction: 0.0136
Processing video: 1_user6.mp4
Prediction: 0.0518
Processing video: 1_user13.mp4
Prediction: 0.0177
Processing video: 0_user3.mp4
Prediction: 0.0311
Processing video: 0_user23.mp4
Prediction: 0.3782
Processing video: 0_user21.mp4
Prediction: 0.0997
Processing video: 0_user7.mp4
Prediction: 0.0163
Processing video: 0_user9.mp4
Prediction: 0.4063
Processing video: 1_user22.mp4
Prediction: 0.0305
Processing video: 1_user8.mp4
Prediction: 0.0151
Processing video: 1_user20.mp4
Prediction: 0.0237

Prediction Results:
1_user2.mp4: 0.9825
1_user1.mp4: 0.9945
1_user5.mp4: 0.0429
1_user10.mp4: 0.7786
1_user12.mp4: 0.0136
1_user6.mp4: 0.0518
1_user13.mp4: 0.0177
0_user3.mp4: 0.0311
0_user23.mp4: 0.3782
0_user21.mp4: 0.0997
0_user7.mp4: 0.0163
0_user9.mp4:

In [113]:
torch.save(final_model.state_dict(), "/Users/danyukezz/Desktop/2 year 1 semester/team project/danya_preprocessing_sports/height_jump/stages/stage3/stage3_height.pth")

___

## feedback generation

In [114]:
def process_video_with_metadata(video_path, model, sequence_length):
    """
    Process a video and return structured metadata with predictions and pose metrics.
    """
    print(f"Processing video: {os.path.basename(video_path)}")
    
    # Step 1: Extract frames
    frames = extract_frames(video_path)
    
    # Step 2: Extract keypoints
    keypoints = extract_keypoints(frames)

    # Step 3: Calculate pose-based features
    velocities, accelerations = calculate_velocity_and_acceleration(keypoints)
    stride_lengths = calculate_stride_length(keypoints)

    # Pose metrics
    pose_metrics = {
        "average_velocity": np.mean(velocities) if velocities else 0,
        "max_stride_length": np.max(stride_lengths) if stride_lengths else 0,
        "average_acceleration": np.mean(accelerations) if accelerations else 0
    }

    # Step 4: Extract CLIP features
    clip_embeddings = extract_clip_features(frames)

    # CLIP insights
    clip_summary = "High contextual alignment with accelerating motion."  # Placeholder for now

    # Step 5: Combine features
    combined_features = combine_features(clip_embeddings, velocities, accelerations, stride_lengths)

    # Step 6: Truncate or pad sequences
    if combined_features.shape[0] >= sequence_length:
        combined_features = combined_features[:sequence_length]
    else:
        padding = torch.zeros((sequence_length - combined_features.shape[0], combined_features.shape[1]))
        combined_features = torch.cat((combined_features, padding), dim=0)

    # Step 7: Predict using the model
    model.eval()
    with torch.no_grad():
        combined_features = combined_features.unsqueeze(0).to(device)  # Add batch dimension
        prediction = model(combined_features).squeeze().cpu().item()

    print(f"Prediction: {prediction:.4f}")

    # Return structured metadata
    return {
        "video_name": os.path.basename(video_path),
        "prediction": prediction,
        "pose_metrics": pose_metrics,
        "clip_features": {"embedding_summary": clip_summary}
    }

In [115]:
# from transformers import pipeline

# generator = pipeline("text2text-generation", model="google/flan-t5-base")
# result = generator("Explain the performance of an athlete based on metrics.", max_length=50)
# print(result[0]['generated_text'])


# def validate_metadata(metadata):
#     required_keys = {
#         'video_name': str,
#         'prediction': float,
#         'pose_metrics': dict,
#         'clip_features': dict,
#     }
#     pose_metrics_keys = ['average_velocity', 'max_stride_length', 'average_acceleration']
#     clip_features_keys = ['embedding_summary']

#     for key, expected_type in required_keys.items():
#         if key not in metadata or not isinstance(metadata[key], expected_type):
#             raise ValueError(f"Invalid or missing key: {key}, expected type: {expected_type}")
    
#     for key in pose_metrics_keys:
#         if key not in metadata['pose_metrics']:
#             raise ValueError(f"Missing pose metric: {key}")
    
#     for key in clip_features_keys:
#         if key not in metadata['clip_features']:
#             raise ValueError(f"Missing clip feature: {key}")

# def generate_justification(metadata, max_length=150, num_return_sequences=1):
#     validate_metadata(metadata)
    
#     # Dynamic prompt construction
#     metrics_prompt = []
#     for metric, value in metadata['pose_metrics'].items():
#         metrics_prompt.append(f"- {metric.replace('_', ' ').title()}: {value:.2f}")
#     metrics_text = "\n".join(metrics_prompt)

#     prompt = f"""
#     Analyze the performance for {metadata['video_name']}.
#     The predicted score is {metadata['prediction']:.2f}.
#     Key metrics:
#     {metrics_text}
#     - CLIP embedding summary: {metadata['clip_features']['embedding_summary']}
    
#     Based on these metrics, explain why the score is appropriate and provide constructive feedback for improvement.
#     """
#     print("Generated prompt:", prompt)  # Debugging log
#     result = generator(prompt, max_length=max_length, num_return_sequences=num_return_sequences)
#     return result[0]['generated_text']


In [116]:
# # Directory containing videos
# video_dir = "/Users/cezar/Desktop/Team Project/AI/distance_jump/stage1/videos"

# # Dictionary to store results
# results = {}

# for video_file in os.listdir(video_dir):
#     if video_file.endswith(".mp4"):
#         video_path = os.path.join(video_dir, video_file)
        
#         # Step 1: Process video and generate metadata
#         metadata = process_video_with_metadata(video_path, final_model, sequence_length=20)
        
#         # Step 2: Generate justification using Hugging Face model
#         justification = generate_justification(metadata)
#         metadata["justification"] = justification
        
#         # Store results
#         results[video_file] = metadata

# # Print results
# for video, data in results.items():
#     print(f"Video: {video}")
#     print(f"Prediction: {data['prediction']:.4f}")
#     print(f"Justification: {data['justification']}")
#     print("-" * 50)

# # Optional: Save results to JSON
# # import json
# # with open("video_predictions_with_justifications.json", "w") as f:
# #     json.dump(results, f, indent=4)