In [1]:
# Required imports
import os
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
import json

# Paths
input_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/stage3/videos"
output_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/stage3/keypoint_data"

# MediaPipe Pose setup
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Helper function to extract keypoints
def extract_keypoints(video_path):
    cap = cv2.VideoCapture(video_path)
    keypoints = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(frame_rgb)
        
        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark
            # Extract keypoints for left leg (bracing) and right leg (push)
            left_leg = {
                "hip": [landmarks[mp_pose.PoseLandmark.LEFT_HIP].x, landmarks[mp_pose.PoseLandmark.LEFT_HIP].y],
                "knee": [landmarks[mp_pose.PoseLandmark.LEFT_KNEE].x, landmarks[mp_pose.PoseLandmark.LEFT_KNEE].y],
                "ankle": [landmarks[mp_pose.PoseLandmark.LEFT_ANKLE].x, landmarks[mp_pose.PoseLandmark.LEFT_ANKLE].y],
            }
            right_leg = {
                "hip": [landmarks[mp_pose.PoseLandmark.RIGHT_HIP].x, landmarks[mp_pose.PoseLandmark.RIGHT_HIP].y],
                "knee": [landmarks[mp_pose.PoseLandmark.RIGHT_KNEE].x, landmarks[mp_pose.PoseLandmark.RIGHT_KNEE].y],
                "ankle": [landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE].x, landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE].y],
            }
            keypoints.append({"left_leg": left_leg, "right_leg": right_leg})
    
    cap.release()
    return keypoints

# Process videos
os.makedirs(output_path, exist_ok=True)
for video_file in os.listdir(input_path):
    if video_file.endswith(".mp4"):
        video_path = os.path.join(input_path, video_file)
        keypoints = extract_keypoints(video_path)
        
        # Save keypoints to JSON
        output_file = os.path.join(output_path, video_file.replace(".mp4", ".json"))
        with open(output_file, "w") as f:
            json.dump(keypoints, f)
        print(f"Keypoints extracted and saved for {video_file}")

print("Keypoint extraction completed!")

I0000 00:00:1736689102.075765 8365806 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1736689102.152487 8383793 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736689102.166544 8383794 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736689102.182426 8383798 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Keypoints extracted and saved for 1_user2.mp4
Keypoints extracted and saved for 1_user1.mp4
Keypoints extracted and saved for 1_user4.mp4
Keypoints extracted and saved for 1_user5.mp4
Keypoints extracted and saved for 1_user12.mp4
Keypoints extracted and saved for 1_user7.mp4
Keypoints extracted and saved for 1_user6.mp4
Keypoints extracted and saved for 1_user13.mp4
Keypoints extracted and saved for 0_user3.mp4
Keypoints extracted and saved for 0_user10.mp4
Keypoints extracted and saved for 0_user9.mp4
Keypoints extracted and saved for 1_user22.mp4
Keypoints extracted and saved for 1_user23.mp4
Keypoints extracted and saved for 1_user21.mp4
Keypoints extracted and saved for 1_user8.mp4
Keypoints extracted and saved for 1_user20.mp4
Keypoints extracted and saved for 1_user19.mp4
Keypoint extraction completed!


### augmentation

In [3]:
import random

# Augmentation parameters
ROTATION_ANGLE = 10  # degrees
SCALE_FACTOR = 0.1  # 10%

# Helper functions for augmentations
def mirror_keypoints(keypoints):
    mirrored = []
    for frame in keypoints:
        left_leg = frame["left_leg"]
        right_leg = frame["right_leg"]
        # Swap left and right leg keypoints
        mirrored.append({"left_leg": right_leg, "right_leg": left_leg})
    return mirrored

def rotate_keypoints(keypoints, angle=ROTATION_ANGLE):
    angle_rad = np.radians(angle)
    cos_angle = np.cos(angle_rad)
    sin_angle = np.sin(angle_rad)
    rotated = []
    
    for frame in keypoints:
        new_frame = {}
        for leg in ["left_leg", "right_leg"]:
            new_leg = {}
            for joint, coords in frame[leg].items():
                x, y = coords
                # Apply rotation transformation
                x_new = cos_angle * x - sin_angle * y
                y_new = sin_angle * x + cos_angle * y
                new_leg[joint] = [x_new, y_new]
            new_frame[leg] = new_leg
        rotated.append(new_frame)
    
    return rotated

def scale_keypoints(keypoints, scale_factor=SCALE_FACTOR):
    scaled = []
    for frame in keypoints:
        new_frame = {}
        for leg in ["left_leg", "right_leg"]:
            new_leg = {}
            for joint, coords in frame[leg].items():
                x, y = coords
                # Apply scaling transformation
                x_new = x * (1 + scale_factor)
                y_new = y * (1 + scale_factor)
                new_leg[joint] = [x_new, y_new]
            new_frame[leg] = new_leg
        scaled.append(new_frame)
    
    return scaled

# Augment keypoints and save
augmented_output_path = os.path.join(output_path, "augmented")
os.makedirs(augmented_output_path, exist_ok=True)

for json_file in os.listdir(output_path):
    if json_file.endswith(".json"):
        json_path = os.path.join(output_path, json_file)
        
        with open(json_path, "r") as f:
            keypoints = json.load(f)
        
        # Apply augmentations
        augmented_keypoints = []
        augmented_keypoints += mirror_keypoints(keypoints)
        augmented_keypoints += rotate_keypoints(keypoints, angle=random.uniform(-ROTATION_ANGLE, ROTATION_ANGLE))
        augmented_keypoints += scale_keypoints(keypoints, scale_factor=random.uniform(-SCALE_FACTOR, SCALE_FACTOR))
        
        # Save augmented keypoints
        output_file = os.path.join(augmented_output_path, json_file.replace(".json", "_augmented.json"))
        with open(output_file, "w") as f:
            json.dump(augmented_keypoints, f)
        print(f"Augmented keypoints saved for {json_file}")

print("Data augmentation completed!")

Augmented keypoints saved for 1_user2.json
Augmented keypoints saved for 1_user12.json
Augmented keypoints saved for 1_user13.json
Augmented keypoints saved for 1_user8.json
Augmented keypoints saved for 1_user22.json
Augmented keypoints saved for 1_user4.json
Augmented keypoints saved for 1_user5.json
Augmented keypoints saved for 1_user19.json
Augmented keypoints saved for 0_user10.json
Augmented keypoints saved for 1_user23.json
Augmented keypoints saved for 1_user20.json
Augmented keypoints saved for 0_user3.json
Augmented keypoints saved for 1_user6.json
Augmented keypoints saved for 1_user7.json
Augmented keypoints saved for 1_user21.json
Augmented keypoints saved for 0_user9.json
Augmented keypoints saved for 1_user1.json
Data augmentation completed!


### create features in a csv file

In [4]:
import math

# Helper function to calculate velocity
def calculate_velocity(coord1, coord2, fps=30):
    dx = coord2[0] - coord1[0]
    dy = coord2[1] - coord1[1]
    distance = math.sqrt(dx**2 + dy**2)
    return distance * fps

# Helper function to calculate angle
def calculate_angle(a, b, c):
    ba = [a[0] - b[0], a[1] - b[1]]
    bc = [c[0] - b[0], c[1] - b[1]]
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    angle = np.arccos(np.clip(cosine_angle, -1.0, 1.0))
    return np.degrees(angle)

# Process keypoints to extract features
feature_dataset = []
fps = 30  # Assuming 30 FPS for the videos

for json_file in os.listdir(augmented_output_path):
    if json_file.endswith(".json"):
        json_path = os.path.join(augmented_output_path, json_file)
        
        with open(json_path, "r") as f:
            keypoints = json.load(f)
        
        video_features = []
        for i in range(1, len(keypoints)):
            frame_features = {}
            
            # Bracing leg velocity (ankle)
            prev_left_ankle = keypoints[i - 1]["left_leg"]["ankle"]
            curr_left_ankle = keypoints[i]["left_leg"]["ankle"]
            frame_features["bracing_leg_velocity"] = calculate_velocity(prev_left_ankle, curr_left_ankle, fps)
            
            # Push leg knee angle
            hip = keypoints[i]["right_leg"]["hip"]
            knee = keypoints[i]["right_leg"]["knee"]
            ankle = keypoints[i]["right_leg"]["ankle"]
            frame_features["push_leg_knee_angle"] = calculate_angle(hip, knee, ankle)
            
            # Hop phase indicator (distance between ankles)
            left_ankle = keypoints[i]["left_leg"]["ankle"]
            right_ankle = keypoints[i]["right_leg"]["ankle"]
            frame_features["ankle_distance"] = math.sqrt((left_ankle[0] - right_ankle[0])**2 + (left_ankle[1] - right_ankle[1])**2)
            
            video_features.append(frame_features)
        
        # Append to dataset with label from filename
        label = int(json_file.split("_")[0])  # Extract score from filename
        for frame_feature in video_features:
            frame_feature["label"] = label
        feature_dataset.extend(video_features)

# Save feature dataset to CSV
feature_csv_path = os.path.join(output_path, "features_stage3.csv")
pd.DataFrame(feature_dataset).to_csv(feature_csv_path, index=False)
print(f"Features saved to {feature_csv_path}")


Features saved to /Users/cezar/Desktop/Team Project/AI/shotput_models/stage3/keypoint_data/features_stage3.csv


### create class weights to account for imbalancedness of the dataset

In [5]:
# Load the features dataset
features = pd.read_csv(feature_csv_path)

# Calculate class weights
labels = features["label"]
class_weights = class_weight.compute_class_weight(
    class_weight="balanced",
    classes=np.unique(labels),
    y=labels
)
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

print(f"Computed class weights: {class_weights_dict}")

Computed class weights: {0: 3.7777777777777777, 1: 0.576271186440678}


In [7]:
# Verify dataset shape
print("Original dataset shape:", X.shape)

# Adjust timesteps to fit features
num_features = X.shape[1]
timesteps = min(num_features, 10)  # Adjust timesteps to match feature count if necessary

# Ensure divisible shape for reshaping
if num_features % timesteps != 0:
    raise ValueError(f"Number of features ({num_features}) is not divisible by timesteps ({timesteps}). Please check data or reduce timesteps.")

# Reshape X for LSTM input: (samples, timesteps, features)
X = X.reshape(-1, timesteps, num_features // timesteps)
print("Reshaped dataset shape for LSTM:", X.shape)


Original dataset shape: (544, 3)
Reshaped dataset shape for LSTM: (544, 3, 1)


In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# Define model architecture
def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=input_shape),
        BatchNormalization(),
        Dropout(0.3),
        LSTM(32, return_sequences=False),
        BatchNormalization(),
        Dropout(0.3),
        Dense(16, activation="relu"),
        Dense(3, activation="softmax")  # 3 classes: 0, 0.5, 1
    ])
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model

# Prepare input data
X = features.drop(columns=["label"]).values  # Drop label column
y = features["label"].values  # Labels

# Reshape X for LSTM input: (samples, timesteps, features per timestep)
timesteps = 1  
X = X.reshape(X.shape[0], timesteps, -1)

# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# input shape
input_shape = (X_train.shape[1], X_train.shape[2])
model = build_lstm_model(input_shape)


  super().__init__(**kwargs)


In [18]:
# Train model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    class_weight=class_weights_dict,
    epochs=500,
    batch_size=16
)

Epoch 1/500
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6990 - loss: 0.4512 - val_accuracy: 0.8073 - val_loss: 0.7053
Epoch 2/500
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7233 - loss: 0.5131 - val_accuracy: 0.1927 - val_loss: 1.0349
Epoch 3/500
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6479 - loss: 0.4580 - val_accuracy: 0.7248 - val_loss: 0.4409
Epoch 4/500
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6939 - loss: 0.4880 - val_accuracy: 0.7431 - val_loss: 0.4705
Epoch 5/500
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6921 - loss: 0.4268 - val_accuracy: 0.7248 - val_loss: 0.5267
Epoch 6/500
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6915 - loss: 0.7184 - val_accuracy: 0.7615 - val_loss: 0.5673
Epoch 7/500
[1m28/28[0m [32m━━━

In [19]:
# Evaluate on the validation set
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=1)

# Print results
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 946us/step - accuracy: 0.9048 - loss: 0.3088
Validation Loss: 0.4324
Validation Accuracy: 0.8532


In [20]:
# Save the model
model.save("/Users/cezar/Desktop/Team Project/AI/shotput_models/stage3/shotput_stage3.keras")

### test un unseen data:

In [23]:
import cv2
import json
import numpy as np
from tensorflow.keras.models import load_model
import mediapipe as mp

# Load the trained model
model_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/stage3/shotput_stage3.keras"
model = load_model(model_path)

# MediaPipe Pose setup
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Helper function to extract keypoints from the video
def extract_keypoints(video_path):
    cap = cv2.VideoCapture(video_path)
    keypoints = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(frame_rgb)
        
        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark
            left_leg = {
                "hip": [landmarks[mp_pose.PoseLandmark.LEFT_HIP].x, landmarks[mp_pose.PoseLandmark.LEFT_HIP].y],
                "knee": [landmarks[mp_pose.PoseLandmark.LEFT_KNEE].x, landmarks[mp_pose.PoseLandmark.LEFT_KNEE].y],
                "ankle": [landmarks[mp_pose.PoseLandmark.LEFT_ANKLE].x, landmarks[mp_pose.PoseLandmark.LEFT_ANKLE].y],
            }
            right_leg = {
                "hip": [landmarks[mp_pose.PoseLandmark.RIGHT_HIP].x, landmarks[mp_pose.PoseLandmark.RIGHT_HIP].y],
                "knee": [landmarks[mp_pose.PoseLandmark.RIGHT_KNEE].x, landmarks[mp_pose.PoseLandmark.RIGHT_KNEE].y],
                "ankle": [landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE].x, landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE].y],
            }
            keypoints.append({"left_leg": left_leg, "right_leg": right_leg})
    
    cap.release()
    return keypoints

# Extract keypoints from the test video
test_video_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/test_videos/stage3.mp4"
keypoints = extract_keypoints(test_video_path)

# Feature extraction for the test video
def extract_features(keypoints):
    features = []
    for i in range(1, len(keypoints)):
        prev_left_ankle = keypoints[i - 1]["left_leg"]["ankle"]
        curr_left_ankle = keypoints[i]["left_leg"]["ankle"]
        left_velocity = np.linalg.norm(np.array(curr_left_ankle) - np.array(prev_left_ankle))
        
        right_leg = keypoints[i]["right_leg"]
        knee_angle = calculate_angle(right_leg["hip"], right_leg["knee"], right_leg["ankle"])
        
        left_ankle = keypoints[i]["left_leg"]["ankle"]
        right_ankle = keypoints[i]["right_leg"]["ankle"]
        ankle_distance = np.linalg.norm(np.array(left_ankle) - np.array(right_ankle))
        
        features.append([left_velocity, knee_angle, ankle_distance])
    
    return np.array(features)

# Function to calculate angles
def calculate_angle(a, b, c):
    ba = [a[0] - b[0], a[1] - b[1]]
    bc = [c[0] - b[0], c[1] - b[1]]
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    return np.degrees(np.arccos(np.clip(cosine_angle, -1.0, 1.0)))

# Extract features
test_features = extract_features(keypoints)

# Reshape for LSTM input
test_features = test_features.reshape(1, test_features.shape[0], test_features.shape[1])  # Single sample

# Make predictions
predictions = model.predict(test_features)
predicted_class = np.argmax(predictions)

# Map prediction to score
score_mapping = {0: "0", 1: "0.5", 2: "1"}
predicted_score = score_mapping[predicted_class]

print(f"Predicted Score for the test video: {predicted_score}")

I0000 00:00:1736691046.921852 8365806 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2 Pro
W0000 00:00:1736691046.999221 8477213 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736691047.013831 8477218 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
Predicted Score for the test video: 0.5
