In [1]:
# Required imports
import os
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
import json

# Paths
input_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/stage4/videos"
output_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/stage4/keypoint_data"

# MediaPipe Pose setup
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Helper function to extract keypoints
def extract_keypoints(video_path):
    cap = cv2.VideoCapture(video_path)
    keypoints = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(frame_rgb)
        
        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark
            # Extract keypoints for push leg, torso, and arms
            push_leg = {
                "hip": [landmarks[mp_pose.PoseLandmark.RIGHT_HIP].x, landmarks[mp_pose.PoseLandmark.RIGHT_HIP].y],
                "knee": [landmarks[mp_pose.PoseLandmark.RIGHT_KNEE].x, landmarks[mp_pose.PoseLandmark.RIGHT_KNEE].y],
                "ankle": [landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE].x, landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE].y],
            }
            torso = {
                "right_shoulder": [landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].x, landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].y],
                "left_shoulder": [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].x, landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].y],
            }
            arms = {
                "right_elbow": [landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW].x, landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW].y],
                "left_elbow": [landmarks[mp_pose.PoseLandmark.LEFT_ELBOW].x, landmarks[mp_pose.PoseLandmark.LEFT_ELBOW].y],
                "right_wrist": [landmarks[mp_pose.PoseLandmark.RIGHT_WRIST].x, landmarks[mp_pose.PoseLandmark.RIGHT_WRIST].y],
                "left_wrist": [landmarks[mp_pose.PoseLandmark.LEFT_WRIST].x, landmarks[mp_pose.PoseLandmark.LEFT_WRIST].y],
            }
            keypoints.append({"push_leg": push_leg, "torso": torso, "arms": arms})
    
    cap.release()
    return keypoints

# Process videos
os.makedirs(output_path, exist_ok=True)
for video_file in os.listdir(input_path):
    if video_file.endswith(".mp4"):
        video_path = os.path.join(input_path, video_file)
        keypoints = extract_keypoints(video_path)
        
        # Save keypoints to JSON
        output_file = os.path.join(output_path, video_file.replace(".mp4", ".json"))
        with open(output_file, "w") as f:
            json.dump(keypoints, f)
        print(f"Keypoints extracted and saved for {video_file}")

print("Keypoint extraction completed!")

I0000 00:00:1736694042.941205 8926888 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2 Pro
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
W0000 00:00:1736694043.044332 8927121 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736694043.075629 8927121 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736694043.091889 8927120 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Keypoints extracted and saved for 1_user5.mp4
Keypoints extracted and saved for 1_user7.mp4
Keypoints extracted and saved for 1_user6.mp4
Keypoints extracted and saved for 0_user2.mp4
Keypoints extracted and saved for 0_user3.mp4
Keypoints extracted and saved for 0_user1.mp4
Keypoints extracted and saved for 0_user4.mp4
Keypoints extracted and saved for 0_user21.mp4
Keypoints extracted and saved for 0_user20.mp4
Keypoints extracted and saved for 0_user10.mp4
Keypoints extracted and saved for 0_user12.mp4
Keypoints extracted and saved for 0_user9.mp4
Keypoints extracted and saved for 0_user13.mp4
Keypoints extracted and saved for 1_user22.mp4
Keypoints extracted and saved for 1_user23.mp4
Keypoints extracted and saved for 1_user8.mp4
Keypoints extracted and saved for 1_user19.mp4
Keypoint extraction completed!


In [3]:
import random

# Augmentation parameters
ROTATION_ANGLE = 10  # degrees
SCALE_FACTOR = 0.1  # 10%

# Helper functions for augmentations
def mirror_keypoints(keypoints):
    mirrored = []
    for frame in keypoints:
        push_leg = frame["push_leg"]
        torso = frame["torso"]
        arms = frame["arms"]
        
        # Swap left and right keypoints
        mirrored.append({
            "push_leg": {
                "hip": push_leg["hip"],  # Push leg remains the same
                "knee": push_leg["knee"],
                "ankle": push_leg["ankle"],
            },
            "torso": {
                "right_shoulder": torso["left_shoulder"],
                "left_shoulder": torso["right_shoulder"],
            },
            "arms": {
                "right_elbow": arms["left_elbow"],
                "left_elbow": arms["right_elbow"],
                "right_wrist": arms["left_wrist"],
                "left_wrist": arms["right_wrist"],
            },
        })
    return mirrored

def rotate_keypoints(keypoints, angle=ROTATION_ANGLE):
    angle_rad = np.radians(angle)
    cos_angle = np.cos(angle_rad)
    sin_angle = np.sin(angle_rad)
    rotated = []
    
    for frame in keypoints:
        new_frame = {}
        for body_part, joints in frame.items():
            new_frame[body_part] = {}
            for joint, coords in joints.items():
                x, y = coords
                # Apply rotation transformation
                x_new = cos_angle * x - sin_angle * y
                y_new = sin_angle * x + cos_angle * y
                new_frame[body_part][joint] = [x_new, y_new]
        rotated.append(new_frame)
    
    return rotated

def scale_keypoints(keypoints, scale_factor=SCALE_FACTOR):
    scaled = []
    for frame in keypoints:
        new_frame = {}
        for body_part, joints in frame.items():
            new_frame[body_part] = {}
            for joint, coords in joints.items():
                x, y = coords
                # Apply scaling transformation
                x_new = x * (1 + scale_factor)
                y_new = y * (1 + scale_factor)
                new_frame[body_part][joint] = [x_new, y_new]
            new_frame[body_part] = new_frame[body_part]
        scaled.append(new_frame)
    
    return scaled

# Augment keypoints and save
augmented_output_path = os.path.join(output_path, "augmented")
os.makedirs(augmented_output_path, exist_ok=True)

for json_file in os.listdir(output_path):
    if json_file.endswith(".json"):
        json_path = os.path.join(output_path, json_file)
        
        with open(json_path, "r") as f:
            keypoints = json.load(f)
        
        # Apply augmentations
        augmented_keypoints = []
        augmented_keypoints += mirror_keypoints(keypoints)
        augmented_keypoints += rotate_keypoints(keypoints, angle=random.uniform(-ROTATION_ANGLE, ROTATION_ANGLE))
        augmented_keypoints += scale_keypoints(keypoints, scale_factor=random.uniform(-SCALE_FACTOR, SCALE_FACTOR))
        
        # Save augmented keypoints
        output_file = os.path.join(augmented_output_path, json_file.replace(".json", "_augmented.json"))
        with open(output_file, "w") as f:
            json.dump(augmented_keypoints, f)
        print(f"Augmented keypoints saved for {json_file}")

print("Data augmentation completed!")

Augmented keypoints saved for 0_user21.json
Augmented keypoints saved for 0_user20.json
Augmented keypoints saved for 1_user8.json
Augmented keypoints saved for 1_user22.json
Augmented keypoints saved for 0_user1.json
Augmented keypoints saved for 1_user5.json
Augmented keypoints saved for 1_user19.json
Augmented keypoints saved for 0_user10.json
Augmented keypoints saved for 1_user23.json
Augmented keypoints saved for 0_user13.json
Augmented keypoints saved for 0_user3.json
Augmented keypoints saved for 1_user6.json
Augmented keypoints saved for 1_user7.json
Augmented keypoints saved for 0_user2.json
Augmented keypoints saved for 0_user12.json
Augmented keypoints saved for 0_user9.json
Augmented keypoints saved for 0_user4.json
Data augmentation completed!


In [4]:
import math

# Helper function to calculate velocity
def calculate_velocity(coord1, coord2, fps=30):
    dx = coord2[0] - coord1[0]
    dy = coord2[1] - coord1[1]
    return dx * fps, dy * fps

# Helper function to calculate angle
def calculate_angle(a, b, c):
    ba = [a[0] - b[0], a[1] - b[1]]
    bc = [c[0] - b[0], c[1] - b[1]]
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    return np.degrees(np.arccos(np.clip(cosine_angle, -1.0, 1.0)))

# Process keypoints to extract features
feature_dataset = []
fps = 30  # Assuming 30 FPS for the videos

for json_file in os.listdir(augmented_output_path):
    if json_file.endswith(".json"):
        json_path = os.path.join(augmented_output_path, json_file)
        
        with open(json_path, "r") as f:
            keypoints = json.load(f)
        
        video_features = []
        for i in range(1, len(keypoints)):
            frame_features = {}
            
            # Push leg extension (velocity and distance)
            prev_hip = keypoints[i - 1]["push_leg"]["hip"]
            curr_hip = keypoints[i]["push_leg"]["hip"]
            vx, vy = calculate_velocity(prev_hip, curr_hip, fps)
            frame_features["push_leg_velocity_x"] = vx
            frame_features["push_leg_velocity_y"] = vy
            
            knee = keypoints[i]["push_leg"]["knee"]
            ankle = keypoints[i]["push_leg"]["ankle"]
            frame_features["knee_ankle_distance"] = math.sqrt((knee[0] - ankle[0])**2 + (knee[1] - ankle[1])**2)
            
            # Torso rotation (shoulder angle)
            right_shoulder = keypoints[i]["torso"]["right_shoulder"]
            left_shoulder = keypoints[i]["torso"]["left_shoulder"]
            frame_features["shoulder_angle"] = calculate_angle(left_shoulder, right_shoulder, [right_shoulder[0] + 1, right_shoulder[1]])  # Angle relative to horizontal
            
            # Arm involvement (elbow and wrist angles)
            right_elbow = keypoints[i]["arms"]["right_elbow"]
            right_wrist = keypoints[i]["arms"]["right_wrist"]
            frame_features["right_arm_angle"] = calculate_angle(right_shoulder, right_elbow, right_wrist)
            
            video_features.append(frame_features)
        
        # Append to dataset with label from filename
        label = int(json_file.split("_")[0])  # Extract score from filename
        for frame_feature in video_features:
            frame_feature["label"] = label
        feature_dataset.extend(video_features)

# Save feature dataset to CSV
feature_csv_path = os.path.join(output_path, "features_stage4.csv")
pd.DataFrame(feature_dataset).to_csv(feature_csv_path, index=False)
print(f"Features saved to {feature_csv_path}")

Features saved to /Users/cezar/Desktop/Team Project/AI/shotput_models/stage4/keypoint_data/features_stage4.csv


In [5]:
# Load the features dataset
features = pd.read_csv(feature_csv_path)

# Calculate class weights
labels = features["label"]
class_weights = class_weight.compute_class_weight(
    class_weight="balanced",
    classes=np.unique(labels),
    y=labels
)
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

# Print the computed class weights
print(f"Computed class weights: {class_weights_dict}")

Computed class weights: {0: 1.7388059701492538, 1: 0.7018072289156626}


In [59]:
# Define the LSTM model architecture
def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=input_shape),
        BatchNormalization(),
        Dropout(0.3),
        LSTM(32, return_sequences=False),
        BatchNormalization(),
        Dropout(0.3),
        Dense(16, activation="relu"),
        Dense(3, activation="softmax")  # 3 classes: 0, 0.5, 1
    ])
    model.compile(
        optimizer=Adam(learning_rate=0.005),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model

# Prepare the input data
X = features.drop(columns=["label"]).values  # Drop the label column
y = features["label"].values  # Labels

# Reshape X for LSTM input: (samples, timesteps, features per timestep)
timesteps = 1  # Use 1 timestep for each sample
X = X.reshape(X.shape[0], timesteps, -1)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Build the model
input_shape = (X_train.shape[1], X_train.shape[2])
model = build_lstm_model(input_shape)

  super().__init__(**kwargs)


In [60]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    class_weight=class_weights_dict,
    epochs=400,
    batch_size=32
)

Epoch 1/400
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.4517 - loss: 1.0251 - val_accuracy: 0.7128 - val_loss: 0.8939
Epoch 2/400
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7177 - loss: 0.6059 - val_accuracy: 0.7128 - val_loss: 0.8238
Epoch 3/400
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7686 - loss: 0.5211 - val_accuracy: 0.7340 - val_loss: 0.7944
Epoch 4/400
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7039 - loss: 0.5573 - val_accuracy: 0.7234 - val_loss: 0.7533
Epoch 5/400
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7341 - loss: 0.5129 - val_accuracy: 0.7128 - val_loss: 0.7399
Epoch 6/400
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7771 - loss: 0.4869 - val_accuracy: 0.7234 - val_loss: 0.7352
Epoch 7/400
[1m12/12[0m [32m━━

In [61]:
# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=1)

# Print results
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7147 - loss: 0.5965 
Validation Loss: 0.5404
Validation Accuracy: 0.7340


In [62]:
# Save the model
model_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/stage4/shotput_stage4.keras"
model.save(model_path)

In [65]:
import cv2
import json
import numpy as np
from tensorflow.keras.models import load_model
import mediapipe as mp

# Load the trained model
model_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/stage4/shotput_stage4.keras"
model = load_model(model_path)

# MediaPipe Pose setup
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Helper function to extract keypoints from the video
def extract_keypoints(video_path):
    cap = cv2.VideoCapture(video_path)
    keypoints = []
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(frame_rgb)
        
        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark
            # Extract keypoints for push leg, torso, and arms
            push_leg = {
                "hip": [landmarks[mp_pose.PoseLandmark.RIGHT_HIP].x, landmarks[mp_pose.PoseLandmark.RIGHT_HIP].y],
                "knee": [landmarks[mp_pose.PoseLandmark.RIGHT_KNEE].x, landmarks[mp_pose.PoseLandmark.RIGHT_KNEE].y],
                "ankle": [landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE].x, landmarks[mp_pose.PoseLandmark.RIGHT_ANKLE].y],
            }
            torso = {
                "right_shoulder": [landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].x, landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].y],
                "left_shoulder": [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].x, landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].y],
            }
            arms = {
                "right_elbow": [landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW].x, landmarks[mp_pose.PoseLandmark.RIGHT_ELBOW].y],
                "right_wrist": [landmarks[mp_pose.PoseLandmark.RIGHT_WRIST].x, landmarks[mp_pose.PoseLandmark.RIGHT_WRIST].y],
            }
            keypoints.append({"push_leg": push_leg, "torso": torso, "arms": arms})
    
    cap.release()
    return keypoints

# Extract features for the new video
def extract_features(keypoints):
    features = []
    for i in range(1, len(keypoints)):
        frame_features = {}
        
        # Push leg extension features
        prev_hip = keypoints[i - 1]["push_leg"]["hip"]
        curr_hip = keypoints[i]["push_leg"]["hip"]
        frame_features["push_leg_velocity_x"] = curr_hip[0] - prev_hip[0]
        frame_features["push_leg_velocity_y"] = curr_hip[1] - prev_hip[1]
        
        knee = keypoints[i]["push_leg"]["knee"]
        ankle = keypoints[i]["push_leg"]["ankle"]
        frame_features["knee_ankle_distance"] = np.linalg.norm(np.array(knee) - np.array(ankle))
        
        # Torso rotation
        right_shoulder = keypoints[i]["torso"]["right_shoulder"]
        left_shoulder = keypoints[i]["torso"]["left_shoulder"]
        frame_features["shoulder_angle"] = np.arctan2(
            right_shoulder[1] - left_shoulder[1], 
            right_shoulder[0] - left_shoulder[0]
        )
        
        # Right arm involvement
        right_elbow = keypoints[i]["arms"]["right_elbow"]
        right_wrist = keypoints[i]["arms"]["right_wrist"]
        frame_features["right_arm_angle"] = np.arctan2(
            right_wrist[1] - right_elbow[1], 
            right_wrist[0] - right_elbow[0]
        )
        
        features.append(list(frame_features.values()))
    
    return np.array(features)

# Process the test video
test_video_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/test_videos/stage4.mp4"
keypoints = extract_keypoints(test_video_path)
test_features = extract_features(keypoints)

# Reshape features for LSTM input
test_features = test_features.reshape(1, test_features.shape[0], test_features.shape[1])

# Make predictions
predictions = model.predict(test_features)

# Print the prediction probabilities
print(f"Prediction probabilities: {predictions[0]}")

# Display the predicted class with the highest probability
predicted_class = np.argmax(predictions)
print(f"Predicted Class: {predicted_class}")



I0000 00:00:1736703146.511628 8926888 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2 Pro
W0000 00:00:1736703146.615475 9171420 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736703146.629771 9171420 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step
Prediction probabilities: [6.1803234e-01 3.8194066e-01 2.7035334e-05]
Predicted Class: 0
