In [1]:
# Required imports
import os
import cv2
import numpy as np
import pandas as pd
import mediapipe as mp
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
import json

# Paths
input_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/stage5/videos"
output_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/stage5/keypoint_data"

# MediaPipe Pose setup
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Helper function to extract keypoints and detect release moment
def extract_keypoints(video_path):
    cap = cv2.VideoCapture(video_path)
    keypoints = []
    distances = []
    release_frame = None
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(frame_rgb)
        
        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark
            
            # Extract relevant keypoints
            wrist = [landmarks[mp_pose.PoseLandmark.RIGHT_WRIST].x, landmarks[mp_pose.PoseLandmark.RIGHT_WRIST].y]
            neck = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].x, landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].y]
            shoulder = [landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].x, landmarks[mp_pose.PoseLandmark.RIGHT_SHOULDER].y]
            
            # Compute wrist-to-neck distance
            distance = np.linalg.norm(np.array(wrist) - np.array(neck))
            distances.append(distance)
            
            # Save keypoints for feature engineering
            keypoints.append({
                "wrist": wrist,
                "neck": neck,
                "shoulder": shoulder,
            })
    
    # Detect release frame: sudden increase in wrist-to-neck distance
    for i in range(1, len(distances)):
        if distances[i] > distances[i - 1] * 1.5:  # Threshold for sudden increase
            release_frame = i
            break
    
    cap.release()
    return keypoints, release_frame

# Process videos
os.makedirs(output_path, exist_ok=True)
for video_file in os.listdir(input_path):
    if video_file.endswith(".mp4"):
        video_path = os.path.join(input_path, video_file)
        keypoints, release_frame = extract_keypoints(video_path)
        
        # Save keypoints and release frame to JSON
        output_data = {"keypoints": keypoints, "release_frame": release_frame}
        output_file = os.path.join(output_path, video_file.replace(".mp4", ".json"))
        with open(output_file, "w") as f:
            json.dump(output_data, f)
        print(f"Keypoints and release frame saved for {video_file}")

print("Keypoint extraction and release detection completed!")

I0000 00:00:1736704017.027921 9179935 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2 Pro
W0000 00:00:1736704017.127493 9185621 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736704017.142395 9185621 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736704017.158495 9185626 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


Keypoints and release frame saved for 1_user2.mp4
Keypoints and release frame saved for 1_user5.mp4
Keypoints and release frame saved for 1_user12.mp4
Keypoints and release frame saved for 1_user7.mp4
Keypoints and release frame saved for 1_user6.mp4
Keypoints and release frame saved for 1_user13.mp4
Keypoints and release frame saved for 0_user3.mp4
Keypoints and release frame saved for 0_user19.mp4
Keypoints and release frame saved for 0_user4.mp4
Keypoints and release frame saved for 0_user23.mp4
Keypoints and release frame saved for 0_user10.mp4
Keypoints and release frame saved for 0_user9.mp4
Keypoints and release frame saved for 1_user22.mp4
Keypoints and release frame saved for 0.5_user1.mp4
Keypoints and release frame saved for 1_user21.mp4
Keypoints and release frame saved for 1_user8.mp4
Keypoints and release frame saved for 1_user20.mp4
Keypoint extraction and release detection completed!


In [4]:
import random

# Augmentation parameters
ROTATION_ANGLE = 10  # degrees
SCALE_FACTOR = 0.1  # 10%

# Helper functions for augmentations
def mirror_keypoints(keypoints):
    mirrored = []
    for frame in keypoints:
        mirrored.append({
            "wrist": frame["wrist"],  # Mirroring is not needed for the wrist (dominant hand remains the same)
            "neck": frame["neck"],    # Neck remains unaffected
            "shoulder": frame["shoulder"],  # Shoulder remains unaffected
        })
    return mirrored

def rotate_keypoints(keypoints, angle=ROTATION_ANGLE):
    angle_rad = np.radians(angle)
    cos_angle = np.cos(angle_rad)
    sin_angle = np.sin(angle_rad)
    rotated = []
    
    for frame in keypoints:
        new_frame = {}
        for joint, coords in frame.items():
            x, y = coords
            # Apply rotation transformation
            x_new = cos_angle * x - sin_angle * y
            y_new = sin_angle * x + cos_angle * y
            new_frame[joint] = [x_new, y_new]
        rotated.append(new_frame)
    
    return rotated

def scale_keypoints(keypoints, scale_factor=SCALE_FACTOR):
    scaled = []
    for frame in keypoints:
        new_frame = {}
        for joint, coords in frame.items():
            x, y = coords
            # Apply scaling transformation
            x_new = x * (1 + scale_factor)
            y_new = y * (1 + scale_factor)
            new_frame[joint] = [x_new, y_new]
        scaled.append(new_frame)
    
    return scaled

# Augment keypoints and maintain release frame
augmented_output_path = os.path.join(output_path, "augmented")
os.makedirs(augmented_output_path, exist_ok=True)

for json_file in os.listdir(output_path):
    if json_file.endswith(".json"):
        json_path = os.path.join(output_path, json_file)
        
        with open(json_path, "r") as f:
            data = json.load(f)
        
        keypoints = data["keypoints"]
        release_frame = data["release_frame"]
        
        # Apply augmentations
        augmented_keypoints = []
        augmented_keypoints += mirror_keypoints(keypoints)
        augmented_keypoints += rotate_keypoints(keypoints, angle=random.uniform(-ROTATION_ANGLE, ROTATION_ANGLE))
        augmented_keypoints += scale_keypoints(keypoints, scale_factor=random.uniform(-SCALE_FACTOR, SCALE_FACTOR))
        
        # Save augmented keypoints and release frame
        augmented_data = {"keypoints": augmented_keypoints, "release_frame": release_frame}
        output_file = os.path.join(augmented_output_path, json_file.replace(".json", "_augmented.json"))
        with open(output_file, "w") as f:
            json.dump(augmented_data, f)
        print(f"Augmented data saved for {json_file}")

print("Data augmentation completed!")

Augmented data saved for 1_user2.json
Augmented data saved for 1_user12.json
Augmented data saved for 0.5_user1.json
Augmented data saved for 1_user13.json
Augmented data saved for 1_user8.json
Augmented data saved for 1_user22.json
Augmented data saved for 1_user5.json
Augmented data saved for 0_user10.json
Augmented data saved for 1_user20.json
Augmented data saved for 0_user3.json
Augmented data saved for 1_user6.json
Augmented data saved for 1_user7.json
Augmented data saved for 1_user21.json
Augmented data saved for 0_user23.json
Augmented data saved for 0_user19.json
Augmented data saved for 0_user9.json
Augmented data saved for 0_user4.json
Data augmentation completed!


In [6]:
import math

# Helper function to calculate distance
def calculate_distance(point1, point2):
    return np.linalg.norm(np.array(point1) - np.array(point2))

# Helper function to calculate angle
def calculate_angle(a, b, c):
    ba = [a[0] - b[0], a[1] - b[1]]
    bc = [c[0] - b[0], c[1] - b[1]]
    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
    return np.degrees(np.arccos(np.clip(cosine_angle, -1.0, 1.0)))

# Extract features for each video
feature_dataset = []

for json_file in os.listdir(augmented_output_path):
    if json_file.endswith(".json"):
        json_path = os.path.join(augmented_output_path, json_file)
        
        with open(json_path, "r") as f:
            data = json.load(f)
        
        keypoints = data["keypoints"]
        release_frame = data["release_frame"]
        
        video_features = []
        for i in range(len(keypoints)):
            frame_features = {}
            
            # Shot-neck proximity
            wrist = keypoints[i]["wrist"]
            neck = keypoints[i]["neck"]
            frame_features["shot_neck_distance"] = calculate_distance(wrist, neck)
            
            # Release angle (only at release frame)
            if i == release_frame and i > 0:
                prev_wrist = keypoints[i - 1]["wrist"]
                release_angle = math.degrees(math.atan2(wrist[1] - prev_wrist[1], wrist[0] - prev_wrist[0]))
                frame_features["release_angle"] = release_angle
            else:
                frame_features["release_angle"] = None
            
            video_features.append(frame_features)
        
        # Append label (from filename) to features
        label = float(json_file.split("_")[0])  # Extract score from filename
        for frame_feature in video_features:
            frame_feature["label"] = label
        feature_dataset.extend(video_features)

# Save features to CSV
feature_csv_path = os.path.join(output_path, "features_stage5.csv")
pd.DataFrame(feature_dataset).to_csv(feature_csv_path, index=False)
print(f"Features saved to {feature_csv_path}")

Features saved to /Users/cezar/Desktop/Team Project/AI/shotput_models/stage5/keypoint_data/features_stage5.csv


In [7]:
# Load the features dataset
features = pd.read_csv(feature_csv_path)

# Calculate class weights
labels = features["label"]
class_weights = class_weight.compute_class_weight(
    class_weight="balanced",
    classes=np.unique(labels),
    y=labels
)
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

# Print computed class weights
print(f"Computed class weights: {class_weights_dict}")


Computed class weights: {0: 0.5749440715883669, 1: 6.119047619047619, 2: 0.9113475177304965}


In [18]:
# Define the LSTM model architecture
def build_lstm_model(input_shape):
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=input_shape),
        BatchNormalization(),
        Dropout(0.3),
        LSTM(32, return_sequences=False),
        BatchNormalization(),
        Dropout(0.3),
        Dense(16, activation="relu"),
        Dense(3, activation="softmax")  # 3 classes: 0, 0.5, 1
    ])
    model.compile(
        optimizer=Adam(learning_rate=0.005),
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model

# Map labels (0 → 0, 0.5 → 1, 1 → 2)
label_mapping = {0: 0, 0.5: 1, 1: 2}
features["label"] = features["label"].map(label_mapping)

# Prepare the input data
X = features.drop(columns=["label"]).values  # Drop the label column
y = features["label"].values.astype(int)  # Labels


X = X.reshape(X.shape[0], 1, -1)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Build the model
input_shape = (X_train.shape[1], X_train.shape[2])
model = build_lstm_model(input_shape)

  y = features["label"].values.astype(int)  # Labels
  super().__init__(**kwargs)


In [19]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    class_weight=class_weights_dict,
    epochs=400,
    batch_size=16,
)


Epoch 1/400
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.6953 - loss: 0.4332 - val_accuracy: 0.9484 - val_loss: 0.3232
Epoch 2/400
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9513 - loss: 0.1457 - val_accuracy: 0.9484 - val_loss: 0.3102
Epoch 3/400
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9398 - loss: 0.1608 - val_accuracy: 0.9484 - val_loss: 0.2971
Epoch 4/400
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9382 - loss: 0.1648 - val_accuracy: 0.9484 - val_loss: 0.2713
Epoch 5/400
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9459 - loss: 0.1624 - val_accuracy: 0.9484 - val_loss: 0.2316
Epoch 6/400
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9483 - loss: 0.1476 - val_accuracy: 0.9484 - val_loss: 0.2442
Epoch 7/400
[1m39/39[0m [32m━━━

In [20]:
# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=1)

# Print the results
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 995us/step - accuracy: 0.9533 - loss: 0.1564
Validation Loss: 0.1664
Validation Accuracy: 0.9484


In [21]:
model_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/stage5/shotput_stage5.keras"
model.save(model_path)

In [24]:
import cv2
import json
import numpy as np
from tensorflow.keras.models import load_model
import mediapipe as mp

# Load the trained model
model_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/stage5/shotput_stage5.keras"
model = load_model(model_path)

# MediaPipe Pose setup
mp_pose = mp.solutions.pose
pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5, min_tracking_confidence=0.5)

# Helper function to extract keypoints and detect release frame
def extract_keypoints(video_path):
    cap = cv2.VideoCapture(video_path)
    keypoints = []
    distances = []
    release_frame = None
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = pose.process(frame_rgb)
        
        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark
            
            # Extract relevant keypoints
            wrist = [landmarks[mp_pose.PoseLandmark.RIGHT_WRIST].x, landmarks[mp_pose.PoseLandmark.RIGHT_WRIST].y]
            neck = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].x, landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].y]
            
            # Compute wrist-to-neck distance
            distance = np.linalg.norm(np.array(wrist) - np.array(neck))
            distances.append(distance)
            
            # Save keypoints for feature engineering
            keypoints.append({"wrist": wrist, "neck": neck})
    
    # Detect release frame: sudden increase in wrist-to-neck distance
    for i in range(1, len(distances)):
        if distances[i] > distances[i - 1] * 1.5:  # Threshold for sudden increase
            release_frame = i
            break
    
    cap.release()
    return keypoints, release_frame

# Extract features for the new video
def extract_features(keypoints, release_frame):
    features = []
    for i in range(len(keypoints)):
        frame_features = {}
        
        # Shot-neck proximity
        wrist = keypoints[i]["wrist"]
        neck = keypoints[i]["neck"]
        frame_features["shot_neck_distance"] = np.linalg.norm(np.array(wrist) - np.array(neck))
        
        # Release angle (only at release frame)
        if i == release_frame and i > 0:
            prev_wrist = keypoints[i - 1]["wrist"]
            release_angle = np.degrees(np.arctan2(
                wrist[1] - prev_wrist[1],
                wrist[0] - prev_wrist[0]
            ))
            frame_features["release_angle"] = release_angle
        else:
            frame_features["release_angle"] = 0.0  # Fallback value
        
        features.append([float(value) for value in frame_features.values()])  # Ensure float type
    
    return np.array(features)


# Process the test video
test_video_path = "/Users/cezar/Desktop/Team Project/AI/shotput_models/test_videos/stage5.mp4"
keypoints, release_frame = extract_keypoints(test_video_path)
test_features = extract_features(keypoints, release_frame)

# Reshape features for LSTM input
test_features = test_features.reshape(1, test_features.shape[0], test_features.shape[1])

# Make predictions
predictions = model.predict(test_features)
predicted_class = np.argmax(predictions, axis=1)

# Map predicted class back to score
inverse_label_mapping = {0: 0, 1: 0.5, 2: 1}
predicted_score = [inverse_label_mapping[cls] for cls in predicted_class]

print(f"Predicted Scores for the test video: {predicted_score}")


I0000 00:00:1736706025.650347 9179935 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 89.3), renderer: Apple M2 Pro
W0000 00:00:1736706025.732426 9252855 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1736706025.748085 9252855 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step
Predicted Scores for the test video: [1]
