In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight


In [2]:
# 📥 Step 1: Load the labeled pose data
pose_df = pd.read_csv('../data/output/pose_data.csv')  # Contains frame, x_0, y_0, ..., z_n
labels_df = pd.read_csv('../data/output/manual_labels.csv')  # Contains shot_id, start_frame, end_frame, label

In [3]:
# 📊 Step 2: Merge frame-level pose data with shot labels
def label_frames(pose_df, labels_df):
    pose_df['shot_label'] = None
    pose_df['shot_id'] = None
    for _, row in labels_df.iterrows():
        mask = pose_df['frame'].str.extract(r'(\d+)').astype(int)[0].between(row['start_frame'], row['end_frame'])
        pose_df.loc[mask, 'shot_label'] = row['label']
        pose_df.loc[mask, 'shot_id'] = row['shot_id']
    return pose_df.dropna(subset=['shot_label'])

pose_df = label_frames(pose_df, labels_df)
pose_df.sort_values('frame').head()

Unnamed: 0,frame,x_0,y_0,z_0,x_1,y_1,z_1,x_2,y_2,z_2,...,y_30,z_30,x_31,y_31,z_31,x_32,y_32,z_32,shot_label,shot_id
36,frame_1040.jpg,0.532451,0.505548,-0.016877,0.534196,0.499362,-0.024981,0.53442,0.499236,-0.024984,...,0.711076,0.158162,0.580615,0.753942,-0.069232,0.524729,0.717886,0.151545,lift,14
41,frame_1045.jpg,0.519448,0.523063,-0.03607,0.520617,0.516561,-0.045085,0.520931,0.516561,-0.045071,...,0.705332,0.103567,0.554431,0.736617,0.019096,0.556938,0.72554,0.104229,lift,14
61,frame_1075.jpg,0.480023,0.516813,0.083156,0.478009,0.511931,0.070471,0.476544,0.511896,0.070465,...,0.77044,0.096276,0.455797,0.800815,-0.068752,0.499527,0.769238,0.089794,lift,14
66,frame_1080.jpg,0.476525,0.498307,0.101022,0.474583,0.493857,0.091319,0.473382,0.493907,0.09131,...,0.763524,0.008639,0.451003,0.796475,-0.058405,0.508918,0.763343,-0.006937,net shot,15
68,frame_1085.jpg,0.469288,0.507486,0.112433,0.467671,0.503178,0.099324,0.466644,0.503481,0.099301,...,0.783611,0.038399,0.417613,0.785941,0.015477,0.529067,0.779752,0.032212,net shot,15


In [4]:
# 🧹 Step 3: Preprocessing
features = [col for col in pose_df.columns if col.startswith(('x_', 'y_', 'z_'))]
scaler = StandardScaler()
pose_df[features] = scaler.fit_transform(pose_df[features])
pose_df.head()



Unnamed: 0,frame,x_0,y_0,z_0,x_1,y_1,z_1,x_2,y_2,z_2,...,y_30,z_30,x_31,y_31,z_31,x_32,y_32,z_32,shot_label,shot_id
36,frame_1040.jpg,1.12903,-0.819311,-1.305596,1.1649,-0.848942,-1.293293,1.179418,-0.85639,-1.293186,...,-0.951415,0.894629,2.086723,-0.74793,-0.795214,0.62897,-0.934266,0.821329,lift,14
41,frame_1045.jpg,0.959999,-0.173457,-1.5978,0.989112,-0.218555,-1.606549,1.004792,-0.224124,-1.606195,...,-1.061928,0.331944,1.734574,-1.117427,0.248629,1.044217,-0.789374,0.371639,lift,14
61,frame_1075.jpg,0.447488,-0.403935,0.217336,0.437512,-0.388271,0.194024,0.430153,-0.394358,0.194156,...,0.190876,0.256801,0.408044,0.251766,-0.789547,0.304052,0.037758,0.234453,lift,14
66,frame_1080.jpg,0.40202,-1.086321,0.489339,0.393156,-1.050728,0.518865,0.38922,-1.050881,0.518982,...,0.057802,-0.646418,0.34357,0.159197,-0.667264,0.425122,-0.07383,-0.684876,net shot,15
68,frame_1085.jpg,0.307946,-0.747838,0.663069,0.303669,-0.709078,0.643608,0.301983,-0.70148,0.643506,...,0.444322,-0.339702,-0.105502,-0.065462,0.205855,0.684892,0.23677,-0.3128,net shot,15


In [5]:
def create_padded_sequences(df, max_len=92):
    X, y = [], []

    # Encode shot labels
    label_encoder = LabelEncoder()
    df['shot_label_enc'] = label_encoder.fit_transform(df['shot_label'])

    # Group by shot
    for shot_id, group in df.groupby('shot_id'):
        seq = group.sort_values(by='frame')[features].values
        label = group['shot_label_enc'].iloc[0]

        # Pad the sequence
        padded_seq = pad_sequences([seq], maxlen=max_len, dtype='float32', padding='post', truncating='post')[0]
        X.append(padded_seq)
        y.append(label)

    return np.array(X), to_categorical(y), label_encoder

# Parameters
max_len = 92  # The length to which all sequences will be padded
num_features = len(features)

# Run it
X, y, label_encoder = create_padded_sequences(pose_df, max_len=max_len)


  df['shot_label_enc'] = label_encoder.fit_transform(df['shot_label'])


In [6]:
# ✂️ Step 5: Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Optional: Compute class weights (only if you have class imbalance)
labels_train = np.argmax(y_train, axis=1)
class_weights_array = compute_class_weight('balanced', classes=np.unique(labels_train), y=labels_train)
class_weight_dict = dict(enumerate(class_weights_array))

# Step 6: Build the model
timesteps = X.shape[1]
features = X.shape[2]
model = Sequential([
    Input(shape=(timesteps, features)),
    LSTM(64, return_sequences=False, input_shape=(X.shape[1], X.shape[2])),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(y.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Early stopping callback
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Step 7: Train the model
model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=16,
    validation_data=(X_test, y_test),
    callbacks=[early_stop],
    class_weight=class_weight_dict
)

# 📈 Step 8: Evaluate and predict
loss, acc = model.evaluate(X_test, y_test)
print(f"Test accuracy: {acc:.2f}")

# 🧠 Predict some labels
preds = model.predict(X_test[:5])
pred_labels = label_encoder.inverse_transform(np.argmax(preds, axis=1))
true_labels = label_encoder.inverse_transform(np.argmax(y_test[:5], axis=1))
print("Predicted:", pred_labels)
print("True:", true_labels)

  super().__init__(**kwargs)


Epoch 1/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 183ms/step - accuracy: 0.1750 - loss: 1.8057 - val_accuracy: 0.6000 - val_loss: 1.6009
Epoch 2/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.4250 - loss: 1.7726 - val_accuracy: 0.6000 - val_loss: 1.5898
Epoch 3/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step - accuracy: 0.4667 - loss: 1.8320 - val_accuracy: 0.6000 - val_loss: 1.5771
Epoch 4/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step - accuracy: 0.4667 - loss: 1.8149 - val_accuracy: 0.6000 - val_loss: 1.5617
Epoch 5/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - accuracy: 0.4250 - loss: 1.7203 - val_accuracy: 0.6000 - val_loss: 1.5411
Epoch 6/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.4250 - loss: 1.6791 - val_accuracy: 0.6000 - val_loss: 1.5101
Epoch 7/50
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━

# To Do

- generate more training data (manual labeling)