In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

In [2]:
# Define folder paths for each batting shot category
folders = {
    "cover_drive": "./cover_drive",
    "pull_shot": "./pull_shot",
    "wrong_shot": "./wrong_shot"
}

In [3]:
def load_and_preprocess_data(folders):
    sequences = []
    labels = []
    
    for label, folder_path in enumerate(folders.values()):
        print(f"Processing folder: {folder_path}, Label: {label}")
        
        for file in os.listdir(folder_path):
            if file.endswith(".xlsx"):
                file_path = os.path.join(folder_path, file)
                
                try:
                    df = pd.read_excel(file_path)
                    # Extract features as sequences
                    sequence = df[["Time","LShoulder_Avg", "LElbow_Avg"]].values
                    sequences.append(sequence)
                    labels.append(label)
                except KeyError:
                    print(f"Missing columns in {file_path}, skipping this file.")
    
    return sequences, labels

In [4]:
def pad_sequences(sequences, max_length=None):
    if max_length is None:
        max_length = max(len(seq) for seq in sequences)
    
    padded_sequences = []
    for seq in sequences:
        if len(seq) > max_length:
            padded_sequences.append(seq[:max_length])
        else:
            padding_length = max_length - len(seq)
            padded_seq = np.pad(seq, ((0, padding_length), (0, 0)), mode='constant')
            padded_sequences.append(padded_seq)
    
    return np.array(padded_sequences)

In [5]:
def create_lstm_model(sequence_length, n_features, n_classes):
    model = Sequential([
        LSTM(64, input_shape=(sequence_length, n_features), return_sequences=True),
        Dropout(0.2),
        LSTM(32, return_sequences=False),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(n_classes, activation='softmax')  # softmax for multi-class
    ])
    
    model.compile(
        optimizer='adam',
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

In [6]:
# Load and preprocess data
sequences, labels = load_and_preprocess_data(folders)

Processing folder: ./cover_drive, Label: 0
Processing folder: ./pull_shot, Label: 1
Processing folder: ./wrong_shot, Label: 2


In [7]:
# Pad sequences to uniform length
padded_sequences = pad_sequences(sequences)

In [8]:
# Split features into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, 
    np.array(labels), 
    test_size=0.2, 
    random_state=42
)

In [9]:
# Normalize the features
scaler = StandardScaler()

In [10]:
# Reshape for scaling
X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
X_test_reshaped = X_test.reshape(-1, X_test.shape[-1])

In [11]:
# Fit and transform
X_train_scaled = scaler.fit_transform(X_train_reshaped)
X_test_scaled = scaler.transform(X_test_reshaped)

In [12]:
# Reshape back to 3D
X_train_scaled = X_train_scaled.reshape(X_train.shape)
X_test_scaled = X_test_scaled.reshape(X_test.shape)

In [13]:
# Create and train the model
n_features = X_train.shape[2]
n_classes = len(np.unique(labels))
sequence_length = X_train.shape[1]

model = create_lstm_model(sequence_length, n_features, n_classes)

In [14]:
# Train the model
history = model.fit(
    X_train_scaled,
    y_train,
    validation_split=0.2,
    epochs=80,
    batch_size=32,
    verbose=1
)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


In [16]:
# Evaluate the model
y_pred = np.argmax(model.predict(X_test_scaled), axis=1)
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))


Confusion Matrix:
[[25  0  0]
 [24  0  0]
 [ 8  0  0]]

Classification Report:
              precision    recall  f1-score   support

           0       0.44      1.00      0.61        25
           1       0.00      0.00      0.00        24
           2       0.00      0.00      0.00         8

    accuracy                           0.44        57
   macro avg       0.15      0.33      0.20        57
weighted avg       0.19      0.44      0.27        57


Accuracy Score: 0.43859649122807015


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
