In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Define folder paths for each batting shot category
folders = {
    "cover_drive": "./cover_drive",
    "pull_shot": "./pull_shot",
    "wrong_shot": "./wrong_shot"
}

In [3]:
# Initialize data and labels
data = []
labels = []

In [4]:
# Load and preprocess data
for label, folder_path in enumerate(folders.values()):
    print(f"Processing folder: {folder_path}, Label: {label}")

    for file in os.listdir(folder_path):
        if file.endswith(".xlsx"):
            file_path = os.path.join(folder_path, file)
            
            # Load Excel data
            df = pd.read_excel(file_path)

            # Extract features (time, shoulder, elbow data)
            try:
                features = df[["Time", "LShoulder_Avg", "LElbow_Avg"]].values
                data.append(features)
                labels.append(label)
            except KeyError:
                print(f"Missing columns in {file_path}, skipping this file.")


Processing folder: ./cover_drive, Label: 0
Processing folder: ./pull_shot, Label: 1
Processing folder: ./wrong_shot, Label: 2


In [5]:
# Pad sequences to ensure equal lengths
max_length = max(len(seq) for seq in data)
data_padded = pad_sequences(data, maxlen=max_length, dtype='float32', padding='post')

In [6]:
# Convert labels to categorical (one-hot encoding)
labels_categorical = to_categorical(labels)

In [7]:
# Normalize data
scaler = StandardScaler()
data_normalized = np.array([scaler.fit_transform(seq) for seq in data_padded])

In [8]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data_normalized, labels_categorical, test_size=0.3, random_state=42)

In [9]:
# Define the LSTM model
# model = Sequential([
#     LSTM(64, input_shape=(max_length, 3), return_sequences=False),
#     Dense(32, activation='relu'),
#     Dense(labels_categorical.shape[1], activation='softmax')
# ])

# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Define the LSTM model
model = Sequential([
    LSTM(128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.3),
    LSTM(64),
    Dropout(0.3),
    Dense(y_train.shape[1], activation='softmax')
])

model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

In [11]:
# Train the model
# history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2)

# Train the model
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    callbacks=[early_stopping, reduce_lr]
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50


In [12]:
# Save the trained model and scaler
model.save("lstm_batting_shot_classifier.h5")
np.save("lstm_scaler.npy", scaler.mean_)  # Save scaler mean for normalization
print("Model and scaler saved successfully!")

Model and scaler saved successfully!


In [13]:
# Evaluate the model
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)
y_true = np.argmax(y_test, axis=1)

In [86]:
print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
print("\nClassification Report:\n", classification_report(y_true, y_pred))
print("\nAccuracy Score:", accuracy_score(y_true, y_pred))

Confusion Matrix:
 [[2 0 1]
 [4 0 0]
 [0 0 0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.33      0.67      0.44         3
           1       0.00      0.00      0.00         4
           2       0.00      0.00      0.00         0

    accuracy                           0.29         7
   macro avg       0.11      0.22      0.15         7
weighted avg       0.14      0.29      0.19         7


Accuracy Score: 0.2857142857142857


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [35]:
# Define label mapping for interpretation
label_map = {0: "square_cut", 1: "pull_shot", 2: "wrong_shot"}