In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

In [2]:
# Define folder paths for each batting shot category
folders = {
    "cover_drive": "./cover_drive",
    "pull_shot": "./pull_shot",
    "wrong_shot": "./wrong_shot"
}

In [3]:
# Initialize data and labels
data = []
labels = []

In [4]:
# Label mapping
label_map = {folder: label for label, folder in enumerate(folders.keys())}

In [5]:
# Load and preprocess data
for label, folder_path in enumerate(folders.values()):
    print(f"Processing folder: {folder_path}, Label: {label}")

    for file in os.listdir(folder_path):
        if file.endswith(".xlsx"):
            file_path = os.path.join(folder_path, file)
            
            # Load Excel data
            df = pd.read_excel(file_path)

            # Extract features (flattened time, shoulder, and elbow data)
            try:
                features = df[["Time", "LShoulder_Avg", "LElbow_Avg"]].values.flatten()
                data.append(features)
                labels.append(label)
            except KeyError:
                print(f"Missing columns in {file_path}, skipping this file.")

Processing folder: ./cover_drive, Label: 0
Processing folder: ./pull_shot, Label: 1
Processing folder: ./wrong_shot, Label: 2


In [6]:
# Padding to ensure equal-length features
max_length = max(len(row) for row in data)
data_padded = [np.pad(row, (0, max_length - len(row)), constant_values=0) for row in data]

In [7]:
# Convert to NumPy arrays
data_array = np.array(data_padded)
labels_array = np.array(labels)

In [8]:
# Normalize data
scaler = StandardScaler()
data_normalized = scaler.fit_transform(data_array)

In [9]:
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data_normalized, labels_array, test_size=0.3, random_state=42)

In [10]:
# Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [11]:
# Evaluate the model
y_pred = model.predict(X_test)

In [12]:
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

Confusion Matrix:
 [[36  5  0]
 [ 4 28  2]
 [ 1  4  5]]

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.88      0.88        41
           1       0.76      0.82      0.79        34
           2       0.71      0.50      0.59        10

    accuracy                           0.81        85
   macro avg       0.78      0.73      0.75        85
weighted avg       0.81      0.81      0.81        85


Accuracy Score: 0.8117647058823529


In [74]:
# Save the trained model and scaler for future use
joblib.dump(model, "batting_shot_classifier.pkl")
joblib.dump(scaler, "scaler.pkl")
print("Model and scaler saved successfully!")

Model and scaler saved successfully!


In [75]:
import os
import pandas as pd
import numpy as np
import joblib

In [76]:
# Load the trained model and scaler
model = joblib.load("batting_shot_classifier.pkl")
scaler = joblib.load("scaler.pkl")

In [77]:
# Define label mapping for interpretation
label_map = {0: "cover_drive", 1: "pull_shot", 2: "wrong_shot"}

In [81]:
# Path to the test Excel file
test_file_path = "./combined_Lsh_LEl_angles_Multiprocessing_BasedonTime_7.xlsx"  # Update with the actual path of your Excel file

In [82]:
# Function to preprocess and predict the category of the shot
def predict_shot(file_path):
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return
    
    try:
        # Load the Excel data
        df = pd.read_excel(file_path)

        # Check for necessary columns
        if not {"Time", "LShoulder_Avg", "LElbow_Avg"}.issubset(df.columns):
            print("The Excel file is missing required columns: 'Time', 'LShoulder_Avg', 'LElbow_Avg'")
            return

        # Extract features and flatten
        features = df[["Time", "LShoulder_Avg", "LElbow_Avg"]].values.flatten()

        # Pad features to match training data length
        max_length = scaler.mean_.shape[0]
        features_padded = np.pad(features, (0, max_length - len(features)), constant_values=0)

        # Normalize the features
        features_normalized = scaler.transform([features_padded])

        # Predict the category
        prediction = model.predict(features_normalized)
        predicted_label = label_map[prediction[0]]

        print(f"The predicted batting shot category is: {predicted_label}")
    except Exception as e:
        print(f"An error occurred while processing the file: {e}")


In [83]:
# Test the model with the Excel file
predict_shot(test_file_path)

The predicted batting shot category is: pull_shot
