In [26]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [27]:
# Step 1: Define paths for each folder
folders = {
    "square_cut": "./square_cut",
    "pull_shot": "./pull_shot",
    "wrong_shot": "./wrong_shot"
}

In [28]:
# Step 2: Load data and assign labels
def load_data_from_folders(folders):
    data = []
    labels = []
    for label, folder_path in enumerate(folders.values()):
        for file in os.listdir(folder_path):
            if file.endswith(".xlsx"):
                file_path = os.path.join(folder_path, file)
                df = pd.read_excel(file_path)
                
                # Include the 'Time' column as a feature
                features = df.values.flatten()
                data.append(features)
                labels.append(label)
    return np.array(data, dtype=object), np.array(labels)

In [29]:
# Load data
data, labels = load_data_from_folders(folders)

In [30]:
# Step 3: Preprocess data (e.g., pad sequences to same length)
from sklearn.preprocessing import StandardScaler

In [31]:
# Pad sequences with zeros for equal length (if necessary)
max_length = max(len(row) for row in data)
data_padded = np.array([np.pad(row, (0, max_length - len(row)), constant_values=0) for row in data])

In [32]:
# Normalize the data
scaler = StandardScaler()
data_normalized = scaler.fit_transform(data_padded)

In [33]:
# Step 4: Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data_normalized, labels, test_size=0.3, random_state=42)

In [34]:
# Step 5: Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

RandomForestClassifier(random_state=42)

In [35]:
# Step 6: Evaluate the model
y_pred = model.predict(X_test)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nAccuracy Score:", accuracy_score(y_test, y_pred))

Confusion Matrix:
 [[0 1 0]
 [0 0 1]
 [0 1 0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       1.0
           1       0.00      0.00      0.00       1.0
           2       0.00      0.00      0.00       1.0

    accuracy                           0.00       3.0
   macro avg       0.00      0.00      0.00       3.0
weighted avg       0.00      0.00      0.00       3.0


Accuracy Score: 0.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
# Save the trained model
import joblib
joblib.dump(model, "batting_shot_classifier.pkl")
print("Model saved as 'batting_shot_classifier.pkl'")

Model saved as 'batting_shot_classifier.pkl'
