In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import matplotlib.pyplot as plt
from tensorflow.keras import layers, models
from tensorflow.keras.models import Model


In [3]:
folders = ["alcaraz", "dimitrov_thiem","nadal" ,"federer", "roland"]
base_path = "/content/drive/MyDrive/dataset/"

for folder in folders:
    shot_dir = os.path.join(base_path, folder, "shots")
    if not os.path.exists(shot_dir):
        print(f"{shot_dir} doesn't exist")
        continue
    print(f"Found shots directory: {shot_dir}")


Found shots directory: /content/drive/MyDrive/dataset/alcaraz/shots
Found shots directory: /content/drive/MyDrive/dataset/dimitrov_thiem/shots
Found shots directory: /content/drive/MyDrive/dataset/nadal/shots
Found shots directory: /content/drive/MyDrive/dataset/federer/shots
Found shots directory: /content/drive/MyDrive/dataset/roland/shots


In [4]:
!pip install pycaret[full]



In [5]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm

# Base path to your dataset
base_path = "/content/drive/MyDrive/dataset/"

# Initialize lists for training, validation, and testing data
X_train = []
y_train = []
X_val = []
y_val = []
X_test = []
y_test = []

# List of folders containing the datasets
folders = ["alcaraz", "dimitrov_thiem", "djoko_sock", "nadal", "federer", "roland"]

# Loop through each folder to load shot data
for folder in folders:
    shot_dir = os.path.join(base_path, folder, "shots")
    if not os.path.exists(shot_dir):
        print(f"{shot_dir} doesn't exist")
        continue

    print(f"Loading shots from {shot_dir}")

    # Loop through CSV files in the shots directory
    for shot_csv in tqdm(sorted(os.listdir(shot_dir))):
        csv_path = os.path.join(shot_dir, shot_csv)
        data = pd.read_csv(csv_path)

        # Special handling for "nadal" folder: invert x-coordinates
        if folder == "nadal":
            revert_data = data.copy()
            for feature in data.columns:
                if feature.endswith("_x"):  # Check if the feature is an x-coordinate
                    revert_data[feature] = 1 - data[feature]
            data = revert_data

        # Calculate split indices
        train_end_idx = int(0.8 * len(data))
        val_end_idx = int(0.9 * len(data))

        # Split the data into 80% training, 10% validation, and 10% testing
        data_train = data[:train_end_idx]
        data_val = data[train_end_idx:val_end_idx]
        data_test = data[val_end_idx:]

        # Extract features and labels for training, validation, and testing
        features_train = data_train.loc[:, data.columns != 'shot'].to_numpy()
        features_val = data_val.loc[:, data.columns != 'shot'].to_numpy()
        features_test = data_test.loc[:, data.columns != 'shot'].to_numpy()

        # Extend the lists with training, validation, and testing data
        X_train.extend(features_train)
        y_train.extend(data_train["shot"].to_numpy().flatten())
        X_val.extend(features_val)
        y_val.extend(data_val["shot"].to_numpy().flatten())
        X_test.extend(features_test)
        y_test.extend(data_test["shot"].to_numpy().flatten())

# Convert lists to NumPy arrays for training, validation, and testing
X_train = np.array(X_train)
y_train = np.array(y_train)
X_val = np.array(X_val)
y_val = np.array(y_val)
X_test = np.array(X_test)
y_test = np.array(y_test)

# Create DataFrames for training, validation, and testing
train_df = pd.DataFrame(X_train)
train_df['shot'] = y_train

val_df = pd.DataFrame(X_val)
val_df['shot'] = y_val

test_df = pd.DataFrame(X_test)
test_df['shot'] = y_test

# Display the shape of the datasets
print(f"Loaded {len(y_train)} shots for training")
print(f"Loaded {len(y_val)} shots for validation")
print(f"Loaded {len(y_test)} shots for testing")

# Optionally, you can now use these DataFrames with PyCaret
from pycaret.classification import *

# Initialize PyCaret with the training data
clf = setup(data=train_df, target='shot', session_id=123)

# You can now proceed with model training and evaluation in PyCaret


Loading shots from /content/drive/MyDrive/dataset/alcaraz/shots


100%|██████████| 129/129 [00:03<00:00, 40.48it/s]


Loading shots from /content/drive/MyDrive/dataset/dimitrov_thiem/shots


100%|██████████| 143/143 [00:04<00:00, 34.71it/s]


Loading shots from /content/drive/MyDrive/dataset/djoko_sock/shots


100%|██████████| 247/247 [00:04<00:00, 52.15it/s] 


Loading shots from /content/drive/MyDrive/dataset/nadal/shots


100%|██████████| 442/442 [00:11<00:00, 38.71it/s]


Loading shots from /content/drive/MyDrive/dataset/federer/shots


100%|██████████| 325/325 [00:05<00:00, 62.92it/s] 


Loading shots from /content/drive/MyDrive/dataset/roland/shots


100%|██████████| 64/64 [00:13<00:00,  4.82it/s]


Loaded 32400 shots for training
Loaded 4050 shots for validation
Loaded 4050 shots for testing


Unnamed: 0,Description,Value
0,Session id,123
1,Target,shot
2,Target type,Multiclass
3,Target mapping,"backhand: 0, forehand: 1, neutral: 2, serve: 3"
4,Original data shape,"(32400, 27)"
5,Transformed data shape,"(32400, 27)"
6,Transformed train set shape,"(22680, 27)"
7,Transformed test set shape,"(9720, 27)"
8,Numeric features,26
9,Preprocess,True


In [8]:
# Compare different models
best_model = compare_models()

# Create a model (e.g., Decision Tree)
dt_model = create_model('dt')

# Tune the model
tuned_dt_model = tune_model(dt_model)

# Finalize the model
final_model = finalize_model(tuned_dt_model)

# Evaluate the model
evaluate_model(final_model)

# Make predictions on the validation and test sets
val_predictions = predict_model(final_model, data=val_df)
test_predictions = predict_model(final_model, data=test_df)

# Display the predictions
print("Validation Predictions:")
print(val_predictions.head())

print("Test Predictions:")
print(test_predictions.head())

# Save the model for future use
save_model(final_model, 'final_tennis_shot_model')

print("Model saved successfully!")

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
et,Extra Trees Classifier,0.9663,0.9963,0.9663,0.9663,0.9662,0.9468,0.9468,2.635
xgboost,Extreme Gradient Boosting,0.9653,0.9959,0.9653,0.9654,0.9653,0.9454,0.9454,4.387
catboost,CatBoost Classifier,0.9633,0.9954,0.9633,0.9633,0.9633,0.9421,0.9421,45.34
knn,K Neighbors Classifier,0.9589,0.9928,0.9589,0.9589,0.9588,0.9351,0.9351,0.807
lightgbm,Light Gradient Boosting Machine,0.9585,0.9946,0.9585,0.9586,0.9584,0.9345,0.9345,10.031
rf,Random Forest Classifier,0.9562,0.994,0.9562,0.9563,0.9562,0.931,0.931,16.104
gbc,Gradient Boosting Classifier,0.9214,0.0,0.9214,0.9215,0.9213,0.8758,0.8759,98.578
qda,Quadratic Discriminant Analysis,0.8953,0.0,0.8953,0.9004,0.8959,0.8382,0.8402,0.17
dt,Decision Tree Classifier,0.8951,0.9168,0.8951,0.8952,0.895,0.8347,0.8348,1.582
lr,Logistic Regression,0.8737,0.0,0.8737,0.8741,0.8729,0.7991,0.7996,1.769


Processing:   0%|          | 0/69 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.903,0.9242,0.903,0.903,0.903,0.8474,0.8474
1,0.8968,0.9182,0.8968,0.8968,0.8968,0.8373,0.8373
2,0.8907,0.9119,0.8907,0.8906,0.8906,0.8276,0.8276
3,0.9012,0.922,0.9012,0.9014,0.9013,0.8446,0.8446
4,0.8937,0.9164,0.8937,0.8944,0.8939,0.8332,0.8332
5,0.9012,0.9209,0.9012,0.9013,0.9009,0.8439,0.844
6,0.8858,0.9079,0.8858,0.8855,0.8855,0.8194,0.8196
7,0.8924,0.9143,0.8924,0.8923,0.8923,0.8305,0.8305
8,0.8858,0.9096,0.8858,0.8863,0.8859,0.8203,0.8204
9,0.8999,0.9222,0.8999,0.9003,0.9,0.8429,0.8429


Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8783,0.9254,0.8783,0.8786,0.8783,0.8087,0.8087
1,0.8721,0.9176,0.8721,0.8726,0.8723,0.7991,0.7992
2,0.8726,0.9219,0.8726,0.8732,0.8727,0.8002,0.8003
3,0.8602,0.907,0.8602,0.8599,0.8598,0.7793,0.7794
4,0.8748,0.928,0.8748,0.8765,0.8751,0.8044,0.8048
5,0.8677,0.9229,0.8677,0.8683,0.8675,0.792,0.7922
6,0.8717,0.9227,0.8717,0.8717,0.8717,0.7982,0.7982
7,0.8607,0.9159,0.8607,0.8603,0.8605,0.7801,0.7802
8,0.8726,0.9228,0.8726,0.8726,0.8723,0.7986,0.7987
9,0.8774,0.9209,0.8774,0.8782,0.8776,0.8077,0.8078


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 10 candidates, totalling 100 fits


Original model was better than the tuned model, hence it will be returned. NOTE: The display metrics are for the tuned model (not the original one).


interactive(children=(ToggleButtons(description='Plot Type:', icons=('',), options=(('Pipeline Plot', 'pipelin…

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Decision Tree Classifier,0.7506,0.8022,0.7506,0.7504,0.7504,0.6075,0.6076


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Decision Tree Classifier,0.5822,0.6648,0.5822,0.5809,0.5814,0.3389,0.3389


Validation Predictions:
          0         1         2         3         4         5         6  \
0  0.157650  0.577344  0.243084  0.466177  0.251609  0.581694  0.294856   
1  0.152841  0.619571  0.242811  0.507550  0.248012  0.581250  0.319128   
2  0.145394  0.644839  0.228342  0.527370  0.242511  0.597727  0.328471   
3  0.165021  0.504495  0.243792  0.406209  0.251582  0.560108  0.270734   
4  0.163170  0.521714  0.238282  0.435774  0.256287  0.574662  0.285564   

          7         8         9  ...        19        20        21        22  \
0  0.519324  0.330640  0.722920  ...  0.393899  0.625271  0.700870  0.883997   
1  0.628001  0.328828  0.725881  ...  0.402102  0.626860  0.749843  0.894025   
2  0.624552  0.355783  0.730472  ...  0.412968  0.631284  0.768440  0.903537   
3  0.417645  0.362852  0.656138  ...  0.372228  0.699400  0.574350  0.899364   
4  0.446502  0.366833  0.670524  ...  0.375838  0.697456  0.598495  0.896026   

         23        24        25      shot  p