# Trying out Classical Machine Learning methods to analyze the data

In this notebook. We visualize the different model performances. The models included are random forests, k-means clustering and baseline models

In [93]:
import os
import sys
import sklearn
import pandas as pd
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.dummy import DummyClassifier

opj = os.path.join

8 different preliminary data sets are imported. This .csv files contain the different punch types.

In [86]:
# Import the data.
cross = pd.read_csv("Data/Preprocessed Data/transformed_cross.csv", sep=",")
jab = pd.read_csv("Data/Preprocessed Data/transformed_jab.csv", sep=",")
lft_nopunch = pd.read_csv("Data/Preprocessed Data/transformed_lft_nopunch.csv", sep=",")
lh = pd.read_csv("Data/Preprocessed Data/transformed_lh.csv", sep=",")
lu = pd.read_csv("Data/Preprocessed Data/transformed_lu.csv", sep=",")
rght_nopunch = pd.read_csv("Data/Preprocessed Data/transformed_rght_nopunch.csv", sep=",")
rh = pd.read_csv("Data/Preprocessed Data/transformed_rh.csv", sep = ",")
ru = pd.read_csv("Data/Preprocessed Data/transformed_ru.csv", sep = ",") 

In [87]:
dfs = [cross, jab, lft_nopunch, lh, lu, rght_nopunch, rh, ru]
df_punching = pd.concat(dfs, ignore_index = True)
print(np.shape(df_punching))

(2052, 8)


In [88]:
mean_x = []
mean_y = []
mean_z = []
std_x = []
std_y = []
std_z = []
for df in dfs:
    mean_x.append(np.mean(df["X (m/s^2)"]))
    mean_y.append(np.mean(df["Y (m/s^2)"]))
    mean_z.append(np.mean(df["Z (m/s^2)"]))
    
    std_x.append(np.std(df["X (m/s^2)"]))
    std_y.append(np.std(df["Y (m/s^2)"]))
    std_z.append(np.std(df["Z (m/s^2)"]))

print(mean_x)
print(mean_y)
print(mean_z)

print(std_x)
print(std_y)
print(std_z)


[10.7462880794702, 9.0016059602649, 8.253933333333332, 13.399754966887418, 6.9975, 7.365608333333333, 18.569029801324504, 7.065897350993377]
[12.964622516556291, 11.265658940397351, 5.1141, 9.148586092715233, 10.104079470198675, 5.419225, 13.007394039735102, 12.761400662251654]
[5.375271523178808, 6.1075860927152315, 1.771875, 8.409072847682118, 14.624811258278143, 3.6738750000000002, 8.782211920529802, 18.992271523178808]
[2.3007999803243546, 2.098360015661925, 0.4780170626894214, 4.239533146962904, 1.5359042976661677, 0.7604847609237296, 8.320659194214375, 2.5769605086172964]
[5.558324134901821, 6.154472502794359, 0.6816157201825674, 4.227105228575339, 4.411668433205901, 0.9776028715050913, 7.044922006183568, 6.034994846977578]
[2.1084990375088686, 2.2238373487839262, 0.7223521943680473, 4.2962139170650335, 6.383054131098647, 0.7535496949162234, 5.562805724149111, 8.846297474783892]


In [89]:
# Split the data.
X = df_punching.drop(["Punch Type"], axis=1)
y = df_punching["Punch Type"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True)

In [90]:
# Train the model.
model = RandomForestClassifier(n_estimators=1000, random_state=42)
model.fit(X_train, y_train)

In [91]:
# Make predictions
y_pred = model.predict(X_test)

In [92]:
# Validate the model performance.
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, zero_division=False)

print(f"Accuracy: {accuracy}")
print(f"classification_report: {report}")

Accuracy: 0.922077922077922
classification_report:               precision    recall  f1-score   support

       cross       0.92      0.94      0.93        97
         jab       0.90      0.87      0.89        86
 lft_nopunch       0.95      1.00      0.98        42
          lh       0.95      0.93      0.94        87
          lu       0.84      0.95      0.90        85
rght_nopunch       0.97      0.95      0.96        37
          rh       0.93      0.96      0.94        89
          ru       0.95      0.84      0.89        93

    accuracy                           0.92       616
   macro avg       0.93      0.93      0.93       616
weighted avg       0.92      0.92      0.92       616



In [94]:
baseline_model = DummyClassifier(strategy="uniform", random_state=42)
baseline_model.fit(X_train, y_train)

In [95]:
y_pred_baseline = baseline_model.predict(X_test)

In [96]:
accuracy_baseline = accuracy_score(y_test, y_pred_baseline)
report_baseline = classification_report(y_test, y_pred_baseline)

print(f"Baseline accuracy: {accuracy_baseline}")
print(report_baseline)

Baseline accuracy: 0.13636363636363635
              precision    recall  f1-score   support

       cross       0.20      0.16      0.18        97
         jab       0.12      0.09      0.11        86
 lft_nopunch       0.07      0.12      0.09        42
          lh       0.16      0.16      0.16        87
          lu       0.13      0.09      0.11        85
rght_nopunch       0.06      0.14      0.09        37
          rh       0.20      0.17      0.18        89
          ru       0.15      0.14      0.14        93

    accuracy                           0.14       616
   macro avg       0.13      0.13      0.13       616
weighted avg       0.15      0.14      0.14       616

