In [1]:
import mediapipe as mp
import cv2
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import CalibratedClassifierCV
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix

import warnings
warnings.filterwarnings('ignore')

# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

objc[47048]: Class CaptureDelegate is implemented in both /Users/jainilpatel/PycharmProjects/Exercise-Correction/.venv/lib/python3.9/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x31f248860) and /Users/jainilpatel/PycharmProjects/Exercise-Correction/.venv/lib/python3.9/site-packages/cv2/cv2.abi3.so (0x34955a480). One of the two will be used. Which one is undefined.
objc[47048]: Class CVWindow is implemented in both /Users/jainilpatel/PycharmProjects/Exercise-Correction/.venv/lib/python3.9/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x31ef90a68) and /Users/jainilpatel/PycharmProjects/Exercise-Correction/.venv/lib/python3.9/site-packages/cv2/cv2.abi3.so (0x34955a4d0). One of the two will be used. Which one is undefined.
objc[47048]: Class CVView is implemented in both /Users/jainilpatel/PycharmProjects/Exercise-Correction/.venv/lib/python3.9/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x31ef90a90) and /Users/jainilpatel/PycharmPr

### 1. Train Model

#### 1.1. Describe data and split dataset

In [2]:
def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)


def describe_dataset(dataset_path: str):
    '''
    Describe dataset
    '''

    data = pd.read_csv(dataset_path)
    print(f"Headers: {list(data.columns.values)}")
    print(f'Number of rows: {data.shape[0]} \nNumber of columns: {data.shape[1]}\n')
    print(f"Labels: \n{data['label'].value_counts()}\n")
    print(f"Missing values: {data.isnull().values.any()}\n")
    
    duplicate = data[data.duplicated()]
    print(f"Duplicate Rows : {len(duplicate.sum(axis=1))}")

    return data


def round_up_metric_results(results) -> list:
    '''Round up metrics results such as precision score, recall score, ...'''
    return list(map(lambda el: round(el, 3), results))

In [3]:
df = describe_dataset("./train.csv")
df.loc[df["label"] == "C", "label"] = 0
df.loc[df["label"] == "H", "label"] = 1
df.loc[df["label"] == "L", "label"] = 2
df.tail(3)

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'lef

Unnamed: 0,label,nose_x,nose_y,nose_z,nose_v,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_v,right_shoulder_x,...,right_heel_z,right_heel_v,left_foot_index_x,left_foot_index_y,left_foot_index_z,left_foot_index_v,right_foot_index_x,right_foot_index_y,right_foot_index_z,right_foot_index_v
28625,0,0.358292,0.814457,-0.114346,0.999731,0.426534,0.752245,-0.265804,0.999718,0.427563,...,0.344237,0.648843,0.855987,0.943825,-0.025909,0.894483,0.816161,0.913611,0.311469,0.693028
28626,0,0.358111,0.814594,-0.119521,0.999727,0.42646,0.751355,-0.269261,0.999717,0.427519,...,0.337748,0.647176,0.856293,0.94391,-0.038278,0.895176,0.816513,0.913366,0.305406,0.692788
28627,0,0.357921,0.814615,-0.117429,0.999731,0.425952,0.751812,-0.265476,0.999726,0.427602,...,0.348348,0.649861,0.856048,0.944072,-0.039961,0.900096,0.812432,0.909268,0.319851,0.700159


In [4]:
# Extract features and class
X = df.drop("label", axis=1)
y = df["label"].astype("int")

In [5]:
sc = StandardScaler()
X = pd.DataFrame(sc.fit_transform(X))

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
y_test.head(3)

17257    0
7466     2
23509    2
Name: label, dtype: int64

#### 1.2. Train model using Scikit-Learn and train set evaluation

In [7]:
algorithms =[("LR", LogisticRegression()),
         ("SVC", SVC(probability=True)),
         ('KNN',KNeighborsClassifier()),
         ("DTC", DecisionTreeClassifier()),
         ("SGDC", CalibratedClassifierCV(SGDClassifier())),
         ("NB", GaussianNB()),
         ('RF', RandomForestClassifier()),]

models = {}
final_results = []

for name, model in algorithms:
    trained_model = model.fit(X_train, y_train)
    models[name] = trained_model

    # Evaluate model
    model_results = model.predict(X_test)

    p_score = precision_score(y_test, model_results, average=None, labels=[0, 1, 2])
    a_score = accuracy_score(y_test, model_results)
    r_score = recall_score(y_test, model_results, average=None, labels=[0, 1, 2])
    f1_score_result = f1_score(y_test, model_results, average=None, labels=[0, 1, 2])
    cm = confusion_matrix(y_test, model_results, labels=[0, 1, 2])
    final_results.append(( name,  round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm))


In [8]:
# Sort results by F1 score
final_results.sort(key=lambda k: sum(k[4]), reverse=True)

pd.DataFrame(final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score", "Confusion Matrix"])

Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score,Confusion Matrix
0,RF,"[0.998, 1.0, 0.999]",0.999127,"[0.999, 1.0, 0.998]","[0.999, 1.0, 0.999]","[[1963, 0, 1], [0, 1825, 0], [4, 0, 1933]]"
1,KNN,"[0.998, 1.0, 0.998]",0.998778,"[0.998, 0.999, 0.998]","[0.998, 1.0, 0.998]","[[1961, 0, 3], [1, 1824, 0], [3, 0, 1934]]"
2,SVC,"[0.996, 1.0, 0.995]",0.997031,"[0.995, 0.999, 0.996]","[0.996, 1.0, 0.996]","[[1955, 0, 9], [1, 1824, 0], [7, 0, 1930]]"
3,DTC,"[0.996, 0.999, 0.997]",0.997555,"[0.998, 0.999, 0.995]","[0.997, 0.999, 0.996]","[[1960, 0, 4], [0, 1824, 1], [8, 1, 1928]]"
4,LR,"[0.993, 1.0, 0.994]",0.995634,"[0.994, 0.999, 0.994]","[0.994, 0.999, 0.994]","[[1953, 0, 11], [1, 1823, 1], [12, 0, 1925]]"
5,SGDC,"[0.994, 0.997, 0.993]",0.994761,"[0.991, 0.999, 0.994]","[0.993, 0.998, 0.994]","[[1946, 5, 13], [1, 1824, 0], [10, 1, 1926]]"
6,NB,"[0.817, 0.917, 0.941]",0.887531,"[0.872, 0.958, 0.837]","[0.843, 0.937, 0.886]","[[1712, 151, 101], [77, 1748, 0], [307, 8, 1622]]"


#### 1.3. Test set evaluation

In [9]:
test_df = describe_dataset("./test.csv")
test_df = test_df.sample(frac=1).reset_index(drop=True)

test_df.loc[test_df["label"] == "C", "label"] = 0
test_df.loc[test_df["label"] == "H", "label"] = 1
test_df.loc[test_df["label"] == "L", "label"] = 2

test_x = test_df.drop("label", axis=1)
test_y = test_df["label"].astype("int")

test_x = pd.DataFrame(sc.transform(test_x))

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'lef

In [10]:
testset_final_results = []

for name, model in models.items():
    # Evaluate model
    model_results = model.predict(test_x)

    p_score = precision_score(test_y, model_results, average=None, labels=[0, 1, 2])
    a_score = accuracy_score(test_y, model_results)
    r_score = recall_score(test_y, model_results, average=None, labels=[0, 1, 2])
    f1_score_result = f1_score(test_y, model_results, average=None, labels=[0, 1, 2])
    cm = confusion_matrix(test_y, model_results, labels=[0, 1, 2])
    testset_final_results.append(( name,  round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm ))


testset_final_results.sort(key=lambda k: sum(k[4]), reverse=True)
pd.DataFrame(testset_final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score", "Confusion Matrix"])

Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score,Confusion Matrix
0,LR,"[0.959, 1.0, 1.0]",0.985915,"[1.0, 0.975, 0.983]","[0.979, 0.987, 0.991]","[[234, 0, 0], [6, 235, 0], [4, 0, 231]]"
1,SGDC,"[0.966, 0.96, 1.0]",0.974648,"[0.957, 0.983, 0.983]","[0.961, 0.971, 0.991]","[[224, 10, 0], [4, 237, 0], [4, 0, 231]]"
2,SVC,"[0.914, 1.0, 1.0]",0.969014,"[1.0, 0.929, 0.979]","[0.955, 0.963, 0.989]","[[234, 0, 0], [17, 224, 0], [5, 0, 230]]"
3,RF,"[0.788, 1.0, 1.0]",0.911268,"[1.0, 0.988, 0.745]","[0.881, 0.994, 0.854]","[[234, 0, 0], [3, 238, 0], [60, 0, 175]]"
4,KNN,"[0.699, 1.0, 0.965]",0.853521,"[0.974, 0.888, 0.698]","[0.814, 0.941, 0.81]","[[228, 0, 6], [27, 214, 0], [71, 0, 164]]"
5,NB,"[0.902, 0.736, 0.946]",0.84507,"[0.632, 0.938, 0.962]","[0.744, 0.825, 0.954]","[[148, 73, 13], [15, 226, 0], [1, 8, 226]]"
6,DTC,"[0.722, 1.0, 0.738]",0.815493,"[0.722, 0.929, 0.791]","[0.722, 0.963, 0.764]","[[169, 0, 65], [16, 224, 1], [49, 0, 186]]"


#### 1.4. Dumped model and input scaler using pickle

According to the evaluations, there are multiple good models at the moment, therefore, the best models are LR and Ridge.

In [11]:
with open("./model/all_sklearn_jainil.pkl", "wb") as f:
    pickle.dump(models, f)

In [12]:
with open("./model/LR_model_jainil.pkl", "wb") as f:
    pickle.dump(models["LR"], f)

In [13]:
with open("./model/SVC_model_jainil.pkl", "wb") as f:
    pickle.dump(models["SVC"], f)

In [14]:
# Dump input scaler
with open("./model/input_scaler_jainil.pkl", "wb") as f:
    pickle.dump(sc, f)