In [2]:
import mediapipe as mp
import cv2
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')

# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

## 1. Set up important functions

In [3]:
def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)


def describe_dataset(dataset_path: str):
    '''
    Describe dataset
    '''

    data = pd.read_csv(dataset_path)
    print(f"Headers: {list(data.columns.values)}")
    print(f'Number of rows: {data.shape[0]} \nNumber of columns: {data.shape[1]}\n')
    print(f"Labels: \n{data['label'].value_counts()}\n")
    print(f"Missing values: {data.isnull().values.any()}\n")
    
    duplicate = data[data.duplicated()]
    print(f"Duplicate Rows : {len(duplicate.sum(axis=1))}")

    return data

## 2. Describe and process data

In [7]:
TRAIN_SET_PATH  = "./err.train.csv"
TEST_SET_PATH  = "./err.test.csv"

In [6]:
df = describe_dataset(TRAIN_SET_PATH)

df.tail(3)

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']
Number of rows: 9736 
Number of columns: 53

Labels: 
C    4931
L    4805
Name: label, dtype: int64

Missing values: 

Unnamed: 0,label,nose_x,nose_y,nose_z,nose_v,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_v,right_shoulder_x,...,right_heel_z,right_heel_v,left_foot_index_x,left_foot_index_y,left_foot_index_z,left_foot_index_v,right_foot_index_x,right_foot_index_y,right_foot_index_z,right_foot_index_v
9733,L,0.669352,0.42572,-0.021043,0.999866,0.60697,0.398885,0.145805,0.999786,0.613258,...,-0.016324,0.986156,0.610678,0.771395,0.127577,0.989525,0.346186,0.798625,-0.023567,0.989879
9734,L,0.669114,0.39997,-0.023006,0.999879,0.604786,0.37543,0.148802,0.999794,0.613155,...,0.035773,0.986634,0.363347,0.770086,0.107243,0.989219,0.565096,0.81358,0.039849,0.990102
9735,L,0.669667,0.352629,-0.018747,0.999891,0.604143,0.331793,0.143837,0.999813,0.615891,...,-0.018783,0.985358,0.608766,0.783921,0.112437,0.97807,0.351742,0.788729,-0.047408,0.988161


In [8]:
# Extract features and class
X = df.drop("label", axis=1)
y = df["label"]

sc = StandardScaler()
X = pd.DataFrame(sc.fit_transform(X))

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
y_test.head(3)

1739    C
643     C
9149    L
Name: label, dtype: object

## 3. Train & Evaluate Model

### 3.1. Train and evaluate model with train set

In [10]:
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score

In [11]:
algorithms =[("LR", LogisticRegression()),
         ("SVC", SVC()),
         ('KNN',KNeighborsClassifier()),
         ("DTC", DecisionTreeClassifier()),
         ("SGDC", SGDClassifier()),
         ("Ridge", RidgeClassifier()),
         ('RF', RandomForestClassifier()),]

models = {}
final_results = []

for name, model in algorithms:
    trained_model = model.fit(X_train, y_train)
    models[name] = trained_model

    # Evaluate model
    model_results = model.predict(X_test)

    p_score = precision_score(y_test, model_results, average="macro")
    a_score = accuracy_score(y_test, model_results)
    r_score = recall_score(y_test, model_results, average="micro")
    f1_score_result = f1_score(y_test, model_results, average=None, labels=["C", "L"])
    final_results.append(( name, p_score, a_score, r_score, f1_score_result ))


In [13]:
# Sort results by F1 score
final_results.sort(key=lambda k: k[4][0] + k[4][1], reverse=True)

pd.DataFrame(final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score"])

Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score
0,SVC,1.0,1.0,1.0,"[1.0, 1.0]"
1,SGDC,1.0,1.0,1.0,"[1.0, 1.0]"
2,RF,1.0,1.0,1.0,"[1.0, 1.0]"
3,LR,0.999506,0.999487,0.999487,"[0.9995061728395062, 0.9994655264564404]"
4,KNN,0.999506,0.999487,0.999487,"[0.9995061728395062, 0.9994655264564404]"
5,DTC,0.997872,0.997947,0.997947,"[0.998019801980198, 0.9978678038379531]"
6,Ridge,0.997985,0.997947,0.997947,"[0.998025666337611, 0.997860962566845]"


### 3.2. Test set evaluation

In [14]:
test_df = describe_dataset(TEST_SET_PATH)
test_df = test_df.sample(frac=1).reset_index(drop=True)

test_x = test_df.drop("label", axis=1)
test_y = test_df["label"]

test_x = pd.DataFrame(sc.transform(test_x))

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']
Number of rows: 622 
Number of columns: 53

Labels: 
L    344
C    278
Name: label, dtype: int64

Missing values: Fal

In [15]:
testset_final_results = []

for name, model in models.items():
    # Evaluate model
    model_results = model.predict(test_x)

    p_score = precision_score(test_y, model_results, average="macro")
    a_score = accuracy_score(test_y, model_results)
    r_score = recall_score(test_y, model_results, average="macro")
    f1_score_result = f1_score(test_y, model_results, average=None, labels=["C", "L"])
    testset_final_results.append(( name, p_score, a_score, r_score, f1_score_result ))


testset_final_results.sort(key=lambda k: k[4][0] + k[4][1], reverse=True)
pd.DataFrame(testset_final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score"])

Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score
0,LR,0.837255,0.733119,0.701439,"[0.5743589743589744, 0.8056206088992974]"
1,Ridge,0.808955,0.707395,0.674042,"[0.5235602094240838, 0.7888631090487239]"
2,KNN,0.65271,0.633441,0.604421,"[0.44660194174757284, 0.7259615384615383]"
3,SVC,0.727951,0.64791,0.610946,"[0.39999999999999997, 0.7508532423208191]"
4,SGDC,0.809353,0.659164,0.618705,"[0.3837209302325581, 0.7644444444444444]"
5,RF,0.78243,0.573955,0.523381,"[0.08934707903780069, 0.7219307450157397]"
6,DTC,0.635192,0.557878,0.506086,"[0.03508771929824561, 0.7132429614181439]"


## 4. Dump Models and Scaler

According to the evaluation above, LR, Ridge and KNN models would be chosen for more eval.

In [18]:
with open("./model/LR_err_model.pkl", "wb") as f:
    pickle.dump(models["LR"], f)

In [19]:
with open("./model/Ridge_err_model.pkl", "wb") as f:
    pickle.dump(models["Ridge"], f)

In [20]:
# Dump input scaler
with open("./model/err_input_scaler.pkl", "wb") as f:
    pickle.dump(sc, f)