In [1]:
import mediapipe as mp
import cv2
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')

# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

## 1. Train Model

### 1.1. Describe data

In [2]:
def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)


def describe_dataset(dataset_path: str):
    '''
    Describe dataset
    '''

    data = pd.read_csv(dataset_path)
    print(f"Headers: {list(data.columns.values)}")
    print(f'Number of rows: {data.shape[0]} \nNumber of columns: {data.shape[1]}\n')
    print(f"Labels: \n{data['label'].value_counts()}\n")
    print(f"Missing values: {data.isnull().values.any()}\n")
    
    duplicate = data[data.duplicated()]
    print(f"Duplicate Rows : {len(duplicate.sum(axis=1))}")

    return data

In [3]:
df = describe_dataset("./stage.train.csv")
df.head(3)

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']
Number of rows: 24244 
Number of columns: 53

Labels: 
D    8232
M    8148
I    7864
Name: label, dtype: int64

Missi

Unnamed: 0,label,nose_x,nose_y,nose_z,nose_v,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_v,right_shoulder_x,...,right_heel_z,right_heel_v,left_foot_index_x,left_foot_index_y,left_foot_index_z,left_foot_index_v,right_foot_index_x,right_foot_index_y,right_foot_index_z,right_foot_index_v
0,M,0.496085,0.286904,-0.219098,0.999996,0.500287,0.360987,0.019479,0.999978,0.436462,...,-0.268695,0.996758,0.370391,0.893386,0.505172,0.931761,0.566927,1.005949,-0.382462,0.998906
1,M,0.496126,0.286918,-0.217849,0.999996,0.500281,0.360954,0.019995,0.999977,0.436466,...,-0.271191,0.996724,0.370344,0.89329,0.505325,0.931969,0.56704,1.005795,-0.384848,0.998902
2,M,0.496144,0.286921,-0.217039,0.999996,0.500279,0.360923,0.020068,0.999977,0.436469,...,-0.271365,0.996699,0.370316,0.893275,0.504931,0.931633,0.56704,1.005774,-0.384872,0.998894


### 1.2. Train and evaluate model with train set

In [4]:
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score

In [5]:
# Extract features and class
X = df.drop("label", axis=1) # features
y = df["label"]

# Standard Scaler
sc = StandardScaler()
X = pd.DataFrame(sc.fit_transform(X))

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

y_train.head(3)

8474    M
4197    I
9705    I
Name: label, dtype: object

In [7]:
algorithms =[("LR", LogisticRegression()),
         ("SVC", SVC(probability=True)),
         ('KNN',KNeighborsClassifier()),
         ("DTC", DecisionTreeClassifier()),
         ("SGDC", SGDClassifier()),
         ("Ridge", RidgeClassifier()),
         ('RF', RandomForestClassifier()),]

models = {}
final_results = []

for name, model in algorithms:
    trained_model = model.fit(X_train, y_train)
    models[name] = trained_model

    # Evaluate model
    model_results = model.predict(X_test)

    p_score = precision_score(y_test, model_results, average="macro")
    a_score = accuracy_score(y_test, model_results)
    r_score = recall_score(y_test, model_results, average="macro")
    f1_score_result = f1_score(y_test, model_results, average=None, labels=["I", "M", "D"])
    final_results.append(( name, p_score, a_score, r_score, f1_score_result ))


final_results.sort(key=lambda k: k[4][0] + k[4][1], reverse=True)

In [8]:
pd.DataFrame(final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score"])

Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score
0,KNN,0.995486,0.995463,0.995497,"[0.998108448928121, 0.9936189608021876, 0.9947..."
1,SVC,0.992812,0.992782,0.992862,"[0.9977952755905511, 0.9893390191897654, 0.991..."
2,RF,0.993807,0.993813,0.993882,"[0.9955974842767296, 0.9908592321755028, 0.995..."
3,LR,0.989931,0.989895,0.990009,"[0.9959080893925086, 0.9850381679389313, 0.988..."
4,DTC,0.990719,0.99072,0.990807,"[0.9934024505183789, 0.9865935405240707, 0.992..."
5,SGDC,0.986811,0.986801,0.986927,"[0.990883370009431, 0.980440097799511, 0.98921..."
6,Ridge,0.970926,0.970097,0.96998,"[0.9709677419354839, 0.9567827130852341, 0.982..."


### 1.3. Evaluate models with test set

In [9]:
test_df = describe_dataset("./stage.test.csv")
test_df = test_df.sample(frac=1).reset_index(drop=True)

test_x = test_df.drop("label", axis=1)
test_y = test_df["label"]

test_x = pd.DataFrame(sc.transform(test_x))

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']
Number of rows: 1205 
Number of columns: 53

Labels: 
D    416
I    402
M    387
Name: label, dtype: int64

Missing v

In [10]:
testset_final_results = []

for name, model in models.items():
    # Evaluate model
    model_results = model.predict(test_x)

    p_score = precision_score(test_y, model_results, average="macro")
    a_score = accuracy_score(test_y, model_results)
    r_score = recall_score(test_y, model_results, average="macro")
    f1_score_result = f1_score(test_y, model_results, average=None, labels=["I", "M", "D"])
    testset_final_results.append(( name, p_score, a_score, r_score, f1_score_result ))


testset_final_results.sort(key=lambda k: k[4][0] + k[4][1], reverse=True)
pd.DataFrame(testset_final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score"])

Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score
0,Ridge,0.953408,0.951037,0.949563,"[0.9763387297633873, 0.9199457259158751, 0.954..."
1,SVC,0.95564,0.951867,0.950163,"[0.9492325855962219, 0.9194444444444445, 0.982..."
2,LR,0.952856,0.948548,0.946658,"[0.950354609929078, 0.9131652661064426, 0.9764..."
3,KNN,0.919799,0.915353,0.916588,"[0.9745454545454546, 0.875609756097561, 0.8941..."
4,SGDC,0.934096,0.921992,0.919275,"[0.9013452914798206, 0.8633720930232559, 0.992..."
5,RF,0.899183,0.893776,0.892272,"[0.90744920993228, 0.8246575342465754, 0.94206..."
6,DTC,0.831701,0.834855,0.83334,"[0.9178082191780821, 0.7285129604365621, 0.841..."


## 2. Dumped Model

The best models are in order:
- Ridge
- SVC
- LR

In [11]:
with open("./model/sklearn/stage_LR_model.pkl", "wb") as f:
    pickle.dump(models["LR"], f)

In [12]:
with open("./model/sklearn/stage_SVC_model.pkl", "wb") as f:
    pickle.dump(models["SVC"], f)

In [13]:
with open("./model/sklearn/stage_Ridge_model.pkl", "wb") as f:
    pickle.dump(models["Ridge"], f)

In [14]:
with open("./model/input_scaler.pkl", "wb") as f:
    pickle.dump(sc, f)