In [1]:
import mediapipe as mp
import cv2
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')

# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

objc[44336]: Class CaptureDelegate is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x10b848860) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x163902480). One of the two will be used. Which one is undefined.
objc[44336]: Class CVWindow is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x1062fca68) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x1639024d0). One of the two will be used. Which one is undefined.
objc[44336]: Class CVView is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x1062fca90) and /Users/fuixlabsdev1/Programming/PP/graduation-th

## 1. Train Model

### 1.1. Describe data

In [2]:
def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)


def describe_dataset(dataset_path: str):
    '''
    Describe dataset
    '''

    data = pd.read_csv(dataset_path)
    print(f"Headers: {list(data.columns.values)}")
    print(f'Number of rows: {data.shape[0]} \nNumber of columns: {data.shape[1]}\n')
    print(f"Labels: \n{data['label'].value_counts()}\n")
    print(f"Missing values: {data.isnull().values.any()}\n")
    
    duplicate = data[data.duplicated()]
    print(f"Duplicate Rows : {len(duplicate.sum(axis=1))}")

    return data

In [3]:
df = describe_dataset("./dataset.csv")
df.head(5)

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']
Number of rows: 17040 
Number of columns: 53

Labels: 
M    6171
D    5735
I    5134
Name: label, dtype: int64

Missi

Unnamed: 0,label,nose_x,nose_y,nose_z,nose_v,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_v,right_shoulder_x,...,right_heel_z,right_heel_v,left_foot_index_x,left_foot_index_y,left_foot_index_z,left_foot_index_v,right_foot_index_x,right_foot_index_y,right_foot_index_z,right_foot_index_v
0,M,0.496085,0.286904,-0.219098,0.999996,0.500287,0.360987,0.019479,0.999978,0.436462,...,-0.268695,0.996758,0.370391,0.893386,0.505172,0.931761,0.566927,1.005949,-0.382462,0.998906
1,M,0.496126,0.286918,-0.217849,0.999996,0.500281,0.360954,0.019995,0.999977,0.436466,...,-0.271191,0.996724,0.370344,0.89329,0.505325,0.931969,0.56704,1.005795,-0.384848,0.998902
2,M,0.496144,0.286921,-0.217039,0.999996,0.500279,0.360923,0.020068,0.999977,0.436469,...,-0.271365,0.996699,0.370316,0.893275,0.504931,0.931633,0.56704,1.005774,-0.384872,0.998894
3,M,0.496147,0.286919,-0.217032,0.999996,0.500283,0.360919,0.020086,0.999977,0.436476,...,-0.271355,0.996695,0.370313,0.89325,0.505156,0.931661,0.566979,1.005758,-0.384867,0.998894
4,M,0.496164,0.286909,-0.216764,0.999996,0.500284,0.360904,0.020158,0.999977,0.436488,...,-0.271353,0.996687,0.370301,0.89321,0.505274,0.931574,0.566932,1.00573,-0.384814,0.998891


### 1.2. Train and evaluate model with train set

In [4]:
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score

In [5]:
# Extract features and class
X = df.drop("label", axis=1) # features
y = df["label"]

# Standard Scaler
sc = StandardScaler()
X = pd.DataFrame(sc.fit_transform(X))

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)

y_train.head(5)

12650    I
7805     D
2267     D
12725    D
13889    M
Name: label, dtype: object

In [8]:
algorithms =[("LR", LogisticRegression()),
         ("SVC", SVC()),
         ('KNN',KNeighborsClassifier()),
         ("DTC", DecisionTreeClassifier()),
         ("SGDC", SGDClassifier()),
         ("Ridge", RidgeClassifier()),
         ('RF', RandomForestClassifier()),]

models = {}
final_results = []

for name, model in algorithms:
    trained_model = model.fit(X_train, y_train)
    models[name] = trained_model

    # Evaluate model
    model_results = model.predict(X_test)

    p_score = precision_score(y_test, model_results, average="macro")
    a_score = accuracy_score(y_test, model_results)
    r_score = recall_score(y_test, model_results, average="macro")
    f1_score_result = f1_score(y_test, model_results, average=None, labels=["I", "M", "D"])
    final_results.append(( name, p_score, a_score, r_score, f1_score_result ))


final_results.sort(key=lambda k: k[4][0] + k[4][1], reverse=True)

In [9]:
pd.DataFrame(final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score"])

Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score
0,KNN,0.997093,0.997066,0.997144,"[0.9986000933271115, 0.9958402662229617, 0.996..."
1,SVC,0.9968,0.996772,0.996867,"[0.9986000933271115, 0.9954223886808157, 0.996..."
2,RF,0.997355,0.997359,0.997422,"[0.9976689976689976, 0.9962577962577962, 0.998..."
3,LR,0.99675,0.996772,0.996901,"[0.9981360671015843, 0.9954185755935028, 0.996..."
4,DTC,0.995593,0.995599,0.995726,"[0.9972067039106145, 0.9941763727121463, 0.995..."
5,SGDC,0.995575,0.995599,0.995742,"[0.9972041006523765, 0.9941666666666666, 0.995..."
6,Ridge,0.982086,0.980634,0.979938,"[0.9766109785202864, 0.9730831973898858, 0.992..."


### 1.4. Evaluate models with test set

In [16]:
test_df = describe_dataset("./test.csv")
test_df = test_df.sample(frac=1).reset_index(drop=True)

test_x = test_df.drop("label", axis=1)
test_y = test_df["label"]

test_x = pd.DataFrame(sc.transform(test_x))

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']
Number of rows: 829 
Number of columns: 53

Labels: 
D    299
I    267
M    263
Name: label, dtype: int64

Missing va

In [17]:
testset_final_results = []

for name, model in models.items():
    # Evaluate model
    model_results = model.predict(test_x)

    p_score = precision_score(test_y, model_results, average="macro")
    a_score = accuracy_score(test_y, model_results)
    r_score = recall_score(test_y, model_results, average="macro")
    f1_score_result = f1_score(test_y, model_results, average=None, labels=["C", "L"])
    testset_final_results.append(( name, p_score, a_score, r_score, f1_score_result ))


testset_final_results.sort(key=lambda k: k[4][0] + k[4][1], reverse=True)
pd.DataFrame(testset_final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score"])

Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score
0,LR,1.0,1.0,1.0,"[0.0, 0.0]"
1,SVC,1.0,1.0,1.0,"[0.0, 0.0]"
2,KNN,0.844961,0.686369,0.670469,"[0.0, 0.0]"
3,DTC,0.584401,0.662244,0.65153,"[0.0, 0.0]"
4,SGDC,0.933473,0.915561,0.91128,"[0.0, 0.0]"
5,Ridge,0.959866,0.956574,0.959866,"[0.0, 0.0]"
6,RF,0.56541,0.64415,0.632518,"[0.0, 0.0]"


### 1.3. Dumped Model

In [18]:
with open("./model/LR_model.pkl", "wb") as f:
    pickle.dump(models["LR"], f)

In [26]:
with open("./model/KNN_model.pkl", "wb") as f:
    pickle.dump(models["KNN"], f)

In [27]:
with open("./model/input_scaler.pkl", "wb") as f:
    pickle.dump(sc, f)