In [7]:
import mediapipe as mp
import cv2
import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, RidgeClassifier, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score

import warnings
warnings.filterwarnings('ignore')

# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

### 1. Train model

#### 1.1. Describe data and split dataset

In [3]:
def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)


def describe_dataset(dataset_path: str):
    '''
    Describe dataset
    '''

    data = pd.read_csv(dataset_path)
    print(f"Headers: {list(data.columns.values)}")
    print(f'Number of rows: {data.shape[0]} \nNumber of columns: {data.shape[1]}\n')
    print(f"Labels: \n{data['label'].value_counts()}\n")
    print(f"Missing values: {data.isnull().values.any()}\n")
    
    duplicate = data[data.duplicated()]
    print(f"Duplicate Rows : {len(duplicate.sum(axis=1))}")

    return data

In [4]:
df = describe_dataset("./dataset.csv")

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v']
Number of rows: 5151 
Number of columns: 21

Labels: 
C    2685
L    2466
Name: label, dtype: int64

Missing values: False

Duplicate Rows : 0


In [5]:
# Extract features and class

X = df.drop("label", axis=1) # features
y = df["label"]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1234)
y_test.head(5)

580     C
4636    L
3761    C
3225    L
3792    C
Name: label, dtype: object

#### 1.2. Train model using Scikit-learn

In [10]:
algorithms =[("LR", LogisticRegression()),
         ("SVC", SVC()),
         ('KNN',KNeighborsClassifier()),
         ("DTC", DecisionTreeClassifier()),
         ("SGDC", SGDClassifier()),
         ("Ridge", RidgeClassifier()),
         ('RF', RandomForestClassifier()),]

models = {}
final_results = []

for name, model in algorithms:
    trained_model = model.fit(X_train, y_train)
    models[name] = trained_model

    # Evaluate model
    model_results = model.predict(X_test)

    p_score = precision_score(y_test, model_results, average="macro")
    a_score = accuracy_score(y_test, model_results)
    f1_score_result = f1_score(y_test, model_results, average=None, labels=["C", "L"])
    final_results.append(( name, p_score, a_score, f1_score_result ))


final_results.sort(key=lambda k: k[3][0] + k[3][1], reverse=True)

In [11]:
pd.DataFrame(final_results, columns=["Model", "Precision Score", "Accuracy score" , "F1 score"])

Unnamed: 0,Model,Precision Score,Accuracy score,F1 score
0,RF,0.997368,0.997413,"[0.9974968710888611, 0.9973226238286479]"
1,KNN,0.996114,0.996119,"[0.99625, 0.9959785522788204]"
2,DTC,0.99549,0.995472,"[0.995627732667083, 0.9953051643192489]"
3,SVC,0.988031,0.98771,"[0.9882060831781502, 0.9871708305199189]"
4,Ridge,0.986211,0.98577,"[0.9863692688971498, 0.9851150202976996]"
5,SGDC,0.982129,0.981242,"[0.9821318545902649, 0.9802586793737236]"
6,LR,0.963468,0.963131,"[0.9646621202727836, 0.9614604462474645]"


#### 1.3. Dump model pickle

In [13]:
with open("./model/RF_model.pkl", "wb") as f:
    pickle.dump(models["RF"], f)

In [14]:
with open("./model/KNN_model.pkl", "wb") as f:
    pickle.dump(models["KNN"], f)