In [1]:
import mediapipe as mp
import cv2
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import CalibratedClassifierCV

import warnings
warnings.filterwarnings('ignore')

# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

In [2]:
def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)


def describe_dataset(dataset_path: str):
    '''
    Describe dataset
    '''

    data = pd.read_csv(dataset_path)
    print(f"Headers: {list(data.columns.values)}")
    print(f'Number of rows: {data.shape[0]} \nNumber of columns: {data.shape[1]}\n')
    print(f"Labels: \n{data['label'].value_counts()}\n")
    print(f"Missing values: {data.isnull().values.any()}\n")
    
    duplicate = data[data.duplicated()]
    print(f"Duplicate Rows : {len(duplicate.sum(axis=1))}")

    return data


def round_up_metric_results(results) -> list:
    '''Round up metrics results such as precision score, recall score, ...'''
    return list(map(lambda el: round(el, 3), results))

In [3]:
# load dataset
df = describe_dataset("./lumbar_train.csv")

# Categorizing label
df.loc[df["label"] == "lumbar_correct", "label"] = 0
df.loc[df["label"] == "lumbar_error", "label"] = 1

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v']
Number of rows: 5891 
Number of columns: 53

Labels: 
lumbar_correct    5001
lumbar_error       890
Name: label, dtype: int64

Missing values: False


In [4]:
# load dataset
df02 = describe_dataset("./torso_train.csv")

# Categorizing label
df02.loc[df02["label"] == "torso_correct", "label"] = 2
df02.loc[df02["label"] == "torso_error", "label"] = 3

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v']
Number of rows: 6938 
Number of columns: 53

Labels: 
torso_correct    6255
torso_error       683
Name: label, dtype: int64

Missing values: False

D

In [5]:
# Extract features and class
X = df.drop("label", axis=1)
y = df["label"].astype("int")

In [6]:
# Extract features and class
a = df02.drop("label", axis=1)
b = df02["label"].astype("int")

In [7]:
sc = StandardScaler()
X = pd.DataFrame(sc.fit_transform(X))

In [8]:
sc02 = StandardScaler()
a = pd.DataFrame(sc02.fit_transform(a))

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)
y_train

785     1
3810    0
2123    0
3859    0
5756    0
       ..
664     1
3276    0
1318    0
723     1
2863    0
Name: label, Length: 4712, dtype: int32

In [10]:
a_train, a_test, b_train, b_test = train_test_split(a, b, test_size=0.2, random_state=1234)
b_train

4113    2
6770    2
2532    2
2322    2
6680    2
       ..
664     3
3276    2
1318    2
723     2
2863    2
Name: label, Length: 5550, dtype: int32

In [11]:
algorithms =[("LR", LogisticRegression()),
         ("SVC", SVC(probability=True)),
         ('KNN',KNeighborsClassifier()),
         ("DTC", DecisionTreeClassifier()),
         ("SGDC", CalibratedClassifierCV(SGDClassifier())),
         ("NB", GaussianNB()),
         ('RF', RandomForestClassifier()),]

models = {}
final_results = []

for name, model in algorithms:
    trained_model = model.fit(X_train, y_train)
    models[name] = trained_model

    # Evaluate model
    model_results = model.predict(X_test)

    p_score = precision_score(y_test, model_results, average=None, labels=[0, 1])
    a_score = accuracy_score(y_test, model_results)
    r_score = recall_score(y_test, model_results, average=None, labels=[0, 1])
    f1_score_result = f1_score(y_test, model_results, average=None, labels=[0, 1])
    cm = confusion_matrix(y_test, model_results, labels=[0, 1])
    final_results.append(( name,  round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm))

# Sort results by F1 score
final_results.sort(key=lambda k: sum(k[4]), reverse=True)
pd.DataFrame(final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score", "Confusion Matrix"])

Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score,Confusion Matrix
0,RF,"[0.934, 0.893]",0.929601,"[0.987, 0.609]","[0.96, 0.724]","[[987, 13], [70, 109]]"
1,KNN,"[0.934, 0.859]",0.926209,"[0.982, 0.615]","[0.958, 0.717]","[[982, 18], [69, 110]]"
2,SVC,"[0.926, 0.847]",0.917727,"[0.982, 0.559]","[0.953, 0.673]","[[982, 18], [79, 100]]"
3,LR,"[0.92, 0.701]",0.894826,"[0.959, 0.536]","[0.939, 0.608]","[[959, 41], [83, 96]]"
4,DTC,"[0.919, 0.588]",0.872774,"[0.932, 0.542]","[0.926, 0.564]","[[932, 68], [82, 97]]"
5,SGDC,"[0.902, 0.8]",0.893978,"[0.982, 0.402]","[0.94, 0.535]","[[982, 18], [107, 72]]"
6,NB,"[0.934, 0.481]",0.840543,"[0.874, 0.654]","[0.903, 0.555]","[[874, 126], [62, 117]]"


In [12]:
algorithms02 =[("LR", LogisticRegression()),
         ("SVC", SVC(probability=True)),
         ('KNN',KNeighborsClassifier()),
         ("DTC", DecisionTreeClassifier()),
         ("SGDC", CalibratedClassifierCV(SGDClassifier())),
         ("NB", GaussianNB()),
         ('RF', RandomForestClassifier()),]

models02 = {}
final_results02 = []

for name, model in algorithms02:
    trained_model02 = model.fit(a_train, b_train)
    models02[name] = trained_model02

    # Evaluate model
    model_results02 = model.predict(a_test)

    p_score = precision_score(b_test, model_results02, average=None, labels=[2, 3])
    a_score = accuracy_score(b_test, model_results02)
    r_score = recall_score(b_test, model_results02, average=None, labels=[2, 3])
    f1_score_result = f1_score(b_test, model_results02, average=None, labels=[2, 3])
    cm = confusion_matrix(b_test, model_results02, labels=[2, 3])
    final_results02.append(( name,  round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm))

# Sort results by F1 score
final_results02.sort(key=lambda k: sum(k[4]), reverse=True)
pd.DataFrame(final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score", "Confusion Matrix"])

Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score,Confusion Matrix
0,RF,"[0.934, 0.893]",0.929601,"[0.987, 0.609]","[0.96, 0.724]","[[987, 13], [70, 109]]"
1,KNN,"[0.934, 0.859]",0.926209,"[0.982, 0.615]","[0.958, 0.717]","[[982, 18], [69, 110]]"
2,SVC,"[0.926, 0.847]",0.917727,"[0.982, 0.559]","[0.953, 0.673]","[[982, 18], [79, 100]]"
3,LR,"[0.92, 0.701]",0.894826,"[0.959, 0.536]","[0.939, 0.608]","[[959, 41], [83, 96]]"
4,DTC,"[0.919, 0.588]",0.872774,"[0.932, 0.542]","[0.926, 0.564]","[[932, 68], [82, 97]]"
5,SGDC,"[0.902, 0.8]",0.893978,"[0.982, 0.402]","[0.94, 0.535]","[[982, 18], [107, 72]]"
6,NB,"[0.934, 0.481]",0.840543,"[0.874, 0.654]","[0.903, 0.555]","[[874, 126], [62, 117]]"


In [13]:
with open("./model/all_sklearn_lumbar.pkl", "wb") as f:
    pickle.dump(models, f)

In [14]:
with open("./model/all_sklearn_torso.pkl", "wb") as f:
    pickle.dump(models02, f)

In [15]:
with open("./model/RF_lumbar_model.pkl", "wb") as f:
    pickle.dump(models["RF"], f)

In [16]:
with open("./model/RF_torso_model.pkl", "wb") as f:
    pickle.dump(models02["RF"], f)

In [17]:
with open("./model/KNN_lumbar_model.pkl", "wb") as f:
    pickle.dump(models["KNN"], f)

In [18]:
with open("./model/KNN_torso_model.pkl", "wb") as f:
    pickle.dump(models02["KNN"], f)

In [19]:
with open("./model/SVC_lumbar_model.pkl", "wb") as f:
    pickle.dump(models["SVC"], f)

In [20]:
with open("./model/SVC_torso_model.pkl", "wb") as f:
    pickle.dump(models02["SVC"], f)

In [21]:
with open("./model/input_scaler_lumbar.pkl", "wb") as f:
    pickle.dump(sc, f)

In [22]:
with open("./model/input_scaler_torso.pkl", "wb") as f:
    pickle.dump(sc02, f)