In [3]:
pip install mediapipe


Collecting mediapipe
  Downloading mediapipe-0.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting protobuf<5,>=4.25.3 (from mediapipe)
  Downloading protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Collecting sounddevice>=0.4.4 (from mediapipe)
  Downloading sounddevice-0.5.0-py3-none-any.whl.metadata (1.4 kB)
Downloading mediapipe-0.10.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.9/35.9 MB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.6/294.6 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading sounddevice-0.5.0-py3-none-any.whl (32 kB)
Installing collected packages: protobuf, sounddevice, mediapipe
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.

In [4]:
import mediapipe as mp
import cv2
import pandas as pd
import pickle

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import CalibratedClassifierCV

import warnings
warnings.filterwarnings('ignore')

# Drawing helpers
mp_drawing = mp.solutions.drawing_utils
mp_pose = mp.solutions.pose

In [5]:
def rescale_frame(frame, percent=50):
    '''
    Rescale a frame to a certain percentage compare to its original frame
    '''
    width = int(frame.shape[1] * percent/ 100)
    height = int(frame.shape[0] * percent/ 100)
    dim = (width, height)
    return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)


def describe_dataset(dataset_path: str):
    '''
    Describe dataset
    '''

    data = pd.read_csv(dataset_path)
    print(f"Headers: {list(data.columns.values)}")
    print(f'Number of rows: {data.shape[0]} \nNumber of columns: {data.shape[1]}\n')
    print(f"Labels: \n{data['label'].value_counts()}\n")
    print(f"Missing values: {data.isnull().values.any()}\n")

    duplicate = data[data.duplicated()]
    print(f"Duplicate Rows : {len(duplicate)}") # Count the number of duplicate rows directly.

    return data


def round_up_metric_results(results) -> list:
    '''Round up metrics results such as precision score, recall score, ...'''
    return list(map(lambda el: round(el, 3), results))

In [8]:
# load dataset
df = describe_dataset("/content/train.csv")

# Categorizing label
df.loc[df["label"] == "C", "label"] = 0
df.loc[df["label"] == "L", "label"] = 1

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_eye_inner_x', 'left_eye_inner_y', 'left_eye_inner_z', 'left_eye_inner_v', 'left_eye_x', 'left_eye_y', 'left_eye_z', 'left_eye_v', 'left_eye_outer_x', 'left_eye_outer_y', 'left_eye_outer_z', 'left_eye_outer_v', 'right_eye_inner_x', 'right_eye_inner_y', 'right_eye_inner_z', 'right_eye_inner_v', 'right_eye_x', 'right_eye_y', 'right_eye_z', 'right_eye_v', 'right_eye_outer_x', 'right_eye_outer_y', 'right_eye_outer_z', 'right_eye_outer_v', 'left_ear_x', 'left_ear_y', 'left_ear_z', 'left_ear_v', 'right_ear_x', 'right_ear_y', 'right_ear_z', 'right_ear_v', 'mouth_left_x', 'mouth_left_y', 'mouth_left_z', 'mouth_left_v', 'mouth_right_x', 'mouth_right_y', 'mouth_right_z', 'mouth_right_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow

In [6]:
def remove_duplicate_rows(dataset_path: str):
    '''
    Remove duplicated data from the dataset then save it to another files
    '''

    df = pd.read_csv(dataset_path)
    df.drop_duplicates(keep="first", inplace=True)
    df.to_csv(f"cleaned_train.csv", sep=',', encoding='utf-8', index=False)

In [10]:
remove_duplicate_rows("/content/train.csv")

In [30]:
import pandas as pd

# Path to your CSV file
csv_file_path = '/content/train.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Remove rows with any missing values
df_cleaned = df.dropna()

# Save the cleaned DataFrame back to a CSV file
cleaned_csv_file_path = '/content/train_cleaned.csv'
df_cleaned.to_csv(cleaned_csv_file_path, index=False)

print(f"Cleaned CSV file has been saved to {cleaned_csv_file_path}.")


Cleaned CSV file has been saved to /content/train_cleaned.csv.


In [31]:
# load dataset
df = describe_dataset("/content/train_cleaned.csv")

# Categorizing label
df.loc[df["label"] == "C", "label"] = 0
df.loc[df["label"] == "L", "label"] = 1

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_eye_inner_x', 'left_eye_inner_y', 'left_eye_inner_z', 'left_eye_inner_v', 'left_eye_x', 'left_eye_y', 'left_eye_z', 'left_eye_v', 'left_eye_outer_x', 'left_eye_outer_y', 'left_eye_outer_z', 'left_eye_outer_v', 'right_eye_inner_x', 'right_eye_inner_y', 'right_eye_inner_z', 'right_eye_inner_v', 'right_eye_x', 'right_eye_y', 'right_eye_z', 'right_eye_v', 'right_eye_outer_x', 'right_eye_outer_y', 'right_eye_outer_z', 'right_eye_outer_v', 'left_ear_x', 'left_ear_y', 'left_ear_z', 'left_ear_v', 'right_ear_x', 'right_ear_y', 'right_ear_z', 'right_ear_v', 'mouth_left_x', 'mouth_left_y', 'mouth_left_z', 'mouth_left_v', 'mouth_right_x', 'mouth_right_y', 'mouth_right_z', 'mouth_right_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow

In [14]:
print(df.isnull().sum())  # Shows the count of missing values per column

label                 0
nose_x                2
nose_y                2
nose_z                2
nose_v                2
                     ..
left_foot_index_v     2
right_foot_index_x    2
right_foot_index_y    2
right_foot_index_z    2
right_foot_index_v    2
Length: 133, dtype: int64


In [16]:
df.dropna()

Unnamed: 0,label,nose_x,nose_y,nose_z,nose_v,left_eye_inner_x,left_eye_inner_y,left_eye_inner_z,left_eye_inner_v,left_eye_x,...,right_heel_z,right_heel_v,left_foot_index_x,left_foot_index_y,left_foot_index_z,left_foot_index_v,right_foot_index_x,right_foot_index_y,right_foot_index_z,right_foot_index_v
0,0,0.901489,0.764261,0.017840,0.999693,0.910496,0.746260,0.024884,0.999643,0.910423,...,-0.079737,0.976753,0.496992,0.754295,0.240436,0.757697,0.477349,0.789760,-0.126244,0.980702
1,1,0.837779,0.435937,0.022410,0.999809,0.848639,0.421949,0.041858,0.999619,0.848461,...,-0.131854,0.911173,0.244333,0.740076,-0.012488,0.719308,0.229834,0.744720,-0.242011,0.914189
2,1,0.668188,0.316630,0.008715,0.999680,0.681724,0.307906,0.026603,0.999331,0.681297,...,-0.102283,0.963908,0.014651,0.619288,0.043482,0.562153,0.121821,0.641085,-0.168111,0.919631
3,0,0.730827,0.289875,-0.337457,0.999973,0.730209,0.248987,-0.315170,0.999964,0.729993,...,0.323690,0.961584,0.152406,0.808210,0.505851,0.919386,0.114678,0.794440,0.270291,0.982480
4,1,0.836489,0.415170,-0.117310,0.999438,0.855328,0.397306,-0.107520,0.998908,0.855385,...,0.115196,0.963895,0.235020,0.793390,0.461484,0.795803,0.076707,0.769268,0.044604,0.938395
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8746,0,0.781083,0.880714,0.061255,0.999888,0.796041,0.861802,0.073317,0.999873,0.796547,...,-0.105312,0.970729,0.096370,0.890990,0.075834,0.775117,0.080872,0.937499,-0.177222,0.969624
8747,1,0.679463,0.572687,-0.399624,0.999933,0.695532,0.564306,-0.389165,0.999886,0.696537,...,0.448237,0.866218,0.259675,0.772054,0.607290,0.683433,0.237173,0.781676,0.397578,0.912387
8748,0,0.740424,0.557311,-0.219162,0.999845,0.752736,0.537788,-0.212824,0.999763,0.754200,...,0.366342,0.826493,0.246257,0.800259,0.650546,0.453982,0.214458,0.840091,0.339854,0.728789
8749,0,0.736577,0.541649,-0.356226,0.999984,0.750581,0.524346,-0.342668,0.999970,0.751557,...,0.377954,0.982639,0.240504,0.813676,0.545598,0.923393,0.219085,0.842793,0.319248,0.988949


In [33]:
# load dataset
df = describe_dataset("/content/train_cleaned.csv")

# Categorizing label
df.loc[df["label"] == "C", "label"] = 0
df.loc[df["label"] == "L", "label"] = 1

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_eye_inner_x', 'left_eye_inner_y', 'left_eye_inner_z', 'left_eye_inner_v', 'left_eye_x', 'left_eye_y', 'left_eye_z', 'left_eye_v', 'left_eye_outer_x', 'left_eye_outer_y', 'left_eye_outer_z', 'left_eye_outer_v', 'right_eye_inner_x', 'right_eye_inner_y', 'right_eye_inner_z', 'right_eye_inner_v', 'right_eye_x', 'right_eye_y', 'right_eye_z', 'right_eye_v', 'right_eye_outer_x', 'right_eye_outer_y', 'right_eye_outer_z', 'right_eye_outer_v', 'left_ear_x', 'left_ear_y', 'left_ear_z', 'left_ear_v', 'right_ear_x', 'right_ear_y', 'right_ear_z', 'right_ear_v', 'mouth_left_x', 'mouth_left_y', 'mouth_left_z', 'mouth_left_v', 'mouth_right_x', 'mouth_right_y', 'mouth_right_z', 'mouth_right_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow

In [34]:
sc = StandardScaler()

In [36]:
with open("/content/input_scaler.pkl", "rb") as f:
    sc = pickle.load(f)

In [37]:
# Standard Scaling of features
x = df.drop("label", axis = 1)

# Check if sc is already a StandardScaler instance
if not isinstance(sc, StandardScaler):
    sc = StandardScaler()

# Fit the scaler ONLY if it's newly created
if not hasattr(sc, 'n_features_in_'):
    sc.fit(x)

x = pd.DataFrame(sc.transform(x))

y = df["label"].astype('int')

In [38]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1234)
y_train

Unnamed: 0,label
6731,1
2949,0
1049,0
6702,0
349,1
...,...
6137,0
664,0
7540,0
7221,1


In [39]:
algorithms =[("LR", LogisticRegression()),
         ("SVC", SVC(probability=True)),
         ('KNN',KNeighborsClassifier()),
         ("DTC", DecisionTreeClassifier()),
         ("SGDC", CalibratedClassifierCV(SGDClassifier())),
         ("NB", GaussianNB()),
         ('RF', RandomForestClassifier()),]

models = {}
final_results = []

for name, model in algorithms:
    trained_model = model.fit(X_train, y_train)
    models[name] = trained_model

    # Evaluate model
    model_results = model.predict(X_test)

    p_score = precision_score(y_test, model_results, average=None, labels=[0, 1])
    a_score = accuracy_score(y_test, model_results)
    r_score = recall_score(y_test, model_results, average=None, labels=[0, 1])
    f1_score_result = f1_score(y_test, model_results, average=None, labels=[0, 1])
    cm = confusion_matrix(y_test, model_results, labels=[0, 1])
    final_results.append(( name,  round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm))

# Sort results by F1 score
final_results.sort(key=lambda k: sum(k[4]), reverse=True)
pd.DataFrame(final_results, columns=["Model", "Precision Score", "Accuracy score", "Recall Score", "F1 score", "Confusion Matrix"])

Unnamed: 0,Model,Precision Score,Accuracy score,Recall Score,F1 score,Confusion Matrix
0,RF,"[1.0, 0.998]",0.998857,"[0.997, 1.0]","[0.999, 0.999]","[[687, 2], [0, 1061]]"
1,KNN,"[0.999, 0.998]",0.998286,"[0.997, 0.999]","[0.998, 0.999]","[[687, 2], [1, 1060]]"
2,DTC,"[0.997, 0.997]",0.997143,"[0.996, 0.998]","[0.996, 0.998]","[[686, 3], [2, 1059]]"
3,SVC,"[0.975, 0.975]",0.974857,"[0.961, 0.984]","[0.968, 0.979]","[[662, 27], [17, 1044]]"
4,SGDC,"[0.922, 0.951]",0.939429,"[0.925, 0.949]","[0.923, 0.95]","[[637, 52], [54, 1007]]"
5,LR,"[0.913, 0.945]",0.932571,"[0.916, 0.943]","[0.914, 0.944]","[[631, 58], [60, 1001]]"
6,NB,"[0.654, 0.851]",0.756,"[0.806, 0.724]","[0.722, 0.782]","[[555, 134], [293, 768]]"


In [41]:
with open("/content/all_sklearn.pkl", "wb") as f:
    pickle.dump(models, f)

In [42]:
with open("/content/input_scaler.pkl", "wb") as f:
    pickle.dump(sc, f)