In [12]:
# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd 
import seaborn as sns
# Keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.optimizers import SGD, Adam, Adadelta, RMSprop
import keras.backend as K
from keras.utils.np_utils import to_categorical

# Train-Test
from sklearn.model_selection import train_test_split
# Scaling data
from sklearn.preprocessing import StandardScaler
# Classification Report
from sklearn.metrics import classification_report

import pickle

import warnings
warnings.filterwarnings('ignore')

#### 1. Describe Dataset

In [13]:
# Determine important landmarks for plank
IMPORTANT_LMS = [
    "NOSE",
    "LEFT_SHOULDER",
    "RIGHT_SHOULDER",
    "LEFT_ELBOW",
    "RIGHT_ELBOW",
    "LEFT_WRIST",
    "RIGHT_WRIST",
    "LEFT_HIP",
    "RIGHT_HIP",
    "LEFT_KNEE",
    "RIGHT_KNEE",
    "LEFT_ANKLE",
    "RIGHT_ANKLE",
    "LEFT_HEEL",
    "RIGHT_HEEL",
    "LEFT_FOOT_INDEX",
    "RIGHT_FOOT_INDEX",
]

# Generate all columns of the data frame

HEADERS = ["label"] # Label column

for lm in IMPORTANT_LMS:
    HEADERS += [f"{lm.lower()}_x", f"{lm.lower()}_y", f"{lm.lower()}_z", f"{lm.lower()}_v"]

In [14]:
def describe_dataset(dataset_path: str):
    '''
    Describe dataset
    '''

    data = pd.read_csv(dataset_path)
    print(f"Headers: {list(data.columns.values)}")
    print(f'Number of rows: {data.shape[0]} \nNumber of columns: {data.shape[1]}\n')
    print(f"Labels: \n{data['label'].value_counts()}\n")
    print(f"Missing values: {data.isnull().values.any()}\n")
    
    duplicate = data[data.duplicated()]
    print(f"Duplicate Rows : {len(duplicate.sum(axis=1))}")

    return data


# Remove duplicate rows (optional)
def remove_duplicate_rows(dataset_path: str):
    '''
    Remove duplicated data from the dataset then save it to another files
    '''
    
    df = pd.read_csv(dataset_path)
    df.drop_duplicates(keep="first", inplace=True)
    df.to_csv(f"cleaned_dataset.csv", sep=',', encoding='utf-8', index=False)


df = describe_dataset("./dataset.csv")

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'lef

#### 2. Preprocess Dataset

In [15]:
# load dataset
df = pd.read_csv("./dataset.csv")

# Categorizing label
df.loc[df["label"] == "C", "label"] = 0
df.loc[df["label"] == "H", "label"] = 1
df.loc[df["label"] == "L", "label"] = 2

print(f'Number of rows: {df.shape[0]} \nNumber of columns: {df.shape[1]}\n')
print(f"Labels: \n{df['label'].value_counts()}\n")

Number of rows: 28623 
Number of columns: 69

Labels: 
2    10011
0     9630
1     8982
Name: label, dtype: int64



In [22]:
# Standard Scaling of features
sc = StandardScaler()

x = df.drop("label", axis = 1)
x = pd.DataFrame(sc.fit_transform(x))

y = df["label"]

# # Converting prediction to categorical
y_cat = to_categorical(y)

### 3. Split train/test set, construct model

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x.values, y_cat, test_size=0.2)

In [None]:
model = Sequential()
model.add(Dense(68, input_dim = 68, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(68, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(14, activation = "relu"))
model.add(Dense(3, activation = "softmax"))
model.compile(Adam(lr = 0.01), "categorical_crossentropy", metrics = ["accuracy"])
model.summary()

In [None]:
model.fit(x_train, y_train, epochs=100, batch_size=10, validation_data=(x_test, y_test))

### 4. Evaluate model

#### 4.1. Confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix

predict_x = model.predict(x_test) 
y_pred_class = np.argmax(predict_x, axis=1)

y_pred = model.predict(x_test)
y_test_class = np.argmax(y_test, axis=1)

confusion_matrix(y_test_class, y_pred_class)

#### 4.2. Classification Report

In [None]:
print(classification_report(y_test_class, y_pred_class))

#### 4.3. Dumped model

In [None]:
# Dump the best model to a pickle file
with open("./model/plank_model_deep_learning.pkl", "wb") as f:
    pickle.dump(model, f)

In [None]:
y_test[22]

In [None]:
X = pd.DataFrame([x_test[22]], columns=HEADERS[1:])

prediction = model.predict(X)
predicted_class = np.argmax(prediction, axis=1)[0]

predicted_class

In [None]:
x_test[22].shape

### 4.4. Dump the Input Scaler

In [23]:
# Dump input scaler
with open("./model/input_scaler.pkl", "wb") as f:
    pickle.dump(sc, f)