In [1]:
# Data visualization
import numpy as np
import pandas as pd 

# Keras
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.optimizers import Adam
from keras.utils.np_utils import to_categorical
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model
import keras_tuner as kt
import tensorflow as tf
import pydot as pyd
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

keras.utils.vis_utils.pydot = pyd

# Train-Test
from sklearn.model_selection import train_test_split
# Classification Report
from sklearn.metrics import confusion_matrix, precision_recall_fscore_support

import pickle

import warnings
warnings.filterwarnings('ignore')

In [2]:
IMPORTANT_LMS = [
"NOSE",
"LEFT_SHOULDER",
"RIGHT_SHOULDER",
"LEFT_HIP",
"RIGHT_HIP",
"LEFT_KNEE",
"RIGHT_KNEE",
"LEFT_ANKLE",
"RIGHT_ANKLE",
]

# Generate all columns of the data frame

HEADERS = ["label"] # Label column

for lm in IMPORTANT_LMS:
    HEADERS += [f"{lm.lower()}_x", f"{lm.lower()}_y", f"{lm.lower()}_z", f"{lm.lower()}_v"]

In [3]:
def describe_dataset(dataset_path: str):
    '''
    Describe dataset
    '''

    data = pd.read_csv(dataset_path)
    print(f"Headers: {list(data.columns.values)}")
    print(f'Number of rows: {data.shape[0]} \nNumber of columns: {data.shape[1]}\n')
    print(f"Labels: \n{data['label'].value_counts()}\n")
    print(f"Missing values: {data.isnull().values.any()}\n")
    
    duplicate = data[data.duplicated()]
    print(f"Duplicate Rows : {len(duplicate.sum(axis=1))}")

    return data


# Remove duplicate rows (optional)
def remove_duplicate_rows(dataset_path: str):
    '''
    Remove duplicated data from the dataset then save it to another files
    '''
    
    df = pd.read_csv(dataset_path)
    df.drop_duplicates(keep="first", inplace=True)
    df.to_csv(f"cleaned_train.csv", sep=',', encoding='utf-8', index=False)


def round_up_metric_results(results) -> list:
    '''Round up metrics results such as precision score, recall score, ...'''
    return list(map(lambda el: round(el, 3), results))


df = describe_dataset("./train.csv")

Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v']
Number of rows: 5235 
Number of columns: 37

Labels: 
knees_forward_error    3931
no_error               1304
Name: label, dtype: int64

Missing values: False

Duplicate Rows : 0


In [4]:
# load dataset
df = pd.read_csv("./train.csv")

# Categorizing label
df.loc[df["label"] == "no_error", "label"] = 0
df.loc[df["label"] == "knees_forward_error", "label"] = 1

print(f'Number of rows: {df.shape[0]} \nNumber of columns: {df.shape[1]}\n')
print(f"Labels: \n{df['label'].value_counts()}\n")

Number of rows: 5235 
Number of columns: 37

Labels: 
1    3931
0    1304
Name: label, dtype: int64



In [5]:
# Standard Scaling of features
# Dump input scaler
with open("./model/kf_input_scaler.pkl", "rb") as f2:
    sc = pickle.load(f2)

x = df.drop("label", axis = 1)
x = pd.DataFrame(sc.transform(x))

y = df["label"]

# # Converting prediction to categorical
y_cat = to_categorical(y)

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x.values, y_cat, test_size=0.2, random_state=1234)

In [7]:
stop_early = EarlyStopping(monitor='loss', patience=3)

# Final Results
final_models = {}

In [8]:
def describe_model(model):
    '''
    Describe Model architecture
    '''
    print(f"Describe models architecture")
    for i, layer in enumerate(model.layers):
        number_of_units = layer.units if hasattr(layer, 'units') else 0

        if hasattr(layer, "activation"):
            print(f"Layer-{i + 1}: {number_of_units} units, func: ", layer.activation)
        else:
            print(f"Layer-{i + 1}: {number_of_units} units, func: None")
            

def get_best_model(tuner):
    '''
    Describe and return the best model found from keras tuner
    '''
    best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
    best_model = tuner.hypermodel.build(best_hps)

    describe_model(best_model)

    print("\nOther params:")
    ignore_params = ["tuner", "activation", "layer"]
    for param, value in best_hps.values.items():
        if not any(word in param for word in ignore_params):
            print(f"{param}: {value}")

    return best_model

In [9]:
def model_3l_builder(hp):
    model = Sequential()
    model.add(Dense(36, input_dim = 36, activation = "relu"))

    hp_activation = hp.Choice('activation', values=['relu', 'tanh'])
    hp_layer_1 = hp.Int('layer_1', min_value=32, max_value=512, step=32)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.add(Dense(units=hp_layer_1, activation=hp_activation))
    model.add(Dense(2, activation = "softmax"))

    model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss="categorical_crossentropy", metrics = ["accuracy"])
    
    return model

In [10]:
tuner_3l = kt.Hyperband(
    model_3l_builder,
    objective='accuracy',
    max_epochs=10,
    directory='keras_tuner_dir',
    project_name='keras_tuner_demo'
)
tuner_3l.search(x_train, y_train, epochs=10, callbacks=[stop_early])

INFO:tensorflow:Reloading Tuner from keras_tuner_dir\keras_tuner_demo\tuner0.json
INFO:tensorflow:Oracle triggered exit


In [11]:
model_3l = get_best_model(tuner_3l)
model_3l.fit(x_train, y_train, epochs=100, batch_size=10, validation_data=(x_test, y_test), callbacks=[stop_early])

Describe models architecture
Layer-1: 36 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-2: 352 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-3: 2 units, func:  <function softmax at 0x0000020FBEE49430>

Other params:
learning_rate: 0.001
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100


<keras.callbacks.History at 0x20fc16a29a0>

In [12]:
final_models["3_layers"] = model_3l

In [13]:
def model_5l_builder(hp):
    model = Sequential()
    model.add(Dense(36, input_dim = 36, activation = "relu"))

    hp_activation = hp.Choice('activation', values=['relu', 'tanh'])
    hp_layer_1 = hp.Int('layer_1', min_value=32, max_value=512, step=32)
    hp_layer_2 = hp.Int('layer_2', min_value=32, max_value=512, step=32)
    hp_layer_3 = hp.Int('layer_3', min_value=32, max_value=512, step=32)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.add(Dense(units=hp_layer_1, activation=hp_activation))
    model.add(Dense(units=hp_layer_2, activation=hp_activation))
    model.add(Dense(units=hp_layer_3, activation=hp_activation))
    model.add(Dense(2, activation = "softmax"))

    model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss="categorical_crossentropy", metrics = ["accuracy"])
    
    return model

In [14]:
import keras
tuner_5l = kt.Hyperband(
    model_5l_builder,
    objective='accuracy',
    max_epochs=10,
    directory='keras_tuner_dir',
    project_name='keras_tuner_demo_1'
)

tuner_5l.search(
    x_train, 
    y_train, 
    epochs=10, 
    callbacks=[keras.callbacks.EarlyStopping(patience=5)]
)

INFO:tensorflow:Reloading Tuner from keras_tuner_dir\keras_tuner_demo_1\tuner0.json
INFO:tensorflow:Oracle triggered exit


In [15]:
model_5l = get_best_model(tuner_5l)
model_5l.fit(x_train, y_train, epochs=100, batch_size=10, validation_data=(x_test, y_test), callbacks=[stop_early])

Describe models architecture
Layer-1: 36 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-2: 320 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-3: 384 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-4: 224 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-5: 2 units, func:  <function softmax at 0x0000020FBEE49430>

Other params:
learning_rate: 0.001
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100


<keras.callbacks.History at 0x20fc3969250>

In [16]:
final_models["5_layers"] = model_5l

In [17]:
def model_7lD_builder(hp):
    model = Sequential()
    model.add(Dense(36, input_dim = 36, activation = "relu"))

    hp_activation = hp.Choice('activation', values=['relu', 'tanh'])
    hp_layer_1 = hp.Int('layer_1', min_value=32, max_value=512, step=32)
    hp_layer_2 = hp.Int('layer_2', min_value=32, max_value=512, step=32)
    hp_layer_3 = hp.Int('layer_3', min_value=32, max_value=512, step=32)
    hp_dropout_1 = hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.1)
    hp_dropout_2 = hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.add(Dense(units=hp_layer_1, activation=hp_activation))
    model.add(Dropout(rate=hp_dropout_1))
    model.add(Dense(units=hp_layer_2, activation=hp_activation))
    model.add(Dropout(rate=hp_dropout_2))
    model.add(Dense(units=hp_layer_3, activation=hp_activation))
    model.add(Dense(2, activation = "softmax"))

    model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss="categorical_crossentropy", metrics = ["accuracy"])
    
    return model

In [18]:
def get_early_stopping_callback():
    return keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=3
    )

tuner_7lD = kt.Hyperband(
    model_7lD_builder,
    objective='accuracy',
    max_epochs=10,
    directory='keras_tuner_dir',
    project_name='keras_tuner_demo_4'
)
tuner_7lD.search(x_train, y_train, epochs=10, callbacks=[get_early_stopping_callback()])

INFO:tensorflow:Reloading Tuner from keras_tuner_dir\keras_tuner_demo_4\tuner0.json
INFO:tensorflow:Oracle triggered exit


In [19]:
model_7lD = get_best_model(tuner_7lD)
model_7lD.fit(x_train, y_train, epochs=100, batch_size=10, validation_data=(x_test, y_test), callbacks=[stop_early])

Describe models architecture
Layer-1: 36 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-2: 64 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-3: 0 units, func: None
Layer-4: 448 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-5: 0 units, func: None
Layer-6: 160 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-7: 2 units, func:  <function softmax at 0x0000020FBEE49430>

Other params:
dropout_1: 0.1
dropout_2: 0.2
learning_rate: 0.001
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100

<keras.callbacks.History at 0x20fc4bfbbe0>

In [20]:
final_models["7_layers_with_dropout"] = model_7lD

In [9]:
def model_7l_builder(hp):
    model = Sequential()
    model.add(Dense(36, input_dim = 36, activation = "relu"))

    hp_activation = hp.Choice('activation', values=['relu', 'tanh'])
    hp_layer_1 = hp.Int('layer_1', min_value=32, max_value=512, step=32)
    hp_layer_2 = hp.Int('layer_2', min_value=32, max_value=512, step=32)
    hp_layer_3 = hp.Int('layer_3', min_value=32, max_value=512, step=32)
    hp_layer_4 = hp.Int('layer_4', min_value=32, max_value=512, step=32)
    hp_layer_5 = hp.Int('layer_5', min_value=32, max_value=512, step=32)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    model.add(Dense(units=hp_layer_1, activation=hp_activation))
    model.add(Dense(units=hp_layer_2, activation=hp_activation))
    model.add(Dense(units=hp_layer_3, activation=hp_activation))
    model.add(Dense(units=hp_layer_4, activation=hp_activation))
    model.add(Dense(units=hp_layer_5, activation=hp_activation))
    model.add(Dense(2, activation = "softmax"))

    model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss="categorical_crossentropy", metrics = ["accuracy"])
    return model

In [10]:
tuner_7l = kt.Hyperband(
    model_7l_builder,
    objective='accuracy',
    max_epochs=10,
    directory='keras_tuner_dir',
    project_name='keras_tuner_demo_5'
)
tuner_7l.search(x_train, y_train, epochs=10, callbacks=[keras.callbacks.EarlyStopping(patience=5)])

INFO:tensorflow:Reloading Tuner from keras_tuner_dir\keras_tuner_demo_5\tuner0.json
INFO:tensorflow:Oracle triggered exit


In [None]:
model_7l = get_best_model(tuner_7l)
model_7l.fit(x_train, y_train, epochs=100, batch_size=10, validation_data=(x_test, y_test), callbacks=[stop_early])

Describe models architecture
Layer-1: 36 units, func:  <function relu at 0x000001F290D48280>
Layer-2: 416 units, func:  <function relu at 0x000001F290D48280>
Layer-3: 192 units, func:  <function relu at 0x000001F290D48280>
Layer-4: 64 units, func:  <function relu at 0x000001F290D48280>
Layer-5: 384 units, func:  <function relu at 0x000001F290D48280>
Layer-6: 256 units, func:  <function relu at 0x000001F290D48280>
Layer-7: 2 units, func:  <function softmax at 0x000001F290D3F820>

Other params:
learning_rate: 0.001
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100

In [None]:
def visualize_model(model):
      return SVG(model_to_dot(model).create(prog='dot', format='svg'))

In [None]:
final_models["7_layers"] = model_7l

In [26]:
visualize_model(final_models["7_layers"])

AttributeError: module 'os' has no attribute 'errno'

In [27]:
for name, model in final_models.items():
    print(f"{name}: ", end="")
    describe_model(model)
    print()

3_layers: Describe models architecture
Layer-1: 36 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-2: 352 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-3: 2 units, func:  <function softmax at 0x0000020FBEE49430>

5_layers: Describe models architecture
Layer-1: 36 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-2: 320 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-3: 384 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-4: 224 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-5: 2 units, func:  <function softmax at 0x0000020FBEE49430>

7_layers_with_dropout: Describe models architecture
Layer-1: 36 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-2: 64 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-3: 0 units, func: None
Layer-4: 448 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-5: 0 units, func: None
Layer-6: 160 units, func:  <function relu at 0x0000020FBEE49E50>
Layer-7: 2 units, func:  <functi

In [28]:
train_set_results = []

for name, model in final_models.items():
    # Evaluate model
    predict_x = model.predict(x_test, verbose=False) 
    y_pred_class = np.argmax(predict_x, axis=1)
    y_test_class = np.argmax(y_test, axis=1)

    cm = confusion_matrix(y_test_class, y_pred_class, labels=[0, 1, 2])
    (p_score, r_score, f_score, _) = precision_recall_fscore_support(y_test_class, y_pred_class, labels=[0, 1, 2])
    
    train_set_results.append(( name, round_up_metric_results(p_score), round_up_metric_results(r_score), round_up_metric_results(f_score), cm ))

train_set_results.sort(key=lambda k: sum(k[3]), reverse=True)
pd.DataFrame(train_set_results, columns=["Model", "Precision Score", "Recall Score", "F1 score", "Confusion Matrix"])

Unnamed: 0,Model,Precision Score,Recall Score,F1 score,Confusion Matrix
0,5_layers,"[0.822, 0.943, 0.0]","[0.834, 0.939, 0.0]","[0.828, 0.941, 0.0]","[[221, 44, 0], [48, 734, 0], [0, 0, 0]]"
1,7_layers,"[0.827, 0.936, 0.0]","[0.811, 0.942, 0.0]","[0.819, 0.939, 0.0]","[[215, 50, 0], [45, 737, 0], [0, 0, 0]]"
2,3_layers,"[0.808, 0.926, 0.0]","[0.777, 0.937, 0.0]","[0.792, 0.931, 0.0]","[[206, 59, 0], [49, 733, 0], [0, 0, 0]]"
3,7_layers_with_dropout,"[0.801, 0.915, 0.0]","[0.743, 0.937, 0.0]","[0.771, 0.926, 0.0]","[[197, 68, 0], [49, 733, 0], [0, 0, 0]]"


In [29]:
# load dataset
test_df = pd.read_csv("./train.csv")

# Categorizing label
test_df.loc[test_df["label"] == "no_error", "label"] = 0
test_df.loc[test_df["label"] == "knees_forward_error", "label"] = 1

print(f'Number of rows: {test_df.shape[0]} \nNumber of columns: {test_df.shape[1]}\n')
print(f"Labels: \n{test_df['label'].value_counts()}\n")

Number of rows: 5235 
Number of columns: 37

Labels: 
1    3931
0    1304
Name: label, dtype: int64



In [30]:
# Standard Scaling of features
test_x = test_df.drop("label", axis = 1)
test_x = pd.DataFrame(sc.transform(test_x))

test_y = test_df["label"]

# # Converting prediction to categorical
test_y_cat = to_categorical(test_y)

In [31]:
test_set_results = []

for name, model in final_models.items():
    # Evaluate model
    predict_x = model.predict(test_x, verbose=False) 
    y_pred_class = np.argmax(predict_x, axis=1)
    y_test_class = np.argmax(test_y_cat, axis=1)

    cm = confusion_matrix(y_test_class, y_pred_class, labels=[0, 1, 2])
    (p_score, r_score, f_score, _) = precision_recall_fscore_support(y_test_class, y_pred_class, labels=[0, 1, 2])
    
    test_set_results.append(( name, round_up_metric_results(p_score), round_up_metric_results(r_score), round_up_metric_results(f_score), cm ))

test_set_results.sort(key=lambda k: sum(k[3]), reverse=True)
pd.DataFrame(test_set_results, columns=["Model", "Precision Score", "Recall Score", "F1 score", "Confusion Matrix"])

Unnamed: 0,Model,Precision Score,Recall Score,F1 score,Confusion Matrix
0,5_layers,"[0.919, 0.984, 0.0]","[0.952, 0.972, 0.0]","[0.936, 0.978, 0.0]","[[1242, 62, 0], [109, 3822, 0], [0, 0, 0]]"
1,7_layers,"[0.931, 0.976, 0.0]","[0.928, 0.977, 0.0]","[0.929, 0.977, 0.0]","[[1210, 94, 0], [90, 3841, 0], [0, 0, 0]]"
2,3_layers,"[0.925, 0.977, 0.0]","[0.932, 0.975, 0.0]","[0.929, 0.976, 0.0]","[[1215, 89, 0], [98, 3833, 0], [0, 0, 0]]"
3,7_layers_with_dropout,"[0.927, 0.971, 0.0]","[0.911, 0.976, 0.0]","[0.919, 0.973, 0.0]","[[1188, 116, 0], [94, 3837, 0], [0, 0, 0]]"


In [35]:
final_models["7_layers"].save("./model/kf_dp.h5")

In [36]:
for model_name, model in final_models.items():
    model.save(f"./model/{model_name}.h5")