In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Dropout, InputLayer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, LearningRateScheduler

from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler, normalize
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, confusion_matrix, classification_report

from my_model import create_model_1, create_model_1_1, create_model_2, create_model_2_1
tf.__version__

'2.6.0'

In [2]:
train_df = pd.read_csv(os.path.join('data', 'unionTrain.csv'))
test_df = pd.read_csv(os.path.join('data', 'unionTest.csv'))
print(f"Length Train: {len(train_df)}")
print(f"Length Test: {len(test_df)}")

Length Train: 107
Length Test: 19


In [3]:
X_train, y_train = train_df.drop(['Severity', 'sequence_id'], axis=1), train_df['Severity']
X_test, y_test = test_df.drop(['Severity', 'sequence_id'], axis=1), test_df['Severity']

In [4]:
idx = 8
tm = X_train.iloc[idx][[f'{i}_0X' for i in range(50)]] # max = 854
print(f'Index:{idx}, Severity:{y_train[idx]}, {tm.to_list()}')

Index:8, Severity:2, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [5]:
def feature_engineering(df):
    return df
tmp_X_train = feature_engineering(X_train)
tmp_X_train.head()

Unnamed: 0,0_0X,0_0Y,0_1X,0_1Y,0_2X,0_2Y,0_3X,0_3Y,0_4X,0_4Y,...,853_20X,853_20Y,853_21X,853_21Y,853_22X,853_22Y,853_23X,853_23Y,853_24X,853_24Y
0,203.81839,137.074432,213.271698,120.394463,218.456604,128.786789,214.281204,138.17157,209.073471,139.232819,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,2.57817,21.342966,29.684607,82.876892,53.714298,149.624344,0.0,0.0,...,236.18898,71.378502,236.18898,71.378502,236.18898,71.378502,236.18898,71.378502,236.18898,71.378502
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,222.626373,125.631256,222.626373,125.631256,222.626373,125.631256,222.626373,125.631256,222.626373,125.631256
4,126.701424,108.963158,120.373978,85.994064,122.49572,96.498444,129.821487,105.855408,114.185593,106.893127,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
def get_scaler(scaler_name):
    if scaler_name == 'RobustScaler':
        scaler = RobustScaler()
    elif scaler_name == 'MinMaxScaler':
        scaler = MinMaxScaler()
    return scaler
scaler_name = 'RobustScaler'
# scaler_name = 'MinMaxScaler'
scaler = get_scaler(scaler_name)

In [7]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print(f"Max: {X_train.max()}, Min: {X_train.min()}")
def modify_X(x):
    result = x.copy()
    result = result.reshape(-1, 854, 50)
    return result
X_train_modified = modify_X(X_train)
X_test_modified = modify_X(X_test)
X_train_modified.shape, X_test_modified.shape

Max: 488.5626831054688, Min: -5.134825106291681


((107, 854, 50), (19, 854, 50))

In [8]:
# joblib.dump(scaler, os.path.join('saved_scaler', 'MinMaxScaler.pkl'))

In [9]:
def modify_y(df):
    result = pd.get_dummies(df)
    result = result.to_numpy()
    return result
y_train_modified = modify_y(y_train)
y_test_modified = modify_y(y_test)
y_train_modified.shape, y_test_modified.shape

((107, 2), (19, 2))

In [10]:
input_shape, n_output = (X_train_modified.shape[1], X_train_modified.shape[2]), y_train_modified.shape[1]
# model = create_model_1(input_shape, n_output)
model = create_model_1_1(input_shape, n_output)
# model = create_model_1(input_shape, n_output)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional (Bidirectional (None, 854, 128)          58880     
_________________________________________________________________
bidirectional_1 (Bidirection (None, 854, 64)           41216     
_________________________________________________________________
bidirectional_2 (Bidirection (None, 64)                24832     
_________________________________________________________________
dense (Dense)                (None, 25)                1625      
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 52        
Total params: 126,605
Trainable params: 126,605
Non-trainable params: 0
_________________________________________________________________


In [11]:
optimizer = Adam(learning_rate=1e-3)
model.compile(
                optimizer=optimizer,
                loss=CategoricalCrossentropy(),
                metrics=[
                    'accuracy'
                ]
              )

In [12]:
reduce_lr = ReduceLROnPlateau(
                                monitor='val_loss',
                                factor=0.5,
                                patience=4,
                                min_lr=1e-4
                              )
early_stopping = EarlyStopping(
                                monitor='loss',
                                patience=6
                              )

In [13]:
history = model.fit(X_train_modified, y_train_modified,
          batch_size=32,
          epochs=200,
          shuffle=True,
          validation_split=0.2,
          callbacks=[
              reduce_lr,
              early_stopping,
              ]
          )

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200

In [None]:
# from datetime import datetime
# current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
# model.save(os.path.join("saved_models", f"trained_at_{current_time}_using_{scaler_name}.h5"))

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validate'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

In [None]:
train_predict = model.predict(X_train_modified)
train_real_predict = np.argmax(train_predict, axis=1)+1
for i in range(len(y_train)):
    print(f"Index:{i}, Predict:{train_real_predict[i]}, Real:{y_train[i]}")

In [None]:
f1_train = f1_score(y_train, train_real_predict)
accuracy_train = accuracy_score(y_train, train_real_predict)
# print(f"f1: {f1_train}\naccuracy: {accuracy_train}")
print(classification_report(y_train, train_real_predict))
print("---------------------------------------------------------")
sns.heatmap(confusion_matrix(y_train, train_real_predict),annot = True,fmt = '2.0f')

In [None]:
test_predict = model.predict(X_test_modified)
test_real_predict = np.argmax(test_predict, axis=1)+1
for i in range(len(y_test)):
    print(f"Index:{i}, Predict:{test_real_predict[i]}, Real:{y_test[i]}")

In [None]:
f1_test = f1_score(y_test, test_real_predict)
accuracy_test = accuracy_score(y_test, test_real_predict)
print(f"f1: {f1_test}\naccuracy: {accuracy_test}")
print(classification_report(y_test, test_real_predict))
print("---------------------------------------------------------")
sns.heatmap(confusion_matrix(y_test, test_real_predict),annot = True,fmt = '2.0f')
print()