In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Bidirectional, Dropout, InputLayer
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, LearningRateScheduler

from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler, normalize
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, confusion_matrix, classification_report

from my_model import create_model_1, create_model_1_1, create_model_1_2, create_model_2, create_model_2_1
tf.__version__


In [None]:
train_df = pd.read_csv(os.path.join('data', 'unionTrain.csv'))
test_df = pd.read_csv(os.path.join('data', 'unionTest.csv'))
print(f"Length Train: {len(train_df)}")
print(f"Length Test: {len(test_df)}")

In [None]:
X_train, y_train = train_df.drop(['Severity', 'sequence_id'], axis=1), train_df['Severity']
X_test, y_test = test_df.drop(['Severity', 'sequence_id'], axis=1), test_df['Severity']

In [None]:
idx = 8
tm = X_train.iloc[idx][[f'{i}_0X' for i in range(50)]] # max = 854
print(f'Index:{idx}, Severity:{y_train[idx]}, {tm.to_list()}')

In [None]:
def feature_engineering(df):
    result = df.copy()
    # tmp = pd.DataFrame()
    # for i in range(50):
    #     result
    return result
tmp_X_train = feature_engineering(X_train)
tmp_X_test = feature_engineering(X_test)
tmp_X_train.head()

In [None]:
def get_scaler(scaler_name):
    if scaler_name == 'RobustScaler':
        scaler = RobustScaler()
    elif scaler_name == 'MinMaxScaler':
        scaler = MinMaxScaler()
    return scaler
scaler_name = 'RobustScaler'
# scaler_name = 'MinMaxScaler'
scaler = get_scaler(scaler_name)

In [None]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print(f"Max: {X_train.max()}, Min: {X_train.min()}")
def modify_X(x):
    result = x.copy()
    result = result.reshape(-1, 854, 50)
    return result
X_train_modified = modify_X(X_train)
X_test_modified = modify_X(X_test)
X_train_modified.shape, X_test_modified.shape

In [None]:
# joblib.dump(scaler, os.path.join('saved_scaler', 'MinMaxScaler.pkl'))

In [None]:
def modify_y(df):
    result = pd.get_dummies(df)
    result = result.to_numpy()
    return result
y_train_modified = modify_y(y_train)
y_test_modified = modify_y(y_test)
y_train_modified.shape, y_test_modified.shape

In [None]:
input_shape, n_output = (X_train_modified.shape[1], X_train_modified.shape[2]), y_train_modified.shape[1]
# model = create_model_1(input_shape, n_output)
# model = create_model_1_1(input_shape, n_output)
model = create_model_1_2(input_shape, n_output)
model.summary()

In [None]:
optimizer = Adam(learning_rate=1e-3)
model.compile(
                optimizer=optimizer,
                loss=CategoricalCrossentropy(),
                metrics=[
                    'accuracy'
                ]
              )

In [None]:
reduce_lr = ReduceLROnPlateau(
                                monitor='val_loss',
                                factor=0.5,
                                patience=4,
                                min_lr=1e-4
                              )
early_stopping = EarlyStopping(
                                monitor='loss',
                                patience=6
                              )

In [None]:
history = model.fit(X_train_modified, y_train_modified,
          batch_size=32,
          epochs=30,
          shuffle=True,
          validation_split=0.2,
          callbacks=[
              reduce_lr,
              early_stopping,
              ]
          )

In [None]:
# from datetime import datetime
# current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
# model.save(os.path.join("saved_models", f"trained_at_{current_time}_using_{scaler_name}.h5"))

In [None]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validate'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='best')
plt.show()

In [None]:
train_predict = model.predict(X_train_modified)
train_real_predict = np.argmax(train_predict, axis=1)+1
for i in range(len(y_train)):
    print(f"Index:{i}, Predict:{train_real_predict[i]}, Real:{y_train[i]}")

In [None]:
f1_train = f1_score(y_train, train_real_predict)
accuracy_train = accuracy_score(y_train, train_real_predict)
# print(f"f1: {f1_train}\naccuracy: {accuracy_train}")
print(classification_report(y_train, train_real_predict))
print("---------------------------------------------------------")
sns.heatmap(confusion_matrix(y_train, train_real_predict),annot = True,fmt = '2.0f')

In [None]:
test_predict = model.predict(X_test_modified)
test_real_predict = np.argmax(test_predict, axis=1)+1
for i in range(len(y_test)):
    print(f"Index:{i}, Predict:{test_real_predict[i]}, Real:{y_test[i]}")

In [None]:
f1_test = f1_score(y_test, test_real_predict)
accuracy_test = accuracy_score(y_test, test_real_predict)
print(f"f1: {f1_test}\naccuracy: {accuracy_test}")
print(classification_report(y_test, test_real_predict))
print("---------------------------------------------------------")
sns.heatmap(confusion_matrix(y_test, test_real_predict),annot = True,fmt = '2.0f')
print()