In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from tqdm import tqdm


import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model

from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler, normalize
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score, confusion_matrix, classification_report

from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

from utils import mod_df, drop_features, inverse_mod_X, inverse_mod_y, apply_savgol_filter, apply_median_filter, apply_maximum_filter, apply_is_zero

tf.__version__

In [None]:
train_df = pd.read_csv(os.path.join('data', 'unionTrain.csv'))
test_df = pd.read_csv(os.path.join('data', 'unionTest.csv'))

In [None]:
X_train, y_train = mod_df(train_df)
X_test, y_test = mod_df(test_df)

In [None]:
def feature_engineering(df):
    result = df.copy()
    
    # drop features
    result = result.drop(['id', 'timestep'], axis=1)
    DEFAULT_FEATURE = result.columns.to_list()
    # result = drop_features(result,[0,1,2,5,15,16,17,18,21,20,23,24])
    # result = drop_features(result, [15, 16, 17, 18, 20, 21, 23, 24])
    
    # add features
    FEATURE_COLUMNS = result.columns.to_list()
    for col in tqdm(FEATURE_COLUMNS):
        feature = result[col]
        feature = feature.to_numpy()
        result[f'{col}_savgol'] = apply_savgol_filter(feature, window_size=11, polynomial=2)
        result[f'{col}_median'] = apply_median_filter(feature)
        # result[f'{col}_max'] = apply_maximum_filter(feature)
        result[f'{col}_sav_med'] = apply_median_filter(apply_savgol_filter(feature, window_size=21), window_size=5)
        result[f'{col}_is_zero'] = apply_is_zero(feature)
    
    # modify features
    FEATURE_COLUMNS = result.columns.to_list()
    for col in tqdm(FEATURE_COLUMNS):
        feature = result[col]
        feature = feature.to_numpy()
        # result[col] = apply_savgol_filter(feature)
        # result[col] = apply_median_filter(feature)
        # result[col] = apply_median_filter(apply_savgol_filter(feature, window_size=21), window_size=5)
    
    # drop default features
    # result = result.drop(DEFAULT_FEATURE, axis=1)
      
    return result

X_train_1 = feature_engineering(X_train)
X_test_1 = feature_engineering(X_test)

In [None]:
# scaler_name = 'RobustScaler.pkl'
# scaler_name = 'MinMaxScaler.pkl'
scaler_name = 'MinMaxScaler_wo_max.pkl'
scaler = joblib.load(os.path.join('saved_scaler', scaler_name))

In [None]:
X_train_scaled = scaler.transform(X_train_1)
X_test_scaled = scaler.transform(X_test_1)

In [None]:
X_train_modified, y_train_modified = inverse_mod_X(X_train_scaled), inverse_mod_y(y_train)
X_test_modified, y_test_modified = inverse_mod_X(X_test_scaled), inverse_mod_y(y_test)
print(X_train_modified.shape, y_train_modified.shape)

In [None]:
model_name = '2022-11-11_18-57-17_MinMaxScaler_15.h5'
model = load_model(os.path.join('saved_models', model_name))
model.summary()

In [None]:
train_predict = model.predict(X_train_modified)
train_predict = np.argmax(train_predict, axis=1)+1
train_real = np.argmax(y_train_modified, axis=1)+1

# for i in range(len(y_train_modified)):
#     print(f"Index:{i}, Predict:{train_predict[i]}, Real:{train_real[i]}")

In [None]:
f1_train = f1_score(train_real, train_predict)
accuracy_train = accuracy_score(train_real, train_predict)
# print(f"f1: {f1_train:.4f}\naccuracy: {accuracy_train:.4f}")
print(classification_report(train_real, train_predict, digits=4))
print("---------------------------------------------------------")
sns.heatmap(confusion_matrix(train_real, train_predict),annot = True,fmt = '2.0f')
print()

In [None]:
test_predict = model.predict(X_test_modified)
test_predict = np.argmax(test_predict, axis=1)+1
test_real = np.argmax(y_test_modified, axis=1)+1

# for i in range(len(y_test)):
#     print(f"Index:{i}, Predict:{test_real[i]}, Real:{test_real[i]}")

In [None]:
f1_test = f1_score(test_real, test_predict)
accuracy_test = accuracy_score(test_real, test_predict)
# print(f"f1: {f1_test:.4f}\naccuracy: {accuracy_test:.4f}")
print(classification_report(test_real, test_predict, digits=4))
print("---------------------------------------------------------")
sns.heatmap(confusion_matrix(test_real, test_predict),annot = True,fmt = '2.0f')
print()