In [None]:
import pandas as pd
import keras

train_df = pd.read_csv(r'./train.csv')
test_df = pd.read_csv(r'./train.csv')

y_train = train_df['label']
y_test = test_df['label']

additional_input_train = pd.read_csv('./sequence_train_dense_output.csv')
additional_input_test = pd.read_csv('./sequence_test_dense_output.csv')

additional_input_train.drop(additional_input_train.columns[0], axis=1, inplace=True)
additional_input_test.drop(additional_input_test.columns[0], axis=1, inplace=True)

train_dense = pd.read_csv('./sequence_train_dense_output.csv')
test_dense = pd.read_csv('./sequence_test_dense_output.csv')

train = pd.concat([additional_input_train, train_dense], axis=1)
test = pd.concat([additional_input_test, test_dense], axis=1)

In [None]:
from keras.layers import Input, Dense, Activation, BatchNormalization, Flatten,Conv1D
from keras.layers import Dropout, MaxPooling1D
from keras.models import Model
from keras.callbacks import EarlyStopping
def HexaCSort(X_train, y_train, X_test, y_test):
    inputShape=(328,1)
    input = Input(inputShape)
    x = Conv1D(32,(3), strides = (1), padding='same')(input)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling1D((2), padding = "same")(x)
    x = Dropout(0.2)(x)
    x = Flatten()(x)
    x = Dense(32, activation = 'relu')(x)
    x = Dropout(0.2)(x)
    x = Dense(2,activation = 'softmax')(x)
    model = Model(inputs = input, outputs = x)
    model.compile(loss='sparse_categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
    early_stop = EarlyStopping(monitor='val_loss', patience = 30,restore_best_weights = True)
    callbacks_list = [early_stop]
    model_history = model.fit(X_train, y_train, validation_data=(X_test, y_test),
                            epochs=500,callbacks=callbacks_list,batch_size = 512, verbose=1)
    return model, model_history

In [None]:
import numpy as np
import math
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
k = 5
kf = KFold(n_splits=k, shuffle = True, random_state=0)

ACC_collecton = []
BACC_collecton = []
Sn_collecton = []
Sp_collecton = []
MCC_collecton = []
precison_collecton = []
F1_collecton = []
cv = 1
for train_index , test_index in kf.split(y_train):
    X_train_CV , X_valid_CV = train.iloc[train_index,:], train.iloc[test_index,:]
    y_train_CV , y_valid_CV = y_train.iloc[train_index] , y_train.iloc[test_index]

    print(X_train_CV.shape)
    print(y_train_CV.shape)
    print(X_valid_CV.shape)
    print(y_valid_CV.shape)

    model, model_history = HexaCSort(X_train_CV, y_train_CV, X_valid_CV, y_valid_CV)
    model.save(f'./model/model_cv_{cv}', save_format = 'tf')
    cv += 1
    predicted_class= []
    predicted_protability = model.predict(X_valid_CV, batch_size=1)
    for i in range(predicted_protability.shape[0]):
        index = np.where(predicted_protability[i] == np.amax(predicted_protability[i]))[0][0]
        predicted_class.append(index)
    predicted_class = np.array(predicted_class)
    y_true = y_valid_CV
    TP, FP, FN, TN = confusion_matrix(y_true, predicted_class).ravel()
    print('TP, FP, FN, TN:', TP, FP, FN, TN)
    ACC = (TP+TN)/(TP+TN+FP+FN)
    ACC_collecton.append(ACC)
    Sn_collecton.append(TP/(TP+FN))
    Sp_collecton.append(TN/(TN+FP))
    MCC = (TP*TN-FP*FN)/math.pow(((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)),0.5)
    MCC_collecton.append(MCC)
    BACC_collecton.append(0.5*TP/(TP+FN)+0.5*TN/(TN+FP))
    precison_collecton.append(TP/(TP+FP))
    F1 = (2*TP)/(2*TP+FN+FP)
    F1_collecton.append(F1)

In [None]:
from statistics import mean, stdev
print('acc',mean(ACC_collecton),'±',stdev(ACC_collecton))
print('bacc',mean(BACC_collecton),'±',stdev(BACC_collecton))
print('sn',mean(Sn_collecton),'±',stdev(Sn_collecton))
print('sp',mean(Sp_collecton),'±',stdev(Sp_collecton))
print('mcc',mean(MCC_collecton),'±',stdev(MCC_collecton))
print('precison',mean(precison_collecton),'±',stdev(precison_collecton))
print('f1',mean(F1_collecton),'±',stdev(F1_collecton))

In [None]:
model_paths = ['./model/model_cv_1',
               './model/model_cv_2',
               './model/model_cv_3',
               './model/model_cv_4',
               './model/model_cv_5']

In [None]:
ACC_collecton = []
BACC_collecton = []
Sn_collecton = []
Sp_collecton = []
MCC_collecton = []
precison_collecton = []
F1_collecton = []

for model_path in model_paths:
    pred = []
    predicted_class= []
    model = keras.models.load_model(model_path)
    predictions = model.predict(test, batch_size=1)
    pred.append(predictions)
    print(pred)
    pred = np.array(pred).reshape(284,2)
    for i in range(pred.shape[0]):
      index = np.where(pred[i] == np.max(pred[i]))[0][0]
      predicted_class.append(index)
    predicted_class = np.array(predicted_class)
    y_true = y_test
    print(y_true.shape)
    print(predicted_class)
    TP, FP, FN, TN = confusion_matrix(y_true, predicted_class).ravel()
    print('TP, FP, FN, TN:', TP, FP, FN, TN)
    ACC = (TP+TN)/(TP+TN+FP+FN)
    ACC_collecton.append(ACC)
    Sn_collecton.append(TP/(TP+FN))
    Sp_collecton.append(TN/(TN+FP))
    MCC = (TP*TN-FP*FN)/math.pow(((TP+FP)*(TP+FN)*(TN+FP)*(TN+FN)),0.5)
    MCC_collecton.append(MCC)
    BACC_collecton.append(0.5*TP/(TP+FN)+0.5*TN/(TN+FP))
    precison_collecton.append(TP/(TP+FP))
    F1 = (2*TP)/(2*TP+FN+FP)
    F1_collecton.append(F1)

In [None]:
from statistics import stdev
import scipy.stats as stats
t_acc = 0
t_bacc = 0
t_sn = 0
t_sp = 0
t_mcc = 0
t_pre = 0
t_f1 = 0
for i in range(5):
    print(f"{i} -------------------")
    print('ACC: ', ACC_collecton[i])
    print('BACC: ', BACC_collecton[i])
    print('Sn: ', Sn_collecton[i])
    print('Sp: ', Sp_collecton[i])
    print('MCC: ', MCC_collecton[i])
    print('Precision: ', precison_collecton[i])
    print('F1: ', F1_collecton[i])
    
    t_acc += ACC_collecton[i]
    t_bacc += BACC_collecton[i]
    t_sn += Sn_collecton[i]
    t_sp += Sp_collecton[i]
    t_mcc += MCC_collecton[i]
    t_pre += precison_collecton[i]
    t_f1 += F1_collecton[i]

print("total ---------------------")
print('ACC: ', t_acc/5, '±', stdev(ACC_collecton))
print('BACC: ', t_bacc/5, '±', stdev(BACC_collecton))
print('Sn: ', t_sn/5, '±', stdev(Sn_collecton))
print('Sp: ', t_sp/5, '±', stdev(Sp_collecton))
print('MCC: ', t_mcc/5, '±', stdev(MCC_collecton))
print('Precision: ', t_pre/5, '±', stdev(precison_collecton))
print('F1: ', t_f1/5, '±', stdev(F1_collecton))

mean_accuracy = np.mean(ACC_collecton)

confidence_level = 0.95
degrees_freedom = len(ACC_collecton) - 1
confidence_interval = stats.t.interval(confidence_level, degrees_freedom, mean_accuracy, stats.sem(ACC_collecton))

print(mean_accuracy)
print(confidence_interval)