In [15]:
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, cohen_kappa_score
from sklearn.neural_network import MLPClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Reshape, Input, LSTM, Conv1D, MaxPooling1D, Flatten


In [16]:
Train = pd.read_csv('dataTrain.csv')
Test = pd.read_csv('dataTest.csv')

X_train = Train.drop('output', axis=1)
y_train = Train['output']

X_test = Test.drop('output', axis=1)
y_test = Test['output']

maxs = X_train.max().tolist()

In [17]:
def train_dnn(X, y, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=1)
    
    accuracies = []
    recalls = []
    precisions = []
    f1_scores = []
    kappas = []

    model = Sequential([
        Input(shape=(X.shape[1],)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(16, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    for train_index, test_index in kf.split(X):
        X_train_fold, X_test_fold = X.iloc[train_index], X.iloc[test_index]
        y_train_fold, y_test_fold = y.iloc[train_index], y.iloc[test_index]
        
        model.set_weights([np.random.permutation(w.flatten()).reshape(w.shape) for w in model.get_weights()])

        model.fit(X_train_fold, y_train_fold, epochs=20, batch_size=32, verbose=0)
        
        y_pred = (model.predict(X_test_fold) > 0.5).astype("int32")

        accuracies.append(accuracy_score(y_test_fold, y_pred))
        recalls.append(recall_score(y_test_fold, y_pred))
        precisions.append(precision_score(y_test_fold, y_pred))
        f1_scores.append(f1_score(y_test_fold, y_pred))
        kappas.append(cohen_kappa_score(y_test_fold, y_pred))

    print("DNN Results:")
    print(f"Average Accuracy: {np.mean(accuracies):.3f} ± {np.std(accuracies):.3f}")
    print(f"Average Recall: {np.mean(recalls):.3f} ± {np.std(recalls):.3f}")
    print(f"Average Precision: {np.mean(precisions):.3f} ± {np.std(recalls):.3f}")
    print(f"Average F1-Score: {np.mean(f1_scores):.3f} ± {np.std(f1_scores):.3f}")
    print(f"Average Kappa: {np.mean(kappas):.3f} ± {np.std(kappas):.3f}\n")

In [18]:
def train_lstm(X, y, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=1)
    
    accuracies = []
    recalls = []
    precisions = []
    f1_scores = []
    kappas = []
    
    for train_index, test_index in kf.split(X):
        X_train2, X_test2 = X.iloc[train_index], X.iloc[test_index]
        y_train2, y_test2 = y.iloc[train_index], y.iloc[test_index]

        model = Sequential([
            Input(shape=(X_train2.shape[1],)),
            Reshape((X_train2.shape[1], 1)),
            LSTM(64, return_sequences=True),
            Dropout(0.3),
            LSTM(32),
            Dense(1, activation='sigmoid')
        ])

        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        model.fit(X_train2, y_train2, epochs=20, batch_size=32, verbose=0)

        y_pred = (model.predict(X_test2) > 0.5).astype("int32")

        accuracies.append(accuracy_score(y_test2, y_pred))
        recalls.append(recall_score(y_test2, y_pred))
        precisions.append(precision_score(y_test2, y_pred))
        f1_scores.append(f1_score(y_test2, y_pred))
        kappas.append(cohen_kappa_score(y_test2, y_pred))

    print("LSTM Results:")
    print(f"Average Accuracy: {np.mean(accuracies):.3f} ± {np.std(accuracies):.3f}")
    print(f"Average Recall: {np.mean(recalls):.3f} ± {np.std(recalls):.3f}")
    print(f"Average Precision: {np.mean(precisions):.3f} ± {np.std(precisions):.3f}")
    print(f"Average F1-Score: {np.mean(f1_scores):.3f} ± {np.std(f1_scores):.3f}")
    print(f"Average Kappa: {np.mean(kappas):.3f} ± {np.std(kappas):.3f}\n")


In [19]:
def train_cnn(X, y, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=1)
    
    accuracies = []
    recalls = []
    precisions = []
    f1_scores = []
    kappas = []
    
    for train_index, test_index in kf.split(X):
        X_train2, X_test2 = X.iloc[train_index], X.iloc[test_index]
        y_train2, y_test2 = y.iloc[train_index], y.iloc[test_index]

        model = Sequential([
            Input(shape=(X_train2.shape[1],)),
            Reshape((X_train2.shape[1], 1)),
            Conv1D(32, 3, activation='relu'),
            MaxPooling1D(2),
            Conv1D(64, 3, activation='relu'),
            MaxPooling1D(2),
            Flatten(),
            Dense(64, activation='relu'),
            Dense(1, activation='sigmoid')
        ])

        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        model.fit(X_train2, y_train2, epochs=20, batch_size=32, verbose=0)

        y_pred = (model.predict(X_test2) > 0.5).astype("int32")

        accuracies.append(accuracy_score(y_test2, y_pred))
        recalls.append(recall_score(y_test2, y_pred))
        precisions.append(precision_score(y_test2, y_pred))
        f1_scores.append(f1_score(y_test2, y_pred))
        kappas.append(cohen_kappa_score(y_test2, y_pred))

    print("CNN Results:")
    print(f"Average Accuracy: {np.mean(accuracies):.3f} ± {np.std(accuracies):.3f}")
    print(f"Average Recall: {np.mean(recalls):.3f} ± {np.std(recalls):.3f}")
    print(f"Average Precision: {np.mean(precisions):.3f} ± {np.std(precisions):.3f}")
    print(f"Average F1-Score: {np.mean(f1_scores):.3f} ± {np.std(f1_scores):.3f}")
    print(f"Average Kappa: {np.mean(kappas):.3f} ± {np.std(kappas):.3f}\n")


In [20]:
def train_mlp(hidden_layer_sizes, X, y, n_splits=5):
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=1)
    
    accuracies = []
    recalls = []
    precisions = []
    f1_scores = []
    kappas = []
    
    for train_index, test_index in kf.split(X):
        X_train2, X_test2 = X.iloc[train_index], X.iloc[test_index]
        y_train2, y_test2 = y.iloc[train_index], y.iloc[test_index]

        model = MLPClassifier(hidden_layer_sizes=hidden_layer_sizes, random_state=1, max_iter=5000)
        model.fit(X_train2, y_train2)

        y_pred = model.predict(X_test2)

        accuracies.append(accuracy_score(y_test2, y_pred))
        recalls.append(recall_score(y_test2, y_pred, average='weighted'))
        precisions.append(precision_score(y_test2, y_pred, average='weighted'))
        f1_scores.append(f1_score(y_test2, y_pred, average='weighted'))
        kappas.append(cohen_kappa_score(y_test2, y_pred))

    print(f'MLP Architecture: {hidden_layer_sizes}')
    print(f"Average Accuracy: {np.mean(accuracies):.3f} ± {np.std(accuracies):.3f}")
    print(f"Average Recall: {np.mean(recalls):.3f} ± {np.std(recalls):.3f}")
    print(f"Average Precision: {np.mean(precisions):.3f} ± {np.std(precisions):.3f}")
    print(f"Average F1-Score: {np.mean(f1_scores):.3f} ± {np.std(f1_scores):.3f}")
    print(f"Average Kappa: {np.mean(kappas):.3f} ± {np.std(kappas):.3f}\n")


In [21]:
train_dnn(X_train, y_train)
train_lstm(X_train, y_train)
train_cnn(X_train, y_train)

Architectures = [[5], [10], [5, 5], [5, 10], [10, 10]]
for arch in Architectures:
    train_mlp(arch, X_train, y_train)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
DNN Results:
Average Accuracy: 0.822 ± 0.024
Average Recall: 0.876 ± 0.060
Average Precision: 0.809 ± 0.060
Average F1-Score: 0.839 ± 0.041
Average Kappa: 0.631 ± 0.052

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 414ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 356ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 415ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 341ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 345ms/step
LSTM Results:
Average Accuracy: 0.706 ± 0.087
Average Recall: 0.797 ± 0.161
Average Precision: 0.699 ± 0.