# Importing libraries

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from scipy.optimize import minimize
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.naive_bayes import GaussianNB
import numpy as np
from numpy import mean, std
from sklearn.neural_network import MLPClassifier
from sklvq import GLVQ
from sklearn.model_selection import KFold, cross_val_score, GridSearchCV
from sklearn.metrics import cohen_kappa_score, make_scorer
kappa_scorer = make_scorer(cohen_kappa_score)

# Importing data from Excel and creating initial dataset

In [3]:
excel_data = pd.read_excel(r'https://github.com/DanialShirazi/delay-prediction/raw/main/Input%20Dataset.xlsx', sheet_name='dataset',index_col=0, engine='openpyxl')
raw_data = excel_data.values[:,0:67]
y = excel_data.label
std_data = StandardScaler().fit_transform(raw_data)

# Determining the optimal number of principal components for PCA

In [3]:
pca_k_scores =[]
for i in range (1,21):
    pca = PCA(n_components=i)
    pca_data = pca.fit_transform(std_data[:,0:65])
    data = pd.DataFrame(pca_data,index=excel_data.index)
    data = data.assign(concrete=std_data[:,[65]])
    data = data.assign(time=std_data[:,[66]])
    model = SVC()
    kernel = ['poly', 'rbf', 'sigmoid']
    C = [ 1.0, 2.0, 3.0,5.0,10.0, 0.1, 0.01]
    grid = dict(kernel=kernel,C=C)
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
    grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                               cv=cv, scoring='accuracy',error_score=0)
    grid_result = grid_search.fit(data, y)
    pca_k_scores.append([i,(1-grid_result.best_score_)])
pca_k_scores

[[1, 0.3018181818181819],
 [2, 0.1903030303030303],
 [3, 0.1333333333333333],
 [4, 0.1515151515151516],
 [5, 0.10909090909090913],
 [6, 0.11454545454545462],
 [7, 0.1260606060606061],
 [8, 0.1266666666666667],
 [9, 0.13212121212121208],
 [10, 0.13212121212121208],
 [11, 0.13818181818181818],
 [12, 0.1321212121212122],
 [13, 0.13818181818181818],
 [14, 0.13818181818181818],
 [15, 0.13818181818181818],
 [16, 0.13818181818181818],
 [17, 0.13818181818181818],
 [18, 0.14484848484848478],
 [19, 0.13818181818181818],
 [20, 0.13818181818181818]]

In [5]:
pca_k_scores =[]
for i in range (1,21):
    pca = PCA(n_components=i)
    pca_data = pca.fit_transform(std_data[:,0:65])
    data = pd.DataFrame(pca_data,index=excel_data.index)
    data = data.assign(concrete=std_data[:,[65]])
    data = data.assign(time=std_data[:,[66]])
    model = RandomForestClassifier(random_state=1)
    n_estimators = [50, 100, 120, 200]
    max_features = ['sqrt', 'log2']
    max_depth = [2,6,8,10]
    grid = dict(n_estimators=n_estimators,
                max_features=max_features,max_depth=max_depth)
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
    grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='accuracy',error_score=0)
    grid_result = grid_search.fit(data, y)
    pca_k_scores.append(1-grid_result.best_score_)
pca_k_scores

[0.3284848484848484,
 0.1884848484848486,
 0.18060606060606055,
 0.18727272727272726,
 0.16909090909090907,
 0.18848484848484837,
 0.21272727272727276,
 0.19454545454545447,
 0.19999999999999984,
 0.20060606060606057,
 0.19333333333333336,
 0.20666666666666655,
 0.22484848484848485,
 0.21393939393939398,
 0.21939393939393925,
 0.23272727272727267,
 0.22545454545454535,
 0.20666666666666667,
 0.20181818181818179,
 0.22606060606060607]

In [6]:
pca_k_scores =[]
for i in range (1,21):
    pca = PCA(n_components=i)
    pca_data = pca.fit_transform(std_data[:,0:65])
    data = pd.DataFrame(pca_data,index=excel_data.index)
    data = data.assign(concrete=std_data[:,[65]])
    data = data.assign(time=std_data[:,[66]])
    model = GaussianNB()
    var_smoothing= np.logspace(0,-9, num=5)
    grid = dict(var_smoothing=var_smoothing)
    cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
    grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                               cv=cv, scoring='f1_macro',error_score=0)
    grid_result = grid_search.fit(data, y)
    pca_k_scores.append(1-grid_result.best_score_)
pca_k_scores

[0.38943562610229276,
 0.17684784351451033,
 0.14100529100529102,
 0.16134038800705475,
 0.13107583774250442,
 0.12469135802469133,
 0.12469135802469133,
 0.12358024691358027,
 0.14007054673721342,
 0.1852380952380952,
 0.17786596119929454,
 0.17827160493827165,
 0.17206349206349203,
 0.18564373897707231,
 0.18564373897707231,
 0.18564373897707231,
 0.18564373897707231,
 0.1925573192239859,
 0.1925573192239859,
 0.1925573192239859]

# Feature extraction with PCA and creating the input dataset

In [4]:
pca = PCA(n_components=5)
pca_data = pca.fit_transform(std_data[:,0:65])
pca.explained_variance_ratio_
data = pd.DataFrame(pca_data,index=excel_data.index)
data = data.assign(concrete=std_data[:,[65]])
data = data.assign(time=std_data[:,[66]])

# Tuning hyperparameters of conventional machine learning classifiers using grid search algorithm

In [5]:
from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

In [8]:
from sklearn.svm import SVC
model = SVC()
kernel = ['poly', 'rbf', 'sigmoid']
C = [ 1.0, 2.0, 3.0, 5.0, 10.0, 0.1, 0.01]
grid = dict(kernel=kernel,C=C)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(data, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.884374 using {'C': 1.0, 'kernel': 'sigmoid'}


In [11]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(random_state=1)
n_estimators = [50, 100, 120, 200]
max_features = ['sqrt', 'log2']
max_depth = [2,6,8,10]
grid = dict(n_estimators=n_estimators,
            max_features=max_features,max_depth=max_depth)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(data, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.802257 using {'max_depth': 8, 'max_features': 'sqrt', 'n_estimators': 200}


In [6]:
model =  GaussianNB()
var_smoothing= np.logspace(0,-9, num=5)
grid = dict(var_smoothing=var_smoothing)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(data, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.868924 using {'var_smoothing': 0.005623413251903491}


# Tuning hyperparameters of shallow ANNs using grid search algorithm

In [6]:
model = GLVQ()
distance_type= ['squared-euclidean', 'euclidean']
activation_type= ['identity', 'sigmoid', 'soft+', 'swish']
activation_params= [{'beta': beta} for beta in range(1, 4, 1)]
solver_type=['sgd', 'wgd', 'adam', 'lbfgs', 'bfgs']
grid= dict(
distance_type=distance_type,
    activation_type=activation_type,
    activation_params=activation_params,
    solver_type=solver_type)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(data, y)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.920882 using {'activation_params': {'beta': 1}, 'activation_type': 'swish', 'distance_type': 'squared-euclidean', 'solver_type': 'adam'}


720 fits failed out of a total of 1800.
The score on these train-test partitions for these parameters will be set to 0.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
450 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\dania\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\dania\anaconda3\lib\site-packages\sklvq\models\_base.py", line 568, in fit
    self._before_fit(X, y_index)
  File "C:\Users\dania\anaconda3\lib\site-packages\sklvq\models\_base.py", line 527, in _before_fit
    self._init_objective()
  File "C:\Users\dania\anaconda3\lib\site-packages\sklvq\models\_glvq.py", line 315, in _init_objective
    self.discriminant_params,
  File "C:\Users\dania\anaco

In [7]:
model = MLPClassifier(max_iter=1000,  random_state=1)
hidden_layer_sizes= [(7),(8),(9),(10)]
activation= ['tanh', 'relu','identity','logistic']
solver= ['sgd', 'adam','lbfgs']
alpha= [0.0001, 0.1, 0.5, 1, 0.7]
learning_rate= ['constant','adaptive','invscaling']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(iid=False, estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(data, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.905291 using {'activation': 'tanh', 'alpha': 1, 'hidden_layer_sizes': 7, 'learning_rate': 'constant', 'solver': 'adam'}


# Finding the best architecture for proposed Deep-MLP-NN and Tuning its hyperparameters using grid search algorithm

In [5]:
layer_2= []
for a in range(7,10):
    for b in range(7,10):
        layer_2.append((a,b))

In [6]:
#2layer
model = MLPClassifier(max_iter=2000,  random_state=1)
hidden_layer_sizes= layer_2
activation= ['tanh', 'relu']
solver= ['sgd', 'adam','lbfgs']
alpha= [0.1, 1, 0.7]
learning_rate= ['constant','adaptive','invscaling']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(iid=False, estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(data, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.926984 using {'activation': 'tanh', 'alpha': 1, 'hidden_layer_sizes': (8, 9), 'learning_rate': 'constant', 'solver': 'adam'}


In [7]:
layer_3= []
for a in range(7,10):
    for b in range(7,10):
        for c in range(7,10):
            layer_3.append((a,b,c))

In [8]:
#3layer
model = MLPClassifier(max_iter=2000,  random_state=1)
hidden_layer_sizes= layer_3
activation= ['tanh', 'relu']
solver= ['adam','lbfgs']
alpha= [1, 0.7]
learning_rate= ['constant','adaptive']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(iid=False, estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(data, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.915309 using {'activation': 'tanh', 'alpha': 0.7, 'hidden_layer_sizes': (7, 7, 8), 'learning_rate': 'constant', 'solver': 'adam'}


In [9]:
layer_4= []
for a in range(7,10):
    for b in range(7,10):
        for c in range(7,10):
            for d in range(7,10):
                layer_4.append((a,b,c,d))

In [10]:
#4layer
model = MLPClassifier(max_iter=2000,  random_state=1)
hidden_layer_sizes= layer_4
activation= ['tanh', 'relu']
solver= ['adam','lbfgs']
alpha= [1, 0.7]
learning_rate= ['constant','adaptive']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(iid=False, estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(data, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.938095 using {'activation': 'tanh', 'alpha': 0.7, 'hidden_layer_sizes': (8, 8, 8, 7), 'learning_rate': 'constant', 'solver': 'adam'}


In [11]:
layer_5= []
for a in range(7,10):
    for b in range(7,10):
        for c in range(7,10):
            for d in range(7,10):
                 for e in range(7,10):
                        layer_5.append((a,b,c,d,e))

In [12]:
#5layer
model = MLPClassifier(max_iter=2000,  random_state=1)
hidden_layer_sizes= layer_5
activation= ['tanh']
solver= ['adam','lbfgs']
alpha= [ 0.7]
learning_rate= ['constant']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(iid=False, estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(data, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.935767 using {'activation': 'tanh', 'alpha': 0.7, 'hidden_layer_sizes': (8, 7, 9, 7, 7), 'learning_rate': 'constant', 'solver': 'adam'}


In [13]:
layer_6= []
for a in range(7,10):
    for b in range(7,10):
        for c in range(7,10):
            for d in range(7,10):
                 for e in range(7,10):
                        for f in range(7,10):
                            layer_6.append((a,b,c,d,e,f))

In [14]:
#6layer
model = MLPClassifier(max_iter=2000,  random_state=1)
hidden_layer_sizes= layer_6
activation= ['tanh']
solver= ['adam']
alpha= [ 0.7]
learning_rate= ['constant']
grid= dict(
hidden_layer_sizes=hidden_layer_sizes,
    activation=activation,
    solver=solver,
    alpha=alpha,
    learning_rate=learning_rate)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
grid_search = GridSearchCV(iid=False, estimator=model, param_grid=grid, n_jobs=-1,
                           cv=cv, scoring='f1_macro',error_score=0)
grid_result = grid_search.fit(data, y)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.937954 using {'activation': 'tanh', 'alpha': 0.7, 'hidden_layer_sizes': (8, 8, 8, 8, 8, 7), 'learning_rate': 'constant', 'solver': 'adam'}


# Training ML/DL algorithms

In [7]:
model_svm = SVC(C= 1.0, kernel='sigmoid')
model_rf = RandomForestClassifier(random_state=1, max_depth=8 , max_features='sqrt', n_estimators=200)
model_nb = GaussianNB(var_smoothing= 0.0056)
model_xgb = xgb.XGBClassifier(objective='multi:softmax', colsample_bytree=1, learning_rate= 0.1,
                              max_depth= 2, min_child_weight= 0.5, n_estimators= 2000, subsample= 0.75)
model_mlp_1layer = MLPClassifier(max_iter=1000,random_state=1, activation= 'tanh', alpha= 1, hidden_layer_sizes= (7), learning_rate= 'constant', solver= 'adam' )
model_mlp_2layer = MLPClassifier(max_iter=2000,random_state=1, activation= 'tanh', alpha= 1, hidden_layer_sizes= (8, 9), learning_rate= 'constant', solver= 'adam' )
model_mlp_3layer = MLPClassifier(max_iter=2000,random_state=1, activation= 'tanh', alpha= 0.7, hidden_layer_sizes= (7, 7, 8), learning_rate= 'constant', solver= 'adam' )
model_mlp_4layer = MLPClassifier(max_iter=2000,random_state=1, activation= 'tanh', alpha= 0.7, hidden_layer_sizes= (8, 8, 8, 7), learning_rate= 'constant', solver= 'adam' )
model_mlp_5layer = MLPClassifier(max_iter=2000,random_state=1, activation= 'tanh', alpha= 0.7, hidden_layer_sizes= (8, 7, 9, 7, 7), learning_rate= 'constant', solver= 'adam' )
model_mlp_6layer = MLPClassifier(max_iter=2000,random_state=1, activation= 'tanh', alpha= 0.7, hidden_layer_sizes= (8, 8, 8, 8, 8, 7), learning_rate= 'constant', solver= 'adam' )
model_glvq = GLVQ(activation_params= {'beta': 1}, activation_type= 'swish', distance_type= 'squared-euclidean', solver_type= 'adam')

# Evaluation of the proposed DL model and 5 shallow classifiers using 3-times repeated stratified 5-fold CV

# SVM

In [8]:
model=model_svm
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8909 (0.0629)
f1_macro: 0.8844 (0.0694)
precision_macro: 0.9056 (0.0648)
recall_macro: 0.8907 (0.0669)
kappa_scorer: 0.8337 (0.0973)


# NAIVE BAYES

In [9]:
model=model_nb
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8794 (0.0897)
f1_macro: 0.8689 (0.0967)
precision_macro: 0.9089 (0.0647)
recall_macro: 0.8778 (0.0900)
kappa_scorer: 0.8190 (0.1337)


# RANDOM FOREST

In [10]:
model=model_rf
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8309 (0.0912)
f1_macro: 0.8023 (0.1171)
precision_macro: 0.8322 (0.1275)
recall_macro: 0.8148 (0.1038)
kappa_scorer: 0.7413 (0.1408)


# XGBOOST

In [48]:
model=model_xgb
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8248 (0.1119)
f1_macro: 0.8054 (0.1187)
precision_macro: 0.8570 (0.1021)
recall_macro: 0.8074 (0.1159)
kappa_scorer: 0.7335 (0.1683)


# GLVQ

In [11]:
model=model_glvq
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.9303 (0.0649)
f1_macro: 0.9209 (0.0770)
precision_macro: 0.9393 (0.0543)
recall_macro: 0.9315 (0.0664)
kappa_scorer: 0.8951 (0.0978)


# Shallow MLP

In [12]:
model=model_mlp_1layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.9188 (0.0676)
f1_macro: 0.9053 (0.0850)
precision_macro: 0.9311 (0.0616)
recall_macro: 0.9111 (0.0825)
kappa_scorer: 0.8758 (0.1048)


# 2 Hidden layer deep MLP

In [13]:
model=model_mlp_2layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.9364 (0.0921)
f1_macro: 0.9270 (0.1064)
precision_macro: 0.9478 (0.0800)
recall_macro: 0.9315 (0.0998)
kappa_scorer: 0.9021 (0.1425)


# 3 Hidden layer deep MLP

In [14]:
model=model_mlp_3layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.9242 (0.0642)
f1_macro: 0.9153 (0.0743)
precision_macro: 0.9337 (0.0649)
recall_macro: 0.9185 (0.0699)
kappa_scorer: 0.8844 (0.0985)


# Proposed 4 hidden layer MLP

In [15]:
model=model_mlp_4layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.9436 (0.0676)
f1_macro: 0.9381 (0.0773)
precision_macro: 0.9585 (0.0519)
recall_macro: 0.9407 (0.0709)
kappa_scorer: 0.9136 (0.1035)


# 5 Hidden layer MLP

In [16]:
model=model_mlp_5layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.9442 (0.0564)
f1_macro: 0.9358 (0.0679)
precision_macro: 0.9533 (0.0459)
recall_macro: 0.9407 (0.0640)
kappa_scorer: 0.9150 (0.0865)


# 6 Hidden layer deep MLP

In [18]:
model=model_mlp_6layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.9436 (0.0676)
f1_macro: 0.9380 (0.0773)
precision_macro: 0.9563 (0.0532)
recall_macro: 0.9426 (0.0699)
kappa_scorer: 0.9139 (0.1034)


# Proposed model without parameter tuning

In [19]:
model=MLPClassifier()
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8727 (0.0713)
f1_macro: 0.8446 (0.0738)
precision_macro: 0.8893 (0.0608)
recall_macro: 0.8593 (0.0551)
kappa_scorer: 0.7694 (0.1189)


# Proposed model without PCA

In [20]:
model=model_mlp_4layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, std_data, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, std_data, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, std_data, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, std_data, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, std_data, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8048 (0.0819)
f1_macro: 0.7793 (0.0898)
precision_macro: 0.8385 (0.0894)
recall_macro: 0.7833 (0.0862)
kappa_scorer: 0.7004 (0.1256)


# Proposed model without projrct specifications

In [29]:
model=model_mlp_4layer
cv= RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=1)
scores = cross_val_score(model, data_risk, y, scoring='accuracy', cv=cv, n_jobs=-1)
print('Accuracy: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data_risk, y, scoring='f1_macro', cv=cv, n_jobs=-1)
print('f1_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data_risk, y, scoring='precision_macro', cv=cv, n_jobs=-1)
print('precision_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data_risk, y, scoring='recall_macro', cv=cv, n_jobs=-1)
print('recall_macro: %.4f (%.4f)' % (mean(scores), std(scores)))
scores = cross_val_score(model, data_risk, y, scoring=kappa_scorer, cv=cv, n_jobs=-1)
print('kappa_scorer: %.4f (%.4f)' % (mean(scores), std(scores)))

Accuracy: 0.8370 (0.0805)
f1_macro: 0.8144 (0.0918)
precision_macro: 0.8678 (0.0728)
recall_macro: 0.8222 (0.0877)
kappa_scorer: 0.7517 (0.1226)
