In [1]:
%matplotlib qt 

import numpy as np
import pandas as pd
import random
import time
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, KFold, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import make_scorer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

from imblearn.metrics import specificity_score

from scipy.stats import iqr
from scipy.stats import t

In [2]:
# import data
data = pd.read_csv('bm_combat_spectral_changed.csv', index_col=[0])

# harmonized 
data_comb = data.drop(data.iloc[:, 209:], axis=1)
data_comb

In [9]:
np.random.seed(0)
random.seed(0)

X = data_comb
y = data_comb[['center','label']]

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    train_size=0.7,
                                                    random_state=1,
                                                    stratify=X[['center','group']])

In [12]:
center_names = ['all','california','finland','iowa','medellin']

X_train_sets = {} 
y_train_sets = {}
X_test_sets = {}
y_test_sets = {}


for name in center_names:
    
    if name == 'all':
        X_train_sets.update({name: X_train.drop(['center', 'group', 'age', 'gender', 'batch', 'label'], axis=1)})
        X_test_sets.update({name: X_test.drop(['center', 'group', 'age', 'gender', 'batch', 'label'], axis=1)})
        y_train_sets.update({name: y_train.drop(['center'], axis = 1)})
        y_test_sets.update({name: y_test.drop(['center'], axis = 1)})
    
    else: 
        X_train_set = X_train.loc[X_train['center'] == name]
        X_train_set = X_train_set.drop(['center', 'group', 'age', 'gender', 'batch', 'label'], axis=1)
        X_train_sets.update({name: X_train_set})

        y_train_set = y_train.loc[X_train['center'] == name]
        y_train_set = y_train_set.drop(['center'], axis=1)
        y_train_sets.update({name: y_train_set})

        X_test_set = X_test.loc[X_test['center'] == name]
        X_test_set = X_test_set.drop(['center', 'group', 'age', 'gender', 'batch', 'label'], axis=1)
        X_test_sets.update({name: X_test_set})

        y_test_set = y_test.loc[X_test['center'] == name]
        y_test_set = y_test_set.drop(['center'], axis=1)
        y_test_sets.update({name: y_test_set})

In [17]:
# Initializing Classifiers

clf1 = LogisticRegression(multi_class='multinomial',
                          solver='newton-cg',
                          random_state=1)

clf2 = KNeighborsClassifier(algorithm='ball_tree',
                            leaf_size=50)

clf3 = DecisionTreeClassifier(random_state=1)

clf4 = SVC(random_state=1)

# Building the pipelines

pipe1 = Pipeline([('clf1', clf1)])

pipe2 = Pipeline([('clf2', clf2)])

pipe4 = Pipeline([('clf4', clf4)])


# Setting up the parameter grids
param_grid1 = [{'clf1__penalty': ['l2'],
                'clf1__C': np.power(10., np.arange(-4, 4))}]

param_grid2 = [{'clf2__n_neighbors': list(range(1, 10)),
                'clf2__p': [1, 2]}]

param_grid3 = [{'max_depth': list(range(1, 10)) + [None],
                'criterion': ['gini', 'entropy']}]


param_grid4 = [{'clf4__kernel': ['rbf'],
                'clf4__C': np.power(10., np.arange(-4, 4)),
                'clf4__gamma': np.power(10., np.arange(-5, 0))},
               {'clf4__kernel': ['linear'],
                'clf4__C': np.power(10., np.arange(-4, 4))}]

In [18]:
# Setting up multiple GridSearchCV objects, 1 for each algorithm

gridcvs = {}

for pgrid, est, name in zip((param_grid1, param_grid2,
                             param_grid3, param_grid4),
                            (pipe1, pipe2, clf3, pipe4),
                            ('Softmax', 'KNN', 'DTree', 'SVM')):
    
    gcv = GridSearchCV(estimator=est,
                       param_grid=pgrid,
                       scoring='accuracy',
                       n_jobs=1,
                       cv=5,
                       verbose=0,
                       refit=True)
    gridcvs[name] = gcv

# WITHOUT FEATURE SELECTION

In [20]:
metrics = ['accuracy','recall','specificity','precision','F1','auc']
cv_scores = {center_name: {name: {metric: [] for metric in metrics} for name, gs_est in gridcvs.items()} for center_name in center_names}
cv_scores_without_zeros = {center_name: {name: {metric: [] for metric in metrics} for name, gs_est in gridcvs.items()} for center_name in center_names}

skfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)

for center_name in center_names:
    
    if center_name != center_names[0]: continue
        
    print('Cross-validation for ' + center_name.upper())

    # The outer loop for algorithm selection
    c = 1
    for outer_train_idx, outer_valid_idx in skfold.split(X_train_sets[center_name],y_train_sets[center_name]):

        for name, gs_est in sorted(gridcvs.items()):
            print('outer fold %d/5 | tuning %-8s' % (c, name), end='')

            # The inner loop for hyperparameter tuning
            gs_est.fit(X_train_sets[center_name].iloc[outer_train_idx.tolist()], y_train_sets[center_name].iloc[outer_train_idx.tolist()].values.ravel())
            y_pred = gs_est.predict(X_train_sets[center_name].iloc[outer_valid_idx.tolist()])
            
            for metric in metrics: 
                if metric == 'accuracy':
                    calc_metric = accuracy_score(y_true=y_train_sets[center_name].iloc[outer_valid_idx.tolist()], y_pred=y_pred)
                    print(' | inner ACC %.2f%% | outer ACC %.2f%%' %
                          (gs_est.best_score_ * 100, calc_metric * 100))
                    cv_scores[center_name][name][metric].append(calc_metric)
                    if calc_metric != 0:
                        cv_scores_without_zeros[center_name][name][metric].append(calc_metric)
                elif metric == 'recall':
                    calc_metric = recall_score(y_true=y_train_sets[center_name].iloc[outer_valid_idx.tolist()], y_pred=y_pred)
                    cv_scores[center_name][name][metric].append(calc_metric)
                    if calc_metric != 0:
                        cv_scores_without_zeros[center_name][name][metric].append(calc_metric)
                elif metric == 'specificity':
                    calc_metric = specificity_score(y_true=y_train_sets[center_name].iloc[outer_valid_idx.tolist()], y_pred=y_pred)
                    cv_scores[center_name][name][metric].append(calc_metric)
                    if calc_metric != 0:
                        cv_scores_without_zeros[center_name][name][metric].append(calc_metric)
                elif metric == 'precision':
                    calc_metric = precision_score(y_true=y_train_sets[center_name].iloc[outer_valid_idx.tolist()], y_pred=y_pred)
                    cv_scores[center_name][name][metric].append(calc_metric)
                    if calc_metric != 0:
                        cv_scores_without_zeros[center_name][name][metric].append(calc_metric)
                elif metric == 'F1':
                    calc_metric = f1_score(y_true=y_train_sets[center_name].iloc[outer_valid_idx.tolist()], y_pred=y_pred)
                    cv_scores[center_name][name][metric].append(calc_metric)
                    if calc_metric != 0:
                        cv_scores_without_zeros[center_name][name][metric].append(calc_metric)
                else:
                    calc_metric = roc_auc_score(y_true=y_train_sets[center_name].iloc[outer_valid_idx.tolist()], y_score=y_pred, average = 'macro')
                    cv_scores[center_name][name][metric].append(calc_metric)
                    if calc_metric != 0:
                        cv_scores_without_zeros[center_name][name][metric].append(calc_metric)

        c += 1

Cross-validation for ALL
outer fold 1/5 | tuning DTree    | inner ACC 67.13% | outer ACC 45.83%
outer fold 1/5 | tuning KNN      | inner ACC 72.46% | outer ACC 45.83%
outer fold 1/5 | tuning SVM      | inner ACC 78.77% | outer ACC 58.33%
outer fold 1/5 | tuning Softmax  | inner ACC 76.61% | outer ACC 66.67%
outer fold 2/5 | tuning DTree    | inner ACC 75.32% | outer ACC 50.00%
outer fold 2/5 | tuning KNN      | inner ACC 72.34% | outer ACC 50.00%
outer fold 2/5 | tuning SVM      | inner ACC 80.70% | outer ACC 66.67%
outer fold 2/5 | tuning Softmax  | inner ACC 78.60% | outer ACC 54.17%
outer fold 3/5 | tuning DTree    | inner ACC 63.74% | outer ACC 50.00%
outer fold 3/5 | tuning KNN      | inner ACC 64.74% | outer ACC 62.50%
outer fold 3/5 | tuning SVM      | inner ACC 71.11% | outer ACC 79.17%
outer fold 3/5 | tuning Softmax  | inner ACC 73.22% | outer ACC 83.33%
outer fold 4/5 | tuning DTree    | inner ACC 63.16% | outer ACC 60.87%
outer fold 4/5 | tuning KNN      | inner ACC 64.21% 

In [21]:
# Looking at the results

mean_std_cv_scores = {center_name: {name: {metric: {'mean': None, 'SD': None} for metric in metrics} for name, gs_est in gridcvs.items()} for center_name in center_names}
mean_std_cv_scores_without_zeros = {center_name: {name: {metric: {'mean': None, 'SD': None} for metric in metrics} for name, gs_est in gridcvs.items()} for center_name in center_names}
median_iqr_cv_scores = {center_name: {name: {metric: {'median': None, 'IQR': None} for metric in metrics} for name, gs_est in gridcvs.items()} for center_name in center_names}
median_iqr_cv_scores_without_zeros = {center_name: {name: {metric: {'median': None, 'IQR': None} for metric in metrics} for name, gs_est in gridcvs.items()} for center_name in center_names}

for center_name in center_names:
    for name, gs_est in sorted(gridcvs.items()):
        print('results for ' + center_name.upper() + ' ' + name.upper())
        for metric in metrics: 
            if metric != 'auc':
                print('%-8s | outer CV ' + metric + ' %.2f%% +\- %.3f' % (
                  100 * np.mean(cv_scores[center_name][name][metric]), 100 * np.std(cv_scores[center_name][name][metric])))
                mean_std_cv_scores[center_name][name][metric]['mean'] = np.mean(cv_scores[center_name][name][metric])
                mean_std_cv_scores_without_zeros[center_name][name][metric]['mean'] = np.mean(cv_scores_without_zeros[center_name][name][metric])
                mean_std_cv_scores[center_name][name][metric]['SD'] = np.std(cv_scores[center_name][name][metric])
                mean_std_cv_scores_without_zeros[center_name][name][metric]['SD'] = np.std(cv_scores_without_zeros[center_name][name][metric])
                
                median_iqr_cv_scores[center_name][name][metric]['median'] = 100*np.median(cv_scores[center_name][name][metric])
                median_iqr_cv_scores_without_zeros[center_name][name][metric]['median'] = 100*np.median(cv_scores_without_zeros[center_name][name][metric])
                median_iqr_cv_scores[center_name][name][metric]['IQR'] = 100*iqr(cv_scores[center_name][name][metric])
                median_iqr_cv_scores[center_name][name][metric]['IQR'] = 100*iqr(cv_scores_without_zeros[center_name][name][metric])
            else: 
                print('%-8s | outer CV ' + metric + ' %.2f +\- %.3f' % (
                  np.mean(cv_scores[center_name][name][metric]), np.std(cv_scores[center_name][name][metric])))
                mean_std_cv_scores[center_name][name][metric]['mean'] = np.mean(cv_scores[center_name][name][metric])
                mean_std_cv_scores_without_zeros[center_name][name][metric]['mean'] = np.mean(cv_scores_without_zeros[center_name][name][metric])
                mean_std_cv_scores[center_name][name][metric]['SD'] = np.std(cv_scores[center_name][name][metric])
                mean_std_cv_scores_without_zeros[center_name][name][metric]['SD'] = np.std(cv_scores_without_zeros[center_name][name][metric])
                
                median_iqr_cv_scores[center_name][name][metric]['median'] = 100*np.median(cv_scores[center_name][name][metric])
                median_iqr_cv_scores_without_zeros[center_name][name][metric]['median'] = 100*np.median(cv_scores_without_zeros[center_name][name][metric])
                median_iqr_cv_scores[center_name][name][metric]['IQR'] = 100*iqr(cv_scores[center_name][name][metric])
                median_iqr_cv_scores[center_name][name][metric]['IQR'] = 100*iqr(cv_scores_without_zeros[center_name][name][metric])
    print('*********************')
print('\nSoftmax Best parameters', gridcvs['Softmax'].best_params_)
print('\nKNN Best parameters', gridcvs['KNN'].best_params_)
print('\nDTree Best parameters', gridcvs['DTree'].best_params_)
print('\nSVM Best parameters', gridcvs['SVM'].best_params_)

results for ALL DTREE
%-8s | outer CV accuracy 51.78% +\- 4.989
%-8s | outer CV recall 46.21% +\- 25.659
%-8s | outer CV specificity 58.18% +\- 25.077
%-8s | outer CV precision 53.99% +\- 7.265
%-8s | outer CV F1 44.73% +\- 16.314
%-8s | outer CV auc 0.52 +\- 0.052
results for ALL KNN
%-8s | outer CV accuracy 56.88% +\- 8.509
%-8s | outer CV recall 44.24% +\- 16.629
%-8s | outer CV specificity 69.55% +\- 13.402
%-8s | outer CV precision 58.90% +\- 10.384
%-8s | outer CV F1 49.13% +\- 14.373
%-8s | outer CV auc 0.57 +\- 0.083
results for ALL SVM
%-8s | outer CV accuracy 73.01% +\- 9.888
%-8s | outer CV recall 69.70% +\- 10.957
%-8s | outer CV specificity 76.21% +\- 14.419
%-8s | outer CV precision 75.79% +\- 10.837
%-8s | outer CV F1 72.10% +\- 9.896
%-8s | outer CV auc 0.73 +\- 0.098
results for ALL SOFTMAX
%-8s | outer CV accuracy 73.01% +\- 11.150
%-8s | outer CV recall 69.70% +\- 19.237
%-8s | outer CV specificity 76.36% +\- 5.934
%-8s | outer CV precision 73.10% +\- 10.090
%-8s | o

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean, casting='unsafe',
  ret = ret.dtype.type(ret / rcount)


In [22]:
confedence_interval = {center_name: {name: {metric: () for metric in metrics} for name, gs_est in gridcvs.items()} for center_name in center_names}

dof = 5-1 
confidence = 0.95

for center_name in center_names:
    for name, gs_est in sorted(gridcvs.items()):
        for metric in metrics:
    

            m = mean_std_cv_scores[center_name][name][metric]['mean']
            s = mean_std_cv_scores[center_name][name][metric]['SD']

            t_crit = np.abs(t.ppf((1-confidence)/2,dof))
            confedence_interval[center_name][name][metric]= ((m-s*t_crit/np.sqrt(5))*100, (m+s*t_crit/np.sqrt(5))*100)

{'all': {'Softmax': {'accuracy': (59.162978421461645, 86.85151433216156),
   'recall': (45.8110112629886, 93.5829281309508),
   'specificity': (68.99524248162547, 83.73203024564727),
   'precision': (60.56648170747487, 85.62399448300133),
   'F1': (51.42183775028856, 90.05794867228964),
   'auc': (59.175512128990896, 86.88509393161517)},
  'KNN': {'accuracy': (46.31910020332198, 67.44901573870699),
   'recall': (23.59425349480415, 64.89059499004432),
   'specificity': (52.904640238887836, 86.18626885202127),
   'precision': (46.00534853144709, 71.79185426575572),
   'F1': (31.28132143104151, 66.97314081152143),
   'auc': (46.55786221361178, 67.230016574267)},
  'DTree': {'accuracy': (45.58009600078208, 57.97062863689908),
   'recall': (14.351831032915785, 78.07241139132663),
   'specificity': (27.04402145279039, 89.31961491084597),
   'precision': (44.97228017365232, 63.013067811695656),
   'F1': (24.47665504405717, 64.9900116226095),
   'auc': (45.70440876357162, 58.68953063036779)},


In [None]:
# BOOTSTRAPPING

from sklearn.utils import resample

# Fitting a model to the whole training set
# using the "best" algorithm
best_algo = gridcvs['Softmax']


bootstrap = {center_name: {metric: [] for metric in metrics} for center_name in center_names}

for center_name in center_names:
    
    print('Test results for ' + center_name.upper())

    n_iterations = 100  # No. of bootstrap samples to be repeated (created)
    n_size = int(len(X_test_sets[center_name]) * 1.0) # Size of sample, picking only 50% of the given data in every bootstrap sample
    
    for i in range(n_iterations):
    
        test_values = resample(X_test_sets[center_name], replace=False, n_samples = n_size)
        test_labels = y_test_sets[center_name].loc[test_values.index.to_list()


        if center_name == 'all':
            start = time.time()
            best_algo.fit(X_train_sets[center_name], y_train_sets[center_name].values.ravel())
            stop = time.time()

            for metric in metrics: 
                if metric == 'accuracy':
                    calc_metric = accuracy_score(y_true=test_labels, y_pred=best_algo.predict(test_values))
                    print('Test Accuracy: %.2f%%' % (100 * calc_metric))
                    bootstrap[center_name][metric].append(calc_metric)
                elif metric == 'recall':
                    calc_metric = recall_score(y_true=test_labels, y_pred=best_algo.predict(test_values))
                    print('Recall/sensitivity: %.2f%%' % (100 * calc_metric))
                    bootstrap[center_name][metric].append(calc_metric)
                elif metric == 'specificity':
                    calc_metric = specificity_score(y_true=test_labels, y_pred=best_algo.predict(test_values))
                    print('Specificity: %.2f%%' % (100 * calc_metric))
                    bootstrap[center_name][metric].append(calc_metric)
                elif metric == 'precision':
                    calc_metric = precision_score(y_true=test_labels, y_pred=best_algo.predict(test_values))
                    print('Precision: %.2f%%' % (100 * calc_metric))
                    bootstrap[center_name][metric].append(calc_metric)
                elif metric == 'F1':
                    calc_metric = f1_score(y_true=test_labels, y_pred=best_algo.predict(test_values))
                    print('F1 score: %.2f%%' % (100 * calc_metric))
                    bootstrap[center_name][metric].append(calc_metric)
                else:
                    calc_metric = roc_auc_score(y_true=test_labels, y_score=best_algo.predict(test_values), average = 'macro')
                    print('ROC AUC: %.2f' % calc_metric)
                    bootstrap[center_name][metric].append(calc_metric)

        else:

            for metric in metrics: 
                if metric == 'accuracy':
                    calc_metric = accuracy_score(y_true=test_labels, y_pred=best_algo.predict(test_values))
                    print('Test Accuracy: %.2f%%' % (100 * calc_metric))
                    bootstrap[center_name][metric].append(calc_metric)
                elif metric == 'recall':
                    calc_metric = recall_score(y_true=test_labels, y_pred=best_algo.predict(test_values))
                    print('Recall/sensitivity: %.2f%%' % (100 * calc_metric))
                    bootstrap[center_name][metric].append(calc_metric)
                elif metric == 'specificity':
                    calc_metric = specificity_score(y_true=test_labels, y_pred=best_algo.predict(test_values))
                    print('Specificity: %.2f%%' % (100 * calc_metric))
                    bootstrap[center_name][metric].append(calc_metric)
                elif metric == 'precision':
                    calc_metric = precision_score(y_true=test_labels, y_pred=best_algo.predict(test_values))
                    print('Precision: %.2f%%' % (100 * calc_metric))
                    bootstrap[center_name][metric].append(calc_metric)
                elif metric == 'F1':
                    calc_metric = f1_score(y_true=test_labels, y_pred=best_algo.predict(test_values))
                    print('F1 score: %.2f%%' % (100 * calc_metric))
                    bootstrap[center_name][metric].append(calc_metric)
                else:
                    calc_metric = roc_auc_score(y_true=test_labels, y_score=best_algo.predict(test_values), average = 'macro')
                    print('ROC AUC: %.2f' % calc_metric)
                    bootstrap[center_name][metric].append(calc_metric)

In [None]:
for center_name in center_names:
    print('Results for ' + center_name.upper())
    for metric in metrics: 
        if metric != 'auc':
            print(metric + ' %.2f%% +\- %.3f' % (
              100 * np.mean(bootstrap[center_name][metric]), 100 * np.std(bootstrap[center_name][metric])))
        else: 
            print(metric + ' %.2f +\- %.3f' % (
              np.mean(bootstrap[center_name][metric]), np.std(bootstrap[center_name][metric])))