In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.impute import KNNImputer
import numpy as np
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import LogisticRegressionCV
from sklearn.metrics import roc_auc_score, confusion_matrix
import seaborn as sns
from random import sample, seed 
from scipy import stats

import matplotlib.pyplot as plt
from gseapy.plot import barplot, dotplot
from mycolorpy import colorlist as mcp
from matplotlib.lines import Line2D



In [45]:
#Read omics data
omics_prot_path = '/data/sharedData/UK_BIOBANK_DATA/Download_Data/ProteomicsData/DATAforMARTIN/Olink_proteomics_data_transposed_decoded2UNIportID_stringvalue_corrected_Martin.txt'
omics_met_path = '/data/sharedData/UK_BIOBANK_DATA/Download_Data/Metabolomics_Data/SecondPhaseData/nmr_biomarker_data_RemovedTechVariation_secondPHASE_FilteredDuplicateV2.csv'

omics_prot = pd.read_csv(omics_prot_path, sep = ',', index_col = 0)
omics_met = pd.read_csv(omics_met_path, sep = ',', index_col = 0)


# Set up the data formats for the output
boxplot_data_full = ({'prob':[],
                'reference':[],
                'disease':[],
                'data_type':[],
                'Model': []
                })
boxplot_data_full = pd.DataFrame(boxplot_data_full)

Shiny_data = ({'prob':[],
                'reference':[],
               'Disease':[],
               'Data': [],
               'N':[],
               'AUC_train':[],
               'AUC_test':[],
               'Features':[],
               'Coef':[],
               'Model':[]
              })
Shiny_data = pd.DataFrame(Shiny_data)


for disease in ['CD', 'UC', 'RA', 'PSO', 'SLE', 'COPD', 'obesity', 'T2D', 'Atherosclerosis']:
#for disease in ['CD']:
    for data_type in ['prot', 'met']:
        if data_type == 'prot':
            omics = omics_prot.copy()
        else:
            omics = omics_met.copy()
            
        for model_type in ['incident', 'prevalent']:           


            if model_type == 'incident':
                sick_path = '/home/marsm05/Documents/FBI_project/UKbb/clinical_story/ukb_' + \
                            disease + '_' + data_type + '_incident.csv'    
                clinical_sick = pd.read_csv(sick_path, index_col = 0)
                HC_path = '/home/marsm05/Documents/FBI_project/UKbb/clinical_story/ukb_' + \
                            disease + '_' + data_type + '_HC_PairedToIncident.csv'
                clinical_healthy = pd.read_csv(HC_path, index_col = 0)
            else:
                sick_path = '/home/marsm05/Documents/FBI_project/UKbb/clinical_story/ukb_' + \
                            disease + '_' + data_type + '_prevalent.csv'    
                clinical_sick = pd.read_csv(sick_path, index_col = 0)
                HC_path = '/home/marsm05/Documents/FBI_project/UKbb/clinical_story/ukb_' + \
                            disease + '_' + data_type + '_HC_PairedToPrevalent.csv'
                clinical_healthy = pd.read_csv(HC_path, index_col = 0)


            # clinical -> clinical data from both healthy and sick
            # omics_clinical -> subset of metabolomics/proteomics including only patients from clinical data
            clinical = pd.concat([clinical_sick,clinical_healthy])

            omics_clinical = omics.loc[clinical.index.values]

            #remove proteins/metabolites with more than 10% NA
            omics_clinical = omics_clinical[omics_clinical.columns[omics_clinical.isna().sum() < np.shape(omics_clinical)[0]/10]]

            #devide between healthy and sick
            X_train, X_test, y_train, y_test = train_test_split(omics_clinical, clinical['group'], test_size=0.3, random_state=42)

            #KNN imputer(default parameters): train on train data, apply on both train and test
            imp = KNNImputer(n_neighbors=5, weights="uniform")
            imp.fit(X_train)
            X_train = pd.DataFrame(imp.transform(X_train), index = X_train.index, columns = X_train.columns)
            X_test = pd.DataFrame(imp.transform(X_test), index = X_test.index, columns = X_test.columns)



            # ExtraTreesClassifier(default parameters): Choose top 5 proteins/metabolites
            clf = ExtraTreesClassifier(n_estimators=10000, random_state=42, verbose = 1).fit(X_train, y_train)


            for N in range(1,31):
            #for N in range(4,6):
                # Take only top N proteins/metabolites
                model = SelectFromModel(clf, prefit=True, max_features = N)
                X_train_subset = X_train[X_train.columns[model.get_support()]]
                X_test_subset = X_test[X_test.columns[model.get_support()]]


                #train ridge regression model

                clf_ridge = LogisticRegressionCV(cv=10, Cs = 10, random_state=42, max_iter = 10000,
                                           penalty='l2').fit(X_train_subset, y_train)



                #compute feature importance 
                coefficients = clf_ridge.coef_[0]
                feature_importance = pd.DataFrame({'Feature': X_train_subset.columns,
                                                   'Importance': np.abs(coefficients),
                                                   'Coef': coefficients})
                feature_importance = feature_importance.sort_values('Importance', ascending = True)


                #compute metrics
                auc_train = roc_auc_score(y_train, clf_ridge.predict_proba(X_train_subset)[:, 1])
                auc = roc_auc_score(y_test, clf_ridge.predict_proba(X_test_subset)[:, 1])
                conf_matrix = confusion_matrix(y_test, clf_ridge.predict(X_test_subset), labels = np.unique(y_test))


                #print summary
                print('Disease: ' + disease)
                print(conf_matrix)
                print('AUC train: ' + str(auc_train))
                print('AUC: ' + str(auc))
                print('\n')



             
                # Prediction for test dataset
                X_test_summary = pd.DataFrame(clf_ridge.predict_proba(X_test_subset)[:, 1], columns = ['prob'])
                X_test_summary['reference'] = y_test.values
                

                # Data for boxplot
                if N == 5:      
                    boxplot_data = X_test_summary[['prob', 'reference']]
                    boxplot_data['disease'] = disease
                    boxplot_data['data_type'] = data_type
                    boxplot_data['Model'] = model_type
                    boxplot_data_full = boxplot_data_full.append(boxplot_data, ignore_index=True)

                # Data for ShinyApp
                new_row = {'prob': list(np.round(boxplot_data['prob'].astype(float), 3)),
                           'reference': list(boxplot_data['reference']),
                           'Disease': disease,
                           'Data': data_type,
                           'N': N,
                           'AUC_train': auc_train,
                           'AUC_test': auc,
                           'Features': list(feature_importance['Feature'].astype(str)),
                           'Coef': list(np.round(feature_importance['Coef'], 3)),
                           'Model': model_type
                    }

                Shiny_data = Shiny_data.append(new_row, ignore_index=True)



[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   14.7s finished


Disease: CD
[[37 18]
 [28 29]]
AUC train: 0.6841529084561216
AUC: 0.625518341307815


Disease: CD
[[39 16]
 [28 29]]
AUC train: 0.7031777028226524
AUC: 0.6389154704944179


Disease: CD
[[36 19]
 [26 31]]
AUC train: 0.7456062488904668
AUC: 0.6277511961722488


Disease: CD
[[36 19]
 [26 31]]
AUC train: 0.7637730043197822
AUC: 0.6411483253588517


Disease: CD
[[35 20]
 [24 33]]
AUC train: 0.7725900940884075
AUC: 0.6615629984051037


Disease: CD
[[35 20]
 [26 31]]
AUC train: 0.7656074323924493
AUC: 0.6446570972886763


Disease: CD
[[35 20]
 [26 31]]
AUC train: 0.7654890821942126
AUC: 0.65103668261563


Disease: CD
[[40 15]
 [28 29]]
AUC train: 0.7632996035268358
AUC: 0.6465709728867624


Disease: CD
[[38 17]
 [27 30]]
AUC train: 0.7635363039233092
AUC: 0.6376395534290271


Disease: CD
[[40 15]
 [26 31]]
AUC train: 0.7667317592756968
AUC: 0.6401913875598086


Disease: CD
[[39 16]
 [23 34]]
AUC train: 0.7890407716432926
AUC: 0.7196172248803828


Disease: CD
[[40 15]
 [29 28]]
AUC train: 0.76

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   13.7s finished


Disease: CD
[[25 23]
 [19 33]]
AUC train: 0.7312295569432055
AUC: 0.5773237179487181


Disease: CD
[[30 18]
 [18 34]]
AUC train: 0.7667261373773416
AUC: 0.6638621794871795


Disease: CD
[[31 17]
 [16 36]]
AUC train: 0.8029289325007434
AUC: 0.6979166666666666


Disease: CD
[[33 15]
 [17 35]]
AUC train: 0.8174249182277729
AUC: 0.6919070512820512


Disease: CD
[[33 15]
 [16 36]]
AUC train: 0.8230002973535533
AUC: 0.703926282051282


Disease: CD
[[35 13]
 [17 35]]
AUC train: 0.8374962830805827
AUC: 0.7123397435897435


Disease: CD
[[33 15]
 [16 36]]
AUC train: 0.8539250669045495
AUC: 0.7467948717948718


Disease: CD
[[43  5]
 [19 33]]
AUC train: 0.8710972346119537
AUC: 0.8149038461538461


Disease: CD
[[43  5]
 [17 35]]
AUC train: 0.8745168004757656
AUC: 0.8165064102564102


Disease: CD
[[40  8]
 [18 34]]
AUC train: 0.8887154326494202
AUC: 0.8277243589743589


Disease: CD
[[40  8]
 [14 38]]
AUC train: 0.8898305084745763
AUC: 0.8377403846153846


Disease: CD
[[38 10]
 [14 38]]
AUC train: 0.

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   37.5s finished


Disease: CD
[[165 114]
 [108 167]]
AUC train: 0.6459217202977735
AUC: 0.6278136200716846


Disease: CD
[[190  89]
 [118 157]]
AUC train: 0.7049235495323554
AUC: 0.6905050505050505


Disease: CD
[[199  80]
 [104 171]]
AUC train: 0.7157306962869663
AUC: 0.7058585858585859


Disease: CD
[[193  86]
 [106 169]]
AUC train: 0.7176488686869172
AUC: 0.7072662104920169


Disease: CD
[[198  81]
 [115 160]]
AUC train: 0.7233649262897788
AUC: 0.7117106549364615


Disease: CD
[[200  79]
 [113 162]]
AUC train: 0.7268094639453296
AUC: 0.7100293255131964


Disease: CD
[[199  80]
 [112 163]]
AUC train: 0.7253191545619091
AUC: 0.710003258390355


Disease: CD
[[199  80]
 [112 163]]
AUC train: 0.7232759884717359
AUC: 0.7118670576735093


Disease: CD
[[200  79]
 [111 164]]
AUC train: 0.7269344576355521
AUC: 0.7089214727924406


Disease: CD
[[201  78]
 [111 164]]
AUC train: 0.725251850267174
AUC: 0.7103160638644509


Disease: CD
[[198  81]
 [113 162]]
AUC train: 0.7271243518956976
AUC: 0.7139654610622352


D

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   25.7s finished


Disease: CD
[[151  76]
 [ 93 125]]
AUC train: 0.6977925913801697
AUC: 0.6467283676191246


Disease: CD
[[164  63]
 [ 94 124]]
AUC train: 0.7471263512658934
AUC: 0.6819908661035444


Disease: CD
[[166  61]
 [ 81 137]]
AUC train: 0.7692599564023779
AUC: 0.7115547831709979


Disease: CD
[[165  62]
 [ 83 135]]
AUC train: 0.769397593946924
AUC: 0.7113324980802651


Disease: CD
[[165  62]
 [ 83 135]]
AUC train: 0.7694050338141967
AUC: 0.7113527058157862


Disease: CD
[[166  61]
 [ 83 135]]
AUC train: 0.7678947407578249
AUC: 0.7147880208543831


Disease: CD
[[166  61]
 [ 91 127]]
AUC train: 0.7884957332361191
AUC: 0.7206078486844765


Disease: CD
[[163  64]
 [ 88 130]]
AUC train: 0.7752081302869556
AUC: 0.7153538374489754


Disease: CD
[[168  59]
 [ 84 134]]
AUC train: 0.798948746754358
AUC: 0.7325910358485228


Disease: CD
[[169  58]
 [ 83 135]]
AUC train: 0.8012923049452798
AUC: 0.73568281938326


Disease: CD
[[169  58]
 [ 86 132]]
AUC train: 0.8026649604571054
AUC: 0.7417047245685648


Dis

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   20.3s finished


Disease: UC
[[60 33]
 [53 46]]
AUC train: 0.6617759452936445
AUC: 0.5723362658846529


Disease: UC
[[62 31]
 [43 56]]
AUC train: 0.6751206757843926
AUC: 0.613229064841968


Disease: UC
[[57 36]
 [49 50]]
AUC train: 0.6936444086886564
AUC: 0.6167046812208101


Disease: UC
[[65 28]
 [48 51]]
AUC train: 0.6930611423974257
AUC: 0.6348430541978929


Disease: UC
[[60 33]
 [42 57]]
AUC train: 0.7097948511665326
AUC: 0.6537417182578472


Disease: UC
[[68 25]
 [40 59]]
AUC train: 0.7053298471440064
AUC: 0.6773107418268709


Disease: UC
[[66 27]
 [37 62]]
AUC train: 0.7128519710378118
AUC: 0.6813294232649071


Disease: UC
[[61 32]
 [43 56]]
AUC train: 0.7229887369267899
AUC: 0.6524383621157815


Disease: UC
[[68 25]
 [41 58]]
AUC train: 0.7105993563958166
AUC: 0.6895840121646573


Disease: UC
[[65 28]
 [39 60]]
AUC train: 0.7253620273531778
AUC: 0.6719887042467687


Disease: UC
[[77 16]
 [54 45]]
AUC train: 0.714501206757844
AUC: 0.6884978820462692


Disease: UC
[[65 28]
 [38 61]]
AUC train: 0.7

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   17.8s finished


Disease: UC
[[64 15]
 [41 48]]
AUC train: 0.6763564377067542
AUC: 0.722301237377329


Disease: UC
[[57 22]
 [39 50]]
AUC train: 0.7327498632491991
AUC: 0.7105674868439766


Disease: UC
[[56 23]
 [36 53]]
AUC train: 0.766221249772082
AUC: 0.7580713980941545


Disease: UC
[[56 23]
 [34 55]]
AUC train: 0.7686436925321039
AUC: 0.7466932157587826


Disease: UC
[[53 26]
 [30 59]]
AUC train: 0.781068479591571
AUC: 0.7370217607737164


Disease: UC
[[55 24]
 [34 55]]
AUC train: 0.7922429736136073
AUC: 0.757644716256578


Disease: UC
[[55 24]
 [28 61]]
AUC train: 0.8097470761376365
AUC: 0.7826767173943963


Disease: UC
[[55 24]
 [27 62]]
AUC train: 0.8125341877002423
AUC: 0.7831033992319727


Disease: UC
[[55 24]
 [27 62]]
AUC train: 0.8138365762808992
AUC: 0.7828189446735885


Disease: UC
[[56 23]
 [27 62]]
AUC train: 0.814930582688651
AUC: 0.7794054899729769


Disease: UC
[[55 24]
 [31 58]]
AUC train: 0.8161808757260817
AUC: 0.7849523538614707


Disease: UC
[[57 22]
 [35 54]]
AUC train: 0.8106

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:  1.1min finished


Disease: UC
[[367 136]
 [340 224]]
AUC train: 0.6107313367353411
AUC: 0.5869005823216729


Disease: UC
[[324 179]
 [265 299]]
AUC train: 0.6525390167481794
AUC: 0.6229502418115421


Disease: UC
[[325 178]
 [266 298]]
AUC train: 0.6540726379967929
AUC: 0.6237609802179829


Disease: UC
[[327 176]
 [266 298]]
AUC train: 0.6540228114836524
AUC: 0.6240465011350338


Disease: UC
[[323 180]
 [265 299]]
AUC train: 0.6565445507266259
AUC: 0.624529419229305


Disease: UC
[[325 178]
 [264 300]]
AUC train: 0.6569884596618786
AUC: 0.6260980217982883


Disease: UC
[[324 179]
 [263 301]]
AUC train: 0.6558676866650015
AUC: 0.6245329441788983


Disease: UC
[[322 181]
 [267 297]]
AUC train: 0.6566823825097291
AUC: 0.6254212314763898


Disease: UC
[[326 177]
 [261 303]]
AUC train: 0.6666975116509917
AUC: 0.6384564950721205


Disease: UC
[[325 178]
 [269 295]]
AUC train: 0.6654026694069091
AUC: 0.6351782919504251


Disease: UC
[[325 178]
 [264 300]]
AUC train: 0.665979233344679
AUC: 0.6360278048023913


D

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   49.5s finished


Disease: UC
[[228 182]
 [192 222]]
AUC train: 0.6090011241368235
AUC: 0.5668669730175563


Disease: UC
[[247 163]
 [179 235]]
AUC train: 0.6565642646018429
AUC: 0.6242665252739484


Disease: UC
[[246 164]
 [172 242]]
AUC train: 0.6604531703696631
AUC: 0.644927536231884


Disease: UC
[[247 163]
 [170 244]]
AUC train: 0.6602643674668727
AUC: 0.6451808648521269


Disease: UC
[[252 158]
 [167 247]]
AUC train: 0.6714373760302779
AUC: 0.6499646518204312


Disease: UC
[[253 157]
 [170 244]]
AUC train: 0.6713462298013446
AUC: 0.6488335100742312


Disease: UC
[[240 170]
 [161 253]]
AUC train: 0.6847458105286915
AUC: 0.6528219630022387


Disease: UC
[[241 169]
 [162 252]]
AUC train: 0.6849183373191724
AUC: 0.6521680216802167


Disease: UC
[[250 160]
 [163 251]]
AUC train: 0.688355852238942
AUC: 0.6509072699422647


Disease: UC
[[252 158]
 [163 251]]
AUC train: 0.6881420926306103
AUC: 0.6511664899257688


Disease: UC
[[248 162]
 [160 254]]
AUC train: 0.6872762034557443
AUC: 0.6505066572404854


D

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   53.9s finished


Disease: RA
[[174 104]
 [109 164]]
AUC train: 0.6512517131776165
AUC: 0.6254117585052836


Disease: RA
[[193  85]
 [106 167]]
AUC train: 0.7173378434860369
AUC: 0.7170527314412207


Disease: RA
[[207  71]
 [ 96 177]]
AUC train: 0.7311890667677563
AUC: 0.7659235249163305


Disease: RA
[[212  66]
 [ 88 185]]
AUC train: 0.7392932474071483
AUC: 0.780008959865075


Disease: RA
[[226  52]
 [ 90 183]]
AUC train: 0.743701338465576
AUC: 0.7906026826890137


Disease: RA
[[218  60]
 [ 92 181]]
AUC train: 0.7448823375032806
AUC: 0.796927293330171


Disease: RA
[[220  58]
 [ 91 182]]
AUC train: 0.7473172367538565
AUC: 0.7926186523308825


Disease: RA
[[218  60]
 [ 90 183]]
AUC train: 0.7477716540790638
AUC: 0.7929480591351095


Disease: RA
[[217  61]
 [ 90 183]]
AUC train: 0.7498298972579438
AUC: 0.7968614119693256


Disease: RA
[[219  59]
 [ 90 183]]
AUC train: 0.7497569960827768
AUC: 0.7968482356971565


Disease: RA
[[218  60]
 [ 92 181]]
AUC train: 0.7497521360044324
AUC: 0.7964529475320843


Di

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   17.8s finished


Disease: RA
[[69 16]
 [51 49]]
AUC train: 0.755995125560343
AUC: 0.6747058823529412


Disease: RA
[[68 17]
 [36 64]]
AUC train: 0.8273055664360013
AUC: 0.7828235294117646


Disease: RA
[[63 22]
 [35 65]]
AUC train: 0.8454976715846282
AUC: 0.7709411764705882


Disease: RA
[[64 21]
 [32 68]]
AUC train: 0.8664534099316707
AUC: 0.7803529411764706


Disease: RA
[[64 21]
 [29 71]]
AUC train: 0.8760064412238326
AUC: 0.8024705882352942


Disease: RA
[[65 20]
 [29 71]]
AUC train: 0.8777037907472691
AUC: 0.8038823529411765


Disease: RA
[[64 21]
 [28 72]]
AUC train: 0.8786395090742918
AUC: 0.806


Disease: RA
[[73 12]
 [35 65]]
AUC train: 0.8782260521390957
AUC: 0.7985882352941176


Disease: RA
[[71 14]
 [37 63]]
AUC train: 0.8752665709187448
AUC: 0.7943529411764706


Disease: RA
[[71 14]
 [37 63]]
AUC train: 0.8743090917003961
AUC: 0.7947058823529412


Disease: RA
[[60 25]
 [26 74]]
AUC train: 0.8919789354571963
AUC: 0.8123529411764706


Disease: RA
[[62 23]
 [25 75]]
AUC train: 0.8929581755668

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:  2.7min finished


Disease: RA
[[837 478]
 [570 758]]
AUC train: 0.6255526702447325
AUC: 0.6325856658573457


Disease: RA
[[854 461]
 [546 782]]
AUC train: 0.6651227974964056
AUC: 0.6665897430024279


Disease: RA
[[853 462]
 [522 806]]
AUC train: 0.6789219104473845
AUC: 0.680121054560447


Disease: RA
[[851 464]
 [525 803]]
AUC train: 0.6784611371770722
AUC: 0.6789202437124926


Disease: RA
[[850 465]
 [523 805]]
AUC train: 0.6827103270638298
AUC: 0.6825369920747629


Disease: RA
[[856 459]
 [525 803]]
AUC train: 0.6822313584555209
AUC: 0.6824757203719821


Disease: RA
[[858 457]
 [514 814]]
AUC train: 0.6834437148780018
AUC: 0.6829842182417885


Disease: RA
[[855 460]
 [517 811]]
AUC train: 0.6821616271890938
AUC: 0.6848956663154518


Disease: RA
[[862 453]
 [515 813]]
AUC train: 0.6848978694626484
AUC: 0.691973979568464


Disease: RA
[[880 435]
 [516 812]]
AUC train: 0.6867659942805637
AUC: 0.6940016720875899


Disease: RA
[[874 441]
 [529 799]]
AUC train: 0.6915668289522535
AUC: 0.6942444683677677


D

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   49.7s finished


Disease: RA
[[294 137]
 [208 236]]
AUC train: 0.6645374267081546
AUC: 0.6566856880081938


Disease: RA
[[311 120]
 [185 259]]
AUC train: 0.7129384281148192
AUC: 0.7037008005685501


Disease: RA
[[308 123]
 [168 276]]
AUC train: 0.7479068407005228
AUC: 0.7354309065445956


Disease: RA
[[309 122]
 [155 289]]
AUC train: 0.769665883475828
AUC: 0.7429871867226856


Disease: RA
[[319 112]
 [154 290]]
AUC train: 0.7736799770623224
AUC: 0.7474551117242533


Disease: RA
[[322 109]
 [155 289]]
AUC train: 0.777670016876127
AUC: 0.7605348968458017


Disease: RA
[[326 105]
 [151 293]]
AUC train: 0.7852440688207301
AUC: 0.7693296544804665


Disease: RA
[[331 100]
 [149 295]]
AUC train: 0.7856991662000234
AUC: 0.7735467486047533


Disease: RA
[[330 101]
 [150 294]]
AUC train: 0.7865901179404582
AUC: 0.7742469848038294


Disease: RA
[[328 103]
 [155 289]]
AUC train: 0.783248567838374
AUC: 0.7726165841015029


Disease: RA
[[330 101]
 [150 294]]
AUC train: 0.7871385439577885
AUC: 0.7725956815283963


Di

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   30.1s finished


Disease: PSO
[[125  20]
 [ 81  73]]
AUC train: 0.7202302523222873
AUC: 0.7168831168831168


Disease: PSO
[[108  37]
 [ 56  98]]
AUC train: 0.7378944594505565
AUC: 0.7565606806986117


Disease: PSO
[[127  18]
 [ 78  76]]
AUC train: 0.732031095592595
AUC: 0.7433945364979848


Disease: PSO
[[115  30]
 [ 51 103]]
AUC train: 0.7644278279201528
AUC: 0.7894760412001791


Disease: PSO
[[112  33]
 [ 50 104]]
AUC train: 0.7666183543052902
AUC: 0.7936856247201075


Disease: PSO
[[116  29]
 [ 49 105]]
AUC train: 0.7717076223730153
AUC: 0.7990147783251231


Disease: PSO
[[116  29]
 [ 49 105]]
AUC train: 0.7715017458330587
AUC: 0.7982086878638602


Disease: PSO
[[115  30]
 [ 47 107]]
AUC train: 0.7743428420844587
AUC: 0.7988804299149127


Disease: PSO
[[113  32]
 [ 47 107]]
AUC train: 0.7765580736543909
AUC: 0.8059113300492611


Disease: PSO
[[114  31]
 [ 44 110]]
AUC train: 0.7849825416694116
AUC: 0.8125839677563815


Disease: PSO
[[113  32]
 [ 45 109]]
AUC train: 0.7857072270900586
AUC: 0.81473354

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   11.0s finished


Disease: PSO
[[29 11]
 [11 36]]
AUC train: 0.8091836734693878
AUC: 0.8313829787234043


Disease: PSO
[[38  2]
 [20 27]]
AUC train: 0.8241010689990282
AUC: 0.8462765957446808


Disease: PSO
[[33  7]
 [16 31]]
AUC train: 0.8245869776482021
AUC: 0.847872340425532


Disease: PSO
[[33  7]
 [16 31]]
AUC train: 0.8256559766763848
AUC: 0.848404255319149


Disease: PSO
[[34  6]
 [13 34]]
AUC train: 0.87910592808552
AUC: 0.8627659574468085


Disease: PSO
[[35  5]
 [13 34]]
AUC train: 0.8797862001943634
AUC: 0.8606382978723405


Disease: PSO
[[31  9]
 [13 34]]
AUC train: 0.8840621963070943
AUC: 0.8611702127659574


Disease: PSO
[[32  8]
 [13 34]]
AUC train: 0.9067055393586007
AUC: 0.8569148936170212


Disease: PSO
[[31  9]
 [12 35]]
AUC train: 0.9081632653061225
AUC: 0.8569148936170212


Disease: PSO
[[32  8]
 [12 35]]
AUC train: 0.9089407191448008
AUC: 0.8659574468085106


Disease: PSO
[[32  8]
 [12 35]]
AUC train: 0.9108843537414967
AUC: 0.851063829787234


Disease: PSO
[[32  8]
 [12 35]]
AUC t

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:  1.7min finished


Disease: PSO
[[528 271]
 [436 400]]
AUC train: 0.6118416893563711
AUC: 0.593618817780599


Disease: PSO
[[497 302]
 [385 451]]
AUC train: 0.6460832155557645
AUC: 0.614742710685007


Disease: PSO
[[519 280]
 [386 450]]
AUC train: 0.6625011612840807
AUC: 0.636186980136654


Disease: PSO
[[519 280]
 [386 450]]
AUC train: 0.6623205781737962
AUC: 0.6365387955039493


Disease: PSO
[[513 286]
 [385 451]]
AUC train: 0.6627273711771764
AUC: 0.6376376571192459


Disease: PSO
[[513 286]
 [387 449]]
AUC train: 0.6627848169458971
AUC: 0.6377679036594786


Disease: PSO
[[510 289]
 [383 453]]
AUC train: 0.6645183598325991
AUC: 0.63949254750256


Disease: PSO
[[516 283]
 [382 454]]
AUC train: 0.6673747063806582
AUC: 0.6438700289237145


Disease: PSO
[[520 279]
 [386 450]]
AUC train: 0.6675415464840716
AUC: 0.6436394775766359


Disease: PSO
[[517 282]
 [384 452]]
AUC train: 0.6678735775300739
AUC: 0.6438355959303196


Disease: PSO
[[521 278]
 [384 452]]
AUC train: 0.6680885181575356
AUC: 0.643810145456

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   24.5s finished


Disease: PSO
[[128  67]
 [102 105]]
AUC train: 0.6394907354780038
AUC: 0.614715719063545


Disease: PSO
[[130  65]
 [ 75 132]]
AUC train: 0.6939956803455724
AUC: 0.6887154713241671


Disease: PSO
[[134  61]
 [ 72 135]]
AUC train: 0.7304308286915994
AUC: 0.7268921095008053


Disease: PSO
[[140  55]
 [ 80 127]]
AUC train: 0.7280027282027963
AUC: 0.719980180849746


Disease: PSO
[[138  57]
 [ 82 125]]
AUC train: 0.7421393656928499
AUC: 0.7138857921466617


Disease: PSO
[[137  58]
 [ 82 125]]
AUC train: 0.7436898942821416
AUC: 0.7143317230273752


Disease: PSO
[[139  56]
 [ 87 120]]
AUC train: 0.7441627827668523
AUC: 0.7128948346339652


Disease: PSO
[[137  58]
 [ 84 123]]
AUC train: 0.7463317039899967
AUC: 0.7082373343242909


Disease: PSO
[[137  58]
 [ 86 121]]
AUC train: 0.7462498579061043
AUC: 0.7083364300755606


Disease: PSO
[[138  57]
 [ 76 131]]
AUC train: 0.7511651699442992
AUC: 0.7234485321441844


Disease: PSO
[[137  58]
 [ 74 133]]
AUC train: 0.7574127543480731
AUC: 0.724489037

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   16.7s finished


Disease: SLE
[[61 24]
 [30 48]]
AUC train: 0.7602234107749735
AUC: 0.7399698340874812


Disease: SLE
[[65 20]
 [31 47]]
AUC train: 0.7796813192935539
AUC: 0.7603318250377074


Disease: SLE
[[65 20]
 [30 48]]
AUC train: 0.7990138726391443
AUC: 0.7565610859728508


Disease: SLE
[[69 16]
 [30 48]]
AUC train: 0.8134157891804557
AUC: 0.7411764705882353


Disease: SLE
[[67 18]
 [31 47]]
AUC train: 0.8172878711905954
AUC: 0.7441930618401207


Disease: SLE
[[63 22]
 [29 49]]
AUC train: 0.8228870689174884
AUC: 0.7386123680241328


Disease: SLE
[[65 20]
 [33 45]]
AUC train: 0.8516351885898936
AUC: 0.7493212669683258


Disease: SLE
[[65 20]
 [32 46]]
AUC train: 0.8422474789681876
AUC: 0.7431372549019607


Disease: SLE
[[66 19]
 [34 44]]
AUC train: 0.8573736698423311
AUC: 0.7429864253393665


Disease: SLE
[[65 20]
 [32 46]]
AUC train: 0.857735807008747
AUC: 0.7413273001508296


Disease: SLE
[[67 18]
 [34 44]]
AUC train: 0.8392668115215333
AUC: 0.7432880844645552


Disease: SLE
[[65 20]
 [30 48]]
A

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   16.2s finished


Disease: SLE
[[55 27]
 [21 56]]
AUC train: 0.7816968957098012
AUC: 0.7602153943617359


Disease: SLE
[[64 18]
 [25 52]]
AUC train: 0.790140681316126
AUC: 0.7934748178650618


Disease: SLE
[[64 18]
 [23 54]]
AUC train: 0.7931345192419487
AUC: 0.79490022172949


Disease: SLE
[[65 17]
 [22 55]]
AUC train: 0.802581095221486
AUC: 0.8173899271460248


Disease: SLE
[[68 14]
 [23 54]]
AUC train: 0.8204278572259041
AUC: 0.8262591067469117


Disease: SLE
[[68 14]
 [24 53]]
AUC train: 0.8238576909661667
AUC: 0.8226164079822618


Disease: SLE
[[69 13]
 [18 59]]
AUC train: 0.8626613184513428
AUC: 0.8664871713652202


Disease: SLE
[[70 12]
 [22 55]]
AUC train: 0.8542029996512034
AUC: 0.8631612290148877


Disease: SLE
[[70 12]
 [24 53]]
AUC train: 0.8426636437623531
AUC: 0.8552423186569528


Disease: SLE
[[72 10]
 [23 54]]
AUC train: 0.8409777932798512
AUC: 0.8611023123218245


Disease: SLE
[[71 11]
 [22 55]]
AUC train: 0.8707417742123009
AUC: 0.8788406715235982


Disease: SLE
[[71 11]
 [18 59]]
AUC 

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   12.8s finished


Disease: SLE
[[56 27]
 [46 46]]
AUC train: 0.648700231928875
AUC: 0.6386851754845468


Disease: SLE
[[58 25]
 [37 55]]
AUC train: 0.6995796289137999
AUC: 0.681246726034573


Disease: SLE
[[59 24]
 [38 54]]
AUC train: 0.7160320834943948
AUC: 0.707962283918282


Disease: SLE
[[62 21]
 [37 55]]
AUC train: 0.7158629686896019
AUC: 0.7061288632792038


Disease: SLE
[[60 23]
 [41 51]]
AUC train: 0.723569771936606
AUC: 0.6957831325301205


Disease: SLE
[[61 22]
 [40 52]]
AUC train: 0.7230141090065714
AUC: 0.6974855945521216


Disease: SLE
[[62 21]
 [40 52]]
AUC train: 0.723400657131813
AUC: 0.6980094290204295


Disease: SLE
[[62 21]
 [40 52]]
AUC train: 0.7233523386161576
AUC: 0.6977475117862756


Disease: SLE
[[61 22]
 [41 51]]
AUC train: 0.7344897564746812
AUC: 0.6910686223153484


Disease: SLE
[[62 21]
 [38 54]]
AUC train: 0.7473424816389641
AUC: 0.7020691461498165


Disease: SLE
[[60 23]
 [42 50]]
AUC train: 0.7343931194433706
AUC: 0.6876636982713462


Disease: SLE
[[64 19]
 [43 49]]
AUC t

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:    9.3s finished


Disease: SLE
[[37 13]
 [27 26]]
AUC train: 0.6903627959097913
AUC: 0.7083018867924528


Disease: SLE
[[40 10]
 [24 29]]
AUC train: 0.7108838772937386
AUC: 0.7524528301886793


Disease: SLE
[[39 11]
 [22 31]]
AUC train: 0.7059812298641266
AUC: 0.7581132075471698


Disease: SLE
[[41  9]
 [23 30]]
AUC train: 0.7145958817761592
AUC: 0.790566037735849


Disease: SLE
[[39 11]
 [17 36]]
AUC train: 0.7496848298080964
AUC: 0.7588679245283019


Disease: SLE
[[38 12]
 [21 32]]
AUC train: 0.7582994817201288
AUC: 0.7803773584905661


Disease: SLE
[[38 12]
 [19 34]]
AUC train: 0.7729373861885418
AUC: 0.7864150943396226


Disease: SLE
[[40 10]
 [20 33]]
AUC train: 0.756268384927861
AUC: 0.799622641509434


Disease: SLE
[[36 14]
 [18 35]]
AUC train: 0.7712564784983891
AUC: 0.7962264150943396


Disease: SLE
[[36 14]
 [18 35]]
AUC train: 0.7714665919596582
AUC: 0.7973584905660377


Disease: SLE
[[40 10]
 [20 33]]
AUC train: 0.7579492926180137
AUC: 0.7981132075471699


Disease: SLE
[[40 10]
 [18 35]]
AUC

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:  2.0min finished


Disease: COPD
[[497 117]
 [249 434]]
AUC train: 0.7872647133174077
AUC: 0.7705729179086327


Disease: COPD
[[503 111]
 [261 422]]
AUC train: 0.8177068891468318
AUC: 0.7909729541541675


Disease: COPD
[[493 121]
 [235 448]]
AUC train: 0.8239422759839858
AUC: 0.7997720346621773


Disease: COPD
[[493 121]
 [235 448]]
AUC train: 0.8307142988349707
AUC: 0.7997672655128506


Disease: COPD
[[491 123]
 [235 448]]
AUC train: 0.8304007144650303
AUC: 0.7990733542857961


Disease: COPD
[[499 115]
 [226 457]]
AUC train: 0.8362617244253796
AUC: 0.8038067349926794


Disease: COPD
[[497 117]
 [237 446]]
AUC train: 0.8339467982467266
AUC: 0.7968819301701155


Disease: COPD
[[497 117]
 [229 454]]
AUC train: 0.8399656937824573
AUC: 0.8035539700783572


Disease: COPD
[[501 113]
 [227 456]]
AUC train: 0.8399919351523267
AUC: 0.8034371259198496


Disease: COPD
[[501 113]
 [223 460]]
AUC train: 0.8413096892759394
AUC: 0.8057000872754327


Disease: COPD
[[498 116]
 [223 460]]
AUC train: 0.8418957465363578
AUC

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   17.9s finished


Disease: COPD
[[71 15]
 [41 64]]
AUC train: 0.8591346932167719
AUC: 0.826799557032115


Disease: COPD
[[76 10]
 [36 69]]
AUC train: 0.8717217095677514
AUC: 0.8345514950166113


Disease: COPD
[[73 13]
 [36 69]]
AUC train: 0.8805447628298526
AUC: 0.8389811738648948


Disease: COPD
[[71 15]
 [32 73]]
AUC train: 0.8923627974745022
AUC: 0.8437430786267995


Disease: COPD
[[71 15]
 [31 74]]
AUC train: 0.8922211429496519
AUC: 0.8428571428571429


Disease: COPD
[[71 15]
 [32 73]]
AUC train: 0.8922211429496519
AUC: 0.8416389811738648


Disease: COPD
[[74 12]
 [25 80]]
AUC train: 0.9235065565808644
AUC: 0.8765227021040974


Disease: COPD
[[73 13]
 [24 81]]
AUC train: 0.9312975554476284
AUC: 0.8841638981173865


Disease: COPD
[[74 12]
 [25 80]]
AUC train: 0.9301238465274404
AUC: 0.8839424141749723


Disease: COPD
[[74 12]
 [25 80]]
AUC train: 0.930204791970212
AUC: 0.883499446290144


Disease: COPD
[[73 13]
 [25 80]]
AUC train: 0.9312773190869353
AUC: 0.8849390919158361


Disease: COPD
[[73 13]
 

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:  6.6min finished


Disease: COPD
[[1982 1216]
 [1399 1810]]
AUC train: 0.6374019924064028
AUC: 0.629533767111768


Disease: COPD
[[2092 1106]
 [1225 1984]]
AUC train: 0.6900554913965246
AUC: 0.6800712544124745


Disease: COPD
[[2048 1150]
 [1188 2021]]
AUC train: 0.7004561098529896
AUC: 0.6845114516298458


Disease: COPD
[[2114 1084]
 [1225 1984]]
AUC train: 0.7042592269079396
AUC: 0.6887229495062647


Disease: COPD
[[2114 1084]
 [1223 1986]]
AUC train: 0.7045850477875397
AUC: 0.6890830998105508


Disease: COPD
[[2109 1089]
 [1172 2037]]
AUC train: 0.7154288528054997
AUC: 0.7011901330509818


Disease: COPD
[[2224  974]
 [1073 2136]]
AUC train: 0.7459331010243408
AUC: 0.7346718334983047


Disease: COPD
[[2214  984]
 [1063 2146]]
AUC train: 0.750193903070302
AUC: 0.739421023306285


Disease: COPD
[[2234  964]
 [1007 2202]]
AUC train: 0.7638377128136762
AUC: 0.7526878262765897


Disease: COPD
[[2277  921]
 [1008 2201]]
AUC train: 0.7669961151368616
AUC: 0.7561931528177377


Disease: COPD
[[2280  918]
 [1009

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   42.1s finished


Disease: COPD
[[246 130]
 [139 226]]
AUC train: 0.6870609252657585
AUC: 0.6839405421160013


Disease: COPD
[[274 102]
 [139 226]]
AUC train: 0.7450114135959935
AUC: 0.7315287088312445


Disease: COPD
[[270 106]
 [130 235]]
AUC train: 0.7531189562259758
AUC: 0.7346546196444186


Disease: COPD
[[289  87]
 [128 237]]
AUC train: 0.7793353558600971
AUC: 0.7647333139026524


Disease: COPD
[[288  88]
 [125 240]]
AUC train: 0.7795365355779625
AUC: 0.7656732730982221


Disease: COPD
[[285  91]
 [126 239]]
AUC train: 0.7875032524054388
AUC: 0.7732439522005247


Disease: COPD
[[285  91]
 [128 237]]
AUC train: 0.7874294865088882
AUC: 0.7741183328475663


Disease: COPD
[[284  92]
 [126 239]]
AUC train: 0.7886727771652974
AUC: 0.7662489070241911


Disease: COPD
[[288  88]
 [126 239]]
AUC train: 0.7884420910888115
AUC: 0.7655858350335181


Disease: COPD
[[286  90]
 [121 244]]
AUC train: 0.7985533837087346
AUC: 0.7725735937044593


Disease: COPD
[[297  79]
 [114 251]]
AUC train: 0.8091622608308454
AUC

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:  3.8min finished


Disease: obesity
[[789 380]
 [367 832]]
AUC train: 0.7478499149037303
AUC: 0.7568693186723183


Disease: obesity
[[822 347]
 [352 847]]
AUC train: 0.7651413746413247
AUC: 0.7773080075997179


Disease: obesity
[[862 307]
 [351 848]]
AUC train: 0.7781305667661935
AUC: 0.7932016343816597


Disease: obesity
[[887 282]
 [324 875]]
AUC train: 0.792926162026496
AUC: 0.813311064038966


Disease: obesity
[[895 274]
 [322 877]]
AUC train: 0.7963108738521048
AUC: 0.8184486501796835


Disease: obesity
[[900 269]
 [328 871]]
AUC train: 0.807747843950365
AUC: 0.8309540813523673


Disease: obesity
[[893 276]
 [314 885]]
AUC train: 0.8200791015805502
AUC: 0.8388448885619681


Disease: obesity
[[896 273]
 [307 892]]
AUC train: 0.8209019457373801
AUC: 0.8408389940005607


Disease: obesity
[[911 258]
 [280 919]]
AUC train: 0.8301174856808469
AUC: 0.851592894278166


Disease: obesity
[[909 260]
 [275 924]]
AUC train: 0.8311144157131379
AUC: 0.8529891248124507


Disease: obesity
[[919 250]
 [290 909]]
AUC 

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   20.1s finished


Disease: obesity
[[91 17]
 [45 78]]
AUC train: 0.8623887342574476
AUC: 0.8404847937368263


Disease: obesity
[[89 19]
 [37 86]]
AUC train: 0.89052553255987
AUC: 0.8535832580548027


Disease: obesity
[[90 18]
 [32 91]]
AUC train: 0.894865929947364
AUC: 0.8680367359229149


Disease: obesity
[[96 12]
 [33 90]]
AUC train: 0.9148455369691625
AUC: 0.8914483589280338


Disease: obesity
[[98 10]
 [30 93]]
AUC train: 0.919709537851021
AUC: 0.9006323396567297


Disease: obesity
[[99  9]
 [33 90]]
AUC train: 0.9191997133959819
AUC: 0.8981481481481481


Disease: obesity
[[97 11]
 [32 91]]
AUC train: 0.9204260478959406
AUC: 0.9011592893706714


Disease: obesity
[[97 11]
 [31 92]]
AUC train: 0.9202055832667347
AUC: 0.8957392351701294


Disease: obesity
[[95 13]
 [32 91]]
AUC train: 0.9218866260644307
AUC: 0.892652815417043


Disease: obesity
[[97 11]
 [31 92]]
AUC train: 0.9182214016038801
AUC: 0.8883619391749472


Disease: obesity
[[97 11]
 [31 92]]
AUC train: 0.9182351806432055
AUC: 0.886555254441

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed: 13.9min finished


Disease: obesity
[[4007 2196]
 [2528 3771]]
AUC train: 0.6665251276583541
AUC: 0.6710703128581066


Disease: obesity
[[4095 2108]
 [2298 4001]]
AUC train: 0.7070763461782219
AUC: 0.7090678178678067


Disease: obesity
[[4268 1935]
 [2069 4230]]
AUC train: 0.7416634288802693
AUC: 0.741950626034338


Disease: obesity
[[4298 1905]
 [2059 4240]]
AUC train: 0.7443045966531738
AUC: 0.7460965389719578


Disease: obesity
[[4300 1903]
 [2070 4229]]
AUC train: 0.7442724934069527
AUC: 0.7456592003362348


Disease: obesity
[[4307 1896]
 [2080 4219]]
AUC train: 0.7444147508873886
AUC: 0.7457696866945223


Disease: obesity
[[4354 1849]
 [2096 4203]]
AUC train: 0.7490935040352896
AUC: 0.7512677714568821


Disease: obesity
[[4282 1921]
 [1995 4304]]
AUC train: 0.7505909143319022
AUC: 0.7539975036788475


Disease: obesity
[[4269 1934]
 [1962 4337]]
AUC train: 0.7526554036281351
AUC: 0.7562366375681719


Disease: obesity
[[4262 1941]
 [1957 4342]]
AUC train: 0.7527877231582838
AUC: 0.7566027755903311


D

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   57.8s finished


Disease: obesity
[[377 157]
 [231 350]]
AUC train: 0.6992455637775834
AUC: 0.7058120120933171


Disease: obesity
[[398 136]
 [207 374]]
AUC train: 0.7689426519475779
AUC: 0.7660497527832033


Disease: obesity
[[410 124]
 [169 412]]
AUC train: 0.8177570923876343
AUC: 0.8226001921006659


Disease: obesity
[[414 120]
 [181 400]]
AUC train: 0.8243412307451716
AUC: 0.8262874934730899


Disease: obesity
[[412 122]
 [177 404]]
AUC train: 0.8311415823731095
AUC: 0.828624288486208


Disease: obesity
[[412 122]
 [178 403]]
AUC train: 0.8311889715830602
AUC: 0.8285340398512188


Disease: obesity
[[421 113]
 [175 406]]
AUC train: 0.8324412314560097
AUC: 0.8311544734314465


Disease: obesity
[[420 114]
 [178 403]]
AUC train: 0.8320550093949108
AUC: 0.8302487639160172


Disease: obesity
[[417 117]
 [170 411]]
AUC train: 0.8325277167641698
AUC: 0.8308643885332664


Disease: obesity
[[414 120]
 [171 410]]
AUC train: 0.8323606697990935
AUC: 0.8324018384936215


Disease: obesity
[[417 117]
 [172 409]]
A

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:  3.4min finished


Disease: T2D
[[902 202]
 [399 722]]
AUC train: 0.7910200289451252
AUC: 0.7953076316435894


Disease: T2D
[[928 176]
 [375 746]]
AUC train: 0.8096942697149668
AUC: 0.8228718212258724


Disease: T2D
[[902 202]
 [338 783]]
AUC train: 0.8224664324791521
AUC: 0.8314667933651373


Disease: T2D
[[892 212]
 [296 825]]
AUC train: 0.8476754616230484
AUC: 0.8507963903864304


Disease: T2D
[[907 197]
 [282 839]]
AUC train: 0.8634693029252047
AUC: 0.8639453968377097


Disease: T2D
[[907 197]
 [280 841]]
AUC train: 0.8674711700075973
AUC: 0.8658838511163687


Disease: T2D
[[907 197]
 [280 841]]
AUC train: 0.8681774168005921
AUC: 0.866501182950006


Disease: T2D
[[917 187]
 [281 840]]
AUC train: 0.8710702951766938
AUC: 0.8687127500032321


Disease: T2D
[[922 182]
 [276 845]]
AUC train: 0.8717377682185377
AUC: 0.8698229776726267


Disease: T2D
[[923 181]
 [279 842]]
AUC train: 0.8729546103475703
AUC: 0.8692743280456114


Disease: T2D
[[916 188]
 [261 860]]
AUC train: 0.8764121593295142
AUC: 0.87303972

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   55.5s finished


Disease: T2D
[[282  57]
 [ 73 274]]
AUC train: 0.8787009406549339
AUC: 0.8647063324067226


Disease: T2D
[[279  60]
 [ 63 284]]
AUC train: 0.9054113113559998
AUC: 0.8811642991337464


Disease: T2D
[[281  58]
 [ 61 286]]
AUC train: 0.9118369009296898
AUC: 0.8924451471950897


Disease: T2D
[[284  55]
 [ 63 284]]
AUC train: 0.9189877581710096
AUC: 0.8956500301786062


Disease: T2D
[[297  42]
 [ 55 292]]
AUC train: 0.9293200811422575
AUC: 0.9119634796358165


Disease: T2D
[[297  42]
 [ 57 290]]
AUC train: 0.9293655082747873
AUC: 0.9122780172230581


Disease: T2D
[[294  45]
 [ 57 290]]
AUC train: 0.9300281178285831
AUC: 0.912252514175444


Disease: T2D
[[296  43]
 [ 57 290]]
AUC train: 0.9308442397612726
AUC: 0.9146157965876922


Disease: T2D
[[295  44]
 [ 55 292]]
AUC train: 0.9316995230151085
AUC: 0.9147858169051202


Disease: T2D
[[296  43]
 [ 58 289]]
AUC train: 0.9327004863836087
AUC: 0.9142587539210936


Disease: T2D
[[299  40]
 [ 61 286]]
AUC train: 0.935599990601283
AUC: 0.917863184

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed: 12.1min finished


Disease: T2D
[[4538 1225]
 [2833 2912]]
AUC train: 0.6797291279329436
AUC: 0.6861457208714336


Disease: T2D
[[4286 1477]
 [2179 3566]]
AUC train: 0.7328119644359504
AUC: 0.7422851306623222


Disease: T2D
[[4455 1308]
 [1828 3917]]
AUC train: 0.7964457511707894
AUC: 0.8004464119188962


Disease: T2D
[[4467 1296]
 [1844 3901]]
AUC train: 0.7971550205163893
AUC: 0.8011842601439785


Disease: T2D
[[4346 1417]
 [1731 4014]]
AUC train: 0.7976690732287863
AUC: 0.8025285399324975


Disease: T2D
[[4366 1397]
 [1739 4006]]
AUC train: 0.7973575167148139
AUC: 0.801785949713419


Disease: T2D
[[4364 1399]
 [1739 4006]]
AUC train: 0.7972842383475245
AUC: 0.8016653460062368


Disease: T2D
[[4398 1365]
 [1741 4004]]
AUC train: 0.7990670715614109
AUC: 0.8035639860355828


Disease: T2D
[[4362 1401]
 [1703 4042]]
AUC train: 0.79924532103726
AUC: 0.8041745253135644


Disease: T2D
[[4341 1422]
 [1661 4084]]
AUC train: 0.8044119480658063
AUC: 0.8100800898622964


Disease: T2D
[[4459 1304]
 [1652 4093]]
AUC

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:  2.6min finished


Disease: T2D
[[1362  192]
 [ 625  959]]
AUC train: 0.7804991648236799
AUC: 0.777049777049777


Disease: T2D
[[1301  253]
 [ 292 1292]]
AUC train: 0.9043501208439481
AUC: 0.8989342426842426


Disease: T2D
[[1298  256]
 [ 298 1286]]
AUC train: 0.9048529063202785
AUC: 0.8998783686283687


Disease: T2D
[[1314  240]
 [ 288 1296]]
AUC train: 0.9085326092028033
AUC: 0.9043190918190919


Disease: T2D
[[1314  240]
 [ 289 1295]]
AUC train: 0.9086163694558756
AUC: 0.9044746857246858


Disease: T2D
[[1309  245]
 [ 288 1296]]
AUC train: 0.9092008249115834
AUC: 0.9053314678314679


Disease: T2D
[[1306  248]
 [ 275 1309]]
AUC train: 0.9120625589055924
AUC: 0.906994656994657


Disease: T2D
[[1304  250]
 [ 278 1306]]
AUC train: 0.9122872633278278
AUC: 0.9073858761358762


Disease: T2D
[[1305  249]
 [ 275 1309]]
AUC train: 0.9124197717495777
AUC: 0.9074086261586262


Disease: T2D
[[1296  258]
 [ 270 1314]]
AUC train: 0.9129250207627633
AUC: 0.9081882206882207


Disease: T2D
[[1294  260]
 [ 278 1306]]
AU

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   21.9s finished


Disease: Atherosclerosis
[[96 19]
 [69 56]]
AUC train: 0.7785802405674925
AUC: 0.6862260869565218


Disease: Atherosclerosis
[[96 19]
 [59 66]]
AUC train: 0.8134445358281073
AUC: 0.7321739130434782


Disease: Atherosclerosis
[[89 26]
 [48 77]]
AUC train: 0.8166572427264316
AUC: 0.7474086956521738


Disease: Atherosclerosis
[[89 26]
 [46 79]]
AUC train: 0.8184178061067132
AUC: 0.7573565217391304


Disease: Atherosclerosis
[[90 25]
 [44 81]]
AUC train: 0.8241621260409171
AUC: 0.7593043478260869


Disease: Atherosclerosis
[[89 26]
 [46 79]]
AUC train: 0.822478667626195
AUC: 0.7633391304347827


Disease: Atherosclerosis
[[86 29]
 [45 80]]
AUC train: 0.8380281690140845
AUC: 0.7675826086956522


Disease: Atherosclerosis
[[83 32]
 [45 80]]
AUC train: 0.8423588979130255
AUC: 0.7678608695652174


Disease: Atherosclerosis
[[88 27]
 [44 81]]
AUC train: 0.8392361468078544
AUC: 0.7793391304347825


Disease: Atherosclerosis
[[86 29]
 [39 86]]
AUC train: 0.8458029197080292
AUC: 0.7832347826086956


D

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:    7.8s finished


Disease: Atherosclerosis
[[23  2]
 [13 13]]
AUC train: 0.831385154880187
AUC: 0.6907692307692307


Disease: Atherosclerosis
[[23  2]
 [12 14]]
AUC train: 0.847749853886616
AUC: 0.7846153846153846


Disease: Atherosclerosis
[[23  2]
 [ 7 19]]
AUC train: 0.880771478667446
AUC: 0.883076923076923


Disease: Atherosclerosis
[[24  1]
 [ 5 21]]
AUC train: 0.9032729398012858
AUC: 0.8923076923076924


Disease: Atherosclerosis
[[23  2]
 [ 5 21]]
AUC train: 0.9032729398012858
AUC: 0.8923076923076924


Disease: Atherosclerosis
[[24  1]
 [ 5 21]]
AUC train: 0.9102863822326125
AUC: 0.9030769230769231


Disease: Atherosclerosis
[[22  3]
 [ 3 23]]
AUC train: 0.923728813559322
AUC: 0.9323076923076923


Disease: Atherosclerosis
[[22  3]
 [ 3 23]]
AUC train: 0.9237288135593221
AUC: 0.9307692307692308


Disease: Atherosclerosis
[[22  3]
 [ 3 23]]
AUC train: 0.924021040327294
AUC: 0.9307692307692308


Disease: Atherosclerosis
[[22  3]
 [ 3 23]]
AUC train: 0.9304500292226767
AUC: 0.9246153846153845


Diseas

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:  1.1min finished


Disease: Atherosclerosis
[[392 188]
 [298 313]]
AUC train: 0.6461690975529286
AUC: 0.6391020937976184


Disease: Atherosclerosis
[[380 200]
 [249 362]]
AUC train: 0.687322974832079
AUC: 0.6652040182854564


Disease: Atherosclerosis
[[395 185]
 [228 383]]
AUC train: 0.7242962049954038
AUC: 0.710720130932897


Disease: Atherosclerosis
[[403 177]
 [232 379]]
AUC train: 0.7276354313863418
AUC: 0.710900728032056


Disease: Atherosclerosis
[[402 178]
 [226 385]]
AUC train: 0.7293182728361169
AUC: 0.7134601275467013


Disease: Atherosclerosis
[[403 177]
 [227 384]]
AUC train: 0.7293551044461459
AUC: 0.7130678932219652


Disease: Atherosclerosis
[[407 173]
 [226 385]]
AUC train: 0.7333033492902392
AUC: 0.7195637451323438


Disease: Atherosclerosis
[[410 170]
 [233 378]]
AUC train: 0.7355324398295993
AUC: 0.722552062757492


Disease: Atherosclerosis
[[410 170]
 [230 381]]
AUC train: 0.7355827590714699
AUC: 0.7228822168293922


Disease: Atherosclerosis
[[407 173]
 [222 389]]
AUC train: 0.7384711

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 10000 out of 10000 | elapsed:   12.8s finished


Disease: Atherosclerosis
[[72 19]
 [36 64]]
AUC train: 0.7393254761225071
AUC: 0.7678021978021978


Disease: Atherosclerosis
[[73 18]
 [38 62]]
AUC train: 0.7481750336446312
AUC: 0.7487912087912089


Disease: Atherosclerosis
[[75 16]
 [37 63]]
AUC train: 0.7608172586762367
AUC: 0.7895604395604396


Disease: Atherosclerosis
[[75 16]
 [38 62]]
AUC train: 0.763998205619673
AUC: 0.7993406593406593


Disease: Atherosclerosis
[[77 14]
 [40 60]]
AUC train: 0.7761714448839769
AUC: 0.8042857142857143


Disease: Atherosclerosis
[[77 14]
 [43 57]]
AUC train: 0.7903837527017658
AUC: 0.799120879120879


Disease: Atherosclerosis
[[76 15]
 [42 58]]
AUC train: 0.7965417397332899
AUC: 0.8067032967032967


Disease: Atherosclerosis
[[77 14]
 [40 60]]
AUC train: 0.8112230333183801
AUC: 0.8283516483516482


Disease: Atherosclerosis
[[77 14]
 [39 61]]
AUC train: 0.810183108356103
AUC: 0.8204395604395605


Disease: Atherosclerosis
[[77 14]
 [39 61]]
AUC train: 0.8101423269850332
AUC: 0.8206593406593407


Dis

In [None]:
Shiny_data = Shiny_data[['Disease', 'Model', 'Data', 'N', 'AUC_train', 'AUC_test', 'Features', 'Coef', 'prob', 'reference']]

In [46]:
Shiny_data

Unnamed: 0,prob,reference,Disease,Data,N,AUC_train,AUC_test,Features,Coef,Model
0,"[0.457, 0.488, 0.474, 0.431, 0.571, 0.554, 0.4...","[HC, Incident, Incident, HC, Incident, HC, Inc...",CD,prot,1.0,0.684153,0.625518,[CXCL9],[0.54],incident
1,"[0.469, 0.504, 0.49, 0.472, 0.519, 0.512, 0.49...","[HC, Incident, Incident, HC, Incident, HC, Inc...",CD,prot,2.0,0.703178,0.638915,"[CRHBP, CXCL9]","[0.079, 0.179]",incident
2,"[0.238, 0.659, 0.725, 0.575, 0.628, 0.66, 0.52...","[HC, Incident, Incident, HC, Incident, HC, Inc...",CD,prot,3.0,0.745606,0.627751,"[CXCL9, MMP10, CRHBP]","[0.527, 0.564, 1.045]",incident
3,"[0.327, 0.726, 0.662, 0.673, 0.67, 0.734, 0.60...","[HC, Incident, Incident, HC, Incident, HC, Inc...",CD,prot,4.0,0.763773,0.641148,"[CXCL9, CHI3L1, MMP10, CRHBP]","[0.414, 0.486, 0.605, 1.209]",incident
4,"[0.488, 0.697, 0.695, 0.656, 0.767, 0.85, 0.67...","[HC, Incident, Incident, HC, Incident, HC, Inc...",CD,prot,5.0,0.772590,0.661563,"[CXCL9, KRT19, CHI3L1, MMP10, CRHBP]","[0.359, 0.425, 0.462, 0.521, 0.743]",incident
...,...,...,...,...,...,...,...,...,...,...
1075,"[0.349, 0.665, 0.672, 0.324, 0.547, 0.144, 0.3...","[HC, Prevalent, Prevalent, HC, HC, HC, HC, Pre...",Atherosclerosis,met,26.0,0.804678,0.823187,"[L_LDL_PL, Total_C, L_LDL_L, XS_VLDL_FC_by_CE,...","[-0.006, 0.007, 0.007, -0.013, 0.013, 0.022, 0...",prevalent
1076,"[0.348, 0.665, 0.673, 0.323, 0.548, 0.144, 0.3...","[HC, Prevalent, Prevalent, HC, HC, HC, HC, Pre...",Atherosclerosis,met,27.0,0.804922,0.823297,"[L_LDL_PL, XS_VLDL_FC_by_CE, L_LDL_L, L_LDL_FC...","[-0.005, -0.013, 0.014, 0.015, 0.026, 0.026, 0...",prevalent
1077,"[0.354, 0.676, 0.662, 0.322, 0.541, 0.147, 0.3...","[HC, Prevalent, Prevalent, HC, HC, HC, HC, Pre...",Atherosclerosis,met,28.0,0.805983,0.823956,"[L_LDL_PL, XS_VLDL_FC_by_CE, L_LDL_FC, LDL_C, ...","[0.003, -0.013, 0.02, -0.024, 0.027, 0.034, 0....",prevalent
1078,"[0.354, 0.678, 0.664, 0.322, 0.542, 0.147, 0.3...","[HC, Prevalent, Prevalent, HC, HC, HC, HC, Pre...",Atherosclerosis,met,29.0,0.805942,0.823956,"[L_LDL_PL, XS_VLDL_FC_by_CE, L_LDL_FC, LDL_C, ...","[0.003, -0.013, 0.02, -0.025, 0.03, 0.034, 0.0...",prevalent


In [47]:
boxplot_data_full

Unnamed: 0,prob,reference,disease,data_type,Model
0,0.488438,HC,CD,prot,incident
1,0.696740,Incident,CD,prot,incident
2,0.694683,Incident,CD,prot,incident
3,0.655752,HC,CD,prot,incident
4,0.766940,Incident,CD,prot,incident
...,...,...,...,...,...
54816,0.462279,Prevalent,Atherosclerosis,met,prevalent
54817,0.610119,Prevalent,Atherosclerosis,met,prevalent
54818,0.254494,HC,Atherosclerosis,met,prevalent
54819,0.592955,Prevalent,Atherosclerosis,met,prevalent


In [49]:
Shiny_data.to_csv('ShinyData_clinical_story.csv') #Supplementary table 2 (proteomics + metabolomics)

In [51]:
boxplot_data_full.to_csv('boxplot_data_clinical_story.csv')