In [168]:
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, confusion_matrix, precision_recall_fscore_support
import seaborn as sns
import pandas as pd

In [169]:
# Loading the dataset
glioma_df = pd.read_csv("./TCGA_InfoWithGrade.csv")

In [170]:
glioma_df.nunique()

Unnamed: 0           839
Gender                 2
Age_at_diagnosis     766
IDH1                   2
TP53                   2
ATRX                   2
PTEN                   2
EGFR                   2
CIC                    2
MUC16                  2
PIK3CA                 2
NF1                    2
PIK3R1                 2
FUBP1                  2
RB1                    2
NOTCH1                 2
BCOR                   2
CSMD3                  2
SMARCA4                2
GRIN2A                 2
IDH2                   2
FAT4                   2
PDGFRA                 2
Primary Diagnosis      6
Grade                  2
dtype: int64

In [171]:
glioma_df = glioma_df.drop("Unnamed: 0", axis=1)
glioma_df.head()

Unnamed: 0,Gender,Age_at_diagnosis,IDH1,TP53,ATRX,PTEN,EGFR,CIC,MUC16,PIK3CA,...,NOTCH1,BCOR,CSMD3,SMARCA4,GRIN2A,IDH2,FAT4,PDGFRA,Primary Diagnosis,Grade
0,0,51.3,1,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,4,0
1,0,38.72,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,3,0
2,0,35.17,1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,32.78,1,1,1,0,0,0,1,0,...,0,0,0,0,0,0,1,0,1,0
4,0,31.51,1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [172]:
# Using k = 10 as recommended by dataset authors
kfold = KFold(n_splits=10, shuffle=True)

In [173]:
feature_df = glioma_df[glioma_df.columns[:-2]]
multi_class_df = glioma_df["Primary Diagnosis"]
binary_class_df = glioma_df["Grade"]

In [174]:
feature_df.head()

Unnamed: 0,Gender,Age_at_diagnosis,IDH1,TP53,ATRX,PTEN,EGFR,CIC,MUC16,PIK3CA,...,FUBP1,RB1,NOTCH1,BCOR,CSMD3,SMARCA4,GRIN2A,IDH2,FAT4,PDGFRA
0,0,51.3,1,0,0,0,0,0,0,1,...,1,0,0,0,0,0,0,0,0,0
1,0,38.72,1,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,35.17,1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,32.78,1,1,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,1,0
4,0,31.51,1,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [175]:
multi_class_df.shape

(839,)

In [176]:
binary_class_df.shape

(839,)

In [177]:
kfold.get_n_splits(feature_df)

10

In [178]:
print(kfold)

KFold(n_splits=10, random_state=None, shuffle=True)


In [179]:
print([(i,j) for i,j in kfold.split(feature_df)])

[(array([  0,   1,   2,   3,   4,   5,   7,   8,   9,  11,  12,  13,  14,
        15,  16,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  29,
        30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  44,
        45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,
        58,  59,  60,  61,  62,  63,  64,  65,  67,  68,  69,  70,  71,
        72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
        85,  86,  87,  88,  89,  90,  91,  93,  94,  97,  98,  99, 100,
       101, 102, 103, 105, 106, 107, 108, 109, 110, 112, 113, 116, 117,
       118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, 131,
       132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144,
       145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157,
       158, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171,
       172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184,
       185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 

In [180]:
#initializing Multi Layer Perceptron model with parameters
glioma_classifier_MLP = MLPClassifier(hidden_layer_sizes=(44, 44, 44,44), activation="relu", solver="adam",learning_rate="invscaling", verbose=True, max_iter=500)
glioma_classifier_RF = RandomForestClassifier(n_estimators=2000, criterion="gini", verbose=True)
glioma_classifier_ada = AdaBoostClassifier(n_estimators=15)

In [181]:
MLP_scores = []
RF_scores = []
ADA_scores = []
Fold_No = 1
for train_indices, test_indices in kfold.split(feature_df):
    feature_train, feature_test = feature_df.iloc[train_indices], feature_df.iloc[test_indices]
    label_train, label_test = multi_class_df.iloc[train_indices], multi_class_df.iloc[test_indices]
    
    # Multi-Layered-Perceptron
    glioma_classifier_MLP.fit(feature_train, label_train)
    predictions = glioma_classifier_MLP.predict(feature_test) 
    accuracy_pct = accuracy_score(label_test, predictions)
    other_metrics = precision_recall_fscore_support(label_test, predictions, average="micro")
    MLP_scores.append((accuracy_pct, other_metrics))
    
    # Random Forest
    glioma_classifier_RF.fit(feature_train, label_train)
    predictions = glioma_classifier_RF.predict(feature_test) 
    accuracy_pct = accuracy_score(label_test, predictions)
    other_metrics = precision_recall_fscore_support(label_test, predictions, average="micro")
    RF_scores.append((accuracy_pct, other_metrics))

    # Ada Boost
    glioma_classifier_ada.fit(feature_train, label_train)
    predictions = glioma_classifier_ada.predict(feature_test) 
    accuracy_pct = accuracy_score(label_test, predictions)
    other_metrics = precision_recall_fscore_support(label_test, predictions, average="micro")
    ADA_scores.append((accuracy_pct, other_metrics))

Iteration 1, loss = 1.79494460
Iteration 2, loss = 1.57734546
Iteration 3, loss = 1.49291876
Iteration 4, loss = 1.46902625
Iteration 5, loss = 1.47356262
Iteration 6, loss = 1.45611875
Iteration 7, loss = 1.43373472
Iteration 8, loss = 1.42572888
Iteration 9, loss = 1.41897266
Iteration 10, loss = 1.40930819
Iteration 11, loss = 1.39828482
Iteration 12, loss = 1.39468361
Iteration 13, loss = 1.37888250
Iteration 14, loss = 1.37159245
Iteration 15, loss = 1.35292252
Iteration 16, loss = 1.35054983
Iteration 17, loss = 1.32626499
Iteration 18, loss = 1.31620262
Iteration 19, loss = 1.30154615
Iteration 20, loss = 1.30994947
Iteration 21, loss = 1.28133645
Iteration 22, loss = 1.27625624
Iteration 23, loss = 1.25892396
Iteration 24, loss = 1.25073187
Iteration 25, loss = 1.25036018
Iteration 26, loss = 1.26605101
Iteration 27, loss = 1.27177412
Iteration 28, loss = 1.24183726
Iteration 29, loss = 1.24731844
Iteration 30, loss = 1.24568591
Iteration 31, loss = 1.23975662
Iteration 32, los

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.6s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    1.4s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    2.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    3.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    5.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    0.2s


Iteration 1, loss = 1.58706507
Iteration 2, loss = 1.53035176
Iteration 3, loss = 1.50549320
Iteration 4, loss = 1.49487747
Iteration 5, loss = 1.48666322
Iteration 6, loss = 1.47682064
Iteration 7, loss = 1.46743428
Iteration 8, loss = 1.45850771
Iteration 9, loss = 1.44989170
Iteration 10, loss = 1.44222560
Iteration 11, loss = 1.43241402
Iteration 12, loss = 1.42267002
Iteration 13, loss = 1.41242103
Iteration 14, loss = 1.40077731
Iteration 15, loss = 1.38949621
Iteration 16, loss = 1.38239148
Iteration 17, loss = 1.36764461
Iteration 18, loss = 1.35553273
Iteration 19, loss = 1.34706786
Iteration 20, loss = 1.33312246
Iteration 21, loss = 1.32285528
Iteration 22, loss = 1.31396809
Iteration 23, loss = 1.30361801
Iteration 24, loss = 1.29590958
Iteration 25, loss = 1.28579703
Iteration 26, loss = 1.27705365
Iteration 27, loss = 1.26923997
Iteration 28, loss = 1.26558972
Iteration 29, loss = 1.26013152
Iteration 30, loss = 1.24872994
Iteration 31, loss = 1.24141694
Iteration 32, los

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    2.5s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    3.9s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    5.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    0.2s


Iteration 1, loss = 4.46379768
Iteration 2, loss = 2.93140159
Iteration 3, loss = 2.11976862
Iteration 4, loss = 1.77912812
Iteration 5, loss = 1.60599716
Iteration 6, loss = 1.53651926
Iteration 7, loss = 1.51870634
Iteration 8, loss = 1.51427663
Iteration 9, loss = 1.50725073
Iteration 10, loss = 1.50322500
Iteration 11, loss = 1.49924241
Iteration 12, loss = 1.49314145
Iteration 13, loss = 1.48821823
Iteration 14, loss = 1.48148657
Iteration 15, loss = 1.47786590
Iteration 16, loss = 1.47299161
Iteration 17, loss = 1.46857750
Iteration 18, loss = 1.46333373
Iteration 19, loss = 1.45786995
Iteration 20, loss = 1.45227432
Iteration 21, loss = 1.44661047
Iteration 22, loss = 1.44128571
Iteration 23, loss = 1.43272060
Iteration 24, loss = 1.42602547
Iteration 25, loss = 1.41969448
Iteration 26, loss = 1.40899980
Iteration 27, loss = 1.39868770
Iteration 28, loss = 1.39130257
Iteration 29, loss = 1.38156569
Iteration 30, loss = 1.37135240
Iteration 31, loss = 1.35839804
Iteration 32, los

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    1.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    2.3s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    3.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    5.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    0.2s


Iteration 1, loss = 2.43699427
Iteration 2, loss = 1.82635770
Iteration 3, loss = 1.65474148
Iteration 4, loss = 1.56123137
Iteration 5, loss = 1.53167087
Iteration 6, loss = 1.51176517
Iteration 7, loss = 1.49766213
Iteration 8, loss = 1.49165827
Iteration 9, loss = 1.48966389
Iteration 10, loss = 1.48702694
Iteration 11, loss = 1.48030539
Iteration 12, loss = 1.47725915
Iteration 13, loss = 1.47265646
Iteration 14, loss = 1.46785756
Iteration 15, loss = 1.46510408
Iteration 16, loss = 1.46118762
Iteration 17, loss = 1.45831784
Iteration 18, loss = 1.45226637
Iteration 19, loss = 1.44842996
Iteration 20, loss = 1.44337421
Iteration 21, loss = 1.43588576
Iteration 22, loss = 1.43011759
Iteration 23, loss = 1.42120220
Iteration 24, loss = 1.41089563
Iteration 25, loss = 1.40053904
Iteration 26, loss = 1.39082400
Iteration 27, loss = 1.37879652
Iteration 28, loss = 1.36660110
Iteration 29, loss = 1.35606363
Iteration 30, loss = 1.34731181
Iteration 31, loss = 1.33488691
Iteration 32, los

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.6s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    2.3s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    3.6s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    5.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    0.2s


Iteration 1, loss = 2.78468964
Iteration 2, loss = 1.68702311
Iteration 3, loss = 1.67939970
Iteration 4, loss = 1.65917090
Iteration 5, loss = 1.55890072
Iteration 6, loss = 1.53568557
Iteration 7, loss = 1.51482628
Iteration 8, loss = 1.49040509
Iteration 9, loss = 1.48694167
Iteration 10, loss = 1.47916950
Iteration 11, loss = 1.46875838
Iteration 12, loss = 1.46001644
Iteration 13, loss = 1.44946487
Iteration 14, loss = 1.43779719
Iteration 15, loss = 1.42887249
Iteration 16, loss = 1.41634440
Iteration 17, loss = 1.40435413
Iteration 18, loss = 1.39093019
Iteration 19, loss = 1.38063860
Iteration 20, loss = 1.36632712
Iteration 21, loss = 1.35292823
Iteration 22, loss = 1.33663294
Iteration 23, loss = 1.33244360
Iteration 24, loss = 1.31149494
Iteration 25, loss = 1.29569264
Iteration 26, loss = 1.28499727
Iteration 27, loss = 1.27840552
Iteration 28, loss = 1.26572550
Iteration 29, loss = 1.25058776
Iteration 30, loss = 1.24780126
Iteration 31, loss = 1.24086776
Iteration 32, los

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.6s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    2.3s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    3.6s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    5.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    0.2s


Iteration 1, loss = 5.60426140
Iteration 2, loss = 2.55684307
Iteration 3, loss = 1.84276218
Iteration 4, loss = 1.71995990
Iteration 5, loss = 1.56468515
Iteration 6, loss = 1.60750519
Iteration 7, loss = 1.53200330
Iteration 8, loss = 1.53178775
Iteration 9, loss = 1.51860254
Iteration 10, loss = 1.51950396
Iteration 11, loss = 1.50158361
Iteration 12, loss = 1.49567628
Iteration 13, loss = 1.49288097
Iteration 14, loss = 1.48564631
Iteration 15, loss = 1.48224875
Iteration 16, loss = 1.47866614
Iteration 17, loss = 1.47314705
Iteration 18, loss = 1.47095138
Iteration 19, loss = 1.46778531
Iteration 20, loss = 1.46468939
Iteration 21, loss = 1.45902378
Iteration 22, loss = 1.45436926
Iteration 23, loss = 1.45147488
Iteration 24, loss = 1.44621878
Iteration 25, loss = 1.44384919
Iteration 26, loss = 1.43901182
Iteration 27, loss = 1.43044279
Iteration 28, loss = 1.42798549
Iteration 29, loss = 1.41920819
Iteration 30, loss = 1.41412209
Iteration 31, loss = 1.40499478
Iteration 32, los

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.6s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    2.3s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    3.5s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    5.0s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    0.2s


Iteration 1, loss = 6.69423259
Iteration 2, loss = 3.95982236
Iteration 3, loss = 2.62403022
Iteration 4, loss = 1.73934790
Iteration 5, loss = 1.54405945
Iteration 6, loss = 1.57156767
Iteration 7, loss = 1.54240928
Iteration 8, loss = 1.50637129
Iteration 9, loss = 1.50385021
Iteration 10, loss = 1.50104331
Iteration 11, loss = 1.49375729
Iteration 12, loss = 1.48868067
Iteration 13, loss = 1.48750681
Iteration 14, loss = 1.48473806
Iteration 15, loss = 1.48021577
Iteration 16, loss = 1.47745065
Iteration 17, loss = 1.47476649
Iteration 18, loss = 1.47205924
Iteration 19, loss = 1.46973460
Iteration 20, loss = 1.46576363
Iteration 21, loss = 1.46401370
Iteration 22, loss = 1.45997349
Iteration 23, loss = 1.45574927
Iteration 24, loss = 1.45134820
Iteration 25, loss = 1.44622214
Iteration 26, loss = 1.44241299
Iteration 27, loss = 1.43867069
Iteration 28, loss = 1.43224588
Iteration 29, loss = 1.42667063
Iteration 30, loss = 1.42311939
Iteration 31, loss = 1.41547561
Iteration 32, los

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.6s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    2.3s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    3.6s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    5.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    0.2s


Iteration 1, loss = 2.68220599
Iteration 2, loss = 1.85010372
Iteration 3, loss = 1.71717777
Iteration 4, loss = 1.52143771
Iteration 5, loss = 1.54673685
Iteration 6, loss = 1.53381937
Iteration 7, loss = 1.51676361
Iteration 8, loss = 1.49087333
Iteration 9, loss = 1.47749822
Iteration 10, loss = 1.47916082
Iteration 11, loss = 1.46808899
Iteration 12, loss = 1.46007430
Iteration 13, loss = 1.45247958
Iteration 14, loss = 1.44450794
Iteration 15, loss = 1.43784818
Iteration 16, loss = 1.43102439
Iteration 17, loss = 1.42119600
Iteration 18, loss = 1.41031272
Iteration 19, loss = 1.39681397
Iteration 20, loss = 1.38426516
Iteration 21, loss = 1.37857373
Iteration 22, loss = 1.36130693
Iteration 23, loss = 1.34521101
Iteration 24, loss = 1.33365042
Iteration 25, loss = 1.32040108
Iteration 26, loss = 1.30523203
Iteration 27, loss = 1.29009439
Iteration 28, loss = 1.28063205
Iteration 29, loss = 1.26875049
Iteration 30, loss = 1.25843058
Iteration 31, loss = 1.25049101
Iteration 32, los

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.6s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    2.3s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    3.6s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    5.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    0.2s


Iteration 1, loss = 3.20099663
Iteration 2, loss = 1.79653868
Iteration 3, loss = 1.73643121
Iteration 4, loss = 1.61057077
Iteration 5, loss = 1.53935863
Iteration 6, loss = 1.53694258
Iteration 7, loss = 1.49768094
Iteration 8, loss = 1.49743975
Iteration 9, loss = 1.48235512
Iteration 10, loss = 1.48279004
Iteration 11, loss = 1.47934127
Iteration 12, loss = 1.47551374
Iteration 13, loss = 1.47425266
Iteration 14, loss = 1.46986838
Iteration 15, loss = 1.46724593
Iteration 16, loss = 1.46614403
Iteration 17, loss = 1.46341587
Iteration 18, loss = 1.46224951
Iteration 19, loss = 1.45886222
Iteration 20, loss = 1.45725158
Iteration 21, loss = 1.45475656
Iteration 22, loss = 1.45395578
Iteration 23, loss = 1.44947385
Iteration 24, loss = 1.44716751
Iteration 25, loss = 1.44396477
Iteration 26, loss = 1.44236428
Iteration 27, loss = 1.43794517
Iteration 28, loss = 1.43538211
Iteration 29, loss = 1.42825900
Iteration 30, loss = 1.42477725
Iteration 31, loss = 1.42534107
Iteration 32, los

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.6s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    2.3s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    3.5s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    5.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    0.2s


Iteration 1, loss = 2.14675491
Iteration 2, loss = 1.63114913
Iteration 3, loss = 1.53546996
Iteration 4, loss = 1.53876924
Iteration 5, loss = 1.52512696
Iteration 6, loss = 1.51766495
Iteration 7, loss = 1.50803400
Iteration 8, loss = 1.50311567
Iteration 9, loss = 1.49641589
Iteration 10, loss = 1.48878403
Iteration 11, loss = 1.48535694
Iteration 12, loss = 1.48038307
Iteration 13, loss = 1.47397060
Iteration 14, loss = 1.46925020
Iteration 15, loss = 1.46362882
Iteration 16, loss = 1.45872300
Iteration 17, loss = 1.45407852
Iteration 18, loss = 1.45071642
Iteration 19, loss = 1.44247302
Iteration 20, loss = 1.43512813
Iteration 21, loss = 1.43156937
Iteration 22, loss = 1.41612408
Iteration 23, loss = 1.41204814
Iteration 24, loss = 1.40153970
Iteration 25, loss = 1.39373023
Iteration 26, loss = 1.38279092
Iteration 27, loss = 1.37190994
Iteration 28, loss = 1.36249795
Iteration 29, loss = 1.35581368
Iteration 30, loss = 1.35172889
Iteration 31, loss = 1.33527295
Iteration 32, los

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.6s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    1.4s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    2.3s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    3.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    5.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    0.2s


In [183]:
print(f"Average Accuracy Of MLP: {(sum([i[0] for i in MLP_scores])/ len(MLP_scores))*100}%")
print(f"Average Accuracy Of Random Forest: {(sum(i[0] for i in RF_scores)/ len(RF_scores))*100}%")
print(f"Average Accuracy Of AdaBoost: {(sum([i[0] for i in ADA_scores])/ len(ADA_scores))*100}%")


Average Accuracy Of MLP: 56.50889271371199%
Average Accuracy Of Random Forest: 48.75645438898451%
Average Accuracy Of AdaBoost: 54.84366035570856%


## Using KNN and SVM

In [184]:
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier


In [185]:
glioma_classifier_SVM = SVC(C=0.70, kernel="poly",degree=3, verbose=True)
glioma_classifier_KNN = KNeighborsClassifier(n_neighbors=18, weights="distance")
glioma_classifier_KNN

In [186]:
SVM_scores, KNN_Scores = [],[]
for train_indices, test_indices in kfold.split(feature_df):
    feature_train, feature_test = feature_df.iloc[train_indices], feature_df.iloc[test_indices]
    label_train, label_test = multi_class_df.iloc[train_indices], multi_class_df.iloc[test_indices]
    # SVM
    glioma_classifier_SVM.fit(feature_train, label_train)
    predictions = glioma_classifier_SVM.predict(feature_test)
    accuracy_pct = accuracy_score(label_test, predictions)
    other_metrics = precision_recall_fscore_support(label_test, predictions, average="micro")
    SVM_scores.append((accuracy_pct, other_metrics))
    print("starting KNN")
    # KNN
    glioma_classifier_KNN.fit(feature_train, label_train)
    predictions = glioma_classifier_KNN.predict(feature_test)
    accuracy_pct = accuracy_score(label_test, predictions)
    other_metrics = precision_recall_fscore_support(label_test, predictions, average="micro")
    KNN_Scores.append((accuracy_pct, other_metrics))

[LibSVM]*..
*.
*.
*
optimization finished, #iter = 385
obj = -75.599429, rho = 0.999590
nSV = 116, nBSV = 96
*
optimization finished, #iter = 108
obj = -75.510374, rho = 0.975929
nSV = 110, nBSV = 107
*..
*
optimization finished, #iter = 351
obj = -75.599103, rho = 1.000188
nSV = 115, nBSV = 100
...
*.
*.
*
optimization finished, #iter = 690
obj = -75.579970, rho = 0.997996
nSV = 113, nBSV = 105
*
optimization finished, #iter = 60
obj = -60.718242, rho = -1.305446
nSV = 90, nBSV = 88
*.
*
optimization finished, #iter = 437
obj = -160.688218, rho = 0.979724
nSV = 235, nBSV = 226
*
optimization finished, #iter = 119
obj = -149.170066, rho = 1.060086
nSV = 216, nBSV = 214
.
*.....
*.
*
optimization finished, #iter = 1398
obj = -132.383425, rho = -0.997733
nSV = 193, nBSV = 186
*.
*.
*
optimization finished, #iter = 223
obj = -95.844219, rho = -1.010143
nSV = 139, nBSV = 135
*
optimization finished, #iter = 141
obj = -143.507233, rho = 1.148462
nSV = 210, nBSV = 208
*
optimization finished

In [187]:
print(f"Average Accuracy Of SVM: {(sum([i[0] for i in SVM_scores])/ len(SVM_scores))*100}%")
print(f"Average Accuracy Of KNN: {(sum(i[0] for i in KNN_Scores)/ len(KNN_Scores))*100}%")


Average Accuracy Of SVM: 47.307802639127935%
Average Accuracy Of KNN: 47.90877796901893%


# fine tuning MLP due to highest accuracy

In [188]:
from sklearn.model_selection import GridSearchCV, train_test_split

In [198]:
param_grid = {
    'hidden_layer_sizes': [(24,24,24), (30, 30, 30), (40, 40, 40), (40, 40, 40, 40),(43,44, 45, 46)],
    'activation': ['relu', 'tanh', 'logistic'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.0005, 0.00095, 0.001, 0.0025, 0.005],
    'learning_rate_init': [0.001, 0.005,0.0075, 0.01, 0.025, 0.05],
    'batch_size': ['auto', 16, 32],
}
glioma_classifier_MLP_finetuned = MLPClassifier(max_iter=600, random_state=42)

In [199]:
gridsearch = GridSearchCV(estimator=glioma_classifier_MLP_finetuned, param_grid=param_grid, scoring="accuracy", cv=5, n_jobs=-1)

In [200]:
    feature_train, feature_test, label_train, label_test = train_test_split(feature_df, multi_class_df, shuffle=True, test_size=0.2)
    # Multi-Layered-Perceptron
    gridsearch.fit(feature_train, label_train)

""" 
for train_indices, test_indices in kfold.split(feature_df):
    feature_train, feature_test = feature_df.iloc[train_indices], feature_df.iloc[test_indices]
    label_train, label_test = multi_class_df.iloc[train_indices], multi_class_df.iloc[test_indices]
    glioma_classifier_MLP.fit()
    predictions = glioma_classifier_MLP.predict(feature_test) 
    accuracy_pct = accuracy_score(label_test, predictions)
    other_metrics = precision_recall_fscore_support(label_test, predictions, average="micro")
    MLP_scores.append((accuracy_pct, other_metrics))
"""



' \nfor train_indices, test_indices in kfold.split(feature_df):\nfeature_train, feature_test = feature_df.iloc[train_indices], feature_df.iloc[test_indices]\nlabel_train, label_test = multi_class_df.iloc[train_indices], multi_class_df.iloc[test_indices]\nglioma_classifier_MLP.fit()\npredictions = glioma_classifier_MLP.predict(feature_test) \naccuracy_pct = accuracy_score(label_test, predictions)\nother_metrics = precision_recall_fscore_support(label_test, predictions, average="micro")\nMLP_scores.append((accuracy_pct, other_metrics))\n'

In [201]:
best_glioma_mlp = gridsearch.best_estimator_

In [202]:
MLP_scores = []
for train_indices, test_indices in kfold.split(feature_df):
    feature_train, feature_test = feature_df.iloc[train_indices], feature_df.iloc[test_indices]
    label_train, label_test = multi_class_df.iloc[train_indices], multi_class_df.iloc[test_indices]
    best_glioma_mlp.fit(feature_train, label_train)
    predictions = best_glioma_mlp.predict(feature_test) 
    accuracy_pct = accuracy_score(label_test, predictions)
    other_metrics = precision_recall_fscore_support(label_test, predictions, average="micro")
    MLP_scores.append((accuracy_pct, other_metrics))



In [203]:
print(f"Average Accuracy: {(sum(i[0] for i in MLP_scores)/ len(MLP_scores))*100}%")

Average Accuracy: 55.05880665519219%


In [204]:
#Saving model

In [205]:
import pickle
with open('model.pkl','wb') as f:
    pickle.dump(best_glioma_mlp,f)