# **Simple Graph Creation and GNN Model Testing**


# Work Environment


#### File Prerequisites

Run the notebook preferably on google colab.


Upload the following csvs in the path: '/thesis/Data_Preprocessing/':
* ADNI_dataset_ROIs_and_SNPs.csv
* ROIs.csv


#### Setting Google Colab as Filesystem

In [1]:
from google.colab import drive
path = "/gdrive/My Drive/thesis/Data_Preprocessing/"
drive.mount('/gdrive')
import os

Mounted at /gdrive


#### Libraries and Installations

In [2]:
import copy
import random
import time

import numpy as np
import pandas as pd

import itertools

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import trange, tqdm

In [3]:
import scipy.sparse as sp

from sklearn import metrics
from sklearn import decomposition
from sklearn import manifold
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

import networkx as nx

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms

In [4]:
def format_pytorch_version(version):
  return version.split('+')[0]

TORCH_version = torch.__version__
TORCH = format_pytorch_version(TORCH_version)

def format_cuda_version(version):
  return 'cu' + version.replace('.', '')

CUDA_version = torch.version.cuda
CUDA = format_cuda_version(CUDA_version)

!pip install torch-scatter     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-sparse      -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-cluster     -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html
!pip install torch-geometric

Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu121.html
Collecting torch-scatter
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_scatter-2.1.2%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m58.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2+pt21cu121
Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu121.html
Collecting torch-sparse
  Downloading https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_sparse-0.6.18%2Bpt21cu121-cp310-cp310-linux_x86_64.whl (5.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-sparse
Successfully installed torch-sparse-0.6.18+pt21cu121
Looking in links: https://pytorch-geometric.com/whl/torch-2.1.0+cu121.html
Collecting torch-cluster
 

#### Versions

Put the following in a txt file and run the following command to instantly download the version requirements for py projects:  
pip install -r /path/to/requirements.txt

In [5]:
!python --version # python v
print("pandas ", pd.__version__) # pandas v
print("numpy ", np.__version__) # numpy v
print("torch ", torch.__version__) # torch v
# print("torchvision ", torchvision.__version__) # torchvision v
# print("pytorch lightning ", pl.__version__) # pytorch lightning v
# print("torch geometric ", pyg.__version__) # torch geometric v

Python 3.10.12
pandas  1.5.3
numpy  1.25.2
torch  2.1.0+cu121


In [6]:
!pip list

Package                          Version
-------------------------------- ---------------------
absl-py                          1.4.0
aiohttp                          3.9.3
aiosignal                        1.3.1
alabaster                        0.7.16
albumentations                   1.3.1
altair                           4.2.2
annotated-types                  0.6.0
anyio                            3.7.1
appdirs                          1.4.4
argon2-cffi                      23.1.0
argon2-cffi-bindings             21.2.0
array-record                     0.5.0
arviz                            0.15.1
astropy                          5.3.4
astunparse                       1.6.3
async-timeout                    4.0.3
atpublic                         4.0
attrs                            23.2.0
audioread                        3.0.1
autograd                         1.6.2
Babel                            2.14.0
backcall                         0.2.0
beautifulsoup4                   4.12.3
bi

#### Setting Up Device

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### Seeds

In [8]:
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# Datasets

#### ADNI

In [9]:
data = pd.read_csv(path + 'ADNI_dataset_ROIs_and_SNPs.csv', encoding='ISO-8859-1')

# Data Preparation


#### Studying the Data




In [10]:
pd.set_option('display.max_columns', None)
data.head()

Unnamed: 0,PTID,Age,Date,Sex,DLICV_baseline,diagnosis,MUSE_Volume_4,MUSE_Volume_11,MUSE_Volume_23,MUSE_Volume_30,MUSE_Volume_31,MUSE_Volume_32,MUSE_Volume_35,MUSE_Volume_36,MUSE_Volume_37,MUSE_Volume_38,MUSE_Volume_39,MUSE_Volume_40,MUSE_Volume_41,MUSE_Volume_47,MUSE_Volume_48,MUSE_Volume_49,MUSE_Volume_50,MUSE_Volume_51,MUSE_Volume_52,MUSE_Volume_55,MUSE_Volume_56,MUSE_Volume_57,MUSE_Volume_58,MUSE_Volume_59,MUSE_Volume_60,MUSE_Volume_61,MUSE_Volume_62,MUSE_Volume_71,MUSE_Volume_72,MUSE_Volume_73,MUSE_Volume_75,MUSE_Volume_76,MUSE_Volume_81,MUSE_Volume_82,MUSE_Volume_83,MUSE_Volume_84,MUSE_Volume_85,MUSE_Volume_86,MUSE_Volume_87,MUSE_Volume_88,MUSE_Volume_89,MUSE_Volume_90,MUSE_Volume_91,MUSE_Volume_92,MUSE_Volume_93,MUSE_Volume_94,MUSE_Volume_95,MUSE_Volume_100,MUSE_Volume_101,MUSE_Volume_102,MUSE_Volume_103,MUSE_Volume_104,MUSE_Volume_105,MUSE_Volume_106,MUSE_Volume_107,MUSE_Volume_108,MUSE_Volume_109,MUSE_Volume_112,MUSE_Volume_113,MUSE_Volume_114,MUSE_Volume_115,MUSE_Volume_116,MUSE_Volume_117,MUSE_Volume_118,MUSE_Volume_119,MUSE_Volume_120,MUSE_Volume_121,MUSE_Volume_122,MUSE_Volume_123,MUSE_Volume_124,MUSE_Volume_125,MUSE_Volume_128,MUSE_Volume_129,MUSE_Volume_132,MUSE_Volume_133,MUSE_Volume_134,MUSE_Volume_135,MUSE_Volume_136,MUSE_Volume_137,MUSE_Volume_138,MUSE_Volume_139,MUSE_Volume_140,MUSE_Volume_141,MUSE_Volume_142,MUSE_Volume_143,MUSE_Volume_144,MUSE_Volume_145,MUSE_Volume_146,MUSE_Volume_147,MUSE_Volume_148,MUSE_Volume_149,MUSE_Volume_150,MUSE_Volume_151,MUSE_Volume_152,MUSE_Volume_153,MUSE_Volume_154,MUSE_Volume_155,MUSE_Volume_156,MUSE_Volume_157,MUSE_Volume_160,MUSE_Volume_161,MUSE_Volume_162,MUSE_Volume_163,MUSE_Volume_164,MUSE_Volume_165,MUSE_Volume_166,MUSE_Volume_167,MUSE_Volume_168,MUSE_Volume_169,MUSE_Volume_170,MUSE_Volume_171,MUSE_Volume_172,MUSE_Volume_173,MUSE_Volume_174,MUSE_Volume_175,MUSE_Volume_176,MUSE_Volume_177,MUSE_Volume_178,MUSE_Volume_179,MUSE_Volume_180,MUSE_Volume_181,MUSE_Volume_182,MUSE_Volume_183,MUSE_Volume_184,MUSE_Volume_185,MUSE_Volume_186,MUSE_Volume_187,MUSE_Volume_190,MUSE_Volume_191,MUSE_Volume_192,MUSE_Volume_193,MUSE_Volume_194,MUSE_Volume_195,MUSE_Volume_196,MUSE_Volume_197,MUSE_Volume_198,MUSE_Volume_199,MUSE_Volume_200,MUSE_Volume_201,MUSE_Volume_202,MUSE_Volume_203,MUSE_Volume_204,MUSE_Volume_205,MUSE_Volume_206,MUSE_Volume_207,rs4575098,rs6656401,rs2093760,rs4844610,rs4663105,rs6733839,rs10933431,rs35349669,rs6448453,rs190982,rs9271058,rs9473117,rs9381563,rs10948363,rs2718058,rs4723711,rs1859788,rs1476679,rs12539172,rs10808026,rs7810606,rs11771145,rs28834970,rs73223431,rs4236673,rs9331896,rs11257238,rs7920721,rs3740688,rs10838725,rs983392,rs7933202,rs2081545,rs867611,rs10792832,rs3851179,rs17125924,rs17125944,rs10498633,rs12881735,rs12590654,rs442495,rs59735493,rs113260531,rs28394864,rs111278892,rs3752246,rs4147929,rs41289512,rs3865444,rs6024870,rs6014724,rs7274581,rs429358
0,002_S_0295,84.742466,2006-04-18,M,1485405.375,CN,1873.124153,1586.249283,302.695176,352.265466,1062.069832,1159.101038,20657.100036,3254.764153,3118.709527,52564.546547,52086.773326,14018.899912,14294.173225,3600.701497,3368.670352,750.93716,587.460672,16514.289409,23626.044006,1544.061802,1339.452519,4182.888734,4105.896581,7365.93417,8007.18388,4747.146291,4789.333772,4638.513528,2017.616275,2812.850291,352.265466,380.742015,107813.271569,102646.359842,26682.526999,30497.329962,51500.367341,52266.07012,63530.127527,63595.518123,426.093557,543.163817,3778.943604,3226.287604,2316.092703,2299.21771,11240.854293,4464.490169,4446.56049,3624.959299,3474.139054,1558.82742,741.444977,9202.144277,10142.925102,3385.545344,3549.021833,2759.061253,2699.998779,5983.239482,4772.45878,2428.944214,2103.045924,954.491756,976.640183,2716.873772,2089.334993,8379.488399,7090.660857,1878.397588,1371.09313,6936.676551,6667.73136,11534.057285,11793.510293,9326.597346,7320.582628,2008.124092,1507.147756,3968.787268,4167.068429,1769.764825,1258.241619,14127.532675,13099.212828,4775.622841,5759.645833,3586.990566,3536.365589,942.890199,769.921527,2916.209619,1978.592855,4195.544978,4115.388764,11762.924369,11668.002537,3131.365772,4021.521619,3495.232795,5277.653864,2280.233344,2930.975237,1168.593222,1541.952428,3406.639085,3815.85765,11444.408888,11739.721255,3415.076581,3348.631299,2000.741283,2098.827176,1443.866535,1758.163268,8868.863178,8873.081926,2589.256642,2578.709772,1332.06971,1373.202504,10701.909224,10142.925102,1171.757283,1339.452519,1123.24168,1319.413466,9906.675209,12091.986721,3378.162535,3411.91252,6568.59078,7252.027971,2965.779909,2489.061375,9220.073956,7986.090139,4336.873039,4313.669925,8816.128827,7512.535666,2559.725405,2955.233039,994.569863,1110.585435,0,0,0,0,1,1,1,0,0,1,0,0,0,0,1,1,0,0,0,1,1,1,0,0,1,1,1,1,1,1,1,1,1,1,1,1,0,0,1,1,1,1,0,0,0,1,1,1,0,0,0,0,0,1
1,002_S_0413,76.283562,2006-05-02,F,1364116.0,CN,2131.516933,1505.034469,384.959763,340.663023,988.239172,1051.520229,18405.295394,3021.670466,3151.396633,45240.682248,43280.024169,12993.710346,13640.231811,3586.981241,3350.731962,475.662611,761.482051,16498.426213,13491.521327,1096.871653,1092.652915,2941.514461,3243.154165,6479.980225,6874.432146,4181.823176,4367.447609,3893.894367,1782.416435,2235.930676,503.084402,371.248867,103124.919662,101385.745282,29939.322695,27712.884177,48141.064022,54393.232442,54684.325304,53309.017001,504.139086,497.810981,3042.764152,2457.414376,2021.829767,1968.040869,11103.716114,2839.210085,3769.441622,3323.310171,3544.79387,1840.424071,1424.878464,9169.425142,8470.169464,2715.812024,3414.013019,2874.014667,2941.514461,5370.452361,6264.824631,2298.157049,2118.860721,1387.964514,967.145486,2358.274053,2307.649208,7267.829383,8130.561125,2144.173144,1907.923865,6878.650883,7117.009531,12912.499657,11882.073114,8530.286468,8347.826087,2611.398281,2317.141366,4837.836799,4199.752808,1187.574501,1671.674586,15983.740284,14967.024637,4672.251366,4851.547694,4239.830811,4672.251366,1018.825016,606.443462,3551.121975,3038.545415,6675.096817,4862.094537,12907.226235,11194.418962,3109.209261,3177.76374,3142.959158,4005.690901,2556.554698,2594.523332,1206.558818,1193.902606,3702.996512,3976.159741,10143.953418,11154.34096,3077.568733,3113.427999,1596.792002,1522.964102,1555.659315,1731.79159,9037.589607,8741.223324,2840.26477,2546.007855,1505.034469,1198.121344,12067.697547,11828.284215,902.809745,1252.964926,948.161169,971.364223,10644.928452,12009.689912,3580.653135,4822.016534,8888.879123,7140.212585,4647.993628,3295.888379,9925.633772,11152.231591,7421.813288,6059.161196,6676.151501,7780.405944,2469.015903,2239.094729,1097.926337,744.607103,1,0,0,0,0,0,0,1,0,0,0,0,0,0,2,2,1,1,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,1,1,0,1,0,0,0,0
2,002_S_0559,79.223288,2006-05-23,M,1570479.625,CN,2366.71768,3157.732947,512.577893,552.656,1172.81197,1141.171359,23368.700372,3041.717375,3198.865741,49984.782089,49158.96215,17229.36721,17536.281134,4728.161925,4206.091848,764.648092,640.195023,16674.601836,16115.617714,1304.647848,1335.233771,4119.607512,4195.544978,7287.88733,7510.426292,5413.70849,5620.427146,3802.146719,1700.155481,2524.920733,563.20287,591.67942,115548.346197,110692.567142,30553.228374,27892.253015,54122.319281,61104.347374,63260.127649,63511.143161,543.163817,640.195023,3323.31881,3096.5611,2498.553558,2377.26455,12633.041163,3755.740489,4386.443329,3620.740551,3068.08455,2433.162962,2104.100611,11164.916827,10359.135942,3451.990627,2741.131573,3804.256093,3905.506047,4038.396612,3980.388825,2408.905161,2620.897253,1344.725955,1242.421313,3342.303176,2947.85023,8735.972613,10286.362537,1908.983512,2000.741283,5694.255238,4487.693284,12286.049133,15746.477256,9619.800338,9030.230292,2045.038138,2290.780214,3946.638841,4054.216917,1891.053833,1156.991664,16048.117744,14235.110752,4362.185528,5427.419421,4695.466627,4353.748032,653.905954,1110.585435,1854.139787,2276.014596,6314.411208,5783.903635,13022.220675,12608.783362,3445.662505,2907.772123,5668.94275,4525.662016,2641.990993,2326.639573,1201.288519,1131.679176,3535.310902,3904.45136,10796.831056,11263.00272,3952.966963,4306.287116,1615.780519,1374.257191,1595.741466,1631.600825,8051.480735,12965.267576,3138.748581,3280.076642,1138.007298,1420.66342,11015.15127,12643.588034,1885.780397,1861.522596,932.343328,1025.155787,11981.244583,11908.471179,4343.201161,5510.739696,7549.449712,5636.247452,2599.803512,2494.33481,7430.270078,6746.832887,7983.980765,7081.168674,10475.151514,11083.705926,3069.139237,2872.967451,1051.522962,1274.061924,0,0,0,0,1,0,1,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1,0,0,2,0,0,0,0,1,0,0,0,0
3,002_S_0619,77.447945,2006-06-01,M,1859348.25,Dementia,5124.734093,2981.605944,280.547287,356.484899,876.4466,908.087272,21112.765397,2883.519862,2848.715123,56650.512928,55319.495347,13810.098417,14307.911649,3485.747309,3231.567248,1810.901098,1584.142953,47125.61612,60669.932896,1393.244235,1401.681747,3791.607134,3956.138626,7059.03381,7534.698571,5230.202998,5155.320075,4848.405562,2089.339007,3279.028256,557.930507,343.82863,105178.865483,105828.553938,30496.333873,30318.091423,52001.443596,58340.124785,64032.281587,63620.952857,546.328928,608.555582,3098.676428,2852.933879,2209.573559,2206.409492,12681.581134,2698.949278,4427.584631,3850.669721,3989.888675,2088.284318,2110.432788,10630.210932,9738.998685,2335.081556,3525.825493,3081.801403,3971.958961,4032.076237,5876.727385,1759.221335,1517.697543,1132.736039,1098.98599,2337.190934,1737.072865,9832.866011,8968.02099,2304.495574,2317.151842,8209.699563,8048.332138,12752.2453,13784.78588,9587.123462,11270.407185,3097.621739,2240.159542,4577.350476,4796.725798,1399.572369,1617.893002,17917.057577,16677.797943,6813.291261,7985.050795,5135.280983,4916.96035,831.094971,594.844624,2534.417786,2599.808508,5705.867759,4306.29539,11084.781913,13730.996738,5032.976145,4884.264989,4352.701708,5000.280785,2023.948286,2122.034368,1509.26003,1475.509981,5866.180495,5736.453741,11578.376388,11748.181325,3241.05945,2811.801007,1759.221335,2247.542365,2254.925188,2166.331308,9747.436198,10617.554664,2975.277809,3174.61404,1750.783823,1521.916299,12143.689718,12557.127826,1405.900503,1814.065166,1921.643449,1717.033773,12618.299791,13427.246292,3545.864585,3870.708812,7061.143188,8703.294038,3338.090842,2569.222525,10113.413298,11186.032061,7032.666584,6352.392146,7272.080998,7162.393337,2489.066157,2506.995871,1155.939198,1574.650751,0,1,1,1,0,0,0,2,1,1,0,0,0,0,1,1,1,1,1,0,1,1,1,1,0,0,0,0,1,1,0,0,0,0,0,0,1,1,0,0,0,1,0,0,1,0,0,0,1,1,0,0,0,2
4,002_S_0685,89.561644,2006-07-06,F,1372862.125,CN,2941.520445,1693.826402,278.437217,328.007479,910.194387,966.092767,19718.417441,3275.856043,3304.332576,48017.763654,46338.702862,15370.999989,15619.905986,3663.980648,3558.512005,850.07726,378.632427,19563.378537,20767.830436,1211.834705,1276.170577,3940.308492,3830.621103,6907.141411,7299.484762,4610.034373,4816.752913,4007.808423,1973.318305,3543.746395,275.273157,261.562234,100127.710645,97369.705638,28625.244318,26733.136868,50063.855322,54914.358201,58502.401426,55236.037561,503.085426,562.147866,3130.309316,2807.575269,2454.255316,2260.193013,10907.567029,2037.654177,3700.894673,3134.528061,3218.902976,1961.716754,1551.443734,6058.118837,6199.446819,3072.301562,3788.433646,2181.091531,2566.052077,4183.941056,4580.503153,1784.529435,1827.771578,930.233429,866.952243,2598.747356,1822.498146,5713.236376,5468.549125,1692.771716,1665.349868,6355.54041,6073.939134,10347.528536,10602.762652,7501.984556,7832.101408,2016.560449,1691.717029,3852.769518,4266.206598,996.678674,1531.404692,13870.181203,12898.815004,3552.183887,3546.910454,3434.059007,3966.675652,834.256964,632.811856,2536.520857,2079.841634,6290.149851,3814.800807,10999.324748,9911.943042,3577.496361,3192.535815,3371.832507,3824.292985,1453.357897,1714.92013,1076.834842,984.022436,3021.676614,3154.567104,7552.609505,7804.679561,3350.738779,3221.012348,1459.686015,1302.537737,855.350692,1380.584533,7132.844307,8861.475361,2633.552009,2252.810208,986.131809,1033.592699,9263.310889,9459.482565,714.022711,719.296143,1048.358309,1097.928571,10256.825504,9957.294558,3674.527512,3801.089883,7668.625012,5915.73617,2531.247425,2325.583572,7989.249685,8070.46054,4039.449016,3574.332301,6934.563258,6778.469667,2066.130711,2608.239534,434.530808,710.858652,0,0,0,0,0,0,1,1,0,0,0,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0,2,2,2,0,0,0,0,1,2,2,0,0,0,0,0,2,0,0,2,1,1,1,0,0,0,0,0,0


In [11]:
data.describe()

Unnamed: 0,Age,DLICV_baseline,MUSE_Volume_4,MUSE_Volume_11,MUSE_Volume_23,MUSE_Volume_30,MUSE_Volume_31,MUSE_Volume_32,MUSE_Volume_35,MUSE_Volume_36,MUSE_Volume_37,MUSE_Volume_38,MUSE_Volume_39,MUSE_Volume_40,MUSE_Volume_41,MUSE_Volume_47,MUSE_Volume_48,MUSE_Volume_49,MUSE_Volume_50,MUSE_Volume_51,MUSE_Volume_52,MUSE_Volume_55,MUSE_Volume_56,MUSE_Volume_57,MUSE_Volume_58,MUSE_Volume_59,MUSE_Volume_60,MUSE_Volume_61,MUSE_Volume_62,MUSE_Volume_71,MUSE_Volume_72,MUSE_Volume_73,MUSE_Volume_75,MUSE_Volume_76,MUSE_Volume_81,MUSE_Volume_82,MUSE_Volume_83,MUSE_Volume_84,MUSE_Volume_85,MUSE_Volume_86,MUSE_Volume_87,MUSE_Volume_88,MUSE_Volume_89,MUSE_Volume_90,MUSE_Volume_91,MUSE_Volume_92,MUSE_Volume_93,MUSE_Volume_94,MUSE_Volume_95,MUSE_Volume_100,MUSE_Volume_101,MUSE_Volume_102,MUSE_Volume_103,MUSE_Volume_104,MUSE_Volume_105,MUSE_Volume_106,MUSE_Volume_107,MUSE_Volume_108,MUSE_Volume_109,MUSE_Volume_112,MUSE_Volume_113,MUSE_Volume_114,MUSE_Volume_115,MUSE_Volume_116,MUSE_Volume_117,MUSE_Volume_118,MUSE_Volume_119,MUSE_Volume_120,MUSE_Volume_121,MUSE_Volume_122,MUSE_Volume_123,MUSE_Volume_124,MUSE_Volume_125,MUSE_Volume_128,MUSE_Volume_129,MUSE_Volume_132,MUSE_Volume_133,MUSE_Volume_134,MUSE_Volume_135,MUSE_Volume_136,MUSE_Volume_137,MUSE_Volume_138,MUSE_Volume_139,MUSE_Volume_140,MUSE_Volume_141,MUSE_Volume_142,MUSE_Volume_143,MUSE_Volume_144,MUSE_Volume_145,MUSE_Volume_146,MUSE_Volume_147,MUSE_Volume_148,MUSE_Volume_149,MUSE_Volume_150,MUSE_Volume_151,MUSE_Volume_152,MUSE_Volume_153,MUSE_Volume_154,MUSE_Volume_155,MUSE_Volume_156,MUSE_Volume_157,MUSE_Volume_160,MUSE_Volume_161,MUSE_Volume_162,MUSE_Volume_163,MUSE_Volume_164,MUSE_Volume_165,MUSE_Volume_166,MUSE_Volume_167,MUSE_Volume_168,MUSE_Volume_169,MUSE_Volume_170,MUSE_Volume_171,MUSE_Volume_172,MUSE_Volume_173,MUSE_Volume_174,MUSE_Volume_175,MUSE_Volume_176,MUSE_Volume_177,MUSE_Volume_178,MUSE_Volume_179,MUSE_Volume_180,MUSE_Volume_181,MUSE_Volume_182,MUSE_Volume_183,MUSE_Volume_184,MUSE_Volume_185,MUSE_Volume_186,MUSE_Volume_187,MUSE_Volume_190,MUSE_Volume_191,MUSE_Volume_192,MUSE_Volume_193,MUSE_Volume_194,MUSE_Volume_195,MUSE_Volume_196,MUSE_Volume_197,MUSE_Volume_198,MUSE_Volume_199,MUSE_Volume_200,MUSE_Volume_201,MUSE_Volume_202,MUSE_Volume_203,MUSE_Volume_204,MUSE_Volume_205,MUSE_Volume_206,MUSE_Volume_207,rs4575098,rs6656401,rs2093760,rs4844610,rs4663105,rs6733839,rs10933431,rs35349669,rs6448453,rs190982,rs9271058,rs9473117,rs9381563,rs10948363,rs2718058,rs4723711,rs1859788,rs1476679,rs12539172,rs10808026,rs7810606,rs11771145,rs28834970,rs73223431,rs4236673,rs9331896,rs11257238,rs7920721,rs3740688,rs10838725,rs983392,rs7933202,rs2081545,rs867611,rs10792832,rs3851179,rs17125924,rs17125944,rs10498633,rs12881735,rs12590654,rs442495,rs59735493,rs113260531,rs28394864,rs111278892,rs3752246,rs4147929,rs41289512,rs3865444,rs6024870,rs6014724,rs7274581,rs429358
count,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0,1567.0
mean,73.81393,1428763.0,1760.170636,1747.376593,354.51935,390.800399,934.421522,982.750502,20204.546261,3176.595664,3073.943628,48502.767251,47544.487474,13477.859096,13760.769017,3558.694047,3263.864868,789.089259,745.75441,20072.757205,22245.285413,1357.105899,1375.6513,3946.335931,3985.483514,6942.422294,7132.958883,4462.614985,4633.40334,4207.894972,2122.038855,2863.019536,342.218076,316.213274,93517.13819,91003.370739,23704.286179,23757.460942,45064.038634,47594.723132,54939.594856,54196.217571,468.51749,513.857501,3058.296978,2746.387878,2103.066347,2056.08248,11296.94778,3133.491107,4067.248729,3657.448684,3649.463509,1536.637447,1425.383938,7690.903778,7393.917601,2897.849903,2993.546567,3002.885283,3149.397027,4497.613369,4400.981967,2129.288967,2022.024455,1410.440053,1316.011305,2765.047791,2371.482681,7484.910847,7112.47125,1877.960776,1831.652737,6845.064304,6559.150427,11411.27218,11106.140979,8425.185999,7683.311672,2169.168064,1885.165281,4192.520679,3978.033455,1482.773103,1545.337928,15593.306724,15546.883475,4457.669816,4919.097513,3933.589762,4376.999607,807.345012,568.688612,2528.324933,2388.297442,5984.481527,4675.459143,12776.632068,12275.818579,3140.713031,3230.752465,3871.49267,3879.486379,2489.984487,2312.472013,1227.131184,1482.934727,3590.328637,3894.922829,9753.021226,9578.031103,3148.077039,3262.949261,1901.655197,1730.543287,1759.374385,1948.75505,8385.291181,9363.44434,2932.532682,2754.70679,1340.007647,1443.574321,11016.685765,11075.781712,1325.970363,1589.365488,1426.429219,1501.694075,11751.565902,12097.737733,4034.722059,4402.293778,7521.862398,7315.938942,3409.253657,2976.968274,8704.286336,8605.153094,5984.63252,6052.626847,8006.932423,7733.883457,2697.332343,2807.495753,1171.346482,1299.51021,0.454371,0.377154,0.394384,0.376516,0.848756,0.789407,0.441608,0.938098,0.587109,0.782387,0.53478,0.577537,0.765795,0.572431,0.751755,0.713465,0.632419,0.555839,0.575622,0.364391,0.959796,0.68411,0.733886,0.736439,0.768347,0.776643,0.797064,0.824505,0.88896,0.643906,0.790045,0.758137,0.775367,0.609445,0.703255,0.703255,0.1806,0.181238,0.444161,0.449266,0.664965,0.649011,0.57626,0.264837,0.950223,0.343331,0.355456,0.360562,0.167837,0.590938,0.176133,0.181876,0.178685,0.569241
std,7.149737,143870.6,777.161321,579.871305,65.326396,70.733256,162.643803,167.684822,2243.884699,489.53855,441.699929,5120.497417,5020.592049,1596.151189,1612.511285,558.676487,518.260944,659.717463,628.743527,10932.51682,12187.913957,167.722216,169.702897,513.177597,512.382842,698.46162,699.304528,468.937838,491.600299,575.009714,298.281983,412.232589,113.936015,117.329251,12341.786604,11942.339011,3414.91706,3624.056625,5779.393875,6056.089689,6784.429505,6889.202109,72.482932,75.253639,397.740369,361.328293,287.241275,270.482462,1723.699442,723.395345,817.928383,565.9253,574.682677,303.696144,289.250441,1482.041218,1552.176925,603.259404,633.180136,569.18226,576.179401,823.90223,823.070353,426.024133,425.138293,297.174011,301.909503,491.996451,452.719165,1228.9528,1132.737766,297.079053,310.391373,1155.142359,1084.372462,1730.931697,1668.070613,1254.502,1158.825576,405.91545,399.729942,647.64952,622.18836,371.07867,354.417481,2367.168831,2409.018703,857.899488,945.782745,554.783199,607.243284,212.361112,186.858918,560.284508,517.459049,1112.149648,792.038558,2083.562888,2082.830997,611.571923,714.194427,702.847957,677.818067,616.539627,592.915181,341.850572,379.27755,607.495133,626.082804,1700.707796,1655.518129,505.260849,524.021379,348.819524,333.493714,431.702909,454.178137,1244.329236,1343.44604,478.38745,442.05741,241.350931,270.463025,1610.663249,1595.176639,331.069016,396.432575,304.985202,318.9956,1652.725931,1711.69288,768.647817,783.578504,1193.36644,1234.212899,658.471423,649.234262,1319.72009,1371.76467,1007.989909,1100.527491,1359.762514,1366.511667,550.951148,589.229934,311.89336,380.021734,0.595045,0.546732,0.559528,0.545418,0.707177,0.699619,0.589612,0.722294,0.637213,0.668959,0.614768,0.636446,0.683245,0.637048,0.689472,0.677284,0.657151,0.639716,0.64564,0.550707,0.703243,0.679303,0.701226,0.704007,0.680372,0.679862,0.692722,0.695148,0.704247,0.64688,0.701178,0.692661,0.6989,0.646723,0.671921,0.671921,0.407379,0.407879,0.586602,0.588153,0.666239,0.674738,0.632575,0.486794,0.711212,0.533242,0.546094,0.550909,0.380612,0.642667,0.406985,0.411493,0.40744,0.66678
min,54.273973,1057343.0,279.599289,649.198349,188.787377,219.599442,428.399974,465.599972,14147.216048,1785.595459,1909.195145,33671.25,32319.375,8848.799473,9111.599457,1739.177918,1655.858626,34.799911,2.475464,2818.799832,3604.799785,913.199946,903.866268,2608.241008,2521.249264,4339.488159,4380.115207,3168.75,3248.4375,2570.267046,1265.625,1640.625,2.4,0.0,63106.796239,61430.396338,15850.759689,14431.253177,29985.714386,30093.598206,38335.102509,36892.797801,151.199991,224.648336,1925.625,1840.795319,1420.799915,1371.599918,7014.73687,1244.528155,1628.848322,1678.364502,1702.264855,639.140625,542.398621,3305.625,2831.999831,1336.875,1305.708143,1291.988975,1553.550824,2206.875,2169.375,933.75,836.39995,575.545349,521.999969,1183.35884,1076.835451,3627.599784,3786.326413,958.710504,820.799951,3543.599789,2583.988172,6137.999634,5839.18515,4623.738503,4325.262682,612.677307,616.390503,1891.875,2371.989775,0.0,1.199997,8025.0,7935.46875,2213.792315,2062.5,1734.960153,1858.358535,0.0,9.375,1005.0,924.960519,2716.873772,2355.116123,6550.657399,5364.132137,1314.137357,1425.939595,1412.225924,1730.625,750.0,870.116794,0.0,0.0,1953.75,2213.788062,4640.625,4296.149335,1775.625,1678.125,881.718351,741.443469,614.883716,681.328125,5220.731172,5583.385091,1638.983634,1535.999908,778.358583,747.590088,6710.973528,4726.052551,542.108272,556.875,586.405985,454.570107,5368.356948,5285.036673,1730.737884,1789.752791,3833.787329,3688.79978,1214.999451,1129.567504,4633.125,4793.573109,2890.799828,1906.875,3224.291687,2693.670657,1095.0,1081.875,416.399975,423.984375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,69.256164,1323742.0,1181.114188,1327.802492,310.077739,345.937011,832.798916,869.998868,18651.099886,2827.918127,2771.884766,44934.739444,44147.342547,12397.848759,12617.234438,3179.376055,2916.101102,344.69999,304.802856,12306.509566,13577.359251,1239.599926,1263.662114,3611.630461,3638.534576,6454.217291,6641.024802,4119.385503,4283.924872,3805.263448,1919.625004,2571.599847,263.999984,245.999985,84630.9375,82494.990204,21172.5,21082.753998,40779.107855,43278.068912,50009.02351,49106.90411,418.799975,461.999972,2770.661557,2484.609375,1901.024943,1860.937445,10027.642013,2641.875,3493.762396,3290.170815,3268.640585,1334.69996,1232.137463,6715.773102,6368.15354,2472.186522,2551.151324,2600.130998,2757.014426,3922.372172,3845.017759,1853.904695,1736.390272,1204.199928,1098.75,2417.665795,2071.307043,6642.778318,6368.88731,1681.53745,1633.984265,6028.269592,5844.199219,10249.312195,9961.868487,7510.836873,6909.224789,1885.421254,1607.579947,3758.913473,3551.138833,1229.477187,1299.609226,13970.080285,13939.192639,3887.253059,4273.610414,3566.399787,3959.133244,659.235279,438.599974,2130.599936,2016.350886,5236.821749,4137.862377,11371.86343,10906.204601,2722.799838,2726.243596,3410.626116,3401.853076,2067.599877,1867.676626,1015.955768,1231.3716,3167.684233,3449.0625,8527.465834,8446.789008,2804.411842,2906.48996,1657.949951,1499.882768,1450.3125,1632.128168,7520.399552,8459.999748,2606.12913,2449.34042,1166.481277,1250.625,9923.087024,9985.259549,1077.888858,1307.897461,1223.439539,1279.812108,10634.477204,10936.416527,3478.390076,3865.19977,6660.071564,6456.909543,2942.491137,2515.773302,7766.25,7652.283426,5342.899782,5347.799681,7160.454479,6845.999592,2324.401478,2417.867951,952.909455,1022.999939,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,73.967123,1423724.0,1636.88129,1643.708008,352.265346,388.125,939.599944,984.375,20012.663645,3114.120348,3028.811855,48298.797121,47384.397176,13397.999201,13660.847351,3592.5,3281.25,588.515625,554.399967,17745.073062,19592.398832,1347.890016,1369.196518,3911.999767,3959.296875,6903.599589,7123.199575,4443.457642,4595.27136,4185.589355,2103.044736,2839.199831,325.199981,301.640625,92283.75,89994.303797,23521.198598,23578.125,44751.435791,47198.397187,54347.047472,53805.596793,466.875,515.741663,3028.799819,2724.25227,2070.350626,2029.207741,11217.564514,3086.009346,4004.399761,3635.509867,3631.199784,1520.859375,1414.799916,7666.511597,7368.043544,2853.59983,2941.199825,2966.843445,3127.148438,4466.599543,4348.125,2130.0,2047.148438,1395.0,1293.040883,2743.125,2330.625,7481.947205,7107.517372,1852.79989,1799.295451,6791.178916,6510.469971,11388.510476,11119.199337,8364.375,7625.390625,2149.940369,1867.5,4138.125,3930.0,1468.124336,1507.19991,15485.264221,15430.79908,4424.412062,4885.315261,3907.617188,4357.5,788.435242,539.999968,2487.599852,2336.25,5886.20495,4621.199725,12758.486704,12263.999269,3087.599816,3166.799811,3845.990219,3832.799772,2433.75,2260.80009,1217.999927,1466.399913,3519.505713,3856.790192,9723.119004,9524.878506,3133.199813,3251.600092,1897.199887,1713.75,1741.875,1911.092886,8335.191544,9253.199448,2923.125,2725.199838,1320.0,1420.084746,10848.75,10990.93819,1285.664062,1545.599908,1408.799916,1486.516052,11699.042236,12007.169464,3976.875,4348.125,7456.875,7265.738903,3368.412849,2945.742188,8654.739213,8546.399491,5963.984833,6040.190534,8018.027466,7719.375,2645.033142,2764.799835,1147.961323,1271.999924,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
75%,78.953425,1527860.0,2230.3125,2039.2882,394.799976,432.211664,1041.29025,1093.455436,21697.919751,3441.866151,3311.257966,51966.086067,50885.2862,14513.078394,14822.02137,3952.361832,3624.328481,1012.194883,991.902871,25057.945899,28160.675369,1466.889835,1479.54961,4235.159212,4254.375,7420.799558,7610.524435,4771.228581,4964.931701,4575.417051,2317.796983,3115.195946,408.30652,387.419498,101146.121986,98605.696952,25918.536728,26148.949825,48869.720837,51710.178185,59564.979757,58827.780823,514.38126,561.599967,3319.099874,2984.585846,2278.125,2227.207031,12391.783877,3580.79534,4579.366209,4014.142667,4008.431435,1719.597762,1595.810471,8641.84436,8372.779884,3253.457584,3378.037399,3373.263371,3512.634932,5001.449851,4905.169682,2403.086004,2318.271912,1597.596946,1521.5625,3064.259634,2624.76356,8302.039444,7852.60555,2045.970886,1997.9974,7614.941955,7235.803299,12555.575993,12203.805615,9193.703663,8375.859375,2421.553878,2134.359355,4631.71875,4360.973738,1719.898091,1775.520782,17051.432014,17059.97115,4975.219593,5489.141792,4279.922117,4772.474858,932.899191,661.244175,2870.996263,2705.036027,6636.656195,5177.130022,14087.321027,13635.595335,3527.92889,3685.738953,4301.516018,4318.623229,2861.63623,2693.996493,1445.398118,1707.599898,3967.995534,4307.070773,10880.420428,10700.345398,3482.399792,3602.144371,2122.344465,1952.997458,2028.97887,2236.195868,9180.429108,10228.144512,3216.997614,3024.077057,1488.577453,1601.810524,12070.905419,12104.699639,1540.886169,1842.267186,1616.442036,1707.603206,12812.999236,13225.724606,4518.854645,4896.268424,8302.498123,8114.635061,3845.830536,3380.995599,9571.799429,9463.5241,6612.589433,6742.799598,8845.946033,8618.514329,3033.680969,3169.199811,1358.399919,1554.902359,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0
max,91.306849,1905572.0,5464.820847,4804.855004,621.341431,661.875,1616.396692,1691.996537,31839.946542,5561.985855,5312.38649,67704.578765,66360.907497,23236.864494,23956.161044,5383.188984,5128.789504,5745.935552,4957.5,70383.340608,84133.438525,1959.599883,2030.625,7181.981735,6802.799595,9519.585703,9660.913477,6308.083086,6631.872002,6821.744966,3135.0,4216.637289,974.095032,756.254211,141230.625,136239.591879,37100.725414,36256.875,66583.453995,75606.288414,82662.315331,84714.571074,779.767814,787.19751,4855.779055,4260.935574,3480.473864,3112.387386,18686.039001,5829.599653,7468.784716,5692.143934,5672.104887,2945.802002,2601.599845,13671.921789,12558.75,5812.822174,5480.164302,5255.986633,5609.999666,7508.399552,7674.029274,3439.657043,3332.802329,2581.197795,2399.999857,4903.893921,5514.095764,11909.406196,10935.788185,3954.553528,6711.181083,10529.999372,11524.541655,18003.598927,16764.216126,13418.3992,11733.75,3612.304688,3490.540202,6809.999594,7790.384058,2794.802887,3046.058289,26441.25,26130.0,7886.39953,8400.0,6853.300212,7898.37825,1577.999906,1653.674365,5092.799696,4209.375,12530.374357,8068.351167,20258.387125,19915.61454,6283.199625,5856.702194,6285.599625,6643.194324,5121.599695,4709.570007,2489.999852,2900.00592,6249.599627,6295.187117,15816.976379,14477.75095,5035.093506,5474.388797,3553.199788,2864.099557,4300.799744,4163.904367,13021.875,14338.799145,5376.1226,4664.399722,2519.645874,2568.282878,19388.398844,17305.198969,2951.999824,3166.118286,2754.84157,2673.599841,18711.561708,18617.297456,7352.399562,6998.900712,12270.0,12236.399271,5843.985138,5578.434185,13815.599177,13048.125,9442.5,11927.972096,16396.234924,16002.636169,4601.887329,6470.399614,2335.600159,2987.922258,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0


In [12]:
categorical_features = [column_name for column_name in data.columns if data[column_name].dtype == 'O']
print("Number of Categorical Features: {}".format(len(categorical_features)))
print("Categorical Features: ",categorical_features)

Number of Categorical Features: 4
Categorical Features:  ['PTID', 'Date', 'Sex', 'diagnosis']


In [13]:
numerical_features = [column_name for column_name in data.columns if data[column_name].dtype != 'O']
print("Number of Numerical Features: {}".format(len(numerical_features)))
print("Numerical Features: ",numerical_features)

Number of Numerical Features: 201
Numerical Features:  ['Age', 'DLICV_baseline', 'MUSE_Volume_4', 'MUSE_Volume_11', 'MUSE_Volume_23', 'MUSE_Volume_30', 'MUSE_Volume_31', 'MUSE_Volume_32', 'MUSE_Volume_35', 'MUSE_Volume_36', 'MUSE_Volume_37', 'MUSE_Volume_38', 'MUSE_Volume_39', 'MUSE_Volume_40', 'MUSE_Volume_41', 'MUSE_Volume_47', 'MUSE_Volume_48', 'MUSE_Volume_49', 'MUSE_Volume_50', 'MUSE_Volume_51', 'MUSE_Volume_52', 'MUSE_Volume_55', 'MUSE_Volume_56', 'MUSE_Volume_57', 'MUSE_Volume_58', 'MUSE_Volume_59', 'MUSE_Volume_60', 'MUSE_Volume_61', 'MUSE_Volume_62', 'MUSE_Volume_71', 'MUSE_Volume_72', 'MUSE_Volume_73', 'MUSE_Volume_75', 'MUSE_Volume_76', 'MUSE_Volume_81', 'MUSE_Volume_82', 'MUSE_Volume_83', 'MUSE_Volume_84', 'MUSE_Volume_85', 'MUSE_Volume_86', 'MUSE_Volume_87', 'MUSE_Volume_88', 'MUSE_Volume_89', 'MUSE_Volume_90', 'MUSE_Volume_91', 'MUSE_Volume_92', 'MUSE_Volume_93', 'MUSE_Volume_94', 'MUSE_Volume_95', 'MUSE_Volume_100', 'MUSE_Volume_101', 'MUSE_Volume_102', 'MUSE_Volume_103'

Turning categorical data to numerical.

In [14]:
data['diagnosis'] = data['diagnosis'].map({'CN': 0, 'MCI': 1, 'Dementia': 2}) #multi-class or binary?

In [15]:
data['Sex'] = data['Sex'].map({'M': 0, 'F': 1})

In [16]:
data_new= data.drop(['Date'], axis=1)
data=data_new

#### Data Preparation

In [17]:
data_controls_age = data.loc[(data['diagnosis'] == 0)]
data_controls_age_60 = data_controls_age.loc[(data_controls_age['Age'] > 60)]
data_controls = data_controls_age_60.loc[(data_controls_age_60['Age'] < 86)]
print(data_controls.shape) #controls ages 60-86

data_age_60 = data.loc[(data['Age'] > 60)]
data_age_filtered = data_age_60.loc[(data_age_60['Age'] < 86)]
print(data_age_filtered.shape) #ages 60-86

(449, 204)
(1463, 204)


In [18]:
roi_features = numerical_features[2:147]
snip_features = numerical_features[147:]
first_features = ['PTID','Sex','Age','DLICV_baseline','diagnosis']
all_but_roi_features = ['PTID','Sex','Age','DLICV_baseline','diagnosis']
all_but_roi_features.extend(snip_features)

In [19]:
print(all_but_roi_features)

['PTID', 'Sex', 'Age', 'DLICV_baseline', 'diagnosis', 'rs4575098', 'rs6656401', 'rs2093760', 'rs4844610', 'rs4663105', 'rs6733839', 'rs10933431', 'rs35349669', 'rs6448453', 'rs190982', 'rs9271058', 'rs9473117', 'rs9381563', 'rs10948363', 'rs2718058', 'rs4723711', 'rs1859788', 'rs1476679', 'rs12539172', 'rs10808026', 'rs7810606', 'rs11771145', 'rs28834970', 'rs73223431', 'rs4236673', 'rs9331896', 'rs11257238', 'rs7920721', 'rs3740688', 'rs10838725', 'rs983392', 'rs7933202', 'rs2081545', 'rs867611', 'rs10792832', 'rs3851179', 'rs17125924', 'rs17125944', 'rs10498633', 'rs12881735', 'rs12590654', 'rs442495', 'rs59735493', 'rs113260531', 'rs28394864', 'rs111278892', 'rs3752246', 'rs4147929', 'rs41289512', 'rs3865444', 'rs6024870', 'rs6014724', 'rs7274581', 'rs429358']


In [20]:
roi_mapping = pd.read_csv(path + 'ROIs.csv')

In [21]:
roi = roi_mapping.values.tolist()
roi_names = []
for i in roi:
  roi_names.append(i[0])

In [22]:
roi_names.remove('42,Right Cerebral Exterior')
roi_names.remove('43,Left Cerebral Exterior')
roi_names.remove('44,Cerebral')
roi_names.remove('45,Cerebral')
roi_names.remove('46,CSF')
roi_names.remove('63,Right vessel')
roi_names.remove('64,Left vessel')
roi_names.remove('69,Optic Chiasm')

In [23]:
ch = ','
roi_name=[]
# Remove all characters before the character ',' from string
for roi in roi_names:
  listOfWords = roi.split(ch, 1)
  if len(listOfWords) > 0:
      strValue = listOfWords[1]
      roi_name.append(strValue)

In [24]:
import warnings
warnings.filterwarnings('ignore')

# Data preprocessing & Model

In [38]:
# Select columns that start with 'rs'
cols_to_drop = [col for col in data_age_filtered.columns if col.startswith('rs')]

# Drop these columns
data_age_filtered.drop(cols_to_drop, axis=1, inplace=True)

In [39]:
data_age_filtered

Unnamed: 0,PTID,Age,Sex,DLICV_baseline,diagnosis,MUSE_Volume_4,MUSE_Volume_11,MUSE_Volume_23,MUSE_Volume_30,MUSE_Volume_31,MUSE_Volume_32,MUSE_Volume_35,MUSE_Volume_36,MUSE_Volume_37,MUSE_Volume_38,MUSE_Volume_39,MUSE_Volume_40,MUSE_Volume_41,MUSE_Volume_47,MUSE_Volume_48,MUSE_Volume_49,MUSE_Volume_50,MUSE_Volume_51,MUSE_Volume_52,MUSE_Volume_55,MUSE_Volume_56,MUSE_Volume_57,MUSE_Volume_58,MUSE_Volume_59,MUSE_Volume_60,MUSE_Volume_61,MUSE_Volume_62,MUSE_Volume_71,MUSE_Volume_72,MUSE_Volume_73,MUSE_Volume_75,MUSE_Volume_76,MUSE_Volume_81,MUSE_Volume_82,MUSE_Volume_83,MUSE_Volume_84,MUSE_Volume_85,MUSE_Volume_86,MUSE_Volume_87,MUSE_Volume_88,MUSE_Volume_89,MUSE_Volume_90,MUSE_Volume_91,MUSE_Volume_92,MUSE_Volume_93,MUSE_Volume_94,MUSE_Volume_95,MUSE_Volume_100,MUSE_Volume_101,MUSE_Volume_102,MUSE_Volume_103,MUSE_Volume_104,MUSE_Volume_105,MUSE_Volume_106,MUSE_Volume_107,MUSE_Volume_108,MUSE_Volume_109,MUSE_Volume_112,MUSE_Volume_113,MUSE_Volume_114,MUSE_Volume_115,MUSE_Volume_116,MUSE_Volume_117,MUSE_Volume_118,MUSE_Volume_119,MUSE_Volume_120,MUSE_Volume_121,MUSE_Volume_122,MUSE_Volume_123,MUSE_Volume_124,MUSE_Volume_125,MUSE_Volume_128,MUSE_Volume_129,MUSE_Volume_132,MUSE_Volume_133,MUSE_Volume_134,MUSE_Volume_135,MUSE_Volume_136,MUSE_Volume_137,MUSE_Volume_138,MUSE_Volume_139,MUSE_Volume_140,MUSE_Volume_141,MUSE_Volume_142,MUSE_Volume_143,MUSE_Volume_144,MUSE_Volume_145,MUSE_Volume_146,MUSE_Volume_147,MUSE_Volume_148,MUSE_Volume_149,MUSE_Volume_150,MUSE_Volume_151,MUSE_Volume_152,MUSE_Volume_153,MUSE_Volume_154,MUSE_Volume_155,MUSE_Volume_156,MUSE_Volume_157,MUSE_Volume_160,MUSE_Volume_161,MUSE_Volume_162,MUSE_Volume_163,MUSE_Volume_164,MUSE_Volume_165,MUSE_Volume_166,MUSE_Volume_167,MUSE_Volume_168,MUSE_Volume_169,MUSE_Volume_170,MUSE_Volume_171,MUSE_Volume_172,MUSE_Volume_173,MUSE_Volume_174,MUSE_Volume_175,MUSE_Volume_176,MUSE_Volume_177,MUSE_Volume_178,MUSE_Volume_179,MUSE_Volume_180,MUSE_Volume_181,MUSE_Volume_182,MUSE_Volume_183,MUSE_Volume_184,MUSE_Volume_185,MUSE_Volume_186,MUSE_Volume_187,MUSE_Volume_190,MUSE_Volume_191,MUSE_Volume_192,MUSE_Volume_193,MUSE_Volume_194,MUSE_Volume_195,MUSE_Volume_196,MUSE_Volume_197,MUSE_Volume_198,MUSE_Volume_199,MUSE_Volume_200,MUSE_Volume_201,MUSE_Volume_202,MUSE_Volume_203,MUSE_Volume_204,MUSE_Volume_205,MUSE_Volume_206,MUSE_Volume_207
0,002_S_0295,84.742466,0,1485405.375,0,1873.124153,1586.249283,302.695176,352.265466,1062.069832,1159.101038,20657.100036,3254.764153,3118.709527,52564.546547,52086.773326,14018.899912,14294.173225,3600.701497,3368.670352,750.937160,587.460672,16514.289409,23626.044006,1544.061802,1339.452519,4182.888734,4105.896581,7365.934170,8007.183880,4747.146291,4789.333772,4638.513528,2017.616275,2812.850291,352.265466,380.742015,107813.271569,102646.359842,26682.526999,30497.329962,51500.367341,52266.070120,63530.127527,63595.518123,426.093557,543.163817,3778.943604,3226.287604,2316.092703,2299.217710,11240.854293,4464.490169,4446.560490,3624.959299,3474.139054,1558.827420,741.444977,9202.144277,10142.925102,3385.545344,3549.021833,2759.061253,2699.998779,5983.239482,4772.458780,2428.944214,2103.045924,954.491756,976.640183,2716.873772,2089.334993,8379.488399,7090.660857,1878.397588,1371.093130,6936.676551,6667.731360,11534.057285,11793.510293,9326.597346,7320.582628,2008.124092,1507.147756,3968.787268,4167.068429,1769.764825,1258.241619,14127.532675,13099.212828,4775.622841,5759.645833,3586.990566,3536.365589,942.890199,769.921527,2916.209619,1978.592855,4195.544978,4115.388764,11762.924369,11668.002537,3131.365772,4021.521619,3495.232795,5277.653864,2280.233344,2930.975237,1168.593222,1541.952428,3406.639085,3815.857650,11444.408888,11739.721255,3415.076581,3348.631299,2000.741283,2098.827176,1443.866535,1758.163268,8868.863178,8873.081926,2589.256642,2578.709772,1332.069710,1373.202504,10701.909224,10142.925102,1171.757283,1339.452519,1123.241680,1319.413466,9906.675209,12091.986721,3378.162535,3411.912520,6568.590780,7252.027971,2965.779909,2489.061375,9220.073956,7986.090139,4336.873039,4313.669925,8816.128827,7512.535666,2559.725405,2955.233039,994.569863,1110.585435
1,002_S_0413,76.283562,1,1364116.000,0,2131.516933,1505.034469,384.959763,340.663023,988.239172,1051.520229,18405.295394,3021.670466,3151.396633,45240.682248,43280.024169,12993.710346,13640.231811,3586.981241,3350.731962,475.662611,761.482051,16498.426213,13491.521327,1096.871653,1092.652915,2941.514461,3243.154165,6479.980225,6874.432146,4181.823176,4367.447609,3893.894367,1782.416435,2235.930676,503.084402,371.248867,103124.919662,101385.745282,29939.322695,27712.884177,48141.064022,54393.232442,54684.325304,53309.017001,504.139086,497.810981,3042.764152,2457.414376,2021.829767,1968.040869,11103.716114,2839.210085,3769.441622,3323.310171,3544.793870,1840.424071,1424.878464,9169.425142,8470.169464,2715.812024,3414.013019,2874.014667,2941.514461,5370.452361,6264.824631,2298.157049,2118.860721,1387.964514,967.145486,2358.274053,2307.649208,7267.829383,8130.561125,2144.173144,1907.923865,6878.650883,7117.009531,12912.499657,11882.073114,8530.286468,8347.826087,2611.398281,2317.141366,4837.836799,4199.752808,1187.574501,1671.674586,15983.740284,14967.024637,4672.251366,4851.547694,4239.830811,4672.251366,1018.825016,606.443462,3551.121975,3038.545415,6675.096817,4862.094537,12907.226235,11194.418962,3109.209261,3177.763740,3142.959158,4005.690901,2556.554698,2594.523332,1206.558818,1193.902606,3702.996512,3976.159741,10143.953418,11154.340960,3077.568733,3113.427999,1596.792002,1522.964102,1555.659315,1731.791590,9037.589607,8741.223324,2840.264770,2546.007855,1505.034469,1198.121344,12067.697547,11828.284215,902.809745,1252.964926,948.161169,971.364223,10644.928452,12009.689912,3580.653135,4822.016534,8888.879123,7140.212585,4647.993628,3295.888379,9925.633772,11152.231591,7421.813288,6059.161196,6676.151501,7780.405944,2469.015903,2239.094729,1097.926337,744.607103
2,002_S_0559,79.223288,0,1570479.625,0,2366.717680,3157.732947,512.577893,552.656000,1172.811970,1141.171359,23368.700372,3041.717375,3198.865741,49984.782089,49158.962150,17229.367210,17536.281134,4728.161925,4206.091848,764.648092,640.195023,16674.601836,16115.617714,1304.647848,1335.233771,4119.607512,4195.544978,7287.887330,7510.426292,5413.708490,5620.427146,3802.146719,1700.155481,2524.920733,563.202870,591.679420,115548.346197,110692.567142,30553.228374,27892.253015,54122.319281,61104.347374,63260.127649,63511.143161,543.163817,640.195023,3323.318810,3096.561100,2498.553558,2377.264550,12633.041163,3755.740489,4386.443329,3620.740551,3068.084550,2433.162962,2104.100611,11164.916827,10359.135942,3451.990627,2741.131573,3804.256093,3905.506047,4038.396612,3980.388825,2408.905161,2620.897253,1344.725955,1242.421313,3342.303176,2947.850230,8735.972613,10286.362537,1908.983512,2000.741283,5694.255238,4487.693284,12286.049133,15746.477256,9619.800338,9030.230292,2045.038138,2290.780214,3946.638841,4054.216917,1891.053833,1156.991664,16048.117744,14235.110752,4362.185528,5427.419421,4695.466627,4353.748032,653.905954,1110.585435,1854.139787,2276.014596,6314.411208,5783.903635,13022.220675,12608.783362,3445.662505,2907.772123,5668.942750,4525.662016,2641.990993,2326.639573,1201.288519,1131.679176,3535.310902,3904.451360,10796.831056,11263.002720,3952.966963,4306.287116,1615.780519,1374.257191,1595.741466,1631.600825,8051.480735,12965.267576,3138.748581,3280.076642,1138.007298,1420.663420,11015.151270,12643.588034,1885.780397,1861.522596,932.343328,1025.155787,11981.244583,11908.471179,4343.201161,5510.739696,7549.449712,5636.247452,2599.803512,2494.334810,7430.270078,6746.832887,7983.980765,7081.168674,10475.151514,11083.705926,3069.139237,2872.967451,1051.522962,1274.061924
3,002_S_0619,77.447945,0,1859348.250,2,5124.734093,2981.605944,280.547287,356.484899,876.446600,908.087272,21112.765397,2883.519862,2848.715123,56650.512928,55319.495347,13810.098417,14307.911649,3485.747309,3231.567248,1810.901098,1584.142953,47125.616120,60669.932896,1393.244235,1401.681747,3791.607134,3956.138626,7059.033810,7534.698571,5230.202998,5155.320075,4848.405562,2089.339007,3279.028256,557.930507,343.828630,105178.865483,105828.553938,30496.333873,30318.091423,52001.443596,58340.124785,64032.281587,63620.952857,546.328928,608.555582,3098.676428,2852.933879,2209.573559,2206.409492,12681.581134,2698.949278,4427.584631,3850.669721,3989.888675,2088.284318,2110.432788,10630.210932,9738.998685,2335.081556,3525.825493,3081.801403,3971.958961,4032.076237,5876.727385,1759.221335,1517.697543,1132.736039,1098.985990,2337.190934,1737.072865,9832.866011,8968.020990,2304.495574,2317.151842,8209.699563,8048.332138,12752.245300,13784.785880,9587.123462,11270.407185,3097.621739,2240.159542,4577.350476,4796.725798,1399.572369,1617.893002,17917.057577,16677.797943,6813.291261,7985.050795,5135.280983,4916.960350,831.094971,594.844624,2534.417786,2599.808508,5705.867759,4306.295390,11084.781913,13730.996738,5032.976145,4884.264989,4352.701708,5000.280785,2023.948286,2122.034368,1509.260030,1475.509981,5866.180495,5736.453741,11578.376388,11748.181325,3241.059450,2811.801007,1759.221335,2247.542365,2254.925188,2166.331308,9747.436198,10617.554664,2975.277809,3174.614040,1750.783823,1521.916299,12143.689718,12557.127826,1405.900503,1814.065166,1921.643449,1717.033773,12618.299791,13427.246292,3545.864585,3870.708812,7061.143188,8703.294038,3338.090842,2569.222525,10113.413298,11186.032061,7032.666584,6352.392146,7272.080998,7162.393337,2489.066157,2506.995871,1155.939198,1574.650751
5,002_S_0729,65.056164,1,1166961.750,1,966.095170,1921.643449,356.484899,415.547486,761.485494,853.243441,18195.495486,3485.747309,2921.488668,44029.049070,42879.438005,11626.892084,12026.619234,2887.738618,2592.425684,392.344326,283.711354,8403.762348,9388.841921,1287.775330,1255.079969,3543.755207,3659.771003,6422.001624,6685.673886,3798.989957,4033.130926,3400.317496,1551.447592,2534.417786,268.945708,158.203357,87173.214027,83566.177477,21149.679514,18514.011579,40216.348155,41546.311047,49603.080697,48494.602506,435.586578,433.477199,2722.152437,2497.503670,2012.346707,1917.424692,9808.608162,2805.472872,3514.223914,3042.777908,3393.989362,1351.056673,769.923006,9316.068376,7022.119693,2155.784418,2744.300907,2465.862998,2690.511766,3676.646027,3777.896176,1416.447394,1371.095765,1420.666150,1110.587569,1976.487279,1818.283922,7236.221570,7037.940029,1790.862006,1707.541571,5660.516130,4246.178114,10451.968483,10478.335709,6965.166484,7380.713970,1843.596459,1515.588164,3769.458664,3579.614635,1319.416001,1700.158748,15817.171679,13515.840172,4775.632017,4807.272689,3063.871689,3464.653528,1248.751835,534.727348,2865.590148,2599.808508,5004.499541,3889.693215,10419.273122,10018.491283,2405.745722,1425.939595,4143.873276,4313.678213,2103.049965,2000.745127,988.243640,1255.079969,2139.964082,3043.832597,8480.754649,6326.024920,2684.183632,2847.660434,1367.931697,1286.720641,1517.697543,1421.720839,6882.900738,9298.138662,2307.659641,2541.800610,1081.056276,1029.376513,8876.263042,9259.115167,980.860816,1336.291026,1059.962495,1116.915704,9093.528987,11257.750917,4128.052941,4293.639121,6690.947331,7654.933123,2551.292811,2318.206531,7825.792749,7452.432825,5885.164897,6838.603798,6198.407545,5983.250979,2492.230224,2634.613246,907.032583,965.040480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1562,941_S_4377,69.187671,1,1213846.750,1,881.999947,1006.799940,291.599983,368.399978,848.399949,842.399950,16942.798990,2813.999832,2732.399837,38367.597713,37732.797751,10575.599370,10729.199360,3331.199801,3163.199811,277.199983,122.399993,10655.999365,10243.199389,1113.599934,1213.199928,3841.199771,3997.199762,6476.399614,6640.799604,3844.799771,3826.799772,3287.999804,2023.199879,2435.999855,275.999984,219.599987,70166.395818,70043.995825,18130.798919,17141.998978,34400.397950,34846.797923,39651.597637,39454.797648,511.199970,561.599967,2305.199863,2029.199879,1618.799904,1685.999900,9392.399440,2234.399867,2710.799838,3969.599763,3922.799766,1229.999927,1067.999936,4531.199730,4151.999753,3071.999817,3301.199803,2642.399843,2969.999823,3874.799769,3181.199810,1715.999898,1789.199893,1654.799901,1523.999909,2671.199841,2146.799872,6149.999633,5678.399662,1540.799908,1766.399895,6223.199629,5893.199649,9580.799429,9399.599440,6956.399585,6032.399640,1623.599903,1963.199883,3746.399777,3551.999788,673.199960,1451.999913,14074.799161,13335.599205,3883.199769,3945.599765,3530.399790,3260.399806,664.799960,518.399969,2203.199869,2095.199875,6124.799635,4739.999717,11431.199319,11432.399319,2571.599847,2919.599826,3221.999808,2474.399853,3680.399781,2359.199859,1059.599937,1625.999903,3466.799793,3680.399781,7255.199568,7193.999571,3002.399821,2816.399832,1857.599889,1628.399903,2129.999873,1987.199882,7388.399560,9505.199433,2341.199860,2143.199872,1143.599932,1201.199928,9118.799456,10125.599396,1430.399915,2164.799871,1171.199930,1341.599920,11763.599299,14017.199165,3299.999803,4031.999760,6173.999632,6542.399610,3784.799774,3429.599796,7781.999536,6890.399589,5427.599676,5349.599681,6283.199625,6148.799634,2323.199862,2611.199844,1282.799924,1429.199915
1563,941_S_4420,81.383562,0,1536545.875,1,1079.999936,1298.399923,431.999974,445.199973,1088.399935,985.199941,19977.598809,3214.799808,3257.999806,47859.597147,48692.397098,12937.199229,13166.399215,4255.199746,3687.599780,751.199955,795.599953,19527.598836,24886.798517,1491.599911,1505.999910,4939.199706,5123.999695,7327.199563,7389.599560,4948.799705,4453.199735,3733.199777,1881.599888,2569.199847,214.799987,379.199977,104933.993745,101272.793964,24487.198540,25827.598461,48183.597128,51826.796911,62361.596283,61167.596354,466.799972,519.599969,3023.999820,2663.999841,2312.399862,2177.999870,11644.799306,4216.799749,3838.799771,4249.199747,4172.399751,1487.999911,1598.399905,8042.399521,9107.999457,2579.999846,2102.399875,3128.399814,3187.199810,5668.799662,5121.599695,2408.399856,2120.399874,1171.199930,1093.199935,3076.799817,2522.399850,8249.999508,6717.599600,1777.199894,1523.999909,7329.599563,7115.999576,13821.599176,13209.599213,8264.399507,7203.599571,2499.599851,1689.599899,4246.799747,4184.399751,1691.999899,1109.999934,17227.198973,17866.798935,4737.599718,5257.199687,3725.999778,3925.199766,743.999956,706.799958,2959.199824,2786.399834,4959.599704,5493.599673,11245.199330,13256.399210,3637.199783,4102.799755,3964.799764,2979.599822,2301.599863,1179.599930,1507.199910,1688.399899,3853.199770,3777.599775,11365.199323,10028.399402,4081.199757,3471.599793,1966.799883,1672.799900,2194.799869,2072.399876,6458.399615,8288.399506,3418.799796,3122.399814,1657.199901,1630.799903,12458.399257,13774.799179,1852.799890,2234.399867,1373.999918,1565.999907,11845.199294,13184.399214,3931.199766,4689.599720,6287.999625,5626.799665,3124.799814,2771.999835,8386.799500,8369.999501,6985.199584,7204.799571,9337.199443,7413.599558,3394.799798,2974.799823,1454.399913,1228.799927
1564,941_S_4764,82.672603,1,1438682.375,1,2911.199826,2806.799833,203.999988,257.999985,872.399948,879.599948,20135.998800,2771.999835,2692.799839,49766.397034,49005.597079,14546.399133,14725.199122,3502.799791,3417.599796,1269.599924,898.799946,34168.797963,24159.598560,1229.999927,1301.999922,3470.399793,3406.799797,6903.599589,7033.199581,4495.199732,4612.799725,3977.999763,2102.399875,2887.199828,340.799980,275.999984,77746.795366,74926.795534,21007.198748,18071.998923,36950.397798,39129.597668,46949.997202,45447.597291,604.799964,455.999973,2845.199830,2633.999843,1772.399894,1849.199890,8560.799490,2911.199826,2881.199828,3688.799780,3614.399785,1567.199907,973.199942,7833.599533,5200.799690,2761.199835,2303.999863,2530.799849,2887.199828,3716.399778,3257.999806,2343.599860,2126.399873,1429.199915,1045.199938,2788.799834,2362.799859,7852.799532,7195.199571,1484.399912,1898.399887,6519.599611,4475.999733,10491.599375,9629.999426,8755.199478,7741.199539,1605.599904,1519.199909,3568.799787,3154.799812,1018.799939,1431.599915,14619.599129,14492.399136,4258.799746,4612.799725,2867.999829,3763.199776,862.799949,463.199972,2491.199852,1895.999887,5236.799688,4847.999711,10013.999403,11163.599335,3194.399810,2228.399867,3721.199778,3015.599820,1940.399884,1718.399898,970.799942,1102.799934,2851.199830,3519.599790,8074.799519,7475.999554,3034.799819,3064.799817,1535.999908,1312.799922,1397.999917,1745.999896,6812.399594,9147.599455,2774.399835,2467.199853,1150.799931,1268.399924,10103.999398,10629.599366,1148.399932,1419.599915,1233.599926,1661.999901,11665.199305,10906.799350,3868.799769,4814.399713,5631.599664,6375.599620,3123.599814,3779.999775,8471.999495,6921.599587,5782.799655,6337.199622,7234.799569,7052.399580,1897.199887,1976.399882,826.799951,1083.599935
1565,941_S_5124,76.664384,1,1353772.750,0,1496.399911,1142.399932,226.799986,238.799986,814.799951,986.399941,17121.598979,2571.599847,2613.599844,40799.997568,37919.997740,11252.399329,11056.799341,2611.199844,2401.199857,1173.599930,903.599946,39554.397642,49455.597052,1089.599935,1198.799929,3371.999799,3441.599795,6752.399598,7288.799566,3669.599781,4016.399761,4837.199712,2242.799866,2045.999878,368.399978,356.399979,72938.395653,72790.795661,16736.399002,19359.598846,37213.197782,37405.197770,52543.196868,48931.197083,151.199991,237.599986,2701.199839,2535.599849,1742.399896,1747.199896,9619.199427,3761.999776,4827.599712,3044.399819,3215.999808,1461.599913,1097.999935,5925.599647,8337.599503,3215.999808,2239.199867,2836.799831,2810.399832,5439.599676,4383.599739,2066.399877,2001.599881,1113.599934,1605.599904,3344.399801,2876.399829,5933.999646,6645.599604,1389.599917,1534.799909,7534.799551,6320.399623,11071.199340,9958.799406,7286.399566,8525.999492,2161.199871,2290.799863,4663.199722,4142.399753,1066.799936,1808.399892,16085.999041,15931.199050,2968.799823,5188.799691,3347.999800,3628.799784,843.599950,481.199971,1402.799916,1694.399899,5456.399675,3436.799795,15909.599052,13811.999177,3209.999809,4238.399747,3220.799808,3424.799796,2506.799851,2048.399878,1117.199933,1532.399909,3375.599799,3566.399787,8815.199475,7204.799571,3658.799782,3085.199816,1631.999903,1789.199893,1604.399904,1658.399901,8210.399511,9099.599458,2953.199824,2977.199823,1442.399914,1610.399904,9107.999457,10970.399346,1537.199908,1352.399919,1473.599912,1432.799915,10263.599388,11079.599340,3147.599812,3183.599810,7949.999526,7853.999532,3065.999817,4151.999753,9046.799461,7985.999524,5089.199697,5537.999670,6945.599586,7079.999578,2033.999879,1708.799898,739.199956,791.999953


10-fold stratified cross validation

In [40]:
data_age_filtered

Unnamed: 0,PTID,Age,Sex,DLICV_baseline,diagnosis,MUSE_Volume_4,MUSE_Volume_11,MUSE_Volume_23,MUSE_Volume_30,MUSE_Volume_31,MUSE_Volume_32,MUSE_Volume_35,MUSE_Volume_36,MUSE_Volume_37,MUSE_Volume_38,MUSE_Volume_39,MUSE_Volume_40,MUSE_Volume_41,MUSE_Volume_47,MUSE_Volume_48,MUSE_Volume_49,MUSE_Volume_50,MUSE_Volume_51,MUSE_Volume_52,MUSE_Volume_55,MUSE_Volume_56,MUSE_Volume_57,MUSE_Volume_58,MUSE_Volume_59,MUSE_Volume_60,MUSE_Volume_61,MUSE_Volume_62,MUSE_Volume_71,MUSE_Volume_72,MUSE_Volume_73,MUSE_Volume_75,MUSE_Volume_76,MUSE_Volume_81,MUSE_Volume_82,MUSE_Volume_83,MUSE_Volume_84,MUSE_Volume_85,MUSE_Volume_86,MUSE_Volume_87,MUSE_Volume_88,MUSE_Volume_89,MUSE_Volume_90,MUSE_Volume_91,MUSE_Volume_92,MUSE_Volume_93,MUSE_Volume_94,MUSE_Volume_95,MUSE_Volume_100,MUSE_Volume_101,MUSE_Volume_102,MUSE_Volume_103,MUSE_Volume_104,MUSE_Volume_105,MUSE_Volume_106,MUSE_Volume_107,MUSE_Volume_108,MUSE_Volume_109,MUSE_Volume_112,MUSE_Volume_113,MUSE_Volume_114,MUSE_Volume_115,MUSE_Volume_116,MUSE_Volume_117,MUSE_Volume_118,MUSE_Volume_119,MUSE_Volume_120,MUSE_Volume_121,MUSE_Volume_122,MUSE_Volume_123,MUSE_Volume_124,MUSE_Volume_125,MUSE_Volume_128,MUSE_Volume_129,MUSE_Volume_132,MUSE_Volume_133,MUSE_Volume_134,MUSE_Volume_135,MUSE_Volume_136,MUSE_Volume_137,MUSE_Volume_138,MUSE_Volume_139,MUSE_Volume_140,MUSE_Volume_141,MUSE_Volume_142,MUSE_Volume_143,MUSE_Volume_144,MUSE_Volume_145,MUSE_Volume_146,MUSE_Volume_147,MUSE_Volume_148,MUSE_Volume_149,MUSE_Volume_150,MUSE_Volume_151,MUSE_Volume_152,MUSE_Volume_153,MUSE_Volume_154,MUSE_Volume_155,MUSE_Volume_156,MUSE_Volume_157,MUSE_Volume_160,MUSE_Volume_161,MUSE_Volume_162,MUSE_Volume_163,MUSE_Volume_164,MUSE_Volume_165,MUSE_Volume_166,MUSE_Volume_167,MUSE_Volume_168,MUSE_Volume_169,MUSE_Volume_170,MUSE_Volume_171,MUSE_Volume_172,MUSE_Volume_173,MUSE_Volume_174,MUSE_Volume_175,MUSE_Volume_176,MUSE_Volume_177,MUSE_Volume_178,MUSE_Volume_179,MUSE_Volume_180,MUSE_Volume_181,MUSE_Volume_182,MUSE_Volume_183,MUSE_Volume_184,MUSE_Volume_185,MUSE_Volume_186,MUSE_Volume_187,MUSE_Volume_190,MUSE_Volume_191,MUSE_Volume_192,MUSE_Volume_193,MUSE_Volume_194,MUSE_Volume_195,MUSE_Volume_196,MUSE_Volume_197,MUSE_Volume_198,MUSE_Volume_199,MUSE_Volume_200,MUSE_Volume_201,MUSE_Volume_202,MUSE_Volume_203,MUSE_Volume_204,MUSE_Volume_205,MUSE_Volume_206,MUSE_Volume_207
0,002_S_0295,84.742466,0,1485405.375,0,1873.124153,1586.249283,302.695176,352.265466,1062.069832,1159.101038,20657.100036,3254.764153,3118.709527,52564.546547,52086.773326,14018.899912,14294.173225,3600.701497,3368.670352,750.937160,587.460672,16514.289409,23626.044006,1544.061802,1339.452519,4182.888734,4105.896581,7365.934170,8007.183880,4747.146291,4789.333772,4638.513528,2017.616275,2812.850291,352.265466,380.742015,107813.271569,102646.359842,26682.526999,30497.329962,51500.367341,52266.070120,63530.127527,63595.518123,426.093557,543.163817,3778.943604,3226.287604,2316.092703,2299.217710,11240.854293,4464.490169,4446.560490,3624.959299,3474.139054,1558.827420,741.444977,9202.144277,10142.925102,3385.545344,3549.021833,2759.061253,2699.998779,5983.239482,4772.458780,2428.944214,2103.045924,954.491756,976.640183,2716.873772,2089.334993,8379.488399,7090.660857,1878.397588,1371.093130,6936.676551,6667.731360,11534.057285,11793.510293,9326.597346,7320.582628,2008.124092,1507.147756,3968.787268,4167.068429,1769.764825,1258.241619,14127.532675,13099.212828,4775.622841,5759.645833,3586.990566,3536.365589,942.890199,769.921527,2916.209619,1978.592855,4195.544978,4115.388764,11762.924369,11668.002537,3131.365772,4021.521619,3495.232795,5277.653864,2280.233344,2930.975237,1168.593222,1541.952428,3406.639085,3815.857650,11444.408888,11739.721255,3415.076581,3348.631299,2000.741283,2098.827176,1443.866535,1758.163268,8868.863178,8873.081926,2589.256642,2578.709772,1332.069710,1373.202504,10701.909224,10142.925102,1171.757283,1339.452519,1123.241680,1319.413466,9906.675209,12091.986721,3378.162535,3411.912520,6568.590780,7252.027971,2965.779909,2489.061375,9220.073956,7986.090139,4336.873039,4313.669925,8816.128827,7512.535666,2559.725405,2955.233039,994.569863,1110.585435
1,002_S_0413,76.283562,1,1364116.000,0,2131.516933,1505.034469,384.959763,340.663023,988.239172,1051.520229,18405.295394,3021.670466,3151.396633,45240.682248,43280.024169,12993.710346,13640.231811,3586.981241,3350.731962,475.662611,761.482051,16498.426213,13491.521327,1096.871653,1092.652915,2941.514461,3243.154165,6479.980225,6874.432146,4181.823176,4367.447609,3893.894367,1782.416435,2235.930676,503.084402,371.248867,103124.919662,101385.745282,29939.322695,27712.884177,48141.064022,54393.232442,54684.325304,53309.017001,504.139086,497.810981,3042.764152,2457.414376,2021.829767,1968.040869,11103.716114,2839.210085,3769.441622,3323.310171,3544.793870,1840.424071,1424.878464,9169.425142,8470.169464,2715.812024,3414.013019,2874.014667,2941.514461,5370.452361,6264.824631,2298.157049,2118.860721,1387.964514,967.145486,2358.274053,2307.649208,7267.829383,8130.561125,2144.173144,1907.923865,6878.650883,7117.009531,12912.499657,11882.073114,8530.286468,8347.826087,2611.398281,2317.141366,4837.836799,4199.752808,1187.574501,1671.674586,15983.740284,14967.024637,4672.251366,4851.547694,4239.830811,4672.251366,1018.825016,606.443462,3551.121975,3038.545415,6675.096817,4862.094537,12907.226235,11194.418962,3109.209261,3177.763740,3142.959158,4005.690901,2556.554698,2594.523332,1206.558818,1193.902606,3702.996512,3976.159741,10143.953418,11154.340960,3077.568733,3113.427999,1596.792002,1522.964102,1555.659315,1731.791590,9037.589607,8741.223324,2840.264770,2546.007855,1505.034469,1198.121344,12067.697547,11828.284215,902.809745,1252.964926,948.161169,971.364223,10644.928452,12009.689912,3580.653135,4822.016534,8888.879123,7140.212585,4647.993628,3295.888379,9925.633772,11152.231591,7421.813288,6059.161196,6676.151501,7780.405944,2469.015903,2239.094729,1097.926337,744.607103
2,002_S_0559,79.223288,0,1570479.625,0,2366.717680,3157.732947,512.577893,552.656000,1172.811970,1141.171359,23368.700372,3041.717375,3198.865741,49984.782089,49158.962150,17229.367210,17536.281134,4728.161925,4206.091848,764.648092,640.195023,16674.601836,16115.617714,1304.647848,1335.233771,4119.607512,4195.544978,7287.887330,7510.426292,5413.708490,5620.427146,3802.146719,1700.155481,2524.920733,563.202870,591.679420,115548.346197,110692.567142,30553.228374,27892.253015,54122.319281,61104.347374,63260.127649,63511.143161,543.163817,640.195023,3323.318810,3096.561100,2498.553558,2377.264550,12633.041163,3755.740489,4386.443329,3620.740551,3068.084550,2433.162962,2104.100611,11164.916827,10359.135942,3451.990627,2741.131573,3804.256093,3905.506047,4038.396612,3980.388825,2408.905161,2620.897253,1344.725955,1242.421313,3342.303176,2947.850230,8735.972613,10286.362537,1908.983512,2000.741283,5694.255238,4487.693284,12286.049133,15746.477256,9619.800338,9030.230292,2045.038138,2290.780214,3946.638841,4054.216917,1891.053833,1156.991664,16048.117744,14235.110752,4362.185528,5427.419421,4695.466627,4353.748032,653.905954,1110.585435,1854.139787,2276.014596,6314.411208,5783.903635,13022.220675,12608.783362,3445.662505,2907.772123,5668.942750,4525.662016,2641.990993,2326.639573,1201.288519,1131.679176,3535.310902,3904.451360,10796.831056,11263.002720,3952.966963,4306.287116,1615.780519,1374.257191,1595.741466,1631.600825,8051.480735,12965.267576,3138.748581,3280.076642,1138.007298,1420.663420,11015.151270,12643.588034,1885.780397,1861.522596,932.343328,1025.155787,11981.244583,11908.471179,4343.201161,5510.739696,7549.449712,5636.247452,2599.803512,2494.334810,7430.270078,6746.832887,7983.980765,7081.168674,10475.151514,11083.705926,3069.139237,2872.967451,1051.522962,1274.061924
3,002_S_0619,77.447945,0,1859348.250,2,5124.734093,2981.605944,280.547287,356.484899,876.446600,908.087272,21112.765397,2883.519862,2848.715123,56650.512928,55319.495347,13810.098417,14307.911649,3485.747309,3231.567248,1810.901098,1584.142953,47125.616120,60669.932896,1393.244235,1401.681747,3791.607134,3956.138626,7059.033810,7534.698571,5230.202998,5155.320075,4848.405562,2089.339007,3279.028256,557.930507,343.828630,105178.865483,105828.553938,30496.333873,30318.091423,52001.443596,58340.124785,64032.281587,63620.952857,546.328928,608.555582,3098.676428,2852.933879,2209.573559,2206.409492,12681.581134,2698.949278,4427.584631,3850.669721,3989.888675,2088.284318,2110.432788,10630.210932,9738.998685,2335.081556,3525.825493,3081.801403,3971.958961,4032.076237,5876.727385,1759.221335,1517.697543,1132.736039,1098.985990,2337.190934,1737.072865,9832.866011,8968.020990,2304.495574,2317.151842,8209.699563,8048.332138,12752.245300,13784.785880,9587.123462,11270.407185,3097.621739,2240.159542,4577.350476,4796.725798,1399.572369,1617.893002,17917.057577,16677.797943,6813.291261,7985.050795,5135.280983,4916.960350,831.094971,594.844624,2534.417786,2599.808508,5705.867759,4306.295390,11084.781913,13730.996738,5032.976145,4884.264989,4352.701708,5000.280785,2023.948286,2122.034368,1509.260030,1475.509981,5866.180495,5736.453741,11578.376388,11748.181325,3241.059450,2811.801007,1759.221335,2247.542365,2254.925188,2166.331308,9747.436198,10617.554664,2975.277809,3174.614040,1750.783823,1521.916299,12143.689718,12557.127826,1405.900503,1814.065166,1921.643449,1717.033773,12618.299791,13427.246292,3545.864585,3870.708812,7061.143188,8703.294038,3338.090842,2569.222525,10113.413298,11186.032061,7032.666584,6352.392146,7272.080998,7162.393337,2489.066157,2506.995871,1155.939198,1574.650751
5,002_S_0729,65.056164,1,1166961.750,1,966.095170,1921.643449,356.484899,415.547486,761.485494,853.243441,18195.495486,3485.747309,2921.488668,44029.049070,42879.438005,11626.892084,12026.619234,2887.738618,2592.425684,392.344326,283.711354,8403.762348,9388.841921,1287.775330,1255.079969,3543.755207,3659.771003,6422.001624,6685.673886,3798.989957,4033.130926,3400.317496,1551.447592,2534.417786,268.945708,158.203357,87173.214027,83566.177477,21149.679514,18514.011579,40216.348155,41546.311047,49603.080697,48494.602506,435.586578,433.477199,2722.152437,2497.503670,2012.346707,1917.424692,9808.608162,2805.472872,3514.223914,3042.777908,3393.989362,1351.056673,769.923006,9316.068376,7022.119693,2155.784418,2744.300907,2465.862998,2690.511766,3676.646027,3777.896176,1416.447394,1371.095765,1420.666150,1110.587569,1976.487279,1818.283922,7236.221570,7037.940029,1790.862006,1707.541571,5660.516130,4246.178114,10451.968483,10478.335709,6965.166484,7380.713970,1843.596459,1515.588164,3769.458664,3579.614635,1319.416001,1700.158748,15817.171679,13515.840172,4775.632017,4807.272689,3063.871689,3464.653528,1248.751835,534.727348,2865.590148,2599.808508,5004.499541,3889.693215,10419.273122,10018.491283,2405.745722,1425.939595,4143.873276,4313.678213,2103.049965,2000.745127,988.243640,1255.079969,2139.964082,3043.832597,8480.754649,6326.024920,2684.183632,2847.660434,1367.931697,1286.720641,1517.697543,1421.720839,6882.900738,9298.138662,2307.659641,2541.800610,1081.056276,1029.376513,8876.263042,9259.115167,980.860816,1336.291026,1059.962495,1116.915704,9093.528987,11257.750917,4128.052941,4293.639121,6690.947331,7654.933123,2551.292811,2318.206531,7825.792749,7452.432825,5885.164897,6838.603798,6198.407545,5983.250979,2492.230224,2634.613246,907.032583,965.040480
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1562,941_S_4377,69.187671,1,1213846.750,1,881.999947,1006.799940,291.599983,368.399978,848.399949,842.399950,16942.798990,2813.999832,2732.399837,38367.597713,37732.797751,10575.599370,10729.199360,3331.199801,3163.199811,277.199983,122.399993,10655.999365,10243.199389,1113.599934,1213.199928,3841.199771,3997.199762,6476.399614,6640.799604,3844.799771,3826.799772,3287.999804,2023.199879,2435.999855,275.999984,219.599987,70166.395818,70043.995825,18130.798919,17141.998978,34400.397950,34846.797923,39651.597637,39454.797648,511.199970,561.599967,2305.199863,2029.199879,1618.799904,1685.999900,9392.399440,2234.399867,2710.799838,3969.599763,3922.799766,1229.999927,1067.999936,4531.199730,4151.999753,3071.999817,3301.199803,2642.399843,2969.999823,3874.799769,3181.199810,1715.999898,1789.199893,1654.799901,1523.999909,2671.199841,2146.799872,6149.999633,5678.399662,1540.799908,1766.399895,6223.199629,5893.199649,9580.799429,9399.599440,6956.399585,6032.399640,1623.599903,1963.199883,3746.399777,3551.999788,673.199960,1451.999913,14074.799161,13335.599205,3883.199769,3945.599765,3530.399790,3260.399806,664.799960,518.399969,2203.199869,2095.199875,6124.799635,4739.999717,11431.199319,11432.399319,2571.599847,2919.599826,3221.999808,2474.399853,3680.399781,2359.199859,1059.599937,1625.999903,3466.799793,3680.399781,7255.199568,7193.999571,3002.399821,2816.399832,1857.599889,1628.399903,2129.999873,1987.199882,7388.399560,9505.199433,2341.199860,2143.199872,1143.599932,1201.199928,9118.799456,10125.599396,1430.399915,2164.799871,1171.199930,1341.599920,11763.599299,14017.199165,3299.999803,4031.999760,6173.999632,6542.399610,3784.799774,3429.599796,7781.999536,6890.399589,5427.599676,5349.599681,6283.199625,6148.799634,2323.199862,2611.199844,1282.799924,1429.199915
1563,941_S_4420,81.383562,0,1536545.875,1,1079.999936,1298.399923,431.999974,445.199973,1088.399935,985.199941,19977.598809,3214.799808,3257.999806,47859.597147,48692.397098,12937.199229,13166.399215,4255.199746,3687.599780,751.199955,795.599953,19527.598836,24886.798517,1491.599911,1505.999910,4939.199706,5123.999695,7327.199563,7389.599560,4948.799705,4453.199735,3733.199777,1881.599888,2569.199847,214.799987,379.199977,104933.993745,101272.793964,24487.198540,25827.598461,48183.597128,51826.796911,62361.596283,61167.596354,466.799972,519.599969,3023.999820,2663.999841,2312.399862,2177.999870,11644.799306,4216.799749,3838.799771,4249.199747,4172.399751,1487.999911,1598.399905,8042.399521,9107.999457,2579.999846,2102.399875,3128.399814,3187.199810,5668.799662,5121.599695,2408.399856,2120.399874,1171.199930,1093.199935,3076.799817,2522.399850,8249.999508,6717.599600,1777.199894,1523.999909,7329.599563,7115.999576,13821.599176,13209.599213,8264.399507,7203.599571,2499.599851,1689.599899,4246.799747,4184.399751,1691.999899,1109.999934,17227.198973,17866.798935,4737.599718,5257.199687,3725.999778,3925.199766,743.999956,706.799958,2959.199824,2786.399834,4959.599704,5493.599673,11245.199330,13256.399210,3637.199783,4102.799755,3964.799764,2979.599822,2301.599863,1179.599930,1507.199910,1688.399899,3853.199770,3777.599775,11365.199323,10028.399402,4081.199757,3471.599793,1966.799883,1672.799900,2194.799869,2072.399876,6458.399615,8288.399506,3418.799796,3122.399814,1657.199901,1630.799903,12458.399257,13774.799179,1852.799890,2234.399867,1373.999918,1565.999907,11845.199294,13184.399214,3931.199766,4689.599720,6287.999625,5626.799665,3124.799814,2771.999835,8386.799500,8369.999501,6985.199584,7204.799571,9337.199443,7413.599558,3394.799798,2974.799823,1454.399913,1228.799927
1564,941_S_4764,82.672603,1,1438682.375,1,2911.199826,2806.799833,203.999988,257.999985,872.399948,879.599948,20135.998800,2771.999835,2692.799839,49766.397034,49005.597079,14546.399133,14725.199122,3502.799791,3417.599796,1269.599924,898.799946,34168.797963,24159.598560,1229.999927,1301.999922,3470.399793,3406.799797,6903.599589,7033.199581,4495.199732,4612.799725,3977.999763,2102.399875,2887.199828,340.799980,275.999984,77746.795366,74926.795534,21007.198748,18071.998923,36950.397798,39129.597668,46949.997202,45447.597291,604.799964,455.999973,2845.199830,2633.999843,1772.399894,1849.199890,8560.799490,2911.199826,2881.199828,3688.799780,3614.399785,1567.199907,973.199942,7833.599533,5200.799690,2761.199835,2303.999863,2530.799849,2887.199828,3716.399778,3257.999806,2343.599860,2126.399873,1429.199915,1045.199938,2788.799834,2362.799859,7852.799532,7195.199571,1484.399912,1898.399887,6519.599611,4475.999733,10491.599375,9629.999426,8755.199478,7741.199539,1605.599904,1519.199909,3568.799787,3154.799812,1018.799939,1431.599915,14619.599129,14492.399136,4258.799746,4612.799725,2867.999829,3763.199776,862.799949,463.199972,2491.199852,1895.999887,5236.799688,4847.999711,10013.999403,11163.599335,3194.399810,2228.399867,3721.199778,3015.599820,1940.399884,1718.399898,970.799942,1102.799934,2851.199830,3519.599790,8074.799519,7475.999554,3034.799819,3064.799817,1535.999908,1312.799922,1397.999917,1745.999896,6812.399594,9147.599455,2774.399835,2467.199853,1150.799931,1268.399924,10103.999398,10629.599366,1148.399932,1419.599915,1233.599926,1661.999901,11665.199305,10906.799350,3868.799769,4814.399713,5631.599664,6375.599620,3123.599814,3779.999775,8471.999495,6921.599587,5782.799655,6337.199622,7234.799569,7052.399580,1897.199887,1976.399882,826.799951,1083.599935
1565,941_S_5124,76.664384,1,1353772.750,0,1496.399911,1142.399932,226.799986,238.799986,814.799951,986.399941,17121.598979,2571.599847,2613.599844,40799.997568,37919.997740,11252.399329,11056.799341,2611.199844,2401.199857,1173.599930,903.599946,39554.397642,49455.597052,1089.599935,1198.799929,3371.999799,3441.599795,6752.399598,7288.799566,3669.599781,4016.399761,4837.199712,2242.799866,2045.999878,368.399978,356.399979,72938.395653,72790.795661,16736.399002,19359.598846,37213.197782,37405.197770,52543.196868,48931.197083,151.199991,237.599986,2701.199839,2535.599849,1742.399896,1747.199896,9619.199427,3761.999776,4827.599712,3044.399819,3215.999808,1461.599913,1097.999935,5925.599647,8337.599503,3215.999808,2239.199867,2836.799831,2810.399832,5439.599676,4383.599739,2066.399877,2001.599881,1113.599934,1605.599904,3344.399801,2876.399829,5933.999646,6645.599604,1389.599917,1534.799909,7534.799551,6320.399623,11071.199340,9958.799406,7286.399566,8525.999492,2161.199871,2290.799863,4663.199722,4142.399753,1066.799936,1808.399892,16085.999041,15931.199050,2968.799823,5188.799691,3347.999800,3628.799784,843.599950,481.199971,1402.799916,1694.399899,5456.399675,3436.799795,15909.599052,13811.999177,3209.999809,4238.399747,3220.799808,3424.799796,2506.799851,2048.399878,1117.199933,1532.399909,3375.599799,3566.399787,8815.199475,7204.799571,3658.799782,3085.199816,1631.999903,1789.199893,1604.399904,1658.399901,8210.399511,9099.599458,2953.199824,2977.199823,1442.399914,1610.399904,9107.999457,10970.399346,1537.199908,1352.399919,1473.599912,1432.799915,10263.599388,11079.599340,3147.599812,3183.599810,7949.999526,7853.999532,3065.999817,4151.999753,9046.799461,7985.999524,5089.199697,5537.999670,6945.599586,7079.999578,2033.999879,1708.799898,739.199956,791.999953


In [41]:
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier


# Splitting dataset into train and test
X = data_age_filtered.drop(['diagnosis', 'PTID'], axis=1)
y = data_age_filtered['diagnosis']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define the cross-validator
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)


# Define the parameter grid for manual hyperparameter tuning
# param_grid = {
#     'n_neighbors': [3, 5, 7, 10, 15],  # Example: trying different numbers of neighbors
#     'weights': ['uniform', 'distance'],  # Example: uniform weights or distance-based
#     'metric': ['euclidean', 'manhattan']  # Example: different distance metrics
# }
# Define the KNN classifier
knn_model = KNeighborsClassifier(n_neighbors=30)

i=0

# Define a function for visualization
def visualize_data(X, y, title):
    # Separate the data based on diagnosis
    data_age_cn = X[y == 0]['Age']
    data_vol_cn = X[y == 0]['MUSE_Volume_48']

    data_age_mci = X[y == 1]['Age']
    data_vol_mci = X[y == 1]['MUSE_Volume_48']

    data_age_dem = X[y == 2]['Age']
    data_vol_dem = X[y == 2]['MUSE_Volume_48']

    # Scatter plot
    plt.scatter(data_age_cn, data_vol_cn, s=10, c='blue')
    plt.scatter(data_age_mci, data_vol_mci, s=10, c='green')
    plt.scatter(data_age_dem, data_vol_dem, s=10, c='red')

    plt.xlabel("Age (years)")
    plt.ylabel("Volume (mm^3)")
    plt.legend(["CN", "MCI", "DEM"])
    plt.title(title)
    plt.show()

# Iterate over each fold
for train_index, val_index in skf.split(X_train, y_train):
    # Split the data into the current fold's training and validation partitions
    fold_X_train, fold_X_val = X_train.iloc[train_index], X_train.iloc[val_index]
    fold_y_train, fold_y_val = y_train.iloc[train_index], y_train.iloc[val_index]

    # Visualize initial data
    # visualize_data(fold_X_train, fold_y_train, "Initial Data - Fold")

    # Linear correction
    data_controls_train = fold_X_train[fold_y_train == 0]  # Control group for the fold
    for roi_feature in roi_features:
        if roi_feature in fold_X_train.columns:
            regr = LinearRegression()
            regr.fit(data_controls_train[['Sex', 'Age', 'DLICV_baseline']], data_controls_train[roi_feature])
            # Apply correction to the training set
            correction_train = regr.predict(fold_X_train[['Sex', 'Age', 'DLICV_baseline']])
            fold_X_train[roi_feature] -= correction_train

            # Apply the same correction to the validation set
            correction_val = regr.predict(fold_X_val[['Sex', 'Age', 'DLICV_baseline']])
            fold_X_val[roi_feature] -= correction_val
    # visualize_data(fold_X_train, fold_y_train, "Linearly Corrected Data - Fold")

    # Z-normalization using control group in fold_X_train
    scaler = StandardScaler().fit(data_controls_train[roi_features])
    fold_X_train[roi_features] = scaler.transform(fold_X_train[roi_features])
    fold_X_val[roi_features] = scaler.transform(fold_X_val[roi_features])
    # visualize_data(fold_X_train, fold_y_train, "Z-Normalized Data - Fold")

    fold_X_train = fold_X_train.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)
    fold_X_val = fold_X_val.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)

    # Train the KNN model
    knn_model.fit(fold_X_train, fold_y_train)

    # Make predictions on the validation set
    y_pred_val = knn_model.predict(fold_X_val)

    # Calculate metrics (weighted because of dataset's imbalance)
    accuracy = knn_model.score(fold_X_val, fold_y_val)
    precision = precision_score(fold_y_val, y_pred_val, average='weighted')
    recall = recall_score(fold_y_val, y_pred_val, average='weighted')
    f1 = f1_score(fold_y_val, y_pred_val, average='weighted')
    auc = roc_auc_score(fold_y_val, knn_model.predict_proba(fold_X_val), multi_class='ovr', average='weighted')

    # Print the validation metrics
    print(f"Validation Metrics, Fold {i}: Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}, F1 Score: {f1}, AUC: {auc}")
    i+=1

Validation Metrics, Fold 0: Accuracy: 0.49572649572649574, Precision: 0.5077061303476398, Recall: 0.49572649572649574, F1 Score: 0.4799553933040288, AUC: 0.6212442832508623
Validation Metrics, Fold 1: Accuracy: 0.5641025641025641, Precision: 0.5614484930274404, Recall: 0.5641025641025641, F1 Score: 0.5468554614482216, AUC: 0.6725786218725885
Validation Metrics, Fold 2: Accuracy: 0.5555555555555556, Precision: 0.5444476786066054, Recall: 0.5555555555555556, F1 Score: 0.5334940056853293, AUC: 0.7152318910603511
Validation Metrics, Fold 3: Accuracy: 0.5042735042735043, Precision: 0.5568586453427052, Recall: 0.5042735042735043, F1 Score: 0.4917032773558271, AUC: 0.6318230860578108
Validation Metrics, Fold 4: Accuracy: 0.46153846153846156, Precision: 0.5119248035914703, Recall: 0.46153846153846156, F1 Score: 0.45054475468506167, AUC: 0.6207331744234347
Validation Metrics, Fold 5: Accuracy: 0.5299145299145299, Precision: 0.5566051881841356, Recall: 0.5299145299145299, F1 Score: 0.50972169592

Training on the whole train dataset and fitting on both the train and the test data

In [42]:
from sklearn.metrics import balanced_accuracy_score

# Define the control group from the training set
data_controls_train = X_train[y_train == 0]

# Linear Correction
for roi_feature in roi_features:
    if roi_feature in X_train.columns:
        regr = LinearRegression()
        regr.fit(data_controls_train[['Sex', 'Age', 'DLICV_baseline']], data_controls_train[roi_feature])
        # Apply correction to the training set
        correction_train = regr.predict(X_train[['Sex', 'Age', 'DLICV_baseline']])
        X_train[roi_feature] -= correction_train

        # Apply the same correction to the test set
        correction_test = regr.predict(X_test[['Sex', 'Age', 'DLICV_baseline']])
        X_test[roi_feature] -= correction_test

X_train = X_train.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)
X_test = X_test.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)

# Z-Normalization
scaler = StandardScaler().fit(data_controls_train[roi_features])
X_train[roi_features] = scaler.transform(X_train[roi_features])
X_test[roi_features] = scaler.transform(X_test[roi_features])

# After cross-validation, train the model on the entire training set
knn_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_test = knn_model.predict(X_test)

# Calculate and print standard accuracy for the test set
test_accuracy = knn_model.score(X_test, y_test)
print(f"Test set accuracy: {test_accuracy}")

# Calculate and print balanced accuracy for the test set
test_balanced_accuracy = balanced_accuracy_score(y_test, y_pred_test)
print(f"Test set Balanced Accuracy: {test_balanced_accuracy}")

# Calculate and print metrics for the test set
test_precision = precision_score(y_test, y_pred_test, average='weighted')
test_recall = recall_score(y_test, y_pred_test, average='weighted')
test_f1 = f1_score(y_test, y_pred_test, average='weighted')
test_auc = roc_auc_score(y_test, knn_model.predict_proba(X_test), multi_class='ovr', average='weighted')

print(f"Test set Precision: {test_precision}, Recall: {test_recall}, F1 Score: {test_f1}, AUC: {test_auc}")


Test set accuracy: 0.5187713310580204
Test set Balanced Accuracy: 0.4825734825734826
Test set Precision: 0.5323172687083401, Recall: 0.5187713310580204, F1 Score: 0.4914512124158869, AUC: 0.6942144681810585


In [43]:
X_train

Unnamed: 0,MUSE_Volume_4,MUSE_Volume_11,MUSE_Volume_23,MUSE_Volume_30,MUSE_Volume_31,MUSE_Volume_32,MUSE_Volume_35,MUSE_Volume_36,MUSE_Volume_37,MUSE_Volume_38,MUSE_Volume_39,MUSE_Volume_40,MUSE_Volume_41,MUSE_Volume_47,MUSE_Volume_48,MUSE_Volume_49,MUSE_Volume_50,MUSE_Volume_51,MUSE_Volume_52,MUSE_Volume_55,MUSE_Volume_56,MUSE_Volume_57,MUSE_Volume_58,MUSE_Volume_59,MUSE_Volume_60,MUSE_Volume_61,MUSE_Volume_62,MUSE_Volume_71,MUSE_Volume_72,MUSE_Volume_73,MUSE_Volume_75,MUSE_Volume_76,MUSE_Volume_81,MUSE_Volume_82,MUSE_Volume_83,MUSE_Volume_84,MUSE_Volume_85,MUSE_Volume_86,MUSE_Volume_87,MUSE_Volume_88,MUSE_Volume_89,MUSE_Volume_90,MUSE_Volume_91,MUSE_Volume_92,MUSE_Volume_93,MUSE_Volume_94,MUSE_Volume_95,MUSE_Volume_100,MUSE_Volume_101,MUSE_Volume_102,MUSE_Volume_103,MUSE_Volume_104,MUSE_Volume_105,MUSE_Volume_106,MUSE_Volume_107,MUSE_Volume_108,MUSE_Volume_109,MUSE_Volume_112,MUSE_Volume_113,MUSE_Volume_114,MUSE_Volume_115,MUSE_Volume_116,MUSE_Volume_117,MUSE_Volume_118,MUSE_Volume_119,MUSE_Volume_120,MUSE_Volume_121,MUSE_Volume_122,MUSE_Volume_123,MUSE_Volume_124,MUSE_Volume_125,MUSE_Volume_128,MUSE_Volume_129,MUSE_Volume_132,MUSE_Volume_133,MUSE_Volume_134,MUSE_Volume_135,MUSE_Volume_136,MUSE_Volume_137,MUSE_Volume_138,MUSE_Volume_139,MUSE_Volume_140,MUSE_Volume_141,MUSE_Volume_142,MUSE_Volume_143,MUSE_Volume_144,MUSE_Volume_145,MUSE_Volume_146,MUSE_Volume_147,MUSE_Volume_148,MUSE_Volume_149,MUSE_Volume_150,MUSE_Volume_151,MUSE_Volume_152,MUSE_Volume_153,MUSE_Volume_154,MUSE_Volume_155,MUSE_Volume_156,MUSE_Volume_157,MUSE_Volume_160,MUSE_Volume_161,MUSE_Volume_162,MUSE_Volume_163,MUSE_Volume_164,MUSE_Volume_165,MUSE_Volume_166,MUSE_Volume_167,MUSE_Volume_168,MUSE_Volume_169,MUSE_Volume_170,MUSE_Volume_171,MUSE_Volume_172,MUSE_Volume_173,MUSE_Volume_174,MUSE_Volume_175,MUSE_Volume_176,MUSE_Volume_177,MUSE_Volume_178,MUSE_Volume_179,MUSE_Volume_180,MUSE_Volume_181,MUSE_Volume_182,MUSE_Volume_183,MUSE_Volume_184,MUSE_Volume_185,MUSE_Volume_186,MUSE_Volume_187,MUSE_Volume_190,MUSE_Volume_191,MUSE_Volume_192,MUSE_Volume_193,MUSE_Volume_194,MUSE_Volume_195,MUSE_Volume_196,MUSE_Volume_197,MUSE_Volume_198,MUSE_Volume_199,MUSE_Volume_200,MUSE_Volume_201,MUSE_Volume_202,MUSE_Volume_203,MUSE_Volume_204,MUSE_Volume_205,MUSE_Volume_206,MUSE_Volume_207
1230,-0.969280,-2.897773,-5.826924,-6.566835,-9.435339,-10.796513,-9.450198,-5.083422,-6.856292,-9.360272,-9.760974,-8.629836,-8.864749,-9.327416,-10.622252,1.567442,0.810521,-1.712461,-1.302717,-8.848113,-9.480777,-6.548312,-7.835744,-11.643848,-11.660340,-9.889767,-10.501174,-7.162980,-7.641651,-8.508720,-2.732457,-2.050215,-8.161615,-8.463451,-6.419611,-6.437326,-8.261183,-7.959536,-8.238770,-8.339332,-6.632654,-7.660679,-8.487412,-8.716692,-6.405287,-7.908042,-7.220253,-5.663365,-5.538962,-8.163488,-8.574432,-5.012114,-6.415330,-5.931771,-3.924859,-4.934002,-4.368135,-5.531522,-6.303038,-6.010871,-6.271780,-6.772449,-8.453138,-6.781615,-5.858275,-4.700090,-2.715416,-7.043744,-8.031861,-4.346246,-5.629856,-6.767629,-7.860569,-7.503964,-7.233227,-7.856676,-6.498561,-3.796431,-4.251992,-6.375621,-7.149372,-4.069807,-4.786916,-6.785174,-7.485986,-3.902526,-3.912809,-6.913142,-7.816538,-3.114498,-2.204363,-4.750466,-5.044285,-5.663508,-6.199700,-8.351281,-7.556202,-4.424629,-3.648512,-7.035693,-6.788613,-5.901114,-5.548799,-3.038108,-2.632759,-7.916607,-8.946271,-8.772100,-7.267878,-7.018663,-8.210615,-7.256860,-7.027779,-5.476323,-6.057825,-5.671300,-7.079513,-8.078688,-6.767054,-6.635224,-5.907282,-6.473232,-7.688196,-5.234535,-6.524393,-6.445678,-6.140252,-7.024686,-7.035499,-6.081744,-5.247474,-6.888732,-7.673690,-5.930361,-5.175756,-7.139301,-6.947489,-5.045482,-6.555628,-6.800062,-7.011173,-5.313479,-5.070172,-6.624213,-5.093842
707,-1.805957,-2.427703,-6.529258,-5.688826,-8.973259,-10.095661,-10.048139,-6.648570,-6.996683,-10.541036,-10.431576,-9.347441,-9.386701,-12.335770,-12.016754,1.558413,0.508998,-2.102389,-2.339932,-7.758867,-7.982511,-8.450282,-7.841651,-12.652828,-12.167022,-11.397402,-11.224613,-6.719831,-7.774499,-7.729640,-1.361767,-0.698660,-8.031385,-8.212808,-7.297783,-7.561230,-7.594817,-7.433118,-9.332229,-8.951302,-10.032565,-10.304629,-9.030332,-8.711216,-8.059205,-8.361232,-8.619758,-5.574974,-4.990768,-6.970107,-6.844387,-7.504916,-6.605407,-7.677169,-6.405608,-5.257721,-5.827656,-5.525637,-6.642721,-7.163280,-7.526112,-6.386354,-8.886581,-4.688594,-4.179243,-5.503409,-5.015376,-7.859466,-8.178861,-6.172612,-5.230292,-8.068037,-6.751876,-7.990125,-8.245281,-8.103833,-7.775490,-5.964041,-5.058253,-8.395198,-6.083962,-5.326212,-2.873658,-6.675477,-7.627149,-6.355855,-7.489663,-7.889272,-7.371439,-4.064331,-4.019868,-6.498603,-6.067757,-5.335452,-6.002330,-7.830550,-7.794313,-6.509876,-5.056056,-5.928622,-7.017176,-3.131059,-4.580402,-4.021246,-3.455624,-7.319614,-7.547786,-7.607137,-6.865577,-9.603669,-8.958761,-5.750562,-5.837442,-5.156826,-5.099791,-6.296465,-6.484228,-5.626799,-5.017202,-4.666678,-6.080970,-7.788667,-9.173018,-5.604747,-4.238075,-4.572663,-5.468355,-9.087857,-8.967838,-5.966804,-6.645480,-6.380033,-6.364684,-5.884160,-5.482599,-6.997831,-6.566305,-7.517710,-6.331051,-7.461466,-7.186693,-6.447263,-5.286763,-5.101909,-4.759780
776,-2.406020,-2.468510,-4.249822,-5.432369,-7.904314,-8.192540,-9.365725,-6.959046,-8.418271,-9.372938,-9.683796,-8.397922,-8.328525,-9.017253,-8.977651,-1.535289,-1.738836,-2.982837,-2.897131,-7.535935,-7.314406,-8.002411,-8.560884,-11.502192,-11.546820,-10.411076,-10.490573,-5.719737,-8.087635,-5.445204,-3.917251,-4.142747,-7.720201,-7.644172,-6.959746,-5.936298,-7.094192,-7.315286,-7.720980,-7.587664,-7.561175,-7.656185,-8.691939,-8.192666,-8.034007,-8.630101,-5.775980,-4.679952,-6.901068,-9.030938,-9.114924,-5.994861,-6.970266,-7.111836,-4.462231,-5.043906,-4.591543,-6.205802,-5.020404,-5.903744,-5.714471,-6.314441,-6.499641,-5.224008,-4.700787,-6.665479,-5.776604,-7.373952,-8.384882,-7.268215,-6.877807,-7.206159,-6.170926,-7.329136,-7.608752,-8.348395,-8.105632,-6.233841,-6.274500,-6.487672,-7.008555,-6.081378,-4.859061,-7.593295,-7.000103,-7.166487,-5.132451,-8.960424,-8.613184,-5.018293,-3.126386,-5.247899,-4.451306,-7.296224,-7.948039,-7.073627,-7.670736,-5.294431,-4.332611,-5.264740,-6.699661,-4.892058,-4.939011,-5.065318,-5.965306,-5.807946,-6.565519,-6.752182,-7.516679,-8.352626,-8.889466,-7.030748,-5.978739,-5.097003,-5.691400,-5.707541,-6.272026,-8.432188,-8.509004,-5.837143,-5.916683,-8.772425,-7.191073,-5.454795,-5.261599,-5.646385,-5.229532,-8.588435,-8.770416,-5.483587,-6.499944,-5.527137,-6.826900,-5.375450,-5.204438,-7.583381,-7.945395,-7.669694,-5.922192,-6.317157,-6.693319,-5.239117,-5.961794,-5.655121,-3.668793
556,-3.513504,-4.119056,-5.814328,-5.342828,-7.734675,-7.679345,-9.686122,-6.572420,-7.750538,-10.461084,-10.081228,-9.563857,-9.202939,-9.095741,-9.034000,-1.238298,-2.104966,-3.165281,-3.119899,-8.358221,-8.502169,-8.939516,-8.717114,-11.639311,-11.955415,-10.655458,-10.668495,-6.873243,-7.139303,-7.235202,-2.175498,-2.332024,-7.702064,-7.641129,-6.921787,-6.286925,-7.629136,-7.497357,-8.201729,-7.571860,-7.075967,-7.459982,-8.341962,-7.580927,-7.896202,-8.412337,-5.761849,-6.107103,-6.851703,-7.028044,-6.969041,-4.448078,-5.814731,-4.867752,-2.869057,-3.993654,-4.236259,-5.355150,-5.510423,-6.102606,-5.531151,-6.795008,-7.662218,-4.440286,-4.925425,-5.128946,-4.481438,-7.114618,-7.571555,-5.519086,-6.503357,-5.896234,-6.133908,-8.168115,-6.362510,-5.405097,-6.164541,-4.379661,-4.402287,-6.656720,-6.216231,-4.073790,-5.110991,-6.377177,-6.432477,-4.954760,-6.719574,-8.149655,-8.008217,-4.960516,-3.663226,-5.664058,-5.278326,-5.692084,-6.106870,-7.338082,-6.734302,-3.768902,-3.648138,-6.358800,-7.231729,-4.669032,-1.044049,-3.390222,-4.547350,-5.707268,-6.701147,-5.852780,-5.568714,-6.985416,-6.717271,-5.203819,-3.942755,-2.905116,-4.320083,-6.683903,-5.981720,-6.884401,-6.726735,-6.660946,-6.415638,-7.991443,-7.486410,-1.993938,-2.020557,-6.127888,-6.862881,-7.834965,-7.345231,-6.538881,-7.052622,-7.049073,-6.149329,-5.980133,-3.912381,-6.711376,-6.961517,-4.616820,-5.675040,-7.105841,-6.465140,-4.379183,-5.522850,-3.938404,-2.868293
910,-2.983050,-2.907952,-6.928117,-6.878507,-7.373132,-8.209575,-10.069761,-7.435292,-7.097642,-9.064132,-9.211535,-8.384458,-8.752097,-7.500121,-8.358380,-2.093876,-2.048850,-2.875612,-2.787841,-8.836439,-8.744546,-8.518969,-8.727428,-11.802984,-11.693634,-11.155500,-11.317163,-7.281042,-5.749608,-6.321687,-3.276546,-2.735242,-8.357032,-8.701562,-7.539610,-7.307223,-8.767972,-8.459264,-9.423669,-9.120734,-7.715515,-7.718738,-9.256186,-9.145082,-8.796583,-9.017965,-6.997467,-5.410572,-5.734723,-7.035957,-7.678113,-4.165039,-3.165533,-5.189389,-4.785859,-4.794456,-4.025250,-5.083833,-5.340652,-5.338419,-6.346555,-5.683185,-6.937150,-3.778633,-5.348107,-6.820494,-5.104871,-6.962459,-7.695977,-6.860835,-6.010295,-6.854099,-6.627116,-7.686927,-7.791263,-6.447135,-7.737121,-5.122148,-5.564456,-7.102140,-7.775081,-4.566860,-3.175441,-6.527847,-7.049139,-4.906794,-5.091513,-6.239935,-6.257950,-3.887502,-3.752526,-4.992544,-5.789197,-6.399935,-6.842710,-7.767034,-6.798935,-4.706069,-4.978591,-6.898116,-3.995658,-3.239948,-3.952604,-2.606959,-3.788333,-5.822256,-5.793162,-7.539262,-7.721852,-6.627048,-7.445877,-5.491348,-4.639577,-3.153523,-3.652652,-5.865089,-6.141461,-5.599062,-6.778027,-3.888831,-5.147269,-7.448682,-7.841601,-3.279066,-2.541169,-5.064923,-5.520003,-7.835296,-6.711835,-6.426833,-7.110943,-6.044372,-5.754573,-4.276293,-3.389100,-6.226473,-6.075694,-5.253604,-5.082034,-6.531980,-6.542285,-4.372367,-5.567967,-2.742226,-3.541588
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
305,-0.384200,-2.201999,-7.083682,-7.499398,-8.433117,-8.327417,-8.284521,-7.371661,-8.128688,-9.418195,-9.492172,-8.228948,-8.423016,-10.161010,-10.406081,1.906669,5.149928,-0.743546,0.553756,-8.392901,-8.182946,-9.287259,-8.769173,-12.355105,-12.540065,-9.985208,-9.495955,-8.310655,-7.913529,-6.737443,-3.597352,-4.054856,-7.832092,-7.906970,-6.662407,-5.850888,-7.520513,-7.711652,-7.967482,-7.681341,-7.094350,-7.092988,-8.348923,-7.828315,-8.314790,-9.128952,-7.431749,-4.826310,-7.239011,-8.944038,-9.442183,-6.306160,-5.295814,-7.216045,-6.102503,-5.449258,-5.739087,-4.763105,-6.751691,-5.710235,-5.273211,-8.268278,-7.597093,-6.634617,-6.828313,-6.080318,-6.017413,-7.244875,-8.727304,-7.039484,-7.520118,-7.426554,-8.092200,-8.886794,-10.943384,-8.115030,-5.977272,-6.413771,-5.596080,-8.008699,-6.334748,-5.270197,-5.157529,-7.736561,-8.450263,-6.071275,-6.381940,-8.991921,-9.019608,-3.926297,-2.785351,-3.913143,-4.213820,-7.569550,-7.131793,-10.621920,-10.858118,-5.814442,-5.750895,-5.467064,-6.439048,-6.014380,-6.617339,-4.695981,-4.456089,-7.208930,-8.360058,-7.551173,-8.436554,-8.808255,-9.132760,-7.846543,-8.008480,-4.033073,-4.274523,-5.524862,-7.112531,-6.578834,-6.824001,-6.066974,-8.615333,-7.979629,-8.368954,-4.729548,-4.451173,-4.748293,-4.763753,-8.278858,-8.624093,-5.461284,-6.860365,-7.994508,-7.585391,-4.953927,-3.114657,-7.493738,-7.805007,-5.714245,-7.850078,-8.137234,-8.779471,-5.663865,-5.935353,-2.707105,-4.346039
693,-2.806770,-3.729248,-5.684638,-5.970230,-7.609696,-8.137041,-10.112261,-7.987984,-8.576278,-10.377038,-10.556642,-8.665627,-8.788296,-8.637442,-8.667647,-1.728450,-1.482021,-2.010743,-2.179745,-9.171764,-9.133427,-8.895872,-9.178804,-11.495817,-11.382913,-10.877933,-11.137350,-7.983952,-7.063936,-8.072813,-3.386797,-3.034304,-7.688181,-7.833604,-6.230086,-5.662958,-6.961230,-7.355432,-7.292832,-7.231783,-7.390670,-7.105339,-7.935746,-8.658430,-8.140174,-8.815980,-6.781293,-6.046834,-6.656917,-7.508473,-7.389842,-6.201016,-4.553567,-7.205397,-5.611080,-4.827190,-4.768261,-5.181510,-6.093572,-5.780621,-6.722328,-6.990656,-7.062135,-5.850219,-4.564167,-5.878590,-5.665117,-5.851203,-6.591886,-6.591187,-7.004583,-7.123863,-6.503438,-7.214906,-7.509768,-6.758732,-7.261563,-5.711730,-5.930316,-7.509893,-7.765511,-5.495243,-4.529506,-8.575491,-7.778237,-6.095449,-5.632011,-8.148168,-8.631734,-4.490419,-3.974502,-5.625157,-5.855517,-6.701466,-7.246392,-7.702831,-7.459011,-6.497652,-3.459545,-5.384737,-5.904252,-5.610708,-3.807020,-3.478289,-5.293228,-6.957698,-6.199170,-6.382275,-7.074889,-7.623879,-8.371803,-6.257937,-5.592436,-3.594652,-4.263131,-5.818312,-7.358824,-6.512150,-6.597097,-5.784442,-5.372766,-8.715668,-9.262929,-4.018501,-3.408673,-4.941977,-5.055222,-8.317449,-9.249692,-6.353823,-6.987053,-6.963406,-6.419599,-4.184138,-5.493074,-6.467959,-6.449459,-5.164916,-5.340570,-6.828684,-7.056006,-5.278149,-4.148914,-3.024897,-2.619531
91,-1.702711,-4.139139,-6.604716,-7.054318,-9.910407,-9.701874,-10.087537,-7.809729,-8.901925,-9.613696,-11.323593,-8.580266,-9.584540,-10.162726,-11.011477,0.583084,0.551315,-1.877665,-0.784337,-8.536395,-9.013594,-7.877848,-7.754098,-12.687525,-12.469566,-11.370851,-11.068683,-6.753923,-6.349859,-7.661931,-4.930874,-2.822306,-7.547215,-7.613987,-7.099974,-6.962512,-7.677370,-7.524889,-8.048943,-7.974468,-6.791974,-6.195036,-8.452247,-7.929953,-7.466315,-7.461775,-6.671215,-5.985908,-5.516023,-8.143180,-8.775051,-6.070100,-4.328243,-6.854528,-5.844573,-5.481458,-5.955110,-5.859806,-4.903213,-5.531624,-5.244489,-7.997730,-10.380937,-6.430378,-6.078994,-6.701415,-5.343785,-7.960052,-8.508418,-7.055820,-7.527613,-6.958684,-7.322629,-7.743048,-7.860505,-6.525199,-7.949978,-6.423555,-4.787856,-6.741530,-6.038764,-5.578767,-5.779192,-8.453318,-8.297781,-6.913005,-5.209905,-7.609931,-8.740341,-4.274209,-3.216015,-4.928981,-4.272101,-5.641330,-6.426725,-9.382416,-8.407384,-5.605503,-4.303949,-4.803578,-5.353483,-4.748964,-3.135216,-4.222713,-3.925050,-6.683121,-7.691397,-7.005991,-6.674763,-8.285073,-9.852088,-6.743424,-6.400570,-3.778084,-4.040193,-6.504243,-6.032007,-7.395883,-5.675392,-6.676670,-7.404526,-7.830480,-6.935547,-5.050528,-4.377045,-5.045217,-4.886633,-7.222843,-8.018936,-6.285999,-7.903431,-5.941986,-7.089297,-5.294224,-4.972526,-7.753287,-7.702850,-6.818936,-8.562630,-7.624531,-8.555835,-2.722535,-4.207863,-3.736557,-2.669427
1055,-1.633417,-3.476802,-6.154217,-6.352032,-7.850208,-8.556278,-9.338333,-7.324828,-9.036368,-8.422373,-8.613170,-7.820384,-7.948823,-9.275302,-9.584107,-0.083082,-0.287304,-1.140154,-0.928568,-8.717687,-8.870170,-9.266736,-9.498271,-11.885028,-11.838413,-10.082125,-10.275763,-6.828135,-7.435881,-7.280714,-2.567306,-1.755411,-7.828413,-7.732048,-7.046911,-6.406705,-8.304279,-8.437166,-7.794617,-7.926404,-4.815135,-6.449246,-8.564036,-8.233082,-7.288767,-8.579793,-8.042280,-5.533173,-6.653417,-7.461461,-8.144710,-5.868556,-6.252123,-5.112438,-5.664223,-5.290720,-5.325491,-6.538047,-7.688462,-5.883416,-6.283701,-7.475616,-7.972127,-7.612066,-6.184677,-5.635132,-4.620723,-9.325645,-8.601571,-6.247673,-5.507377,-7.445252,-7.179810,-7.638823,-8.583374,-7.891487,-7.172479,-3.848818,-4.597902,-7.801546,-7.601154,-6.134001,-4.938510,-8.151099,-7.521488,-6.685152,-4.893301,-8.381504,-8.300967,-3.072673,-1.785256,-4.961705,-5.352827,-6.567683,-7.653775,-8.233158,-8.686829,-5.310489,-2.622575,-7.579049,-7.024024,-6.544020,-5.443558,-4.103272,-5.015488,-8.604577,-8.329589,-8.326901,-8.304589,-7.366757,-7.346679,-7.443349,-7.263040,-5.777738,-5.257748,-6.297060,-7.906163,-5.798188,-7.790427,-5.837002,-5.564100,-7.042886,-7.225689,-4.601443,-4.669713,-5.669239,-5.188073,-7.173225,-7.738378,-6.286509,-6.904201,-7.551902,-8.079525,-6.654647,-6.630546,-8.018376,-8.334852,-6.919923,-6.742200,-7.532045,-6.999810,-4.477951,-4.875003,-5.678173,-4.548348


In [44]:
X_test

Unnamed: 0,MUSE_Volume_4,MUSE_Volume_11,MUSE_Volume_23,MUSE_Volume_30,MUSE_Volume_31,MUSE_Volume_32,MUSE_Volume_35,MUSE_Volume_36,MUSE_Volume_37,MUSE_Volume_38,MUSE_Volume_39,MUSE_Volume_40,MUSE_Volume_41,MUSE_Volume_47,MUSE_Volume_48,MUSE_Volume_49,MUSE_Volume_50,MUSE_Volume_51,MUSE_Volume_52,MUSE_Volume_55,MUSE_Volume_56,MUSE_Volume_57,MUSE_Volume_58,MUSE_Volume_59,MUSE_Volume_60,MUSE_Volume_61,MUSE_Volume_62,MUSE_Volume_71,MUSE_Volume_72,MUSE_Volume_73,MUSE_Volume_75,MUSE_Volume_76,MUSE_Volume_81,MUSE_Volume_82,MUSE_Volume_83,MUSE_Volume_84,MUSE_Volume_85,MUSE_Volume_86,MUSE_Volume_87,MUSE_Volume_88,MUSE_Volume_89,MUSE_Volume_90,MUSE_Volume_91,MUSE_Volume_92,MUSE_Volume_93,MUSE_Volume_94,MUSE_Volume_95,MUSE_Volume_100,MUSE_Volume_101,MUSE_Volume_102,MUSE_Volume_103,MUSE_Volume_104,MUSE_Volume_105,MUSE_Volume_106,MUSE_Volume_107,MUSE_Volume_108,MUSE_Volume_109,MUSE_Volume_112,MUSE_Volume_113,MUSE_Volume_114,MUSE_Volume_115,MUSE_Volume_116,MUSE_Volume_117,MUSE_Volume_118,MUSE_Volume_119,MUSE_Volume_120,MUSE_Volume_121,MUSE_Volume_122,MUSE_Volume_123,MUSE_Volume_124,MUSE_Volume_125,MUSE_Volume_128,MUSE_Volume_129,MUSE_Volume_132,MUSE_Volume_133,MUSE_Volume_134,MUSE_Volume_135,MUSE_Volume_136,MUSE_Volume_137,MUSE_Volume_138,MUSE_Volume_139,MUSE_Volume_140,MUSE_Volume_141,MUSE_Volume_142,MUSE_Volume_143,MUSE_Volume_144,MUSE_Volume_145,MUSE_Volume_146,MUSE_Volume_147,MUSE_Volume_148,MUSE_Volume_149,MUSE_Volume_150,MUSE_Volume_151,MUSE_Volume_152,MUSE_Volume_153,MUSE_Volume_154,MUSE_Volume_155,MUSE_Volume_156,MUSE_Volume_157,MUSE_Volume_160,MUSE_Volume_161,MUSE_Volume_162,MUSE_Volume_163,MUSE_Volume_164,MUSE_Volume_165,MUSE_Volume_166,MUSE_Volume_167,MUSE_Volume_168,MUSE_Volume_169,MUSE_Volume_170,MUSE_Volume_171,MUSE_Volume_172,MUSE_Volume_173,MUSE_Volume_174,MUSE_Volume_175,MUSE_Volume_176,MUSE_Volume_177,MUSE_Volume_178,MUSE_Volume_179,MUSE_Volume_180,MUSE_Volume_181,MUSE_Volume_182,MUSE_Volume_183,MUSE_Volume_184,MUSE_Volume_185,MUSE_Volume_186,MUSE_Volume_187,MUSE_Volume_190,MUSE_Volume_191,MUSE_Volume_192,MUSE_Volume_193,MUSE_Volume_194,MUSE_Volume_195,MUSE_Volume_196,MUSE_Volume_197,MUSE_Volume_198,MUSE_Volume_199,MUSE_Volume_200,MUSE_Volume_201,MUSE_Volume_202,MUSE_Volume_203,MUSE_Volume_204,MUSE_Volume_205,MUSE_Volume_206,MUSE_Volume_207
1153,-3.000263,-2.953754,-6.201226,-6.069332,-7.421024,-8.588489,-9.130180,-7.084358,-7.923820,-10.958310,-10.825443,-8.440875,-8.299786,-7.771747,-7.634176,-0.898566,-1.451911,-2.496971,-2.445404,-8.283910,-8.973458,-7.976803,-8.009063,-11.060999,-10.692876,-9.764255,-10.189356,-6.683376,-6.837509,-7.825393,-1.923669,-1.085972,-6.846017,-6.968551,-5.754560,-4.616201,-6.681138,-6.351644,-7.131425,-6.630725,-7.885762,-7.686246,-6.888571,-6.540429,-7.090908,-7.569809,-6.591904,-5.571191,-5.468066,-7.116770,-7.636427,-5.857644,-5.515722,-6.350440,-5.427466,-5.098370,-4.546565,-5.247707,-4.952514,-4.865845,-5.599354,-6.633446,-4.534965,-4.499297,-4.036576,-5.270481,-4.587739,-7.922898,-8.444324,-5.672735,-5.515293,-7.105355,-5.749189,-8.339766,-8.481632,-7.633507,-6.953625,-5.276940,-5.390747,-6.862399,-7.007496,-4.237248,-4.923761,-7.347705,-7.644963,-7.237547,-6.694455,-7.579621,-7.617918,-3.453844,-3.767050,-5.013042,-4.607656,-5.297870,-6.423931,-7.898857,-6.763535,-3.934815,-4.917840,-6.134842,-6.167571,-2.652187,-3.880714,-3.665272,-4.810512,-6.261549,-7.282699,-7.593798,-7.069771,-8.311904,-8.126085,-5.465438,-5.434031,-2.544275,-4.805246,-6.163576,-6.305969,-6.797916,-7.086410,-3.600253,-4.760631,-7.741807,-8.051257,-4.477015,-5.241320,-3.548414,-4.613635,-7.831700,-7.184237,-5.142111,-5.094691,-5.988164,-7.041820,-5.308360,-4.474120,-8.070899,-6.890960,-5.896595,-6.268917,-6.590528,-6.614963,-3.905221,-4.563176,-2.515658,-2.311212
1061,-2.364362,-2.994382,-5.144035,-5.819230,-8.858589,-9.256135,-10.516981,-7.519321,-7.621486,-11.053074,-10.523329,-9.550420,-9.556000,-11.059713,-11.069305,-0.006243,-0.286195,-0.989257,-1.476774,-8.034075,-7.915739,-8.415067,-7.750396,-12.887359,-12.166514,-11.558425,-11.461455,-8.202784,-8.155033,-8.001180,-3.814108,-4.096430,-7.437306,-7.319290,-7.035769,-5.900674,-7.136624,-6.981693,-8.202404,-7.821876,-7.130084,-8.318208,-8.856417,-8.766015,-8.171098,-8.469729,-7.113815,-4.099392,-5.765690,-8.774986,-8.088324,-4.814620,-5.495421,-6.862498,-6.072448,-6.902055,-4.867736,-5.078516,-5.386932,-8.485921,-7.402753,-9.022478,-8.853413,-5.549636,-5.392552,-7.522338,-6.167380,-8.897159,-9.831639,-7.116496,-7.005976,-9.528421,-8.648302,-9.181928,-8.068501,-10.234731,-9.560279,-6.966672,-7.015263,-8.674808,-8.628188,-5.810552,-5.382856,-8.480846,-7.704174,-7.546566,-7.171838,-9.011921,-8.229162,-4.534884,-4.446492,-5.961095,-6.341725,-8.008857,-6.501610,-9.941124,-8.196738,-7.290265,-5.443581,-9.294129,-6.593020,-4.448294,-4.235695,-3.822568,-4.500738,-6.698615,-7.806455,-9.072993,-9.031410,-9.911139,-10.111758,-6.751145,-5.734035,-5.692109,-5.139198,-6.886044,-6.628477,-4.198784,-4.176735,-6.486538,-6.049534,-8.675776,-9.017010,-6.454886,-4.475970,-5.270519,-4.364163,-8.688544,-9.074333,-6.927815,-6.822513,-8.009168,-5.697845,-8.692445,-7.445832,-9.372631,-9.041310,-7.111852,-6.407044,-7.611724,-7.000584,-5.520230,-6.596776,-2.474750,-2.809757
1014,-2.841367,-2.782929,-7.247184,-6.814433,-7.807940,-8.308348,-10.168109,-7.912647,-8.621369,-8.656242,-9.055669,-8.037969,-8.450737,-9.523729,-9.140719,-1.603995,-1.655897,-2.403028,-2.203251,-7.490393,-8.083272,-8.191500,-8.410401,-11.070679,-11.220515,-11.139869,-11.004692,-7.483379,-6.043936,-6.452068,-2.639844,-3.016453,-7.761566,-7.884852,-6.846487,-6.268942,-7.283025,-7.033222,-7.823717,-8.054479,-7.483525,-7.742194,-7.109531,-7.440972,-7.745416,-8.142760,-8.205746,-3.446898,-5.229521,-8.683400,-8.566570,-5.615698,-5.452171,-6.287314,-4.526340,-5.615723,-6.585942,-4.892961,-4.360629,-6.793930,-7.171994,-6.708784,-6.707732,-5.961899,-4.574487,-5.423906,-5.470872,-5.508155,-7.213702,-7.076365,-7.229635,-7.309892,-6.919999,-6.823747,-8.481608,-6.974732,-5.925666,-6.418812,-4.617729,-5.558795,-7.476055,-3.469464,-3.796843,-6.397426,-7.750053,-6.159918,-6.415961,-8.356527,-8.370468,-3.415280,-3.909779,-4.389461,-3.420477,-6.858646,-6.408420,-7.957674,-7.410593,-6.492961,-4.932905,-6.786809,-6.726634,-5.282995,-3.608004,-3.074380,-4.520610,-6.197926,-6.964110,-6.257374,-6.471033,-8.159839,-8.608585,-5.860911,-5.843030,-4.864204,-5.217315,-5.678775,-5.343926,-4.613189,-5.937954,-5.821630,-5.827164,-7.183352,-7.682278,-3.415547,-4.268801,-5.469951,-4.759061,-8.174801,-6.354561,-5.360007,-4.180763,-6.747525,-6.467488,-5.966909,-4.610210,-6.717645,-6.562539,-6.264206,-6.335981,-6.953757,-6.873304,-5.768242,-4.047005,-5.079450,-3.601313
456,-2.253738,-3.189911,-5.923755,-5.516132,-7.629186,-8.384617,-10.360425,-7.182146,-7.095432,-9.402608,-9.270303,-9.311078,-9.076243,-9.352605,-8.792766,-2.193478,-2.110491,-3.076430,-3.124993,-8.218079,-8.413255,-9.200554,-8.894586,-10.788422,-10.688697,-10.893603,-10.794445,-7.384670,-6.571818,-7.843621,-3.515279,-3.101366,-8.629648,-8.429307,-6.749525,-5.797163,-7.830105,-8.210252,-9.004759,-8.778619,-7.736831,-8.320365,-8.567415,-8.454661,-7.950783,-8.603238,-7.457782,-4.593258,-5.597136,-7.045657,-7.183727,-5.569691,-4.990571,-6.466122,-6.131620,-2.891036,-2.659508,-6.210065,-6.565745,-4.083867,-3.797964,-5.343770,-6.363570,-3.441531,-5.968988,-5.227368,-4.557418,-8.612908,-7.911459,-6.544177,-7.354585,-5.845551,-5.922158,-8.011150,-7.827065,-6.678622,-5.466613,-6.029202,-4.789605,-7.834655,-8.082159,-5.198137,-4.998100,-7.686119,-7.387168,-4.750557,-4.334849,-7.338016,-7.833288,-2.538947,-2.892152,-6.284110,-5.840712,-5.506507,-6.405170,-8.101392,-7.757226,-5.540900,-3.944024,-5.986030,-6.248691,-3.523414,-3.909007,-4.172787,-3.021057,-5.804294,-5.942403,-6.640872,-6.600905,-6.026178,-6.182563,-6.661571,-6.966659,-4.810439,-3.999923,-5.514717,-6.465477,-6.734389,-6.686840,-6.085390,-6.471016,-8.790607,-8.470268,-5.650746,-3.444629,-5.353243,-5.868632,-7.383958,-6.856710,-6.262125,-7.220159,-6.211558,-6.640543,-4.852045,-4.190530,-8.043243,-7.837478,-6.510245,-7.320783,-5.904585,-6.397365,-3.903759,-4.844467,-4.883829,-3.135838
1047,-2.310973,-3.626112,-5.486226,-4.716565,-9.229840,-9.794481,-8.525446,-7.518378,-8.471241,-8.849896,-8.424762,-6.890548,-6.902370,-9.537662,-10.042126,-0.032804,0.739271,-1.860420,-1.012122,-7.686281,-8.427493,-9.069514,-8.636716,-11.941314,-11.901916,-9.314541,-8.996060,-5.559170,-6.600798,-7.068615,-2.590668,-2.394037,-8.822588,-8.647379,-6.609221,-5.649181,-8.065276,-7.265133,-7.863773,-7.954335,-6.990622,-6.105172,-7.241963,-6.094607,-6.538499,-6.193051,-7.097921,-4.043639,-5.632983,-7.839549,-8.117417,-6.754723,-5.261063,-6.018489,-4.083444,-4.382456,-4.932525,-4.041998,-4.721706,-5.674560,-6.283862,-7.848166,-7.808003,-5.998957,-5.303501,-3.661010,0.055207,-6.154836,-7.208853,-5.646779,-6.108355,-7.895461,-7.124227,-7.245574,-8.570582,-6.115192,-5.387146,-6.249717,-4.630508,-6.538990,-7.235484,-4.983123,-5.637590,-6.928126,-6.972001,-5.591538,-6.288267,-5.595339,-7.642961,-4.481426,-1.164929,-4.494117,-5.731093,-7.857239,-7.115972,-7.244838,-7.232837,-7.363321,-4.108005,-4.893893,-5.754196,-3.808977,-2.890997,-3.523287,-4.855911,-6.363852,-7.065909,-6.573820,-6.013982,-6.611659,-7.563938,-7.066758,-7.012879,-5.131701,-4.513722,-5.309648,-5.622506,-6.426492,-6.467514,-6.405475,-5.860454,-7.132899,-7.371170,-3.528942,-4.036038,-6.636811,-6.957081,-6.873911,-6.493383,-7.444294,-6.642225,-6.431961,-7.500405,-5.985980,-5.668302,-7.898442,-7.508970,-6.526842,-5.143150,-5.172300,-5.864943,-3.533841,-5.857813,-5.246894,-4.231967
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
217,-2.367456,-3.564133,-6.841700,-6.211096,-8.147833,-8.682724,-9.152897,-8.079309,-9.199355,-10.703585,-10.480511,-8.530005,-8.402717,-9.311727,-9.692236,-1.458775,-1.330879,-1.769940,-1.944133,-9.629794,-9.631302,-9.597336,-9.936161,-11.594821,-11.262343,-10.603990,-10.482347,-6.471745,-7.416969,-6.666056,-2.365770,-2.342033,-7.584927,-7.668523,-7.126432,-7.087852,-7.353453,-7.462114,-7.901035,-7.816021,-6.063257,-7.467428,-8.058360,-8.010246,-8.114367,-8.488498,-7.158379,-4.351178,-5.974558,-7.847295,-8.590313,-5.160511,-5.141546,-5.973656,-5.348569,-6.299997,-6.082498,-5.061246,-5.833210,-6.061053,-5.857780,-6.693900,-7.125362,-5.242868,-5.380140,-6.766036,-5.244218,-7.066136,-7.317409,-6.412329,-6.440692,-6.806774,-8.349400,-7.852997,-8.398349,-8.123682,-7.346345,-5.992991,-5.271428,-7.756092,-7.106872,-4.616983,-3.465112,-7.318317,-7.084984,-6.955649,-7.288191,-8.078196,-8.135691,-4.786696,-1.886364,-6.264545,-5.822881,-7.393195,-6.260910,-8.395583,-8.635268,-5.837040,-5.582758,-6.351181,-7.787486,-4.875709,-3.983018,-3.899748,-4.058201,-7.447475,-7.824080,-7.050267,-7.247067,-8.636972,-6.861079,-5.485603,-5.868919,-4.449209,-4.505913,-6.970421,-8.237042,-6.790793,-5.355190,-5.715142,-7.070436,-8.517867,-8.859234,-4.044936,-3.812537,-4.827727,-4.769803,-8.173556,-8.648259,-6.296902,-6.914431,-8.187516,-6.140108,-7.069672,-4.724449,-8.000808,-7.666823,-6.546386,-7.272584,-8.106504,-6.761975,-4.528216,-5.234376,-3.804298,-2.623751
246,-0.800177,-1.350086,-7.236226,-6.164892,-5.602742,-5.767436,-10.436242,-8.012120,-8.778667,-11.079137,-11.046450,-9.201551,-8.650900,-8.418449,-8.558592,-1.536068,-1.703324,-1.644133,-1.826296,-8.668315,-8.910863,-9.236059,-9.234722,-12.375969,-11.990589,-10.890348,-10.434064,-8.481716,-8.017795,-7.793894,-3.917155,-3.617140,-7.242327,-7.470442,-5.273199,-6.079784,-7.782080,-7.678684,-7.488526,-7.432669,-6.121279,-7.183589,-7.854104,-7.361427,-8.350446,-8.329628,-7.241074,-5.150390,-6.756027,-6.350276,-6.134368,-6.332141,-5.843770,-6.612951,-5.748837,-3.307529,-4.626926,-7.348262,-5.785524,-6.806808,-7.613429,-5.758119,-5.663768,-5.663378,-5.809651,-6.937344,-6.148282,-7.467085,-9.254745,-6.600663,-5.683231,-6.750972,-6.483635,-7.828542,-7.812957,-7.995759,-7.850391,-6.215713,-4.053773,-8.032806,-7.671563,-5.085389,-4.330759,-7.820730,-7.374647,-5.996049,-6.346675,-7.964954,-8.410762,-5.869496,-3.534495,-6.141379,-5.593527,-7.427419,-7.420118,-8.178762,-6.884596,-5.905354,-5.492381,-6.778785,-5.690025,-5.548642,-4.693875,-5.958651,-5.737789,-7.148399,-8.054843,-7.438552,-7.012250,-8.730492,-8.375350,-4.743682,-5.317398,-5.049855,-5.348215,-8.965630,-8.218627,-6.726378,-5.743026,-6.884783,-6.257354,-8.705984,-8.418384,-4.424451,-4.371363,-3.513382,-3.883163,-7.228060,-7.962325,-7.090063,-7.988560,-8.192299,-6.787656,-5.894971,-5.419208,-7.952696,-6.888157,-6.017934,-6.456189,-6.129361,-6.422494,-5.042424,-4.579484,-6.074818,-3.543810
506,-1.839968,-3.402632,-4.954696,-4.946269,-7.514318,-8.415307,-9.570424,-6.706455,-7.560370,-10.342220,-10.180904,-9.859672,-9.668809,-8.240423,-8.613254,-1.639544,-1.678528,-2.684052,-2.568177,-8.039048,-8.062452,-8.896932,-8.550006,-10.232757,-10.123068,-9.257745,-9.599574,-7.462922,-8.888720,-6.878531,-1.286859,-1.723185,-8.724360,-8.420953,-7.733005,-7.203849,-7.877877,-7.824040,-8.245709,-8.236523,-7.782761,-7.100255,-7.466136,-7.327787,-6.652719,-7.375292,-6.649660,-3.733923,-6.204222,-7.080110,-7.050060,-5.106647,-6.081411,-5.707818,-4.050241,-5.480487,-5.319022,-5.178356,-4.275962,-5.543497,-5.604580,-6.257606,-6.416177,-5.473467,-5.472017,-6.126248,-4.290154,-6.143318,-7.106412,-5.109005,-6.396468,-5.654198,-7.658926,-6.751279,-8.269779,-5.834393,-7.988525,-5.592066,-4.836680,-8.753460,-9.109098,-4.273853,-6.476520,-7.752610,-7.241990,-5.809793,-6.618995,-7.406641,-6.431254,-4.506270,-3.584535,-4.985626,-6.341027,-7.112675,-6.294661,-7.087195,-7.122254,-7.017285,-4.326293,-7.180483,-5.594173,-4.519312,-3.319824,-2.415933,-3.885518,-7.125060,-6.965504,-6.181565,-6.331139,-5.669196,-6.108408,-6.475778,-6.042324,-4.231154,-4.167367,-5.861381,-6.756234,-6.153288,-6.476813,-4.973514,-5.715364,-7.212798,-7.325922,-4.372936,-3.501748,-5.535724,-5.544333,-8.105554,-7.320493,-6.504233,-6.542124,-5.784176,-6.515313,-4.071063,-4.424973,-7.852668,-6.863352,-5.274010,-6.127804,-6.875932,-6.903757,-5.501870,-6.348232,-2.615703,-3.920074
669,-3.396072,-2.940261,-6.382129,-5.906699,-8.090111,-8.403256,-9.404943,-7.212923,-7.976973,-11.235810,-11.235377,-8.199100,-8.568927,-8.234506,-8.893013,-2.224072,-1.924318,-2.605357,-2.531841,-9.238857,-8.793491,-8.486659,-8.281048,-11.620948,-11.757775,-10.599383,-10.645044,-7.287277,-7.734524,-6.298949,-3.724363,-3.318824,-8.230988,-8.281579,-7.332251,-7.120655,-7.368349,-7.501895,-8.628270,-8.331964,-7.495465,-7.661683,-8.551774,-8.053373,-8.285871,-8.657007,-7.990878,-4.736428,-7.305081,-8.081331,-7.873014,-5.451706,-4.062687,-6.715662,-5.667964,-4.015902,-6.252961,-5.817182,-6.212244,-5.786026,-6.439217,-6.781210,-7.571152,-4.638805,-5.090065,-5.122137,-4.772635,-8.395780,-7.554924,-6.738014,-6.133166,-6.483874,-7.732565,-8.334610,-8.753020,-7.607357,-8.001166,-5.213170,-4.098367,-7.499088,-7.685580,-5.910589,-4.719251,-7.008469,-7.454569,-5.555179,-6.081430,-8.285082,-8.452779,-3.751919,-2.349727,-5.094616,-6.588978,-7.416656,-5.340119,-7.368527,-6.374359,-6.451838,-5.761647,-6.169925,-8.235579,-3.477975,-5.723454,-4.182472,-3.826919,-6.064004,-6.483255,-6.497572,-6.862896,-7.935833,-8.611964,-6.774104,-7.134639,-3.936870,-4.982721,-6.646805,-7.148037,-7.661362,-7.489007,-6.831867,-6.225269,-8.170481,-7.249470,-4.434771,-4.446728,-5.686709,-5.328369,-8.741959,-8.340003,-6.452881,-7.131631,-6.337410,-6.173705,-5.621836,-3.023375,-5.594786,-5.451856,-7.310753,-6.455779,-6.897412,-7.047087,-5.422853,-5.436070,-3.714733,-2.882746


A few considerations should be kept in mind to further ensure that our approach is unbiased:

* Representation of Control Group: Make sure that the control group (CN) in the training data is representative of the normal population.

* Generalizability of Corrections: Applying corrections based solely on the control group assumes that the relationship between predictors (like age, sex) and the outcome (ROI features) is the same in control and diseased groups. If this assumption does not hold, the model may not capture disease-specific patterns effectively.

* Statistical Assumptions: Linear regression makes certain assumptions (like linearity, normality, homoscedasticity, and independence of errors). Ensure these assumptions hold for your data; otherwise, the corrections might be inappropriate.

Nested CV with GridSearch

In [45]:
from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, balanced_accuracy_score
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

X = data_age_filtered.drop(['diagnosis', 'PTID'], axis=1)
y = data_age_filtered['diagnosis']

# Assuming X, y, roi_features are defined
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

param_grid = {
    'n_neighbors': [3, 5, 7, 10, 15, 30],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean']
}

outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

outer_fold_results = []

for train_index, test_index in outer_cv.split(X_train, y_train):
    X_train_outer, X_val_outer = X_train.iloc[train_index], X_train.iloc[test_index]
    y_train_outer, y_val_outer = y_train.iloc[train_index], y_train.iloc[test_index]

    inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
    grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=inner_cv, scoring='accuracy', n_jobs=-1)

    for inner_train_index, inner_val_index in inner_cv.split(X_train_outer, y_train_outer):
        X_train_inner, X_val_inner = X_train_outer.iloc[inner_train_index], X_train_outer.iloc[inner_val_index]
        y_train_inner, y_val_inner = y_train_outer.iloc[inner_train_index], y_train_outer.iloc[inner_val_index]

        # Preprocess for the inner fold
        data_controls_train_inner = X_train_inner[y_train_inner == 0]
        for roi_feature in roi_features:
            if roi_feature in X_train_inner.columns:
                regr = LinearRegression()
                regr.fit(data_controls_train_inner[['Sex', 'Age', 'DLICV_baseline']], data_controls_train_inner[roi_feature])
                # Apply correction to the training set
                correction_train = regr.predict(X_train_inner[['Sex', 'Age', 'DLICV_baseline']])
                X_train_inner[roi_feature] -= correction_train

                # Apply the same correction to the validation set
                correction_val = regr.predict(X_val_inner[['Sex', 'Age', 'DLICV_baseline']])
                X_val_inner[roi_feature] -= correction_val

        # Z-normalization for training and validation sets
        scaler = StandardScaler().fit(data_controls_train_inner[roi_features])
        X_train_inner[roi_features] = scaler.transform(X_train_inner[roi_features])
        X_val_inner[roi_features] = scaler.transform(X_val_inner[roi_features])

        X_train_inner = X_train_inner.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)
        X_val_inner = X_val_inner.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)

        grid_search.fit(X_train_inner, y_train_inner)

    # Evaluate the best model on the outer validation set
    best_model = grid_search.best_estimator_

    # We apply the linear correction and the z normalisation of the outer
    # loop after we are doe with the inner loop, in order to avoid applying
    # these filter twice.
    data_controls_train_outer = X_train_outer[y_train_outer == 0]
    for roi_feature in roi_features:
        if roi_feature in X_train_outer.columns:
            regr = LinearRegression()
            regr.fit(data_controls_train_outer[['Sex', 'Age', 'DLICV_baseline']], data_controls_train_outer[roi_feature])
            # Apply correction to the training set
            correction_train = regr.predict(X_train_outer[['Sex', 'Age', 'DLICV_baseline']])
            X_train_outer[roi_feature] -= correction_train

            # Apply the same correction to the validation set
            correction_val = regr.predict(X_val_outer[['Sex', 'Age', 'DLICV_baseline']])
            X_val_outer[roi_feature] -= correction_val

    # Z-normalization for training and validation sets
    scaler = StandardScaler().fit(data_controls_train_outer[roi_features])
    X_train_outer[roi_features] = scaler.transform(X_train_outer[roi_features])
    X_val_outer[roi_features] = scaler.transform(X_val_outer[roi_features])

    X_train_outer = X_train_outer.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)
    X_val_outer = X_val_outer.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)


    y_pred_val = best_model.predict(X_val_outer)

    # # Calculate and print metrics for each outer fold
    # accuracy = best_model.score(X_val_outer, y_val_outer)
    # print(f"Outer Fold Metrics: Accuracy: {accuracy}, Balanced Accuracy: {balanced_accuracy_score(y_val_outer, y_pred_val)}, ...")

    # Append metrics to dict
    fold_metrics = {
        "Accuracy": best_model.score(X_val_outer, y_val_outer),
        "Balanced Accuracy": balanced_accuracy_score(y_val_outer, y_pred_val),
        "Precision": precision_score(y_val_outer, y_pred_val, average='weighted'),
        "Recall": recall_score(y_val_outer, y_pred_val, average='weighted'),
        "F1 Score": f1_score(y_val_outer, y_pred_val, average='weighted'),
        "AUC": roc_auc_score(y_val_outer, best_model.predict_proba(X_val_outer), multi_class='ovr', average='weighted')
    }
    outer_fold_results.append(fold_metrics)
    print(f"Outer Fold Metrics: {fold_metrics}")

# Average metrics across all outer folds
avg_metrics = {metric: np.mean([fold[metric] for fold in outer_fold_results]) for metric in outer_fold_results[0]}
print("\nAverage Metrics Across All Outer Folds:")
print(avg_metrics)



Outer Fold Metrics: {'Accuracy': 0.5085470085470085, 'Balanced Accuracy': 0.4578103851359665, 'Precision': 0.5118861685048164, 'Recall': 0.5085470085470085, 'F1 Score': 0.4979714120517578, 'AUC': 0.6202970934146793}
Outer Fold Metrics: {'Accuracy': 0.5170940170940171, 'Balanced Accuracy': 0.5071346080045335, 'Precision': 0.522302313607111, 'Recall': 0.5170940170940171, 'F1 Score': 0.5086697807798726, 'AUC': 0.6514838474845354}
Outer Fold Metrics: {'Accuracy': 0.5042735042735043, 'Balanced Accuracy': 0.4656665525309593, 'Precision': 0.5519653100369403, 'Recall': 0.5042735042735043, 'F1 Score': 0.4932571197966852, 'AUC': 0.6779416787457724}
Outer Fold Metrics: {'Accuracy': 0.5299145299145299, 'Balanced Accuracy': 0.5049363693431491, 'Precision': 0.5685660277516854, 'Recall': 0.5299145299145299, 'F1 Score': 0.5037374019306462, 'AUC': 0.6783342137723062}
Outer Fold Metrics: {'Accuracy': 0.5427350427350427, 'Balanced Accuracy': 0.500777549506363, 'Precision': 0.582248216452974, 'Recall': 0.

In [46]:
# Retrieve the best hyperparameters from the grid search
best_params = grid_search.best_params_

# Retrain the model on the entire training dataset (X_train, y_train) with these parameters
final_knn_model = KNeighborsClassifier(**best_params)

# Apply preprocessing (linear correction and Z-normalization) to the entire training dataset X_train
data_controls = X_train[y_train == 0]  # Control group for the training dataset
for roi_feature in roi_features:
    if roi_feature in X_train.columns:
        regr = LinearRegression()
        regr.fit(data_controls[['Sex', 'Age', 'DLICV_baseline']], data_controls[roi_feature])
        # Apply correction to the training set
        correction_train = regr.predict(X_train[['Sex', 'Age', 'DLICV_baseline']])
        X_train[roi_feature] -= correction_train

        # Apply the same correction to the test set
        correction_test = regr.predict(X_test[['Sex', 'Age', 'DLICV_baseline']])
        X_test[roi_feature] -= correction_test


# Z-normalization
scaler = StandardScaler().fit(data_controls[roi_features])
X_train[roi_features] = scaler.transform(X_train[roi_features])
X_test[roi_features] = scaler.transform(X_test[roi_features])


X_train = X_train.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)
X_test = X_test.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)

# Train the final model on the entire training dataset
final_knn_model.fit(X_train, y_train)

# # Apply linear correction and Z-normalization to X_test
# for roi_feature in roi_features:
#     if roi_feature in X_test.columns:
#         correction_test = regr.predict(X_test[['Sex', 'Age', 'DLICV_baseline']])
#         X_test[roi_feature] -= correction_test
# X_test[roi_features] = scaler.transform(X_test[roi_features])
# X_test = X_test.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)

# Evaluate the final model on the test set (X_test, y_test)
y_pred_test = final_knn_model.predict(X_test)
test_accuracy = final_knn_model.score(X_test, y_test)
test_balanced_accuracy = balanced_accuracy_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test, average='weighted')
test_recall = recall_score(y_test, y_pred_test, average='weighted')
test_f1 = f1_score(y_test, y_pred_test, average='weighted')
test_auc = roc_auc_score(y_test, final_knn_model.predict_proba(X_test), multi_class='ovr', average='weighted')

print(f"Test set Metrics: Accuracy: {test_accuracy}, Balanced Accuracy: {test_balanced_accuracy}, Precision: {test_precision}, Recall: {test_recall}, F1 Score: {test_f1}, AUC: {test_auc}")

Test set Metrics: Accuracy: 0.5221843003412969, Balanced Accuracy: 0.4842797342797343, Precision: 0.5329516568103109, Recall: 0.5221843003412969, F1 Score: 0.4992342495948671, AUC: 0.6927815353515775


In [47]:
best_params

{'metric': 'euclidean', 'n_neighbors': 30, 'weights': 'distance'}

#### Nested CV with Randomized Search

In [48]:
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold, train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, balanced_accuracy_score
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
import scipy.stats as stats

X = data_age_filtered.drop(['diagnosis', 'PTID'], axis=1)
y = data_age_filtered['diagnosis']

# Assuming X, y, roi_features are defined
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define parameter distribution (instead of a fixed grid)
param_dist = {
    'n_neighbors': stats.randint(3, 1000),  # Uniformly distributed integers from 3 to 30
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

outer_fold_results = []

for train_index, test_index in outer_cv.split(X_train, y_train):
    X_train_outer, X_val_outer = X_train.iloc[train_index], X_train.iloc[test_index]
    y_train_outer, y_val_outer = y_train.iloc[train_index], y_train.iloc[test_index]

    inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)

    # Use RandomizedSearchCV
    n_iter_search = 20  # Number of parameter settings sampled
    random_search = RandomizedSearchCV(KNeighborsClassifier(), param_distributions=param_dist, n_iter=n_iter_search, cv=inner_cv, scoring='accuracy', n_jobs=-1, random_state=42)

    for inner_train_index, inner_val_index in inner_cv.split(X_train_outer, y_train_outer):
        X_train_inner, X_val_inner = X_train_outer.iloc[inner_train_index], X_train_outer.iloc[inner_val_index]
        y_train_inner, y_val_inner = y_train_outer.iloc[inner_train_index], y_train_outer.iloc[inner_val_index]

        # Preprocess for the inner fold
        data_controls_train_inner = X_train_inner[y_train_inner == 0]
        for roi_feature in roi_features:
            if roi_feature in X_train_inner.columns:
                regr = LinearRegression()
                regr.fit(data_controls_train_inner[['Sex', 'Age', 'DLICV_baseline']], data_controls_train_inner[roi_feature])
                # Apply correction to the training set
                correction_train = regr.predict(X_train_inner[['Sex', 'Age', 'DLICV_baseline']])
                X_train_inner[roi_feature] -= correction_train

                # Apply the same correction to the validation set
                correction_val = regr.predict(X_val_inner[['Sex', 'Age', 'DLICV_baseline']])
                X_val_inner[roi_feature] -= correction_val

        # Z-normalization for training and validation sets
        scaler = StandardScaler().fit(data_controls_train_inner[roi_features])
        X_train_inner[roi_features] = scaler.transform(X_train_inner[roi_features])
        X_val_inner[roi_features] = scaler.transform(X_val_inner[roi_features])

        X_train_inner = X_train_inner.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)
        X_val_inner = X_val_inner.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)

        random_search.fit(X_train_inner, y_train_inner)

    # Evaluate the best model on the outer validation set
    best_model = grid_search.best_estimator_

    # We apply the linear correction and the z normalisation of the outer
    # loop after we are doe with the inner loop, in order to avoid applying
    # these filter twice.
    data_controls_train_outer = X_train_outer[y_train_outer == 0]
    for roi_feature in roi_features:
        if roi_feature in X_train_outer.columns:
            regr = LinearRegression()
            regr.fit(data_controls_train_outer[['Sex', 'Age', 'DLICV_baseline']], data_controls_train_outer[roi_feature])
            # Apply correction to the training set
            correction_train = regr.predict(X_train_outer[['Sex', 'Age', 'DLICV_baseline']])
            X_train_outer[roi_feature] -= correction_train

            # Apply the same correction to the validation set
            correction_val = regr.predict(X_val_outer[['Sex', 'Age', 'DLICV_baseline']])
            X_val_outer[roi_feature] -= correction_val

    # Z-normalization for training and validation sets
    scaler = StandardScaler().fit(data_controls_train_outer[roi_features])
    X_train_outer[roi_features] = scaler.transform(X_train_outer[roi_features])
    X_val_outer[roi_features] = scaler.transform(X_val_outer[roi_features])

    X_train_outer = X_train_outer.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)
    X_val_outer = X_val_outer.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)


    y_pred_val = best_model.predict(X_val_outer)
    # Calculate and print metrics for each outer fold
    accuracy = best_model.score(X_val_outer, y_val_outer)
    print(f"Outer Fold Metrics: Accuracy: {accuracy}, Balanced Accuracy: {balanced_accuracy_score(y_val_outer, y_pred_val)}, ...")

    # Append metrics to dict
    fold_metrics = {
        "Accuracy": best_model.score(X_val_outer, y_val_outer),
        "Balanced Accuracy": balanced_accuracy_score(y_val_outer, y_pred_val),
        "Precision": precision_score(y_val_outer, y_pred_val, average='weighted'),
        "Recall": recall_score(y_val_outer, y_pred_val, average='weighted'),
        "F1 Score": f1_score(y_val_outer, y_pred_val, average='weighted'),
        "AUC": roc_auc_score(y_val_outer, best_model.predict_proba(X_val_outer), multi_class='ovr', average='weighted')
    }
    outer_fold_results.append(fold_metrics)
    print(f"Outer Fold Metrics: {fold_metrics}")

# Average metrics across all outer folds
avg_metrics = {metric: np.mean([fold[metric] for fold in outer_fold_results]) for metric in outer_fold_results[0]}
print("\nAverage Metrics Across All Outer Folds:")
print(avg_metrics)



Outer Fold Metrics: Accuracy: 0.6025641025641025, Balanced Accuracy: 0.5497606019151847, ...
Outer Fold Metrics: {'Accuracy': 0.6025641025641025, 'Balanced Accuracy': 0.5497606019151847, 'Precision': 0.6221614101731061, 'Recall': 0.6025641025641025, 'F1 Score': 0.5755357048805961, 'AUC': 0.7819868719747315}
Outer Fold Metrics: Accuracy: 0.6965811965811965, Balanced Accuracy: 0.6437995172460789, ...
Outer Fold Metrics: {'Accuracy': 0.6965811965811965, 'Balanced Accuracy': 0.6437995172460789, 'Precision': 0.7012702076421629, 'Recall': 0.6965811965811965, 'F1 Score': 0.6866896137546932, 'AUC': 0.8199571887636071}
Outer Fold Metrics: Accuracy: 0.6324786324786325, Balanced Accuracy: 0.5958240598071106, ...
Outer Fold Metrics: {'Accuracy': 0.6324786324786325, 'Balanced Accuracy': 0.5958240598071106, 'Precision': 0.6644320778936164, 'Recall': 0.6324786324786325, 'F1 Score': 0.6217670283588936, 'AUC': 0.7927849821527984}
Outer Fold Metrics: Accuracy: 0.6452991452991453, Balanced Accuracy: 0.56

In [49]:
# Retrieve the best hyperparameters from the grid search
best_params = grid_search.best_params_

# Retrain the model on the entire training dataset (X_train, y_train) with these parameters
final_knn_model = KNeighborsClassifier(**best_params)

# Apply preprocessing (linear correction and Z-normalization) to the entire training dataset X_train
data_controls = X_train[y_train == 0]  # Control group for the training dataset
for roi_feature in roi_features:
    if roi_feature in X_train.columns:
        regr = LinearRegression()
        regr.fit(data_controls[['Sex', 'Age', 'DLICV_baseline']], data_controls[roi_feature])
        # Apply correction to the training set
        correction_train = regr.predict(X_train[['Sex', 'Age', 'DLICV_baseline']])
        X_train[roi_feature] -= correction_train

        # Apply the same correction to the test set
        correction_test = regr.predict(X_test[['Sex', 'Age', 'DLICV_baseline']])
        X_test[roi_feature] -= correction_test


# Z-normalization
scaler = StandardScaler().fit(data_controls[roi_features])
X_train[roi_features] = scaler.transform(X_train[roi_features])
X_test[roi_features] = scaler.transform(X_test[roi_features])


X_train = X_train.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)
X_test = X_test.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)

# Train the final model on the entire training dataset
final_knn_model.fit(X_train, y_train)

# # Apply linear correction and Z-normalization to X_test
# for roi_feature in roi_features:
#     if roi_feature in X_test.columns:
#         correction_test = regr.predict(X_test[['Sex', 'Age', 'DLICV_baseline']])
#         X_test[roi_feature] -= correction_test
# X_test[roi_features] = scaler.transform(X_test[roi_features])
# X_test = X_test.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)

# Evaluate the final model on the test set (X_test, y_test)
y_pred_test = final_knn_model.predict(X_test)
test_accuracy = final_knn_model.score(X_test, y_test)
test_balanced_accuracy = balanced_accuracy_score(y_test, y_pred_test)
test_precision = precision_score(y_test, y_pred_test, average='weighted')
test_recall = recall_score(y_test, y_pred_test, average='weighted')
test_f1 = f1_score(y_test, y_pred_test, average='weighted')
test_auc = roc_auc_score(y_test, final_knn_model.predict_proba(X_test), multi_class='ovr', average='weighted')

print(f"Test set Metrics: Accuracy: {test_accuracy}, Balanced Accuracy: {test_balanced_accuracy}, Precision: {test_precision}, Recall: {test_recall}, F1 Score: {test_f1}, AUC: {test_auc}")

Test set Metrics: Accuracy: 0.5221843003412969, Balanced Accuracy: 0.4842797342797343, Precision: 0.5329516568103109, Recall: 0.5221843003412969, F1 Score: 0.4992342495948671, AUC: 0.6927815353515775


In [50]:
best_params

{'metric': 'euclidean', 'n_neighbors': 30, 'weights': 'distance'}

##### back ups


In [51]:
# only corrected and standardized outer folds
# (which include the inner folds, but it is the same problem of data
# leakage even though the whole data are train data. Even though all of these
# data are train data, we should treat them as validation/test data for validating
# and evaluating the model.)

# from sklearn.model_selection import GridSearchCV, StratifiedKFold, train_test_split
# from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, balanced_accuracy_score
# from sklearn.linear_model import LinearRegression
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.preprocessing import StandardScaler

# # Splitting dataset into train and test
# X = data_age_filtered.drop(['diagnosis', 'PTID'], axis=1)
# y = data_age_filtered['diagnosis']

# # Assuming X, y, roi_features are defined
# # Split the dataset into training and test sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# # Define parameter grid for KNN
# param_grid = {
#     'n_neighbors': [3, 5, 7, 10, 15, 30],
#     'weights': ['uniform', 'distance'],
#     'metric': ['euclidean', 'manhattan']
# }

# # Define outer cross-validation
# outer_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# # Lists to store metrics for each outer fold
# outer_fold_accuracy = []
# outer_fold_balanced_accuracy = []
# outer_fold_precision = []
# outer_fold_recall = []
# outer_fold_f1 = []
# outer_fold_auc = []

# # Outer loop for model evaluation
# for train_index, test_index in outer_cv.split(X_train, y_train):
#     # Split training data into training and validation for the current outer fold
#     X_train_outer, X_val_outer = X_train.iloc[train_index], X_train.iloc[test_index]
#     y_train_outer, y_val_outer = y_train.iloc[train_index], y_train.iloc[test_index]

#     # Apply preprocessing (linear correction and Z-normalization) to X_train_outer and X_val_outer
#     # Linear correction
#     data_controls_train_outer = X_train_outer[y_train_outer == 0]  # Control group for the fold
#     for roi_feature in roi_features:
#         if roi_feature in X_train_outer.columns:
#             regr = LinearRegression()
#             regr.fit(data_controls_train_outer[['Sex', 'Age', 'DLICV_baseline']], data_controls_train_outer[roi_feature])
#             # Apply correction to the training set
#             correction_train = regr.predict(X_train_outer[['Sex', 'Age', 'DLICV_baseline']])
#             X_train_outer[roi_feature] -= correction_train

#             # Apply the same correction to the validation set
#             correction_val = regr.predict(X_val_outer[['Sex', 'Age', 'DLICV_baseline']])
#             X_val_outer[roi_feature] -= correction_val

#     # Z-normalization for training and validation sets
#     scaler = StandardScaler().fit(data_controls_train_outer[roi_features])
#     X_train_outer[roi_features] = scaler.transform(X_train_outer[roi_features])
#     X_val_outer[roi_features] = scaler.transform(X_val_outer[roi_features])

#     X_train_outer = X_train_outer.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)
#     X_val_outer = X_val_outer.drop(['Sex', 'Age', 'DLICV_baseline'], axis=1)

#     # Inner loop for hyperparameter tuning
#     inner_cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
#     grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=inner_cv, scoring='accuracy', n_jobs=-1)
#     grid_search.fit(X_train_outer, y_train_outer)

#     # Train model with best parameters on the entire training set of the outer fold
#     best_model = grid_search.best_estimator_
#     best_model.fit(X_train_outer, y_train_outer)

#     # Evaluate model on the validation set of the outer fold
#     y_pred_val = best_model.predict(X_val_outer)
#     outer_fold_accuracy.append(best_model.score(X_val_outer, y_val_outer))
#     outer_fold_balanced_accuracy.append(balanced_accuracy_score(y_val_outer, y_pred_val))
#     outer_fold_precision.append(precision_score(y_val_outer, y_pred_val, average='weighted'))
#     outer_fold_recall.append(recall_score(y_val_outer, y_pred_val, average='weighted'))
#     outer_fold_f1.append(f1_score(y_val_outer, y_pred_val, average='weighted'))
#     outer_fold_auc.append(roc_auc_score(y_val_outer, best_model.predict_proba(X_val_outer), multi_class='ovr', average='weighted'))

#     # Evaluate model on the validation set of the outer fold
#     y_pred_val = best_model.predict(X_val_outer)
#     accuracy = best_model.score(X_val_outer, y_val_outer)
#     balanced_accuracy = balanced_accuracy_score(y_val_outer, y_pred_val)
#     precision = precision_score(y_val_outer, y_pred_val, average='weighted')
#     recall = recall_score(y_val_outer, y_pred_val, average='weighted')
#     f1 = f1_score(y_val_outer, y_pred_val, average='weighted')
#     auc = roc_auc_score(y_val_outer, best_model.predict_proba(X_val_outer), multi_class='ovr', average='weighted')

#     # Append metrics to lists
#     outer_fold_accuracy.append(accuracy)
#     outer_fold_balanced_accuracy.append(balanced_accuracy)
#     outer_fold_precision.append(precision)
#     outer_fold_recall.append(recall)
#     outer_fold_f1.append(f1)
#     outer_fold_auc.append(auc)


# # Calculate average metrics across all outer folds
# print("Average Metrics Across All Outer Folds:")
# print(f"Accuracy: {np.mean(outer_fold_accuracy)}")
# print(f"Balanced Accuracy: {np.mean(outer_fold_balanced_accuracy)}")
# print(f"Precision: {np.mean(outer_fold_precision)}")
# print(f"Recall: {np.mean(outer_fold_recall)}")
# print(f"F1 Score: {np.mean(outer_fold_f1)}")
# print(f"AUC: {np.mean(outer_fold_auc)}")
