In [100]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from sklearn.svm import SVC
from sklearn.feature_selection import RFE
import matplotlib.pyplot as plt

In [171]:
folds = 5

## Read AV45 Features

In [106]:
AV45 = pd.ExcelFile('Dataset/AV45_network.xlsx')
AV45_features= []
for i in AV45.sheet_names:
    AV45_corr = AV45.parse(i,header=None)
    indices = np.where(np.triu(np.ones(AV45_corr.shape), k=1).astype(bool))
    AV45_features.append(list(np.array(AV45_corr)[indices]))
print('subject number:%s'%len(AV45_features))
print('length of AV45 Features:%s'%len(AV45_features[0]))


subject number:46
length of AV45 Features:4005


## Read FDG Features

In [107]:
FDG = pd.ExcelFile('Dataset/FDG_network.xlsx')
FDG_features= []
for i in FDG.sheet_names:
    FDG_corr = FDG.parse(i,header=None)
    indices = np.where(np.triu(np.ones(FDG_corr.shape), k=1).astype(bool))
    FDG_features.append(list(np.array(FDG_corr)[indices]))
    
print('subject number:%s'%len(FDG_features))
print('length of FDG Features:%s'%len(FDG_features[0]))

subject number:46
length of FDG Features:4005


## Read Subject Clinical Data

In [94]:
Subject_data = pd.read_excel('Dataset/ClinicalScores.xlsx',sheet_name='Scores2',header=None) 
for i in Subject_data.columns:
    print('Clinical Feature %s'%i)
    for j in np.unique(Subject_data[i]):
        print('%s:%d'%(j,sum(Subject_data[i]==j)))
    print('-------------------')

Clinical Feature 0
AD:21
MCI:25
-------------------
Clinical Feature 1
1:46
-------------------
Clinical Feature 2
1:39
2:7
-------------------
Clinical Feature 3
1:41
2:5
-------------------
Clinical Feature 4
1:18
2:26
3:2
-------------------
Clinical Feature 5
0:17
1:20
2:7
3:2
-------------------


In [124]:
y = [1 if i=='AD' else 0 for i in Subject_data[0]]

## SVM-RFE (AV45)

In [143]:
#RFE
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
rfe.fit(AV45_features, y)

#Find Top50 Features
idx = []
for i in range(1,51):
    idx.append(list(rfe.ranking_).index(i))
AV45_features_low_dim = pd.DataFrame(AV45_features)[idx]

## SVM-RFE (FDG)

In [146]:
#RFE
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
rfe.fit(FDG_features, y)

#Find Top50 Features
idx = []
for i in range(1,51):
    idx.append(list(rfe.ranking_).index(i))
FDG_features_low_dim = pd.DataFrame(FDG_features)[idx]

## Simlarity (AV45)

In [158]:
corr_dist = distance.pdist(np.array(AV45_features_low_dim),metric='correlation')
corr_dist = distance.squareform(corr_dist)
sigma = np.mean(corr_dist)
AV45_simirality = np.exp(-corr_dist**2/(2*sigma**2))

## Simlarity (FDG)

In [159]:
corr_dist = distance.pdist(np.array(FDG_features_low_dim),metric='correlation')
corr_dist = distance.squareform(corr_dist)
sigma = np.mean(corr_dist)
FDG_simirality = np.exp(-corr_dist**2/(2*sigma**2))

In [162]:
rnd_state = np.random.RandomState()

In [174]:
n = len(Subject_data)
ids_all = Subject_data.index
ids = ids_all[rnd_state.permutation(n)]
stride = int(np.ceil(n / float(folds)))
test_ids = [ids[i: i + stride] for i in range(0, n, stride)]

train_ids = []
for fold in range(folds):
    train_ids.append(np.array([e for e in ids if e not in test_ids[fold]]))


In [179]:
test_ids[0]

Int64Index([40, 10, 20, 32, 43, 21, 36, 45, 18, 27], dtype='int64')

In [180]:
train_ids[0]

array([ 3,  7,  4, 39, 31, 24,  0, 17, 28, 42, 34, 13, 23,  8, 33, 30, 26,
       11,  6, 15, 25, 38, 37,  9, 19,  5, 14, 29, 16,  2, 22, 44, 41,  1,
       12, 35])

In [191]:
y = [1 if i=='AD' else 0 for i in Subject_data[0][train_ids[0]]]

#SVM-RFE (AV45)
AV45_features_train = np.array(pd.DataFrame(AV45_features).loc[train_ids[0]])
#RFE
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
rfe.fit(AV45_features_train, y)
#Find Top50 Features
idx = []
for i in range(1,51):
    idx.append(list(rfe.ranking_).index(i))
AV45_features_low_dim = pd.DataFrame(AV45_features)[idx]

#SVM-RFE (FDG)
FDG_features_train = np.array(pd.DataFrame(FDG_features).loc[train_ids[0]])
#RFE
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
rfe.fit(FDG_features_train, y)

#Find Top50 Features
idx = []
for i in range(1,51):
    idx.append(list(rfe.ranking_).index(i))
FDG_features_low_dim = pd.DataFrame(FDG_features)[idx]


## Edge Connection

In [205]:
#Graph Edge Connection
Subject_data_train = Subject_data.loc[train_ids[0]]
idx_train_NC = Subject_data_train[Subject_data_train[0]=='MCI'].index
idx_train_Pt = Subject_data_train[Subject_data_train[0]=='AD'].index
idx_test = test_ids[0]

#Adjacency Matrix (A): NC-->Pt-->test
n = len(Subject_data)
A = np.zeros([n,n])
for i in range(len(idx_train_NC)):
    for j in range(len(idx_train_NC)):
        A[i,j] = 1
for i in range(len(idx_train_NC),len(idx_train_NC)+len(idx_train_Pt)):
     for j in range(len(idx_train_NC),len(idx_train_NC)+len(idx_train_Pt)):
        A[i,j] = 1   
for i in range(len(idx_train_NC)+len(idx_train_Pt),n):
     for j in range(n):
        A[i,j] = 1  

## Edge Weight Initialization

In [255]:
idx_order = idx_train_NC.append(idx_train_Pt).append(idx_test)
#Similarity (SV45)
AV45_features_low_dim = AV45_features_low_dim.loc[idx_order]
corr_dist = distance.pdist(np.array(AV45_features_low_dim),metric='correlation')
corr_dist = distance.squareform(corr_dist)
sigma = np.mean(corr_dist)
AV45_simirality = np.exp(-corr_dist**2/(2*sigma**2))
#Similarity (FDG)
FDG_features_low_dim = FDG_features_low_dim.loc[idx_order]
corr_dist = distance.pdist(np.array(FDG_features_low_dim),metric='correlation')
corr_dist = distance.squareform(corr_dist)
sigma = np.mean(corr_dist)
FDG_simirality = np.exp(-corr_dist**2/(2*sigma**2))

#Phenotypic Information
n = len(Subject_data)
R = np.zeros([n,n])
for i in Subject_data.columns[1:]:
    a = list(Subject_data.loc[idx_order][i])
    b = list(Subject_data.loc[idx_order][i])
    for j in range(len(a)):
        for k in range(len(b)):
            if a[j]==b[k]:
                R[j,k]+=1    

## Initialized A

In [257]:
A_AV45 = A*AV45_simirality*R
A_FDG = A*FDG_simirality*R

## Feature Matrix (X)

In [263]:
X_AV45 = np.array(AV45_features)
X_FDG = np.array(FDG_features)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3995,3996,3997,3998,3999,4000,4001,4002,4003,4004
0,0.698179,0.004092,0.032366,0.001576,0.660451,0.003209,0.18286,0.313169,0.205149,0.11583,...,0.000134,0.128447,0.009915665,0.632313,0.019738,0.03139821,0.0001230173,0.09873758,0.096541,0.007706545
1,0.141083,0.123279,0.688792,0.561533,0.493717,0.141014,0.22789,0.299163,0.578348,0.010987,...,0.201029,0.019334,0.006356688,0.43465,0.313496,0.4335045,0.3623367,0.1200505,0.009551,0.08192285
2,0.314626,0.00044,0.321874,0.027753,0.369826,0.011013,0.105564,0.058834,0.026653,0.357452,...,0.00455,0.034382,8.805128e-05,0.676205,0.270539,0.2732738,0.006427022,0.2546546,0.037554,0.0001392259
3,0.516158,0.001757,0.214056,0.044399,0.588148,0.006071,0.465173,0.265622,0.430444,0.607999,...,0.000377,0.058917,0.07265706,0.56839,0.128953,0.01056805,0.0007967026,0.1741619,0.074804,0.1157251
4,0.352521,0.067856,0.487453,0.475278,0.098429,0.01339,0.118713,0.080898,0.13708,0.465864,...,0.069029,0.056455,0.000242548,0.321746,0.373274,0.05981908,0.5708944,0.02959503,0.005102,0.005510901
5,0.05401,0.07397,0.117963,0.433794,0.553476,0.198176,0.15666,0.434753,0.00456,0.002081,...,0.397772,0.071282,0.05450945,0.004911,0.04205,0.7045766,0.001380252,0.04418882,0.088628,0.0002079853
6,0.308662,0.321145,0.235555,0.607414,0.092225,0.202773,0.13442,0.078482,0.388013,0.11236,...,0.006186,0.072828,2.422805e-07,0.153135,0.41523,0.0005379958,0.2873937,0.001176306,0.02742,1.663654e-05
7,0.200474,0.833209,0.165786,0.144918,0.616987,0.024771,0.336065,0.127431,0.652854,0.000364,...,0.000377,0.047392,0.003806887,0.081392,0.162324,0.2087144,0.04896974,0.06017499,0.026106,0.282069
8,0.11542,0.040024,0.674525,0.555007,0.308122,0.025668,0.186692,0.265591,0.421204,0.138594,...,0.004767,0.096905,3.940721e-05,0.527104,0.120454,0.125324,0.002802054,0.01343663,0.120525,2.785416e-05
9,0.4039,0.045811,0.585603,0.065124,0.447793,0.032636,0.328589,0.138508,0.026195,0.012107,...,0.001283,0.042482,0.0002651003,0.0874,0.015405,0.5541731,0.1364753,0.02302624,0.019248,0.02531609


array([[5.        , 1.05820161, 4.18537083, ..., 0.        , 0.        ,
        0.        ],
       [1.05820161, 5.        , 1.16208049, ..., 0.        , 0.        ,
        0.        ],
       [4.18537083, 1.16208049, 5.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [2.15065412, 1.656196  , 2.15277321, ..., 5.        , 1.57147322,
        2.12633579],
       [0.7398643 , 0.76836459, 0.699741  , ..., 1.57147322, 5.        ,
        0.71036424],
       [2.83610323, 1.21008531, 2.68455332, ..., 2.12633579, 0.71036424,
        5.        ]])