In [100]:
import pandas as pd
import numpy as np
from scipy.spatial import distance
from sklearn.svm import SVC
from sklearn.feature_selection import RFE
import matplotlib.pyplot as plt

In [171]:
folds = 5

## Read AV45 Features

In [106]:
AV45 = pd.ExcelFile('Dataset/AV45_network.xlsx')
AV45_features= []
for i in AV45.sheet_names:
    AV45_corr = AV45.parse(i,header=None)
    indices = np.where(np.triu(np.ones(AV45_corr.shape), k=1).astype(bool))
    AV45_features.append(list(np.array(AV45_corr)[indices]))
print('subject number:%s'%len(AV45_features))
print('length of AV45 Features:%s'%len(AV45_features[0]))


subject number:46
length of AV45 Features:4005


## Read FDG Features

In [107]:
FDG = pd.ExcelFile('Dataset/FDG_network.xlsx')
FDG_features= []
for i in FDG.sheet_names:
    FDG_corr = FDG.parse(i,header=None)
    indices = np.where(np.triu(np.ones(FDG_corr.shape), k=1).astype(bool))
    FDG_features.append(list(np.array(FDG_corr)[indices]))
    
print('subject number:%s'%len(FDG_features))
print('length of FDG Features:%s'%len(FDG_features[0]))

subject number:46
length of FDG Features:4005


## Read Subject Clinical Data

In [94]:
Subject_data = pd.read_excel('Dataset/ClinicalScores.xlsx',sheet_name='Scores2',header=None) 
for i in Subject_data.columns:
    print('Clinical Feature %s'%i)
    for j in np.unique(Subject_data[i]):
        print('%s:%d'%(j,sum(Subject_data[i]==j)))
    print('-------------------')

Clinical Feature 0
AD:21
MCI:25
-------------------
Clinical Feature 1
1:46
-------------------
Clinical Feature 2
1:39
2:7
-------------------
Clinical Feature 3
1:41
2:5
-------------------
Clinical Feature 4
1:18
2:26
3:2
-------------------
Clinical Feature 5
0:17
1:20
2:7
3:2
-------------------


In [124]:
y = [1 if i=='AD' else 0 for i in Subject_data[0]]

## SVM-RFE (AV45)

In [143]:
#RFE
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
rfe.fit(AV45_features, y)

#Find Top50 Features
idx = []
for i in range(1,51):
    idx.append(list(rfe.ranking_).index(i))
AV45_features_low_dim = pd.DataFrame(AV45_features)[idx]

## SVM-RFE (FDG)

In [146]:
#RFE
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
rfe.fit(FDG_features, y)

#Find Top50 Features
idx = []
for i in range(1,51):
    idx.append(list(rfe.ranking_).index(i))
FDG_features_low_dim = pd.DataFrame(FDG_features)[idx]

## Simlarity (AV45)

In [158]:
corr_dist = distance.pdist(np.array(AV45_features_low_dim),metric='correlation')
corr_dist = distance.squareform(corr_dist)
sigma = np.mean(corr_dist)
AV45_simirality = np.exp(-corr_dist**2/(2*sigma**2))

## Simlarity (FDG)

In [159]:
corr_dist = distance.pdist(np.array(FDG_features_low_dim),metric='correlation')
corr_dist = distance.squareform(corr_dist)
sigma = np.mean(corr_dist)
FDG_simirality = np.exp(-corr_dist**2/(2*sigma**2))

In [162]:
rnd_state = np.random.RandomState()

In [174]:
n = len(Subject_data)
ids_all = Subject_data.index
ids = ids_all[rnd_state.permutation(n)]
stride = int(np.ceil(n / float(folds)))
test_ids = [ids[i: i + stride] for i in range(0, n, stride)]

train_ids = []
for fold in range(folds):
    train_ids.append(np.array([e for e in ids if e not in test_ids[fold]]))


In [179]:
test_ids[0]

Int64Index([40, 10, 20, 32, 43, 21, 36, 45, 18, 27], dtype='int64')

In [180]:
train_ids[0]

array([ 3,  7,  4, 39, 31, 24,  0, 17, 28, 42, 34, 13, 23,  8, 33, 30, 26,
       11,  6, 15, 25, 38, 37,  9, 19,  5, 14, 29, 16,  2, 22, 44, 41,  1,
       12, 35])

In [191]:
y = [1 if i=='AD' else 0 for i in Subject_data[0][train_ids[0]]]

#SVM-RFE (AV45)
AV45_features_train = np.array(pd.DataFrame(AV45_features).loc[train_ids[0]])
#RFE
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
rfe.fit(AV45_features_train, y)
#Find Top50 Features
idx = []
for i in range(1,51):
    idx.append(list(rfe.ranking_).index(i))
AV45_features_low_dim = pd.DataFrame(AV45_features)[idx]

#SVM-RFE (FDG)
FDG_features_train = np.array(pd.DataFrame(FDG_features).loc[train_ids[0]])
#RFE
svc = SVC(kernel="linear", C=1)
rfe = RFE(estimator=svc, n_features_to_select=1, step=1)
rfe.fit(FDG_features_train, y)

#Find Top50 Features
idx = []
for i in range(1,51):
    idx.append(list(rfe.ranking_).index(i))
FDG_features_low_dim = pd.DataFrame(FDG_features)[idx]


## Edge Connection

In [205]:
#Graph Edge Connection
Subject_data_train = Subject_data.loc[train_ids[0]]
idx_train_NC = Subject_data_train[Subject_data_train[0]=='MCI'].index
idx_train_Pt = Subject_data_train[Subject_data_train[0]=='AD'].index
idx_test = test_ids[0]

#Adjacency Matrix (A): NC-->Pt-->test
n = len(Subject_data)
A = np.zeros([n,n])
for i in range(len(idx_train_NC)):
    for j in range(len(idx_train_NC)):
        A[i,j] = 1
for i in range(len(idx_train_NC),len(idx_train_NC)+len(idx_train_Pt)):
     for j in range(len(idx_train_NC),len(idx_train_NC)+len(idx_train_Pt)):
        A[i,j] = 1   
for i in range(len(idx_train_NC)+len(idx_train_Pt),n):
     for j in range(n):
        A[i,j] = 1  

## Edge Weight Initialization

In [255]:
idx_order = idx_train_NC.append(idx_train_Pt).append(idx_test)
#Similarity (SV45)
AV45_features_low_dim = AV45_features_low_dim.loc[idx_order]
corr_dist = distance.pdist(np.array(AV45_features_low_dim),metric='correlation')
corr_dist = distance.squareform(corr_dist)
sigma = np.mean(corr_dist)
AV45_simirality = np.exp(-corr_dist**2/(2*sigma**2))
#Similarity (FDG)
FDG_features_low_dim = FDG_features_low_dim.loc[idx_order]
corr_dist = distance.pdist(np.array(FDG_features_low_dim),metric='correlation')
corr_dist = distance.squareform(corr_dist)
sigma = np.mean(corr_dist)
FDG_simirality = np.exp(-corr_dist**2/(2*sigma**2))

#Phenotypic Information
n = len(Subject_data)
R = np.zeros([n,n])
for i in Subject_data.columns[1:]:
    a = list(Subject_data.loc[idx_order][i])
    b = list(Subject_data.loc[idx_order][i])
    for j in range(len(a)):
        for k in range(len(b)):
            if a[j]==b[k]:
                R[j,k]+=1    

## Initialized A

In [257]:
A_AV45 = A*AV45_simirality*R
A_FDG = A*FDG_simirality*R

## Feature Matrix (X)

In [264]:
X_AV45 = np.array(AV45_features)
X_FDG = np.array(FDG_features)

In [271]:
np.matmul(A_AV45, X_AV45)

array([[16.50479921,  8.40940433, 13.580844  , ...,  3.85043156,
         1.77050309,  2.41058854],
       [ 8.38686307,  5.0250034 , 11.95114217, ...,  2.53080841,
         1.18902409,  2.02636602],
       [16.34828462,  9.16836212, 13.76480228, ...,  4.10487776,
         1.77847519,  2.62425547],
       ...,
       [33.97273333, 15.52226728, 33.53516707, ..., 10.8799571 ,
         4.40611889,  6.89919383],
       [16.8934571 ,  7.95830439, 17.34101817, ...,  5.62315631,
         2.47612422,  5.57319312],
       [32.94102469, 18.06109329, 32.9426309 , ..., 10.24409641,
         4.17766103,  5.48173612]])

array([[6.98178938e-01, 4.09230268e-03, 3.23655450e-02, ...,
        9.87375816e-02, 9.65407523e-02, 7.70654505e-03],
       [1.41082894e-01, 1.23279340e-01, 6.88792188e-01, ...,
        1.20050475e-01, 9.55138747e-03, 8.19228522e-02],
       [3.14625623e-01, 4.40464280e-04, 3.21873969e-01, ...,
        2.54654569e-01, 3.75536842e-02, 1.39225869e-04],
       ...,
       [4.19546954e-01, 3.21754174e-02, 4.13260644e-01, ...,
        1.08249447e-01, 8.82582978e-04, 3.97776211e-07],
       [8.97744548e-02, 7.37884898e-04, 2.99755430e-02, ...,
        7.04972171e-03, 2.85944201e-03, 6.12476143e-01],
       [1.25274169e-01, 6.60014129e-02, 4.38887020e-01, ...,
        5.06798360e-02, 4.38969197e-02, 5.30843792e-03]])