# The files we need
- adni_connectome_aparc_count.mat
- ADNI-179_subjects-label-mor.csv
- DTI - sMRI.csv

# 1 libraries

In [1]:
import scipy.io
import numpy as np
import pandas as pd
import os
import re
import networkx as nx
import pickle
from numpy import genfromtxt

# 2 load data

In [2]:
raw_mor = pd.read_csv('./data/ADNI-179_subjects-label-mor.csv')

## 2-1 subject list

In [22]:
sub_179 = pd.DataFrame(raw_mor.iloc[:, 1])
sub_179 = sub_179.applymap(str)

## 2-2 connectivity matrix (count)

In [7]:
raw_con = scipy.io.loadmat('./data/adni_connectome_aparc_count.mat')
con = raw_con['connectome_aparc0x2Baseg_count'].T

In [8]:
con.shape

(179, 84, 84)

In [69]:
con[0] # 일단 arbitrary하게 threshold로 20을 잡을 예정.

array([[   0,    0,    0, ...,    0,    0,    0],
       [   0,    0,    0, ...,    0,    0,    0],
       [1486,  821,    0, ...,    0,    0,    0],
       ...,
       [   3,    0,    4, ...,    0,    0,    0],
       [  19,  135,  384, ..., 1294,    0,    0],
       [  13,   42,  371, ...,   10,   93,    0]], dtype=int32)

## 2-3 morphometry

In [43]:
mor = raw_mor.iloc[:, 4:] # extract morphometry data
mor = pd.concat([sub_179, mor], axis=1) # merge with subject name

In [45]:
mor

Unnamed: 0,RID,lh_bankssts_area,lh_caudalanteriorcingulate_area,lh_caudalmiddlefrontal_area,lh_cuneus_area,lh_entorhinal_area,lh_fusiform_area,lh_inferiorparietal_area,lh_inferiortemporal_area,lh_isthmuscingulate_area,...,wm.rh.superiortemporal,wm.rh.supramarginal,wm.rh.frontalpole,wm.rh.temporalpole,wm.rh.transversetemporal,wm.rh.insula,Left.UnsegmentedWhiteMatter,Right.UnsegmentedWhiteMatter,MaskVol.y,EstimatedTotalIntraCranialVol.y
0,4081,737,436,1593,1068,299,2261,3590,2587,817,...,4017.6,5017.4,229.0,475.9,452.4,6920.7,18493.1,19334.3,1241578,1175216.313
1,4119,869,593,2737,1418,564,3426,4655,3356,894,...,7079.7,9652.6,363.0,787.7,695.8,9738.5,27927.7,26879.5,1659472,1480541.795
2,4136,1126,609,2564,1692,447,3030,4285,3170,1166,...,8856.8,10692.2,337.1,693.5,638.5,13231.4,35276.1,34642.9,1794085,1850467.809
3,4142,785,405,1695,1073,285,2261,3739,2420,535,...,4477.5,5653.1,248.7,466.8,547.6,7963.1,18987.1,19344.4,1188334,1144613.578
4,4152,568,546,1804,1419,334,2602,3430,2759,818,...,6303.2,8422.6,275.5,523.7,559.9,9059.8,24940.1,25319.1,1501575,1445688.975
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174,4287,805,464,1623,1453,332,2644,3457,2527,802,...,5966.1,7422.5,242.3,540.4,462.9,8050.5,29416.1,30754.2,1323664,1262824.903
175,4369,768,489,2488,1463,430,2695,4123,3383,979,...,6262.4,8333.7,344.1,500.4,479.0,9529.5,23860.8,24704.3,1388539,1332303.832
176,4371,904,565,1893,1253,608,3099,3917,3663,827,...,6481.5,9059.6,272.9,686.6,665.9,8262.5,24712.7,23681.2,1445892,1337718.333
177,4396,872,477,1737,1042,475,2656,3485,2651,808,...,5022.5,6323.6,235.5,468.7,549.7,7877.1,24802.8,23896.7,1287012,1218440.724


## 2-4 label

In [11]:
label = raw_mor[['DXCHANGE']]

In [12]:
label

Unnamed: 0,DXCHANGE
0,1
1,1
2,3
3,3
4,3
...,...
174,2
175,1
176,1
177,1


# 3 node_feat & num_nodes

In [13]:
num_nodes=84

In [14]:
node_skeleton = pd.read_csv('./data/DTI - sMRI.csv')

In [15]:
node_skeleton

Unnamed: 0,feat,label_freesurfer(Full name),sMRI thickness,sMRI volume
0,L.SFG,ctx-lh-superiorfrontal,lh_superiorfrontal_thickness._.5,lh_superiorfrontal_volume._.7
1,R.SPG,ctx-rh-superiorparietal,rh_superiorparietal_thickness._.6,rh_superiorparietal_volume._.8
2,R.CA,Right-Caudate,,Right.Caudate._.9
3,L.ITG,ctx-lh-inferiortemporal,lh_inferiortemporal_thickness._.5,lh_inferiortemporal_volume._.7
4,R.IN,ctx-rh-insula,rh_insula_thickness._.6,rh_insula_volume._.8
...,...,...,...,...
79,L.PCG,ctx-lh-posteriorcingulate,lh_posteriorcingulate_thickness._.5,lh_posteriorcingulate_volume._.7
80,L.MOFG,ctx-lh-medialorbitofrontal,lh_medialorbitofrontal_thickness._.5,lh_medialorbitofrontal_volume._.7
81,R.ICG,ctx-rh-isthmuscingulate,rh_isthmuscingulate_thickness._.6,rh_isthmuscingulate_volume._.8
82,R.CER,Right-Cerebellum-Cortex,,Right.Cerebellum.Cortex._.9


In [16]:
node_skeleton_array  = np.array(node_skeleton.iloc[:, 2:])

In [32]:
node_skeleton_array

array([['lh_superiorfrontal_thickness._.5',
        'lh_superiorfrontal_volume._.7'],
       ['rh_superiorparietal_thickness._.6',
        'rh_superiorparietal_volume._.8'],
       [nan, 'Right.Caudate._.9'],
       ['lh_inferiortemporal_thickness._.5',
        'lh_inferiortemporal_volume._.7'],
       ['rh_insula_thickness._.6', 'rh_insula_volume._.8'],
       ['rh_rostralanteriorcingulate_thickness._.6',
        'rh_rostralanteriorcingulate_volume._.8'],
       ['rh_supramarginal_thickness._.6', 'rh_supramarginal_volume._.8'],
       ['rh_entorhinal_thickness._.6', 'rh_entorhinal_volume._.8'],
       ['lh_fusiform_thickness._.5', 'lh_fusiform_volume._.7'],
       ['rh_superiortemporal_thickness._.6',
        'rh_superiortemporal_volume._.8'],
       ['lh_middletemporal_thickness._.5',
        'lh_middletemporal_volume._.7'],
       ['rh_bankssts_thickness._.6', 'rh_bankssts_volume._.8'],
       ['rh_inferiorparietal_thickness._.6',
        'rh_inferiorparietal_volume._.8'],
       ['

In [36]:
node_skeleton_array.shape

(84, 2)

In [67]:
def node_feat_func(subject_id):
    node_feat = list()
    data = mor[mor['RID'] == subject_id]
    for i in range (84):
        for j in range(2):
            if pd.isna(node_skeleton_array[i][j]) == False:
                col_name = node_skeleton_array[i][j][:-4]
            else:
                col_name = node_skeleton_array[i][j]
            
            # you'll get column name
                
            if pd.isna(col_name) == True:
                node_feat.append('nan')
            else:
                node_feat.append(data[col_name].values[0])
                
    return (np.array(node_feat)).reshape(84,2)

In [68]:
node_feat_func('4081')

array([['2.548', '16362'],
       ['2.154', '11230'],
       ['nan', '3251.7'],
       ['2.864', '9626'],
       ['2.928', '5414'],
       ['2.935', '1734'],
       ['2.488', '7781'],
       ['3.98', '1883'],
       ['2.727', '7434'],
       ['2.477', '8118'],
       ['2.644', '9488'],
       ['2.58', '1774'],
       ['2.433', '11992'],
       ['2.916', '9079'],
       ['2.463', '1623'],
       ['2.909', '9729'],
       ['nan', '39863.2'],
       ['2.412', '4451'],
       ['2.246', '1605'],
       ['nan', '503.0'],
       ['nan', '1363.2'],
       ['nan', '3112.7'],
       ['2.719', '9161'],
       ['3.093', '2094'],
       ['2.772', '9998'],
       ['nan', '1285.6'],
       ['2.493', '4240'],
       ['1.727', '1913'],
       ['2.762', '1782'],
       ['nan', '2902.9'],
       ['1.9', '6300'],
       ['2.471', '3270'],
       ['2.023', '5537'],
       ['2.291', '10373'],
       ['2.326', '2139'],
       ['2.239', '2511'],
       ['2.22', '11705'],
       ['1.968', '6532'],
       ['2.1

# 6 edge_feat & num_edges

In [70]:
def edge_list_func(adjacency):
    edge_list=[]
    for i in range (84):
        for j in range (84):
            edge_list.append((i, j, adjacency[i][j]))
                
    edge_new_list=[]
    for i in range (len(edge_list)):
        if edge_list[i][2] >= 20:
            edge_new_list.append(edge_list[i])
            
    return edge_new_list

In [80]:
edge_list_func(con[0])

[(2, 0, 1486),
 (2, 1, 821),
 (3, 0, 30),
 (4, 3, 32),
 (5, 0, 49),
 (5, 2, 386),
 (5, 3, 924),
 (5, 4, 1439),
 (6, 0, 11102),
 (6, 2, 3814),
 (6, 3, 1105),
 (6, 4, 78),
 (6, 5, 10084),
 (7, 0, 1022),
 (7, 1, 28),
 (7, 2, 2623),
 (7, 3, 640),
 (7, 4, 1365),
 (7, 5, 12803),
 (7, 6, 20770),
 (8, 0, 582),
 (8, 1, 3872),
 (8, 2, 120),
 (8, 3, 5335),
 (8, 4, 502),
 (8, 5, 1328),
 (8, 6, 2742),
 (8, 7, 1529),
 (9, 0, 277),
 (9, 2, 101),
 (9, 3, 7730),
 (9, 4, 254),
 (9, 5, 23058),
 (9, 6, 30977),
 (9, 7, 14978),
 (9, 8, 1266),
 (10, 0, 50),
 (10, 1, 306),
 (10, 2, 102),
 (10, 3, 27),
 (10, 4, 31),
 (10, 5, 205),
 (10, 6, 102),
 (10, 7, 1659),
 (10, 8, 80),
 (10, 9, 418),
 (11, 0, 44),
 (11, 1, 21),
 (11, 3, 10627),
 (11, 4, 1034),
 (11, 5, 9977),
 (11, 6, 1516),
 (11, 7, 5038),
 (11, 8, 2557),
 (11, 9, 58385),
 (11, 10, 545),
 (12, 0, 36),
 (12, 1, 2900),
 (12, 2, 33),
 (12, 3, 88),
 (12, 4, 157),
 (12, 5, 498),
 (12, 6, 235),
 (12, 7, 2436),
 (12, 8, 690),
 (12, 9, 967),
 (12, 10, 15070),
 

In [71]:
def num_edges_func(edge_list):
    num_edges=len(edge_list)
    return num_edges

In [72]:
def edge_feat_func(num_edges, edge_list):
    # initialization
    edgefeat_dim = 1
    edge_feat = np.zeros((int(num_edges), edgefeat_dim))
    edge_index = np.zeros((2, int(num_edges)))
    # calculation
    for i in range (num_edges):
        source = edge_list[i][0]
        target = edge_list[i][1]
        edge = edge_list[i][2]
        edge_index[0][i] = source
        edge_index[1][i] = target
        edge_feat[i] = edge
    return edge_feat, edge_index

#  generate brain graph

In [82]:
#!rm -rf intelligence_structural_graph_fa
!mkdir ADNI_structural_graph_count_threshold_20
for i in range(len(sub_179)):
    # subject ID
    subject_id = sub_179.values[i][0]
    
    # load adjacency matrix
    adjacency = con[i]

    # label 
    diagnosis = label.values[i][0]
    
    # edge_index, edge_feat (raw)
    raw_edge_list = edge_list_func(adjacency)
    raw_num_edges = num_edges_func(raw_edge_list)
    raw_edge_feat, raw_edge_index = edge_feat_func(raw_num_edges, raw_edge_list)
    
    # edge_index, edge_feat (processed)
    zero_edge_index=[]
    for i, k in enumerate(raw_edge_feat):
        if k[0] == 0:
            zero_edge_index.append(i)
            
    edge_feat = np.delete(raw_edge_feat, zero_edge_index)
    node_from = np.delete(raw_edge_index[0], zero_edge_index)
    node_to = np.delete(raw_edge_index[1], zero_edge_index)
    edge_index = np.concatenate((node_from, node_to)).reshape(2, -1)
    num_edges = len(node_from)    
            
    # node_feat
    node_feat = node_feat_func(subject_id)
    
    # draw graph
    graph = dict({'labels': diagnosis, 'num_nodes': num_nodes, 'num_edges': num_edges,
                                  'edge_index': edge_index, 'edge_feat': edge_feat,
                                  'node_feat': node_feat})
    
    #print(graph)
    
    with open('./ADNI_structural_graph_count_threshold_20/ADNI_structural_graph_count_threshold_20_%s.pickle'%subject_id, 'wb') as file:
        pickle.dump(graph, file)