In [25]:
## using traditional network analysis as a comparision for our method.

import os
import glob
import pandas as pd
import numpy as np
import re
import networkx as nx
import networkx.algorithms.community as nx_comm
import matplotlib.pyplot as plt

# input_dir  = 'hcp_out02_corr_matrix_scan2_0back'
# output_dir = 'hcp_out03_network_analysis_0back'

input_dir  = 'hcp_out02_corr_matrix_scan2_2back'
output_dir = 'hcp_out03_network_analysis_2back'

input_files = glob.glob(input_dir + '/*.csv')
input_files.sort()

# input_files

In [26]:
input_files

['hcp_out02_corr_matrix_scan2_2back/corr_matrix_1000.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1001.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1002.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1003.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1004.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1005.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1006.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1007.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1008.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1009.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_101.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1010.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1011.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1012.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1013.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1014.csv',
 'hcp_out02_corr_matrix_scan2_2back/corr_matrix_1015.csv'

In [27]:
# input_files[:10]
subject_id = []

for f in input_files:
    
    subjid = re.search('(.*)corr_matrix_(.*).csv', f).group(2)
    subject_id.append(subjid)

subject_id

['1000',
 '1001',
 '1002',
 '1003',
 '1004',
 '1005',
 '1006',
 '1007',
 '1008',
 '1009',
 '101',
 '1010',
 '1011',
 '1012',
 '1013',
 '1014',
 '1015',
 '1016',
 '1018',
 '1019',
 '102',
 '1020',
 '1021',
 '1022',
 '1023',
 '1024',
 '1025',
 '1026',
 '1027',
 '1028',
 '1029',
 '103',
 '1030',
 '1031',
 '1032',
 '1033',
 '1034',
 '1035',
 '1036',
 '1037',
 '1038',
 '1039',
 '104',
 '1040',
 '1041',
 '1042',
 '1043',
 '1044',
 '1045',
 '1046',
 '1047',
 '1048',
 '1049',
 '105',
 '1050',
 '1051',
 '1052',
 '1053',
 '1054',
 '1055',
 '1056',
 '1057',
 '1058',
 '1059',
 '106',
 '1060',
 '1061',
 '1062',
 '1063',
 '1064',
 '1065',
 '1066',
 '1067',
 '1068',
 '1069',
 '107',
 '1070',
 '1071',
 '1072',
 '1073',
 '108',
 '109',
 '110',
 '111',
 '112',
 '113',
 '114',
 '115',
 '116',
 '117',
 '118',
 '119',
 '120',
 '121',
 '122',
 '123',
 '124',
 '125',
 '126',
 '127',
 '128',
 '129',
 '130',
 '131',
 '132',
 '133',
 '134',
 '135',
 '136',
 '137',
 '138',
 '139',
 '140',
 '141',
 '142',
 '143',

In [28]:

output_file = pd.DataFrame({'subject_id': subject_id})

all_clustering  = []
all_betweenness = []
all_modularity  = []
all_degree      = []
all_eigenvector = []
all_closeness   = []

for f in input_files:
    print(f)
    df_corr = pd.read_csv(f)
    
    # use int index and columns name so that the order is correctly sorted.
    df_corr.index = [i for i in range(1, len(df_corr) + 1)]
    df_corr.columns = [i for i in range(1, len(df_corr) + 1)]
    
    links = df_corr.stack().reset_index()
    links.columns = ['roi1', 'roi2', 'corr']
    
    # use same threshold in the harmonic analysis:
    links_filtered=links.loc[(links['corr'] > 0.07) & (links['roi1'] != links['roi2'])]
 
    # Build your graph
    G=nx.from_pandas_edgelist(links_filtered, 'roi1', 'roi2')

    ## clustering coefficient:
    clustering = nx.clustering(G)
    clustering_list = [clustering[key] for key in sorted(clustering.keys())]
    all_clustering.append(clustering_list)
    
    ## betweenness:
    betweeness = nx.betweenness_centrality(G)
    betweeness_list = [betweeness[key] for key in sorted(betweeness.keys())]
    all_betweenness.append(betweeness_list)
    
    ## degree centrality:
    degree = nx.degree_centrality(G)
    degree_list = [degree[key] for key in sorted(degree.keys())]
    all_degree.append(degree_list)

    ## modularity:
    modularity = nx_comm.modularity(G, nx_comm.label_propagation_communities(G))
    all_modularity.append(modularity)
    
    ## eigenvector:
    eigenvector = nx.eigenvector_centrality(G)
    eigenvector_list = [eigenvector[key] for key in sorted(eigenvector.keys())]
    all_eigenvector.append(eigenvector_list)
    
    ## closeness:
    closeness = nx.closeness_centrality(G)
    closeness_list = [closeness[key] for key in sorted(closeness.keys())]
    all_closeness.append(closeness_list)
    
    # break
    


hcp_out02_corr_matrix_scan2_2back/corr_matrix_1000.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1001.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1002.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1003.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1004.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1005.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1006.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1007.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1008.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1009.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_101.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1010.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1011.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1012.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1013.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1014.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1015.csv
hcp_out02_corr_matrix_scan2_2back/corr_matrix_1016.csv
hcp_out02_c

In [29]:
df_corr

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,259,260,261,262,263,264,265,266,267,268
1,1.000000,0.477384,0.205398,0.320890,0.412636,0.290823,0.268464,0.322462,-0.061475,0.170081,...,-0.090114,-0.264086,-0.152163,-0.261937,-0.109283,-0.134917,0.125159,0.038340,0.062454,0.072116
2,0.477384,1.000000,0.558706,0.260939,0.160566,0.091706,0.539910,0.581236,-0.055811,-0.054774,...,-0.173109,-0.069944,-0.205468,-0.009982,0.045039,0.052858,-0.140754,-0.009793,-0.152716,-0.128588
3,0.205398,0.558706,1.000000,0.320476,0.173666,0.114267,0.351988,0.521612,0.076709,0.246555,...,-0.120335,0.115207,-0.223354,0.206983,-0.077617,0.140852,0.070657,-0.067380,-0.096274,-0.204107
4,0.320890,0.260939,0.320476,1.000000,0.558831,0.339055,0.001639,0.486699,-0.184121,0.385441,...,0.168258,-0.146396,0.070864,-0.189435,-0.063831,-0.320618,0.191931,-0.185842,-0.063809,0.170547
5,0.412636,0.160566,0.173666,0.558831,1.000000,0.756472,-0.325727,0.224979,0.048278,0.784208,...,0.259180,-0.244141,0.187515,-0.305785,-0.032187,-0.388841,0.266461,-0.290749,-0.081886,0.167241
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264,-0.134917,0.052858,0.140852,-0.320618,-0.388841,-0.301574,0.198321,-0.078106,0.177975,-0.258544,...,0.469023,0.787629,0.357528,0.850380,0.738721,1.000000,0.504391,0.790859,0.689722,0.343654
265,0.125159,-0.140754,0.070657,0.191931,0.266461,0.247872,-0.169012,0.102510,0.233788,0.303692,...,0.841830,0.598785,0.631854,0.521093,0.611326,0.504391,1.000000,0.518992,0.640047,0.721250
266,0.038340,-0.009793,-0.067380,-0.185842,-0.290749,-0.241521,0.076525,-0.209489,-0.196860,-0.257990,...,0.455566,0.612200,0.527596,0.744257,0.702459,0.790859,0.518992,1.000000,0.640945,0.464664
267,0.062454,-0.152716,-0.096274,-0.063809,-0.081886,-0.147834,-0.159995,-0.216786,0.353566,0.028377,...,0.524542,0.634213,0.529250,0.689533,0.689320,0.689722,0.640047,0.640945,1.000000,0.389509


In [30]:

if not os.path.exists(output_dir):
    os.mkdir(output_dir)
    
for res, fname in [[all_clustering, 'clustering'], [all_modularity, 'modularity'], 
                   [all_betweenness, 'betweenness'], [all_eigenvector, 'eigenvector'],
                   [all_degree, 'degree'], [all_closeness, 'closeness']]:
    
    df = pd.DataFrame(res)
    output_clustering = pd.concat([output_file, df], axis = 1)
    output_clustering.to_csv(output_dir + '/' + fname + '.csv')
    