Compute mean connectivity from Maastricht DWI dataset

In [1]:
import os

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.colors as colors

from glob import glob

In [72]:
# first, count streamlines per subject/IC
data_dir = os.path.join('/Users/KRS228/data',
                        'maastricht_dwi',
                        'preprocessed/dsi_studio',
                        'filtered_streamlines')

ic_slcounts = np.zeros((10,2))

for icx, ic in enumerate(['L_IC', 'R_IC']):
    
    for sx, subfile in enumerate(sorted(glob(data_dir+'/*%s*.txt'%ic))):
        num_lines = sum(1 for line in open(subfile))
        ic_slcounts[sx, icx] = num_lines
        
print(ic_slcounts)

[[108.  85.]
 [ 82.  97.]
 [ 51.  76.]
 [142.  81.]
 [ 42.  90.]
 [ 79.  99.]
 [111.  98.]
 [162. 159.]
 [ 70.  94.]
 [107. 105.]]


In [75]:
# check the mean streamline counts per IC
np.mean(ic_slcounts, axis=0)

array([95.4, 98.4])

In [2]:
# define the connectivity matrix filepath
data_dir = os.path.join('/Users/KRS228/data',
                        'maastricht_dwi',
                        'preprocessed/dsi_studio',
                        'connectivity')

conn_files = sorted(glob(data_dir+'/*cg.txt'))

In [29]:
# loop through each subject's connectivity matrix and clean up the formatting
connmats = np.zeros((10,9,9))
slcounts = np.zeros((9,10))
for sx, matrix_fpath in enumerate(conn_files):

    orig_df = pd.read_csv(matrix_fpath, header=1, delimiter='\t')
    df = orig_df.drop(columns=orig_df.columns[-1])
    df = df.iloc[:,2:]

    df.columns = df.columns.str[4:]

    connmats[sx,:,:] = df.to_numpy()
    
    slcounts[:,sx] = orig_df.iloc[:,0]

In [30]:
orig_df

Unnamed: 0,data,data.1,S11_LL_IC,S11_brachium_IC,S11_commissure_IC,S11_L_ICc,S11_L_ICd,S11_L_ICx,S11_R_ICc,S11_R_ICd,S11_R_ICx,Unnamed: 11
0,788,S11_LL_IC,0,120,12,20,38,72,32,16,84,
1,584,S11_brachium_IC,120,0,22,10,32,24,12,20,52,
2,136,S11_commissure_IC,12,22,0,10,18,6,0,0,0,
3,160,S11_L_ICc,20,10,10,0,20,20,0,0,0,
4,220,S11_L_ICd,38,32,18,20,0,2,0,0,0,
5,248,S11_L_ICx,72,24,6,20,2,0,0,0,0,
6,160,S11_R_ICc,32,12,0,0,0,0,0,12,24,
7,96,S11_R_ICd,16,20,0,0,0,0,12,0,0,
8,320,S11_R_ICx,84,52,0,0,0,0,24,0,0,


In [34]:
# check the streamline count matrix (subdivision x subject)
slcounts

array([[580., 324., 340., 732., 328., 640., 636., 752., 348., 788.],
       [448.,  80., 304., 788., 436., 532., 180., 752., 204., 584.],
       [ 20.,  68.,  76.,  16.,  32.,  52.,  40.,  60.,  84., 136.],
       [ 64.,  44.,  52., 152.,  48.,  36., 176., 108., 100., 160.],
       [  1.,  56.,  52.,  20.,  48.,  48.,  20.,  72.,  56., 220.],
       [204., 116.,  60., 492., 104., 268., 368., 292., 240., 248.],
       [128., 112.,  44., 172.,  20., 144., 152., 328., 120., 160.],
       [ 28.,  56.,  24.,  32.,  44.,  48.,  24.,  52.,  72.,  96.],
       [328., 232., 208., 324., 284., 240., 268., 504., 200., 320.]])

In [33]:
# compute the mean streamline count per IC subdivision
mean_slcount = slcounts.mean(axis=1)
print(mean_slcount)
print(mean_slcount.shape)

[546.8 430.8  58.4  94.   59.3 239.2 138.   47.6 290.8]
(9,)


In [4]:
mean_connmat = connmats.mean(axis=0)

mean_connmat

In [6]:
df.columns

Index(['LL_IC', 'brachium_IC', 'commissure_IC', 'L_ICc', 'L_ICd', 'L_ICx',
       'R_ICc', 'R_ICd', 'R_ICx'],
      dtype='object')

In [61]:
# clean up the group-average dataframe to look like the individual subjects'
mean_df = pd.DataFrame(mean_connmat, columns=df.columns)

mean_df.rename(columns={'L_ICc':'L_IC_central', 'L_ICd':'L_IC_dorsal', 'L_ICx':'L_IC_external',
                        'R_ICc':'R_IC_central', 'R_ICd':'R_IC_dorsal', 'R_ICx':'R_IC_external'},
               inplace=True)

mean_df.insert(0, orig_df.columns[1], orig_df.iloc[:,1].str[4:])
mean_df.insert(0, 'data', mean_slcount)

In [62]:
mean_df

Unnamed: 0,data,data.1,LL_IC,brachium_IC,commissure_IC,L_IC_central,L_IC_dorsal,L_IC_external,R_IC_central,R_IC_dorsal,R_IC_external
0,546.8,LL_IC,0.0,94.6,2.4,15.8,4.6,60.4,23.0,2.2,70.4
1,430.8,brachium_IC,94.6,0.0,4.8,6.8,5.0,41.4,12.2,3.6,47.0
2,58.4,commissure_IC,2.4,4.8,0.0,2.2,8.6,1.4,1.2,7.6,1.0
3,94.0,L_ICc,15.8,6.8,2.2,0.0,7.2,14.8,0.0,0.0,0.2
4,59.3,L_ICd,4.6,5.0,8.6,7.2,0.0,1.6,0.0,2.4,0.2
5,239.2,L_ICx,60.4,41.4,1.4,14.8,1.6,0.0,0.0,0.0,0.0
6,138.0,R_ICc,23.0,12.2,1.2,0.0,0.0,0.0,0.0,7.0,25.6
7,47.6,R_ICd,2.2,3.6,7.6,0.0,2.4,0.0,7.0,0.0,1.0
8,290.8,R_ICx,70.4,47.0,1.0,0.2,0.2,0.0,25.6,1.0,0.0


In [64]:
# save the mean data
out_fpath = os.path.join(data_dir, 'mean_connectivity.txt')
mean_df.to_csv(out_fpath, sep='\t', index=False)