In [None]:
import os
import csv
import datetime
import pandas as pd
import numpy as np
import zipfile
from pathlib import Path
import glob
import gc

MISMATCH BETWEEN FOLDER NAME IN UKBB SHOWCASE AND FILES IN STRUCTURAL DTI MATRICES:

31024_Schaefer7n1000p_Tian_S4
- expected: 1000 + 54 = 1054 structures
- in fact: 216 structures = 200 + 16 (T1)

31025_Schaefer7n200p_Tian_S1:
- expected: 200 + 16 = 216 structures
- in fact: 554 structures = 500 + 54 (T4)

31026_Schaefer7n500p_Tian_S4
- expected: 500 + 54 = 516 structures
- in fact: 1054 structures = 1000 + 54 (T4)

#####################################################

Check how many columns are in the file

In [None]:
with open("/brain/dti/dti_struct/31025_connectome_streamline_count_10M.csv", "r") as f:
  reader = csv.reader(f, delimiter=",")
  first_row = next(reader)
  num_columns = len(first_row)
  print(num_columns)

# 31020_aparc_a2009s_Tian_S1

### Connectome_mean_FA_10M

In [None]:
folder_paths = ['/Melbourne_BULK/DTI_connectome/31020_aparc_a2009s_Tian_S1/10000/unzipped/',
'/Melbourne_BULK/DTI_connectome/31020_aparc_a2009s_Tian_S1/20000/unzipped/',
'/Melbourne_BULK/DTI_connectome/31020_aparc_a2009s_Tian_S1/30000/unzipped/',
'media/hcs-sci-psy-narun/Melbourne_BULK/DTI_connectome/31020_aparc_a2009s_Tian_S1/40000/unzipped/',
'/Melbourne_BULK/DTI_connectome/31020_aparc_a2009s_Tian_S1/42840/unzipped/']

In [None]:
missing_file_count = 0

fa_connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_FA_10M.csv"

        fa_file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(fa_file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            fa_connectome = pd.read_csv(fa_file, header=None)
            fa_connectome_triu_indices = np.triu_indices(len(np.array(fa_connectome)), k=0)
            fa_connectome_triu_v = np.array(fa_connectome)[fa_connectome_triu_indices]

            fa_connectome_mean_list.append(fa_connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {fa_file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving fa_connectome_mean_full_df and indices')

fa_connectome_mean_full_df = pd.DataFrame(fa_connectome_mean_list, index = index_list)
fa_connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_mean_FA_10M/31020_aparc_a2009s_Tian_S1_fa_connectome_mean_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_mean_FA_10M/31020_aparc_a2009s_Tian_S1_full_id_full.csv', index=False)  

print('Getting IDs and matrices for instance 2')

fa_connectome_instance_2 = []
index_instance_2 = []

for folder_name, fa in zip(index_list, fa_connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        fa_connectome_instance_2.append(fa)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31020_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_mean_FA_10M/31020_aparc_a2009s_Tian_S1_full_id_instance_2.csv', index=False)

del index_list
del fa_connectome_mean_list

print('Getting connectome data frame for instance 2')
fa_connectome_instance_2_df = pd.DataFrame(fa_connectome_instance_2, index = index_instance_2_df['ID'].to_list())
del index_instance_2

print('Saving connectome data frame instance 2')
fa_connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_mean_FA_10M/31020_aparc_a2009s_Tian_S1_fa_connectome_instance_2_df.csv', index_label='ID')

del fa_connectome_instance_2_df
del index_instance_2_df

gc.collect()


**When upload file specify which column of the CSV file should be used as the index of the dataframe**

In [None]:
# When upload file specify which column of the CSV file should be used as the index of the dataframe
df = pd.read_csv('my_data.csv', index_col=0)
df = pd.read_csv('my_data.csv', index_col='ID')

### Connectome_mean_length_10M

In [None]:
# Connectome_mean_length_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_length_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_mean_length_10M/31020_aparc_a2009s_Tian_S1_connectome_mean_length_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_mean_length_10M/31020_aparc_a2009s_Tian_S1_connectome_mean_length_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31020_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_mean_length_10M/31020_aparc_a2009s_Tian_S1_connectome_mean_length_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('IBu/UK_BB/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_mean_length_10M/31020_aparc_a2009s_Tian_S1_connectome_mean_length_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_sift2_fbc_10M

In [None]:
# Connectome_sift2_fbc_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_sift2_fbc_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_sift2_fbc_10M/31020_aparc_a2009s_Tian_S1_connectome_sift2_fbc_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_sift2_fbc_10M/31020_aparc_a2009s_Tian_S1_connectome_sift2_fbc_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31020_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_sift2_fbc_10M/31020_aparc_a2009s_Tian_S1_connectome_sift2_fbc_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list


print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2


print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_sift2_fbc_10M/31020_aparc_a2009s_Tian_S1_connectome_sift2_fbc_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_streamline_count_10M

In [None]:
# Connectome_streamline_count_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_streamline_count_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_streamline_count_10M/31020_aparc_a2009s_Tian_S1_connectome_streamline_count_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_streamline_count_10M/31020_aparc_a2009s_Tian_S1_connectome_streamline_count_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31020_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_streamline_count_10M/31020_aparc_a2009s_Tian_S1_connectome_streamline_count_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31020_aparc_a2009s_Tian_S1/Connectome_streamline_count_10M/31020_aparc_a2009s_Tian_S1_connectome_streamline_count_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

In [None]:
# Extract names of the folders
folder_names = []

for path in folder_paths:
    folders = sorted(glob.glob(path + "*_31020_*"))

    for folder in folders:
        folder_name = os.path.basename(folder)
        folder_names.append(folder_name)

print(len(folder_names))

index_instance_2 = []
for folder_name in folder_names:
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        index_instance_2.append(folder_name)

print(len(index_instance_2))

# 31027_Tractography_endpoints_coordinates - skip

These are real coordinates for streamlines

In [None]:
folder_paths = ["/Melbourne_BULK/DTI_connectome/31027_Tractography_endpoints_coordinates/10000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31027_Tractography_endpoints_coordinates/20000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31027_Tractography_endpoints_coordinates/30000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31027_Tractography_endpoints_coordinates/40000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31027_Tractography_endpoints_coordinates/42959/unzipped/"]

missing_file_count = 0

endpoints_coordinates_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_FA_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            endpoints_coordinates = pd.read_csv(file, header=None)
            endpoints_coordinates_triu_indices = np.triu_indices(len(np.array(endpoints_coordinates)), k=0)
            endpoints_coordinates_triu_v = np.array(endpoints_coordinates)[endpoints_coordinates_triu_indices]

            endpoints_coordinates_list.append(endpoints_coordinates_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving fa_connectome_mean_full_df and indices')

endpoints_coordinates_full_df = pd.DataFrame(endpoints_coordinates_list, index = index_list)
endpoints_coordinates_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31027_Tractography_endpoints_coordinates/endpoints_coordinates_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31027_Tractography_endpoints_coordinates/endpoints_coordinates_id_full.csv', index=False)  

print('Getting IDs and matrices for instance 2')

endpoints_coordinates_instance_2 = []
index_instance_2 = []

for folder_name, cn in zip(index_list, endpoints_coordinates_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        endpoints_coordinates_instance_2.append(cn)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31027_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31027_Tractography_endpoints_coordinates/endpoints_coordinates_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
endpoints_coordinates_instance_2_df = pd.DataFrame(endpoints_coordinates_instance_2, index = index_instance_2_df['ID'].to_list())

print('Saving connectome data frame instance 2')
endpoints_coordinates_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31027_Tractography_endpoints_coordinates/endpoints_coordinates_instance_2_df.csv', index_label='ID')

# 31028_Tractography_quality_metrics - skip

In [None]:
folder_paths = ["/Melbourne_BULK/DTI_connectome/31028_Tractography_quality_metrics/10000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31028_Tractography_quality_metrics/20000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31028_Tractography_quality_metrics/30000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31028_Tractography_quality_metrics/40000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31028_Tractography_quality_metrics/42959/unzipped/"]

missing_file_count = 0

sift_weights_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        npy_file = "sift_weights.npy"

        file = os.path.join(subfolder_path, npy_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            sift_weights = np.load(file)

            sift_weights_list.append(sift_weights)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full indices')
index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31028_Tractography_quality_metrics/sift_weights_id_full.csv', index=False)  

print('Saving full_df')
sift_weights_list_full_df = pd.DataFrame(sift_weights_list, index = index_list).T
sift_weights_list_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31028_Tractography_quality_metrics/sift_weights_full_df.csv')
del sift_weights_list_full_df
gc.collect()

print('Getting IDs and matrices for instance 2')

sift_weights_instance_2_list = []
index_instance_2 = []

for folder_name, m in zip(index_list, sift_weights_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        sift_weights_instance_2_list.append(m)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31028_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31028_Tractography_quality_metrics/sift_weights_id_instance_2.csv', index=False)

del index_list
del sift_weights_list
gc.collect()

print('Getting connectome data frame for instance 2')
sift_weights_instance_2_df = pd.DataFrame(sift_weights_instance_2_list, index = index_instance_2_df['ID'].to_list()).T

del index_instance_2
del sift_weights_instance_2_list

print('Saving connectome data frame instance 2')
sift_weights_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31028_Tractography_quality_metrics/sift_weights_instance_2_df.csv', index_label='ID')

gc.collect()

# 31021_aparc_Tian_S1

In [None]:
folder_paths = ["/Melbourne_BULK/DTI_connectome/31021_aparc_Tian_S1/10000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31021_aparc_Tian_S1/20000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31021_aparc_Tian_S1/30000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31021_aparc_Tian_S1/40000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31021_aparc_Tian_S1/42892/unzipped/"]

### Connectome_mean_FA_10M

In [None]:
missing_file_count = 0

fa_connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_FA_10M.csv"

        fa_file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(fa_file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            fa_connectome = pd.read_csv(fa_file, header=None)
            fa_connectome_triu_indices = np.triu_indices(len(np.array(fa_connectome)), k=0)
            fa_connectome_triu_v = np.array(fa_connectome)[fa_connectome_triu_indices]

            fa_connectome_mean_list.append(fa_connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {fa_file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving fa_connectome_mean_full_df and indices')

fa_connectome_mean_full_df = pd.DataFrame(fa_connectome_mean_list, index = index_list)
fa_connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_mean_FA_10M/31021_aparc_Tian_S1_fa_connectome_mean_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_mean_FA_10M/31021_aparc_Tian_S1_full_id_full.csv', index=False)  

print('Getting IDs and matrices for instance 2')

fa_connectome_instance_2 = []
index_instance_2 = []

for folder_name, fa in zip(index_list, fa_connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        fa_connectome_instance_2.append(fa)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31021_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_mean_FA_10M/31021_aparc_Tian_S1_full_id_instance_2.csv', index=False)

del index_list
del fa_connectome_mean_list

print('Getting connectome data frame for instance 2')
fa_connectome_instance_2_df = pd.DataFrame(fa_connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving connectome data frame instance 2')
fa_connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_mean_FA_10M/31021_aparc_Tian_S1_fa_connectome_instance_2_df.csv', index_label='ID')

del fa_connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_mean_length_10M

In [None]:
# Connectome_mean_length_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_length_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_mean_length_10M/31021_aparc_Tian_S1_connectome_mean_length_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_mean_length_10M/31021_aparc_Tian_S1_connectome_mean_length_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31021_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_mean_length_10M/31021_aparc_Tian_S1_connectome_mean_length_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_mean_length_10M/31021_aparc_Tian_S1_connectome_mean_length_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()


### Connectome_sift2_fbc_10M

In [None]:
# Connectome_sift2_fbc_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_sift2_fbc_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_sift2_fbc_10M/31021_aparc_Tian_S1_connectome_sift2_fbc_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_sift2_fbc_10M/31021_aparc_Tian_S1_connectome_sift2_fbc_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31021_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_sift2_fbc_10M/31021_aparc_Tian_S1_connectome_sift2_fbc_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_sift2_fbc_10M/31021_aparc_Tian_S1_connectome_sift2_fbc_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_streamline_count_10M

In [None]:
# Connectome_streamline_count_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_streamline_count_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_streamline_count_10M/31021_aparc_Tian_S1_connectome_streamline_count_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_streamline_count_10M/31021_aparc_Tian_S1_connectome_streamline_count_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31021_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_streamline_count_10M/31021_aparc_Tian_S1_connectome_streamline_count_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31021_aparc_Tian_S1/Connectome_streamline_count_10M/31021_aparc_Tian_S1_connectome_streamline_count_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

# 31022_Glasser_Tian_S1

In [None]:
folder_paths = ["/Melbourne_BULK/DTI_connectome/31022_Glasser_Tian_S1/10000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31022_Glasser_Tian_S1/20000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31022_Glasser_Tian_S1/30000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31022_Glasser_Tian_S1/40000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31022_Glasser_Tian_S1/42892/unzipped/"]

### Connectome_mean_FA_10M

In [None]:
missing_file_count = 0

fa_connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_FA_10M.csv"

        fa_file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(fa_file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            fa_connectome = pd.read_csv(fa_file, header=None)
            fa_connectome_triu_indices = np.triu_indices(len(np.array(fa_connectome)), k=0)
            fa_connectome_triu_v = np.array(fa_connectome)[fa_connectome_triu_indices]

            fa_connectome_mean_list.append(fa_connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {fa_file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving fa_connectome_mean_full_df and indices')

fa_connectome_mean_full_df = pd.DataFrame(fa_connectome_mean_list, index = index_list)
fa_connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_mean_FA_10M/31022_Glasser_Tian_S1_fa_connectome_mean_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_mean_FA_10M/31022_Glasser_Tian_S1_full_id_full.csv', index=False)  

print('Getting IDs and matrices for instance 2')

fa_connectome_instance_2 = []
index_instance_2 = []

for folder_name, fa in zip(index_list, fa_connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        fa_connectome_instance_2.append(fa)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31022_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_mean_FA_10M/31022_Glasser_Tian_S1_full_id_instance_2.csv', index=False)

del index_list
del fa_connectome_mean_list

print('Getting connectome data frame for instance 2')
fa_connectome_instance_2_df = pd.DataFrame(fa_connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving connectome data frame instance 2')
fa_connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_mean_FA_10M/31022_Glasser_Tian_S1_fa_connectome_instance_2_df.csv', index_label='ID')

del fa_connectome_instance_2_df
del index_instance_2_df

gc.collect()



### Connectome_mean_length_10M

In [None]:
# Connectome_mean_length_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_length_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_mean_length_10M/31022_Glasser_Tian_S1_connectome_mean_length_10M_full_df.csv')

del connectome_mean_full_df #!!!!!!

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_mean_length_10M/31022_Glasser_Tian_S1_connectome_mean_length_10M_full_id.csv', index=False)  

del index_list_df #!!!!!!

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list): 
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31022_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_mean_length_10M/31022_Glasser_Tian_S1_connectome_mean_length_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list
gc.collect()

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2
gc.collect()

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_mean_length_10M/31022_Glasser_Tian_S1_connectome_mean_length_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_sift2_fbc_10M

In [None]:
# Connectome_sift2_fbc_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_sift2_fbc_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_sift2_fbc_10M/31022_Glasser_Tian_S1_connectome_sift2_fbc_10M_full_df.csv')
del connectome_mean_full_df

print('Saving full indices')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_sift2_fbc_10M/31022_Glasser_Tian_S1_connectome_sift2_fbc_10M_full_id.csv', index=False)  
del index_list_df

gc.collect()

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31022_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_sift2_fbc_10M/31022_Glasser_Tian_S1_connectome_sift2_fbc_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_sift2_fbc_10M/31022_Glasser_Tian_S1_connectome_sift2_fbc_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_streamline_count_10M

In [None]:
# Connectome_streamline_count_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_streamline_count_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print("Length of the full set:", len(connectome_mean_list))

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_streamline_count_10M/31022_Glasser_Tian_S1_connectome_streamline_count_10M_full_df.csv')
del connectome_mean_full_df 

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_streamline_count_10M/31022_Glasser_Tian_S1_connectome_streamline_count_10M_full_id.csv', index=False)  
del index_list_df

gc.collect()

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print("Length of the instance 2 set:", len(connectome_instance_2))

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31022_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_streamline_count_10M/31022_Glasser_Tian_S1_connectome_streamline_count_10M_id_instance_2.csv', index=False)

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

#del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31022_Glasser_Tian_S1/Connectome_streamline_count_10M/31022_Glasser_Tian_S1_connectome_streamline_count_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

# 31023_Glasser_Tian_S4

In [None]:
folder_paths = ["/Melbourne_BULK/DTI_connectome/31023_Glasser_Tian_S4/10000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31023_Glasser_Tian_S4/20000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31023_Glasser_Tian_S4/30000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31023_Glasser_Tian_S4/40000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31023_Glasser_Tian_S4/42887/unzipped/"]

### Connectome_mean_FA_10M

In [None]:
missing_file_count = 0

fa_connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_FA_10M.csv"

        fa_file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(fa_file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            fa_connectome = pd.read_csv(fa_file, header=None)
            fa_connectome_triu_indices = np.triu_indices(len(np.array(fa_connectome)), k=0)
            fa_connectome_triu_v = np.array(fa_connectome)[fa_connectome_triu_indices]

            fa_connectome_mean_list.append(fa_connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {fa_file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print("Length of the full set:", len(fa_connectome_mean_list))

print('Saving full indices')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_mean_FA_10M/31023_Glasser_Tian_S4_full_id_full.csv', index=False)  
del index_list_df

print('Saving fa_connectome_mean_full_df')

fa_connectome_mean_full_df = pd.DataFrame(fa_connectome_mean_list, index = index_list)
fa_connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_mean_FA_10M/31023_Glasser_Tian_S4_fa_connectome_mean_full_df.csv')

del fa_connectome_mean_full_df
gc.collect()

print('Getting IDs and matrices for instance 2')

fa_connectome_instance_2 = []
index_instance_2 = []

for folder_name, fa in zip(index_list, fa_connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        fa_connectome_instance_2.append(fa)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31023_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_mean_FA_10M/31023_Glasser_Tian_S4_full_id_instance_2.csv', index=False)

del index_list

print('Getting connectome data frame for instance 2')
fa_connectome_instance_2_df = pd.DataFrame(fa_connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del fa_connectome_mean_list

print('Saving connectome data frame instance 2')
fa_connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_mean_FA_10M/31023_Glasser_Tian_S4_fa_connectome_instance_2_df.csv', index_label='ID')

del index_instance_2
del fa_connectome_instance_2_df
del index_instance_2_df

gc.collect()


### Connectome_mean_length_10M

In [None]:
# Connectome_mean_length_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_length_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_mean_length_10M/31023_Glasser_Tian_S4_connectome_mean_length_10M_full_id.csv', index=False)  
del index_list_df

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_mean_length_10M/31023_Glasser_Tian_S4_connectome_mean_length_10M_full_df.csv')

del connectome_mean_full_df
gc.collect()

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31023_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_mean_length_10M/31023_Glasser_Tian_S4_connectome_mean_length_10M_id_instance_2.csv', index=False)

del index_list
del index_instance_2

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del connectome_mean_list

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_mean_length_10M/31023_Glasser_Tian_S4_connectome_mean_length_10M_instance_2_df.csv', index_label='ID')

gc.collect()


### Connectome_sift2_fbc_10M

In [None]:
# Connectome_sift2_fbc_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_sift2_fbc_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            codel index_list_df
            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_sift2_fbc_10M/31023_Glasser_Tian_S4_connectome_sift2_fbc_10M_full_id.csv', index=False)  
del index_list_df

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_sift2_fbc_10M/31023_Glasser_Tian_S4_connectome_sift2_fbc_10M_full_df.csv')
del connectome_mean_full_df
gc.collect()

print('Getting IDs for instance 2')
connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31023_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_sift2_fbc_10M/31023_Glasser_Tian_S4_connectome_sift2_fbc_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_sift2_fbc_10M/31023_Glasser_Tian_S4_connectome_sift2_fbc_10M_instance_2_df.csv', index_label='ID')

gc.collect()


### Connectome_streamline_count_10M

In [None]:
# Connectome_streamline_count_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_streamline_count_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full indices')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_streamline_count_10M/31023_Glasser_Tian_S4_connectome_streamline_count_10M_full_id.csv', index=False)  
del index_list_df

print('Saving full df')
connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_streamline_count_10M/31023_Glasser_Tian_S4_connectome_streamline_count_10M_full_df.csv')
del connectome_mean_full_df
gc.collect()

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31023_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_streamline_count_10M/31023_Glasser_Tian_S4_connectome_streamline_count_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31023_Glasser_Tian_S4/Connectome_streamline_count_10M/31023_Glasser_Tian_S4_connectome_streamline_count_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

# 31024_Schaefer7n1000p_Tian_S4

In [None]:
folder_paths = ["/Melbourne_BULK/DTI_connectome/31024_Schaefer7n1000p_Tian_S4/10000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31024_Schaefer7n1000p_Tian_S4/20000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31024_Schaefer7n1000p_Tian_S4/30000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31024_Schaefer7n1000p_Tian_S4/40000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31024_Schaefer7n1000p_Tian_S4/42891/unzipped/"]

### Connectome_mean_FA_10M

In [None]:
missing_file_count = 0

fa_connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_FA_10M.csv"

        fa_file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(fa_file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            fa_connectome = pd.read_csv(fa_file, header=None)
            fa_connectome_triu_indices = np.triu_indices(len(np.array(fa_connectome)), k=0)
            fa_connectome_triu_v = np.array(fa_connectome)[fa_connectome_triu_indices]

            fa_connectome_mean_list.append(fa_connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {fa_file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving fa_connectome_mean_full_df and indices')

fa_connectome_mean_full_df = pd.DataFrame(fa_connectome_mean_list, index = index_list)
fa_connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_mean_FA_10M/31024_Schaefer7n1000p_Tian_S4_fa_connectome_mean_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_mean_FA_10M/31024_Schaefer7n1000p_Tian_S4_full_id_full.csv', index=False)  

print('Getting IDs and matrices for instance 2')

fa_connectome_instance_2 = []
index_instance_2 = []

for folder_name, fa in zip(index_list, fa_connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        fa_connectome_instance_2.append(fa)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31024_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_mean_FA_10M/31024_Schaefer7n1000p_Tian_S4_full_id_instance_2.csv', index=False)

del index_list
del fa_connectome_mean_list


print('Getting connectome data frame for instance 2')
fa_connectome_instance_2_df = pd.DataFrame(fa_connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving connectome data frame instance 2')
fa_connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_mean_FA_10M/31024_Schaefer7n1000p_Tian_S4_fa_connectome_instance_2_df.csv', index_label='ID')

del fa_connectome_instance_2_df
del index_instance_2_df

gc.collect()



### Connectome_mean_length_10M

In [None]:
# Connectome_mean_length_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_length_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_mean_length_10M/31024_Schaefer7n1000p_Tian_S4_connectome_mean_length_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_mean_length_10M/31024_Schaefer7n1000p_Tian_S4_connectome_mean_length_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31024_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_mean_length_10M/31024_Schaefer7n1000p_Tian_S4_connectome_mean_length_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_mean_length_10M/31024_Schaefer7n1000p_Tian_S4_connectome_mean_length_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_sift2_fbc_10M

In [None]:
# Connectome_sift2_fbc_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_sift2_fbc_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_sift2_fbc_10M/31024_Schaefer7n1000p_Tian_S4_connectome_sift2_fbc_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_sift2_fbc_10M/31024_Schaefer7n1000p_Tian_S4_connectome_sift2_fbc_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31024_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_sift2_fbc_10M/31024_Schaefer7n1000p_Tian_S4_connectome_sift2_fbc_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_sift2_fbc_10M/31024_Schaefer7n1000p_Tian_S4_connectome_sift2_fbc_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_streamline_count_10M

In [None]:
# Connectome_streamline_count_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_streamline_count_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_streamline_count_10M/31024_Schaefer7n1000p_Tian_S4_connectome_streamline_count_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_streamline_count_10M/31024_Schaefer7n1000p_Tian_S4_connectome_streamline_count_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31024_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_streamline_count_10M/31024_Schaefer7n1000p_Tian_S4_connectome_streamline_count_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31024_Schaefer7n1000p_Tian_S4/Connectome_streamline_count_10M/31024_Schaefer7n1000p_Tian_S4_connectome_streamline_count_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

# 31025_Schaefer7n200p_Tian_S1

In [None]:
folder_paths = ["/Melbourne_BULK/DTI_connectome/31025_Schaefer7n200p_Tian_S1/10000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31025_Schaefer7n200p_Tian_S1/20000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31025_Schaefer7n200p_Tian_S1/30000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31025_Schaefer7n200p_Tian_S1/40000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31025_Schaefer7n200p_Tian_S1/42886/unzipped/"]

### Connectome_mean_FA_10M

In [None]:
missing_file_count = 0

fa_connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_FA_10M.csv"

        fa_file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(fa_file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            fa_connectome = pd.read_csv(fa_file, header=None)
            fa_connectome_triu_indices = np.triu_indices(len(np.array(fa_connectome)), k=0)
            fa_connectome_triu_v = np.array(fa_connectome)[fa_connectome_triu_indices]

            fa_connectome_mean_list.append(fa_connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {fa_file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full indices')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_mean_FA_10M/31025_Schaefer7n200p_Tian_S1_full_id_full.csv', index=False)  
del index_list_df
gc.collect()

print('Saving fa_connectome_mean_full_df')

fa_connectome_mean_full_df = pd.DataFrame(fa_connectome_mean_list, index = index_list)
fa_connectome_mean_full_dict = fa_connectome_mean_full_df.to_dict (orient='list')
#fa_connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_mean_FA_10M/31025_Schaefer7n200p_Tian_S1_fa_connectome_mean_full_df.csv.gz', compression='gzip')

with open(f'/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_mean_FA_10M/31025_Schaefer7n200p_Tian_S1_fa_connectome_mean_full_df.csv', 'a', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=fa_connectome_mean_full_df.columns)
    if f.tell() == 0:
        writer.writeheader()
    writer.writerow(fa_connectome_mean_full_dict)


del fa_connectome_mean_full_df
gc.collect()

print('Getting IDs and matrices for instance 2')

fa_connectome_instance_2 = []
index_instance_2 = []

for folder_name, fa in zip(index_list, fa_connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        fa_connectome_instance_2.append(fa)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31025_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_mean_FA_10M/31025_Schaefer7n200p_Tian_S1_full_id_instance_2.csv', index=False)

print('Getting connectome data frame for instance 2')
fa_connectome_instance_2_df = pd.DataFrame(fa_connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2
del index_list
del fa_connectome_mean_list

print('Saving connectome data frame instance 2')
fa_connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_mean_FA_10M/31025_Schaefer7n200p_Tian_S1_fa_connectome_instance_2_df.csv.gz', compression='gzip', index_label='ID')

gc.collect()



### Connectome_mean_length_10M

In [None]:
# Connectome_mean_length_10M

missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_length_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving indices')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_mean_length_10M/31025_Schaefer7n200p_Tian_S1_connectome_mean_length_10M_full_id.csv', index=False)  
del index_list_df
gc.collect() 

print('Saving full_df')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_mean_length_10M/31025_Schaefer7n200p_Tian_S1_connectome_mean_length_10M_full_df.csv.gz', compression='gzip')
del connectome_mean_full_df
gc.collect() 


print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31025_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_mean_length_10M/31025_Schaefer7n200p_Tian_S1_connectome_mean_length_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_mean_length_10M/31025_Schaefer7n200p_Tian_S1_connectome_mean_length_10M_instance_2_df.csv.gz', compression='gzip', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_sift2_fbc_10M

In [None]:
# Connectome_sift2_fbc_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_sift2_fbc_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_sift2_fbc_10M/31025_Schaefer7n200p_Tian_S1_connectome_sift2_fbc_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_sift2_fbc_10M/31025_Schaefer7n200p_Tian_S1_connectome_sift2_fbc_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31025_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_sift2_fbc_10M/31025_Schaefer7n200p_Tian_S1_connectome_sift2_fbc_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_sift2_fbc_10M/31025_Schaefer7n200p_Tian_S1_connectome_sift2_fbc_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_streamline_count_10M

In [None]:
# Connectome_streamline_count_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_streamline_count_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_streamline_count_10M/31025_Schaefer7n200p_Tian_S1_connectome_streamline_count_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_streamline_count_10M/31025_Schaefer7n200p_Tian_S1_connectome_streamline_count_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31025_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_streamline_count_10M/31025_Schaefer7n200p_Tian_S1_connectome_streamline_count_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31025_Schaefer7n200p_Tian_S1/Connectome_streamline_count_10M/31025_Schaefer7n200p_Tian_S1_connectome_streamline_count_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()


# 31026_Schaefer7n500p_Tian_S4

In [None]:
folder_paths = ["/Melbourne_BULK/DTI_connectome/31026_Schaefer7n500p_Tian_S4/10000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31026_Schaefer7n500p_Tian_S4/20000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31026_Schaefer7n500p_Tian_S4/30000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31026_Schaefer7n500p_Tian_S4/40000/unzipped/",
"/Melbourne_BULK/DTI_connectome/31026_Schaefer7n500p_Tian_S4/42886/unzipped/"]

### Connectome_mean_FA_10M

In [None]:
missing_file_count = 0

fa_connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_FA_10M.csv"

        fa_file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(fa_file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            fa_connectome = pd.read_csv(fa_file, header=None)
            fa_connectome_triu_indices = np.triu_indices(len(np.array(fa_connectome)), k=0)
            fa_connectome_triu_v = np.array(fa_connectome)[fa_connectome_triu_indices]

            fa_connectome_mean_list.append(fa_connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {fa_file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving fa_connectome_mean_full_df and indices')

fa_connectome_mean_full_df = pd.DataFrame(fa_connectome_mean_list, index = index_list)
fa_connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_mean_FA_10M/31026_Schaefer7n500p_Tian_S4_fa_connectome_mean_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_mean_FA_10M/31026_Schaefer7n500p_Tian_S4_full_id_full.csv', index=False)  

print('Getting IDs and matrices for instance 2')

fa_connectome_instance_2 = []
index_instance_2 = []

for folder_name, fa in zip(index_list, fa_connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        fa_connectome_instance_2.append(fa)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31026_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_mean_FA_10M/31026_Schaefer7n500p_Tian_S4_full_id_instance_2.csv', index=False)

del index_list
del fa_connectome_mean_list

print('Getting connectome data frame for instance 2')
fa_connectome_instance_2_df = pd.DataFrame(fa_connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving connectome data frame instance 2')
fa_connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_mean_FA_10M/31026_Schaefer7n500p_Tian_S4_fa_connectome_instance_2_df.csv', index_label='ID')

del fa_connectome_instance_2_df
del index_instance_2_df

gc.collect()



### Connectome_mean_length_10M

In [None]:
# Connectome_mean_length_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_mean_length_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_mean_length_10M/31026_Schaefer7n500p_Tian_S4_connectome_mean_length_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_mean_length_10M/31026_Schaefer7n500p_Tian_S4_connectome_mean_length_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31026_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_mean_length_10M/31026_Schaefer7n500p_Tian_S4_connectome_mean_length_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_mean_length_10M/31026_Schaefer7n500p_Tian_S4_connectome_mean_length_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_sift2_fbc_10M

In [None]:
# Connectome_sift2_fbc_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_sift2_fbc_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_sift2_fbc_10M/31026_Schaefer7n500p_Tian_S4_connectome_sift2_fbc_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_sift2_fbc_10M/31026_Schaefer7n500p_Tian_S4_connectome_sift2_fbc_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31026_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_sift2_fbc_10M/31026_Schaefer7n500p_Tian_S4_connectome_sift2_fbc_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_sift2_fbc_10M/31026_Schaefer7n500p_Tian_S4_connectome_sift2_fbc_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()

### Connectome_streamline_count_10M

In [None]:
# Connectome_streamline_count_10M
missing_file_count = 0

connectome_mean_list = []
index_list = []
skipped_folders = []

for folder_path in folder_paths:

    print(f'Started {folder_path}')

    for subject_folder in sorted(os.listdir(folder_path)):
        subfolder_path = os.path.join(folder_path, subject_folder)
        subject_folder_name = os.path.basename(subfolder_path)
        
        csv_file = "connectome_streamline_count_10M.csv"

        file = os.path.join(subfolder_path, csv_file)

        if not os.path.exists(file):
            missing_file_count += 1
            skipped_folders.append(subfolder_path)
            continue

        try:
            connectome = pd.read_csv(file, header=None)
            connectome_triu_indices = np.triu_indices(len(np.array(connectome)), k=0)
            connectome_triu_v = np.array(connectome)[connectome_triu_indices]

            connectome_mean_list.append(connectome_triu_v)
            index_list.append(subject_folder_name)
            
        except pd.errors.EmptyDataError:
            print(f"Skipping empty file: {file}")
            continue

print('Number of folders without the file:', missing_file_count)
print('Skipped folders:', *skipped_folders, sep='\n')

print('Saving full_df and indices')

connectome_mean_full_df = pd.DataFrame(connectome_mean_list, index = index_list)
connectome_mean_full_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_streamline_count_10M/31026_Schaefer7n500p_Tian_S4_connectome_streamline_count_10M_full_df.csv')

index_list_df = pd.DataFrame(index_list, columns = ['ID'])
index_list_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_streamline_count_10M/31026_Schaefer7n500p_Tian_S4_connectome_streamline_count_10M_full_id.csv', index=False)  

print('Getting IDs for instance 2')

connectome_instance_2 = []
index_instance_2 = []

for folder_name, c in zip(index_list, connectome_mean_list):
    mid_part = folder_name.split("_")[-2]
    if mid_part == '2':
        connectome_instance_2.append(c)
        index_instance_2.append(folder_name)

print('Converting instance 2 indices to the data frame') 
index_instance_2_df = pd.DataFrame(index_instance_2, columns = ['ID'])
index_instance_2_df['ID'] = index_instance_2_df['ID'].str.replace('_31026_2_0', '')
index_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_streamline_count_10M/31026_Schaefer7n500p_Tian_S4_connectome_streamline_count_10M_id_instance_2.csv', index=False)

del index_list
del connectome_mean_list

print('Getting connectome data frame for instance 2')
connectome_instance_2_df = pd.DataFrame(connectome_instance_2, index = index_instance_2_df['ID'].to_list())

del index_instance_2

print('Saving instance 2')
connectome_instance_2_df.to_csv('/ML_DATASETS/Brain/DTI/Structural_matrices/31026_Schaefer7n500p_Tian_S4/Connectome_streamline_count_10M/31026_Schaefer7n500p_Tian_S4_connectome_streamline_count_10M_instance_2_df.csv', index_label='ID')

del connectome_instance_2_df
del index_instance_2_df

gc.collect()