In [None]:
#!/bin/bash

cd /well/win/users/hsv459/agemapper

module purge
module load Python/3.7.4-GCCcore-8.3.0
# module load CUDA/10.1.243-GCC-8.3.0
# module load cudnn/7.6.5.32-10.1
# module add fsl/5.0.11

# VISIBLE_CUDA_DEVICES=0,1

# determine Ivybridge or Skylake compatibility on this node
CPU_ARCHITECTURE=$(/apps/misc/utils/bin/get-cpu-software-architecture.py)
# CPU_ARCHITECTURE=general

# Error handling
if [[ ! $? == 0 ]]; then
  echo "Fatal error: Please send the following information to the BMRC team: Could not determine CPU software architecture on $(hostname)"
  exit 1
fi

# Activate the ivybridge or skylake version of your python virtual environment
source /well/win/users/hsv459/python/functionmapper-${CPU_ARCHITECTURE}/bin/activate

# continue to use your python venv as normal

ipython


In [None]:
import os
import pickle
import numpy as np
import configparser
import pandas as pd
from fsl.data.image import Image
from fsl.utils.image.resample import resampleToPixdims
from fsl.utils.image.roi import roi
from sklearn.model_selection import train_test_split
import fsl.wrappers.wrapperutils as wutils

In [None]:
data_directory = "/well/win-biobank/projects/imaging/data/data3/subjectsAll/"
problematic_data_file = "datasets/problematic_data.txt"

interesting_data_paths = [
    "T1/T1_brain_to_MNI.nii.gz",
    "fMRI/rfMRI_25.dr/dr_stage2.nii.gz",
    "fMRI/tfMRI.feat/reg_standard/stats/zstat1.nii.gz",
    # "fMRI/tfMRI.feat/reg_standard/stats/zstat2.nii.gz",
    # "fMRI/tfMRI.feat/reg_standard/stats/zstat5.nii.gz",
    # "fMRI/tfMRI.feat/reg_standard/stats/cope1.nii.gz",
    # "fMRI/tfMRI.feat/reg_standard/stats/cope2.nii.gz",
    # "fMRI/tfMRI.feat/reg_standard/stats/cope5.nii.gz",
    "dMRI/TBSS/stats/all_FA_skeletonised.nii.gz",
    # "dMRI/TBSS/stats/all_ICVF_skeletonised.nii.gz",
    # "dMRI/TBSS/stats/all_ISOVF_skeletonised.nii.gz",
    # "dMRI/TBSS/stats/all_L1_skeletonised.nii.gz",
    # "dMRI/TBSS/stats/all_L2_skeletonised.nii.gz",
    # "dMRI/TBSS/stats/all_L3_skeletonised.nii.gz",
    # "dMRI/TBSS/stats/all_MD_skeletonised.nii.gz",
    # "dMRI/TBSS/stats/all_MO_skeletonised.nii.gz",
    # "dMRI/TBSS/stats/all_OD_skeletonised.nii.gz",
    "dMRI/autoptx_preproc/tractsNormSummed.nii.gz",
    "T1/T1_vbm/T1_GM_to_template_GM_mod.nii.gz",
    "T1/transforms/T1_to_MNI_warp_jac.nii.gz",
    "SWI/T2star_to_MNI.nii.gz",
    "T2_FLAIR/lesions/final_mask_to_MNI.nii.gz",
]

In [None]:
subDirectoryList = []
number_of_subjects = 0
subject_number = len(os.listdir(os.path.join(os.path.expanduser("~"), data_directory)))
problematic_data = open(problematic_data_file,'r').read().split('\n')

index = 0
directory_list = os.listdir(data_directory)
for directory in directory_list:
    if directory in problematic_data:
        pass
    else:
        if os.path.isdir(os.path.join(data_directory, directory)):
            flag = True
            for modality_path in interesting_data_paths:
                if os.path.exists(os.path.join(data_directory, directory, modality_path)) == False:
                    flag = False
            if flag == True:
                filename = data_directory+directory
                if os.access(filename, os.R_OK):
                    subDirectoryList.append(directory)
                    number_of_subjects += 1
    index +=1
    print("\r Processed {:.3f}%: {}/{} directories".format(index/len(directory_list) * 100.0, index+1, len(directory_list)), end='')
    

print("Number of usable subjects: {}/{}".format(number_of_subjects, len(directory_list)))
print("Length of subDirectoryList: ", len(subDirectoryList))

# with open('datasets/subDirectoryList.pkl', 'wb') as dump_file:
#     pickle.dump(subDirectoryList, dump_file)
    
with open('datasets/subDirectoryList_update2023.pkl', 'wb') as dump_file:
    pickle.dump(subDirectoryList, dump_file)

In [None]:
print("Number of usable subjects: {}/{}".format(number_of_subjects, len(directory_list)))
print("Length of subDirectoryList: ", len(subDirectoryList))

# with open('datasets/subDirectoryList.pkl', 'wb') as dump_file:
#     pickle.dump(subDirectoryList, dump_file)
    
with open('datasets/subDirectoryList_update2022.pkl', 'wb') as dump_file:
    pickle.dump(subDirectoryList, dump_file)