Library installation

In [21]:
%%bash

sudo apt install curl

In [None]:
!pip install dipy

Library import

In [1]:
import matplotlib.pylab as plt
import os
import numpy as np
from scipy.ndimage import binary_dilation

In [2]:
from dipy.core.gradients import gradient_table
from dipy.data import get_fnames
from dipy.io.gradients import read_bvals_bvecs
from dipy.io.image import load_nifti_data, load_nifti, save_nifti
from dipy.direction import peaks
from dipy.reconst import shm
from dipy.tracking import utils
from dipy.tracking.local_tracking import LocalTracking
from dipy.tracking.stopping_criterion import BinaryStoppingCriterion
from dipy.tracking.streamline import Streamlines

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
from dipy.io.stateful_tractogram import Space, StatefulTractogram
from dipy.io.streamline import save_trk

In [4]:
def take_import_text(number = 10159, short=False):
    # choose lines containing given number of directory
    f = open("./Data/ds000030-1.0.0.sh", "r")
    lines_array = f.readlines()
    lines_import_text = ""
    nii_gz_filenames = []
    for line in lines_array:
        if f"sub-{number}" in line[-60:] and (not short or "dwi" in line): 
            lines_import_text += line
            if "nii.gz" in line[-10:]:
                words = line.split(" ")
                nii_gz_filenames.append(words[-1][:-1])
    return lines_import_text, nii_gz_filenames

In [5]:
def create_bash_script(number = 10159, do_import = True, short=False):
    # removing previous script
    try:
        os.system('rm ./Data/import_process_script.sh')
    except:
        print("No script file to remove!")

    # cut part of import script to download one directory
    bin_bash = f"#!/bin/bash\n"
    import_data, nii_gz_filenames = take_import_text(number, short)
    if not do_import: import_data = ""
    
    # extracting mask file from nii.gz files
    convert_data_fsl = "export FSLOUTPUTTYPE=NIFTI_GZ\nexport PATH=\"/usr/local/fsl/bin$PATH\"\nexport FSLDIR=\"/usr/local/fsl\"\n"
    for filename in nii_gz_filenames:
        convert_data_fsl += f"bet ./{filename} ./{filename[:-7]}mask{filename[-7:]}\n" #f"dir=\"./Data/sub-{number}\"\nnifti_dir=\"processed-data/sub-{number}/\"\nmkdir processed-data\nmkdir $nifti_dir\ndcm2niix -o $nifti_dir $dir"
        # convert_data_fsl += f"fast -n 4 ./{filename}" #-o ./{filename[:-7]}mask{filename[-7:]}\n
    
    # create script and write it to file
    script_text = bin_bash + import_data + convert_data_fsl
    # print(script_text)
    f = open("./Data/import_process_script.sh", "w")
    f.write(script_text)

In [6]:
# test
# print("export FSLOUTPUTTYPE=NIFTI_GZ\nexport PATH=\"/usr/local/fsl/bin$PATH\"\nexport FSLDIR=\"/usr/local/fsl\"")

In [7]:
# test of function creating singlw directory downloading script
# create_bash_script()

In [6]:
def run_script(number = 10159, do_import = True, short=False):
    # create script importing and converting single directory
    create_bash_script(number=number, do_import=do_import, short=short)

    # run script in bash
    os.system('bash ./Data/import_process_script.sh')

In [9]:
# test of running script for downloading files
# and creating brain masks necessary for further analyse
# run_script()

In [16]:
def process_to_streamlines(filename = None, number = 10159, do_draw = False):
    # load data
    if filename == None:
        data, affine, load_img = load_nifti(f"./sub-{number}/dwi/sub-{number}_dwi.nii.gz", return_img=True) #f"./sub-{number}/anat/sub-10159_T1w.nii.gz"
        bvals, bvecs = read_bvals_bvecs(f"./sub-{number}/dwi/sub-{number}_dwi.bval", f"./sub-{number}/dwi/sub-{number}_dwi.bvec")
        gtab = gradient_table(bvals, bvecs)
        labels = load_nifti_data(f"./sub-{number}/dwi/sub-{number}_dwimask.nii.gz")
        # labels = load_nifti_data(f"./sub-{number}/dwi/sub-{number}_dwi_seg.nii.gz")
    else:
        data, affine, load_img = load_nifti(filename, return_img=True) #f"./sub-{number}/anat/sub-10159_T1w.nii.gz"
        bvals, bvecs = read_bvals_bvecs(f"./sub-{number}/dwi/sub-{number}_dwi.bval", f"./sub-{number}/dwi/sub-{number}_dwi.bvec")
        gtab = gradient_table(bvals, bvecs)
        labels = load_nifti_data(filename[:-7] + "mask.nii.gz")
        # labels = load_nifti_data(f"./sub-{number}/dwi/sub-{number}_dwi_seg.nii.gz")

    # choosing part of the brain
    # white_matter = binary_dilation((labels == 1) | (labels == 2)) 
    # it probably doesn't work this way, white_matter mask may look different in this files
    try:
        # print(labels.shape, labels)
        white_matter = binary_dilation((labels == 1) | (labels == 2)) 
        csamodel = shm.CsaOdfModel(gtab, 6)
        # print(white_matter.shape, data.shape)
        csapeaks = peaks.peaks_from_model(model=csamodel,
                                        data=data,
                                        sphere=peaks.default_sphere,
                                        relative_peak_threshold=.8,
                                        min_separation_angle=45)

        affine = np.eye(4)
        seeds = utils.seeds_from_mask(white_matter, affine, density=1)
        stopping_criterion = BinaryStoppingCriterion(white_matter)

        # choosing tracks model
        streamline_generator = LocalTracking(csapeaks, stopping_criterion, seeds,
                                            affine=affine, step_size=0.5) # we can change stopping criterion here
        streamlines = Streamlines(streamline_generator)

        # choosing proper brain slice (verify if it is necessary)
        cc_slice = labels == 2
        cc_streamlines = utils.target(streamlines, affine, cc_slice)
        cc_streamlines = Streamlines(cc_streamlines)

        # save streamlines
        sft = StatefulTractogram(cc_streamlines, load_img, Space.VOX)
        save_trk(sft, "cc_streamlines.trk")

        # create connectivity matrix
        # print(cc_streamlines)
        M, _ = utils.connectivity_matrix(cc_streamlines, affine,
                                            labels.astype(np.uint8),
                                            return_mapping=True,
                                            mapping_as_streamlines=True)
        M[:3, :] = 0
        M[:, :3] = 0

        # plot cennectivity matrix
        if do_draw:
            plt.imshow(np.log1p(M), interpolation='nearest')
            plt.savefig(f"connectivity-{number}.png")
    except:
        print(f"File = {filename}, number = {number}: Error while processing data")
        M = None

    return M   

In [39]:
# test of processing to streamline
# print(process_to_streamlines(number = 10159, do_draw = True))

In [17]:
def write_conectivity_matrix_to_file(M, number = 10159):
    # save connectivity matrix as a .txt file
    # filename consists a directory number
    f = open(f"./cc_matrices/connectivity-matrix-{number}.txt", "w")
    for row in M:
        for cell in row:
            f.write(str(cell) + ",")
        f.write("\n")

In [18]:
def remove_sub_directory(number):
    # remove downloaded directory with data
    try:
        os.system(f"rm -r sub-{number}")
    except:
        print("No directory to remove!")

In [19]:
def preprocess_single_record(number = 10159, do_draw = False, short = False):
    # main function for data preprocessing
    run_script(number, short=short)
    M = process_to_streamlines(number = number, do_draw = do_draw)
    if M is not None:
        return False
    else:
        write_conectivity_matrix_to_file(M, number=number)
        remove_sub_directory(number)
        return True

In [40]:
# test of processing single directory
# preprocess_single_record(number = 10159, do_draw = True, short = True)

In [20]:
def choose_directory_numbers(index = 0):
    index = index % 4
    f = open("./Data/ds000030-1.0.0.sh", "r")
    lines_array = f.readlines()
    directory_numbers_set = set()
    for line in lines_array:
        if f"_dwi.nii.gz" in line[-60:]: 
            directory_numbers_set.add(int(line[-17:-12]))
    directory_numbers_list = list(directory_numbers_set)
    directory_numbers_list = sorted(directory_numbers_list)
    n = len(directory_numbers_list)
    return directory_numbers_list[index * n // 4:(index + 1) * n // 4]

In [21]:
print(choose_directory_numbers(0), len(choose_directory_numbers(0)))
print(choose_directory_numbers(1), len(choose_directory_numbers(1)))
print(choose_directory_numbers(2), len(choose_directory_numbers(2)))
print(choose_directory_numbers(3), len(choose_directory_numbers(3)))

[10159, 10171, 10189, 10193, 10206, 10217, 10225, 10227, 10228, 10235, 10249, 10269, 10271, 10273, 10274, 10280, 10290, 10292, 10304, 10316, 10321, 10325, 10329, 10339, 10340, 10345, 10347, 10356, 10361, 10365, 10376, 10377, 10388, 10429, 10438, 10440, 10448, 10455, 10460, 10471, 10478, 10487, 10492, 10506, 10517, 10523, 10524, 10525, 10527, 10530, 10557, 10565, 10570, 10575, 10624, 10629, 10631, 10638, 10674, 10678, 10680, 10686, 10692, 10696, 10697] 65
[10704, 10707, 10708, 10719, 10724, 10746, 10762, 10779, 10785, 10788, 10844, 10855, 10871, 10877, 10882, 10891, 10893, 10912, 10934, 10940, 10948, 10949, 10958, 10963, 10968, 10975, 10977, 10987, 10998, 11019, 11030, 11044, 11050, 11052, 11059, 11061, 11062, 11066, 11067, 11068, 11077, 11082, 11088, 11090, 11097, 11098, 11104, 11105, 11106, 11108, 11112, 11122, 11128, 11131, 11142, 11143, 11149, 11156, 50004, 50005, 50006, 50007, 50008, 50010, 50013, 50014] 66
[50015, 50016, 50020, 50021, 50022, 50023, 50025, 50027, 50029, 50032, 5003

In [22]:
def print_process_dict(process_dict):
    print("Dir. num.: | Processed? ")
    for key, value in process_dict.items():
        print(f"{str(key).ljust(10)}| {str(value).ljust(10)}")

In [23]:
def proprocess_record_batch(index = 0, short = False):
    # download, create brain masks, process data
    # from given list of directories (represeted with numbers)
    # argument index id personal id for every team member 
    # running this code in order to preprocess a batch of data
    index_list = choose_directory_numbers(index)
    process_dict = {}
    index_list = [10159, 10171, 50015, 60048, 70001]
    for number in index_list:
        print(f"Directory: sub-{number}")
        process_dict[number] = preprocess_single_record(number = number, do_draw = False, short=short)
    print_process_dict(process_dict)

In [24]:
# put your index
proprocess_record_batch(index = 0, short = True) 

Directory: sub-10159


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   322  100   322    0     0    245      0  0:00:01  0:00:01 --:--:--   245
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1814  100  1814    0     0    739      0  0:00:02  0:00:02 --:--:--   739
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2007  100  2007    0     0   1554      0  0:00:01  0:00:01 --:--:--  1554
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 30.5M  100 30.5M    0     0   292k      0  0:01:46  0:01:46 --:--:--  786k


Directory: sub-10171


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   322  100   322    0     0     81      0  0:00:03  0:00:03 --:--:--    81
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1814  100  1814    0     0    592      0  0:00:03  0:00:03 --:--:--   592
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1999  100  1999    0     0   1622      0  0:00:01  0:00:01 --:--:--  1623
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 32.2M  100 32.2M    0     0   295k      0  0:01:51  0:01:51 --:--:--  830k


Directory: sub-50015


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   322  100   322    0     0    278      0  0:00:01  0:00:01 --:--:--   278
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1814  100  1814    0     0   1708      0  0:00:01  0:00:01 --:--:--  1709
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2168  100  2168    0     0   2214      0 --:--:-- --:--:-- --:--:--  2214
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 38.6M  100 38.6M    0     0   494k      0  0:01:20  0:01:20 --:--:--  561k
