In [None]:
# import library 

import os
import shutil
import ants
import subprocess

import pandas as pd
import nibabel as nib
import numpy as np
from tqdm import tqdm

import matplotlib.pyplot as plt
import subprocess

In [None]:
# we expect the data structure to be like this:
# data_path/                            # data root
# ├── 3000/                             # subject number
# │   └── Reconstructed_DaTSCAN/        # imaging type
# │       └── 2011-01-20_16_28_47.0/    # examination date
# │           └── I323662/              # imaging ID
# │               └── ...dicom          # dicom files
# ├── 3001/
# ...  
data_path = "./data/DaT_raw"

# we expect the data table at least contains the following columns:
# Image Data ID : [Ixxxxxxx, Dxxxxxxx], Image identifiers
# Subject       : xxxxxx, Subject ID
# Group         : [PD, Control, SWEDD], Diagnostic group
# Sex           : [F, M], Sex
# Age           : xx, Age
# Visit         : [SC, V02, V04, V06, V08, V10, ...], Visit timepoint
# Acq Date      : DD/MM/YYYY, Acqusition Date
table_path = "./data/DaT_raw.csv"

# we expect the Striatal Binding Ratio analysis table at least contains the following columns:
# PATNO                 : xxxxxx, Subject ID
# EVENT_ID              : [SC, V02, V04, V06, V08, V10, ...], Screening timepoint
# DATSCAN_CAUDATE_R     : xxx, SBR value
# DATSCAN_CAUDATE_L     : xxx, SBR value
# DATSCAN_PUTAMEN_R     : xxx, SBR value
# DATSCAN_PUTAMEN_L     : xxx, SBR value
# DATSCAN_PUTAMEN_R_ANT : xxx, SBR value
# DATSCAN_PUTAMEN_L_ANT : xxx, SBR value
sbr_table_path = "./data/DaT_SBR_Analysis.csv"

# All of the above data is accessible in the same format on the PPMI.
# https://www.ppmi-info.org/


In [None]:
save_path = "./data/DaT_nii"

####################

success_count = 0
failure_count = 0
failure_list = []

def convert_dicom_to_nifti(dicom_directory, output_directory):
    global success_count
    global failure_count
    global failure_list
    try:
        subprocess.run(['dcm2niix', '-o', output_directory, dicom_directory], check=True)
        success_count += 1
    except subprocess.CalledProcessError as e:
        failure_list.append(dicom_directory)
        failure_count += 1

os.makedirs(save_path, exist_ok=True)

subs = os.listdir(data_path)
for sub in tqdm(subs, desc="Convert"):
    files = os.listdir(os.path.join(data_path, sub))
    os.makedirs(os.path.join(save_path, sub), exist_ok=True)
    for file in files:
        dates = os.listdir(os.path.join(data_path, sub, file))
        os.makedirs(os.path.join(save_path, sub, file), exist_ok=True)
        for date in dates:
            os.makedirs(os.path.join(save_path, sub, file, date), exist_ok=True)
            iids = os.listdir(os.path.join(data_path, sub, file, date))
            for iid in iids:
                os.makedirs(os.path.join(save_path, sub, file, date, iid), exist_ok=True)
                dcm_path = os.path.join(data_path, sub, file, date, iid)
                nii_path = os.path.join(save_path, sub, file, date, iid)
                os.makedirs(nii_path, exist_ok=True)
                convert_dicom_to_nifti(dcm_path, nii_path)


print("converting done.")
print(f"success: {success_count}, failure: {failure_count}")
if failure_count > 0:
    print(f"failure list: {failure_list}")
print(f"result files are saved in {save_path}")

In [None]:
# multiple inspection filtering

data_path       = "./data/DaT_nii"
table_path      = "./data/DaT_raw.csv"

save_path       = "./data/DaT_nii_multi_inspection_filtering"
table_save_path = "./data/DaT_nii_multi_inspection_filtering.csv"
extension = '.nii'

############################################

def find_substring_in_list(s, lst):
    for i, element in enumerate(lst):
        if s in element:
            return i
    return -1

def find_files_with_extension(path, extension):
    result = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.endswith(extension):
                result.append(os.path.join(root, file))
    return result


files = find_files_with_extension(data_path, extension)
table = pd.read_csv(table_path)
table['Acq Date'] = pd.to_datetime(table['Acq Date'], format='%m/%d/%Y')

subs = []
cohorts = []
age = []
sex = []
iids = []
spect_visit = []
spect_date = []

for idx, sub in enumerate(tqdm(table['Subject'].unique(), desc="refactoring", leave=True)):
    sub_rows = table[table['Subject'] == sub]
    if len(sub_rows) > 1:
        row_idx = sub_rows['Acq Date'].idxmin()
        sub_row = table.loc[row_idx]
    else:
        sub_row = sub_rows.iloc[0]

    if sub_row['Image Data ID'][0] == 'D':
        iid = 'I' + sub_row['Image Data ID'][1:]
    else:
        iid = sub_row['Image Data ID']

    list_idx = find_substring_in_list(iid, files)

    save_dir = os.path.join(save_path, str(sub))
    os.makedirs(save_dir)
    shutil.copy(files[list_idx], os.path.join(save_dir, "DaT.nii"))

    subs.append(sub)
    cohorts.append(sub_row['Group'])
    age.append(sub_row['Age'])
    sex.append(sub_row['Sex'])
    iids.append(sub_row['Image Data ID'])
    spect_visit.append(sub_row['Visit'])
    spect_date.append(sub_row['Acq Date'])

sub_table = pd.DataFrame({'subject_number': subs, 
                          'cohort': cohorts,
                          'age':age,
                          'sex':sex,
                          'spect_id':iids,
                          'spect_visit':spect_visit,
                          'spect_date':spect_date})
sub_table.to_csv(table_save_path)

print("refactoring done.")
print(f"result files are saved in {save_path}")
print(f"result table is saved in {table_save_path}")

In [None]:
# data class filtering

data_path       = "./data/DaT_nii_multi_inspection_filtering"
table_path      = "./data/DaT_nii_multi_inspection_filtering.csv"

save_path       = "./data/DaT_nii_multi_inspection_filtering_class_filtering"
table_save_path = "./data/DaT_nii_multi_inspection_filtering_class_filtering.csv"

labels = ['Control', 'PD']
############################################

os.makedirs(save_path, exist_ok=True)
subs = os.listdir(data_path)
table = pd.read_csv(table_path, index_col=0)
filtered_sub = table[~table['cohort'].isin(labels)]['subject_number']

filtered_table = table[table['cohort'].isin(labels)]
filtered_table.to_csv(table_save_path)

shutil.copytree(data_path, save_path, dirs_exist_ok=True)

for sub in tqdm(filtered_sub, desc="removing"):
    shutil.rmtree(os.path.join(save_path, str(sub)))

print("class filtering done.")
print(f"{len(filtered_sub)} data are filtered")
if len(filtered_sub) > 0:
    if len(filtered_sub) > 10:
        print(f"filtered list: {str(filtered_sub[:5])[:-1] + ', ..., '+ str(filtered_sub[-5:])[1:]}")
    else:
        print(f"filtered list: {filtered_sub}")

print(f"result files are saved in {save_path}")
print(f"result table is saved in {table_save_path}")

In [None]:
# data shape filtering

data_path       = "./data/DaT_nii_multi_inspection_filtering_class_filtering"
table_path      = "./data/DaT_nii_multi_inspection_filtering_class_filtering.csv"

save_path       = "./data/DaT_nii_multi_inspection_filtering_class_filtering_shape_filtering"
table_save_path = "./data/DaT_nii_multi_inspection_filtering_class_filtering_shape_filtering.csv"

shape = (91, 109, 91)
############################################

os.makedirs(save_path, exist_ok=True)
subs = os.listdir(data_path)

filtered_sub = []
for idx, sub in enumerate(tqdm(subs, desc='shape filtering')):
    dat_path = os.path.join(data_path, sub, "DaT.nii")
    dat_nib = nib.load(dat_path)
    if dat_nib.shape == shape:
        dat_save_dir = os.path.join(save_path, sub)
        os.makedirs(dat_save_dir, exist_ok=True)
        dat_save_path = os.path.join(dat_save_dir, "DaT.nii")
        shutil.copy(dat_path, dat_save_path)
    else:
        filtered_sub.append(sub)
    pass

sub_table = pd.read_csv(table_path, index_col=0)
for sub in filtered_sub:
    sub_table = sub_table[sub_table['subject_number'] != int(sub)]

sub_table.to_csv(table_save_path)

print("shape filtering done.")
print(f"{len(filtered_sub)} data filtered: {filtered_sub}")
print(f"result files are saved in {save_path}")
print(f"result table is saved in {table_save_path}")

In [None]:
# data affine matrix reorientation

ref_img_path = "./data/atlas/PD25_T1_2mm.nii"

data_path    = "./data/DaT_nii_multi_inspection_filtering_class_filtering_shape_filtering"
save_path    = "./data/DaT_nii_multi_inspection_filtering_class_filtering_shape_filtering_reorient"

############################################

os.makedirs(save_path, exist_ok=True)

ref_nib = nib.load(ref_img_path)
ref_affine = ref_nib.affine
subs = os.listdir(data_path)
for sub in tqdm(subs, desc='reorientation'):
    dat_path = os.path.join(data_path, sub, "DaT.nii")
    dat_nib = nib.load(dat_path)
    dat_arr = dat_nib.get_fdata()
    dat_header = dat_nib.header
    re_orient_dat_nib = nib.Nifti1Image(dat_arr, affine=ref_affine, header=dat_header)
    os.makedirs(os.path.join(save_path, sub), exist_ok=True)
    nib.save(re_orient_dat_nib, os.path.join(save_path, sub, "DaT.nii"))

print(f"reorientation done. ")
print(f"result files are saved in {save_path}")

In [None]:
# SBR table merge

table_path      = "./data/DaT_nii_multi_inspection_filtering_class_filtering_shape_filtering.csv"
data_path       = "./data/DaT_nii_multi_inspection_filtering_class_filtering_shape_filtering_reorient"

save_path       = "./data/DaT_preprocessed"
table_save_path = "./data/DaT_preprocessed.csv"

############################################
SBR_COLUMNS = [
    "DATSCAN_CAUDATE_R",
    "DATSCAN_CAUDATE_L",
    "DATSCAN_PUTAMEN_R",
    "DATSCAN_PUTAMEN_L",
    "DATSCAN_PUTAMEN_R_ANT",
    "DATSCAN_PUTAMEN_L_ANT",
]

table = pd.read_csv(table_path, index_col=0)
sbr_table = pd.read_csv(sbr_table_path, index_col=None)

merged_table = pd.merge(table, 
                        sbr_table[['PATNO', 'EVENT_ID', 'DATSCAN_CAUDATE_R', 'DATSCAN_CAUDATE_L', 'DATSCAN_PUTAMEN_R', 'DATSCAN_PUTAMEN_L', 'DATSCAN_PUTAMEN_R_ANT', 'DATSCAN_PUTAMEN_L_ANT']],
                        left_on=['subject_number', 'spect_visit'],
                        right_on=['PATNO', 'EVENT_ID'],
                        how='inner'
                        )

no_sbr_list = set(table['subject_number'].to_list()) - set(merged_table['subject_number'].to_list())

shutil.copytree(data_path, save_path)
for sub in tqdm(no_sbr_list, desc="removing"):
    shutil.rmtree(os.path.join(save_path, str(sub)))

merged_table = merged_table.drop(columns=['PATNO', 'EVENT_ID'])

no_sbr_list = no_sbr_list.union(set(merged_table[merged_table[SBR_COLUMNS].isna().any(axis=1)]['subject_number'].to_list()))
merged_table = merged_table.dropna(subset=SBR_COLUMNS)
merged_table.to_csv(table_save_path)

print("SBR merge done.")
print(f"{len(no_sbr_list)} data filtered: {no_sbr_list}")
print(f"result files are saved in {save_path}")
print(f"result table is saved in {table_save_path}")