In [1]:
import pydicom
import json
import re
import os
import sys
import shutil
import glob
import random
import subprocess
import pathlib
import yaml
import nibabel as nib
import gzip
import pandas as pd
import numpy as np
import platform
import multiprocessing

In [27]:
# cfg = "C:/Users/smart/Desktop/GitProjects/convsauce/ConvertSource/cfg.test.yml"
cfg = "/Users/brac4g/Desktop/convsauce/ConvertSource/cfg.test.yml"

In [28]:
def read_config(config_file, verbose = False):
    '''
    Reads configuration file and creates a dictionary of search terms for 
    certain modalities provided that BIDS modalities are used as keys. If
    exclusions are provided (via the key 'exclude') then an exclusion list is 
    created. Otherwise, 'exclusion_list' is returned as an empty list. If 
    additional settings are specified, they should be done so via the key
    'metadata' to enable writing of additional metadata.
    
    Arguments:
        config_file (string): file path to yaml configuration file.
        verbose (boolean): Prints additional information to screen.
    
    Returns: 
        data_map (dict): Nested dictionary of search terms for BIDS modalities
        exclusion_list (list): List of exclusion terms
        settings_dict (dict): Nested dictionary of metadata terms to write to JSON file(s)
    '''
    
    with open(config_file) as file:
        data_map = yaml.safe_load(file)
        if verbose:
            print("Initialized parameters from configuration file")
        
    if any("exclude" in data_map for element in data_map):
        if verbose:
            print("exclusion option implemented")
        exclusion_list = data_map["exclude"]
        del data_map["exclude"]
    else:
        if verbose:
            print("exclusion option not implemented")
        exclusion_list = list()
        
    if any("metadata" in data_map for element in data_map):
        if verbose:
            print("implementing additional settings for metadata")
        settings_dict = data_map["metadata"]
        del data_map["metadata"]
    else:
        if verbose:
            print("no metadata settings")
        settings_dict = dict()
        
    return data_map,exclusion_list,settings_dict

In [29]:
search_dict, exclusion_list, param_dict = read_config(cfg,True)

Initialized parameters from configuration file
exclusion option implemented
implementing additional settings for metadata


In [30]:
search_dict

{'anat': {'T1w': ['T1', 'T1w', 'TFE'], 'T2w': ['T2', 'T2w', 'TSE']},
 'func': {'bold': {'rest': ['rsfMR', 'rest', 'FFE', 'FEEPI'],
   'visualstrobe': ['vis', 'visual']}},
 'fmap': {'fmap': ['map']},
 'swi': {'swi': ['swi']},
 'dwi': {'dwi': ['diffusion', 'DTI', 'DWI', '6_DIR']}}

In [31]:
exclusion_list

['SURVEY',
 'Reg',
 'SHORT',
 'LONG',
 'MRS',
 'PRESS',
 'DEFAULT',
 'ScreenCapture',
 'PD',
 'ALL',
 'SPECTRO']

In [32]:
param_dict

{'common': {'Manufacturer': 'Philips',
  'ManufacturersModelName': 'Ingenia',
  'MagneticFieldStrength': 3,
  'InstitutionName': "Cincinnati Children's Hospital Medical Center"},
 'func': {'rest': {'ParallelAcquisitionTechnique': 'SENSE',
   'PhaseEncodingDirection': 'j',
   'MultibandAccelerationFactor': 6,
   'TaskName': 'Rest'},
  'visualstrobe': {'PhaseEncodingDirection': 'j',
   'TaskName': 'Visual (Strobe) Task'}},
 'dwi': {'PhaseEncodingDirection': 'j'}}

In [18]:
# data_dir_par = "C:/Users/smart/Desktop/GitProjects/convsauce/287H_C10/PAR REC"
data_dir_par = "/Users/brac4g/Desktop/convsauce/287H_C10/PAR REC"

In [19]:
# data_dir_dcm = "C:/Users/smart/Desktop/GitProjects/convsauce/IRC287H-8/20171003"
data_dir_dcm = "/Users/brac4g/Desktop/convsauce/IRC287H-8/20171003"

In [21]:
data_dir_nii = "/Users/brac4g/Desktop/convsauce/287H_C10/NIFTI"

In [24]:
def get_dcm_files(dcm_dir):
    '''
    Creates a file list consisting of the first DICOM file in a parent DICOM directory. 
    A file list is then returned.
    
    Arguments:
        dcm_dir (string): Absolute path to parent DICOM data directory

    Returns: 
        dcm_files (list): List of DICOM filenames, complete with their absolute paths.
    '''
    
    # Create directory list
    dcm_dir = os.path.abspath(dcm_dir)
    parent_dcm_dir = os.path.join(dcm_dir,'*')
    dcm_dir_list = glob.glob(parent_dcm_dir, recursive=True)

    # Initilized dcm_file list
    dcm_files = list()
    
    # Iterate through files in the dicom directory list
    for dir_ in dcm_dir_list:
        # print(dir_)
        for root, dirs, files in os.walk(dir_):
            # print(files[0])
            tmp_dcm_file = files[0] # only need the first dicom file
            tmp_dcm_dir = root
            tmp_file = os.path.join(tmp_dcm_dir, tmp_dcm_file)

            dcm_files.append(tmp_file)
            break

    return dcm_files

In [27]:
def create_file_list(data_dir, file_ext="", order="size"):
    '''
    Creates a file list by globbing a directory for a specific file
    extension and sorting by some determined order. A file list is 
    then returned
    
    Arguments:
        data_dir (string): Absolute path to data directory (must be a directory dump of image data)
        file_ext (string): File extension to glob. Built-in options include:
            - 'par' or 'PAR': Searches for PAR headers
            - 'dcm' or 'DICOM': Searches for DICOM directories, then searches for one file from each DICOM directory
            - 'nii', or 'Nifti': Searches for nifti files (including gzipped nifti files)
        order (string): Order to sort the list. Valid options are: 'size' and 'time':
            - 'size': sorts by file size in ascending order (default)
            - 'time': sorts by file modification time in ascending order
            - 'none': no sorting is applied and the list is generated as the system finds the files
    
    Returns: 
        file_list (list): List of filenames, complete with their absolute paths.
    '''
    
    # Check file extension
    if file_ext != "":
        if file_ext.upper() == "PAR" or file_ext.upper() == "REC":
            file_ext = "PAR"
            file_ext = f".{file_ext.upper()}"
        elif file_ext.lower() == "dcm" or file_ext.upper() == "DICOM":
            file_ext = "dcm"
            file_ext = f".{file_ext.lower()}"
        elif file_ext.lower() == "nii" or file_ext.lower() == "nifti":
            file_ext = "nii"
            file_ext = f".{file_ext.lower()}*" # Add wildcard for globbling gzipped files
        else:
            file_ext = f".{file_ext}"
    
    # Check sort order
    if order.lower() == "size":
        order_key = os.path.getsize
    elif order.lower() == "time":
        order_key = os.path.getmtime
    elif order.lower() == "none":
        order_key=None
    else:
        order_key = os.path.getsize
        print("Unrecognized keyword option. Using default.")
    
    # Create file list
    if file_ext == ".dcm":
        file_list = sorted(get_dcm_files(data_dir), key=order_key, reverse=False)
    elif file_ext != ".dcm":
        file_names = os.path.join(data_dir, f"*{file_ext}")
        file_list = sorted(glob.glob(file_names, recursive=True), key=order_key, reverse=False)
    
    return file_list

In [28]:
par_file_list = create_file_list(data_dir=data_dir_par,file_ext="par")
dcm_file_list = create_file_list(data_dir=data_dir_dcm,file_ext="dcm")
nii_file_list = create_file_list(data_dir=data_dir_nii,file_ext="nii")

In [84]:
def file_exclude(file_list, data_dir, exclusion_list = [], verbose = False):
    '''
    Excludes files from the conversion process by removing filenames
    that contain words that match those found in the 'exclusion_list'
    from the 'read_config' function - should any files need/want to be 
    excluded.
    
    If 'exclusion_list' is empty, then the original 'file_list' is returned.
    
    Arguments:
        file_list (list): List of filenames
        data_dir (string): Absolute path to parent directory that contains the image data
        exclusion_list (list): List of words to be matched. Filenames that contain these words will be excluded.
        verbose (bool): Boolean - True or False.
    
    Returns: 
        currated_list (list): Currated list of filenames, with unwanted filenames removed.
    '''
            
    # Check file extension in file list
    if 'dcm' in file_list[0]:
        file_ext = "dcm"
        file_ext = f".{file_ext.lower()}"
    elif 'PAR' in file_list[0]:
        file_ext = "PAR"
        file_ext = f".{file_ext.upper()}"
    elif 'nii' in file_list[0]:
        file_ext = "nii"
        file_ext = f".{file_ext.lower()}*" # Add wildcard for globbling gzipped files
    else:
        file_ext = ""
        file_ext = f".{file_ext.lower()}"
    
    # create set of lists
    file_set = set(file_list)
    
    # create empty sets
    currated_set = set()
    exclusion_set = set()
    
    if len(exclusion_list) == 0:
        currated_set = file_set
    else:
        for file in exclusion_list:
            if file_ext == '.dcm':
                dir_ = os.path.join(data_dir, f"*{file}*",f"*{file_ext}")
            else:
                dir_ = os.path.join(data_dir, f"*{file}*{file_ext}")
            f_names = glob.glob(dir_, recursive=True)        
            f_names_set = set(f_names)
            exclusion_set.update(f_names_set)
            
        currated_set = file_set.difference(exclusion_set)

    currated_list = list(currated_set)
    
    return currated_list

In [85]:
par_file_list_currated = file_exclude(par_file_list,data_dir_par,exclusion_list)
dcm_file_list_currated = file_exclude(dcm_file_list,data_dir_dcm,exclusion_list)
nii_file_list_currated = file_exclude(nii_file_list,data_dir_nii,exclusion_list)

In [93]:
def str_in_substr(sub_str_,str_):
    '''
    DEPRECATED: Should only be used if config_file uses comma separated
        lists to denote search terms.
    
    Searches a (longer) string using a comma separated string 
    consisting of substrings. Returns 'True' or 'False' if any part
    of the substring is found within the larger string.
    
    Example:
        str_in_substr('T1,TFE','sub_T1_image_file') would return True.
        str_in_substr('T2,TSE','sub_T1_image_file') would return False.
    
    Arguments:
        sub_str_ (string): Substring used for matching.
        str_ (string): Larger string to be searched for matches within substring.
    
    Returns: 
        bool_var (bool): Boolean - True or False
    '''
    
    bool_var = False
    
    for word in sub_str_.split(","):
        if any(word in str_ for element in str_):
            bool_var = True
            
    return bool_var

In [140]:
def list_in_substr(list_,str_):
    '''
    Searches a string using a list that contains substrings. 
    Returns 'True' or 'False' if any elements of the list are 
    found within the string.
    
    Example:
        list_in_substr('['T1','TFE']','sub_T1_image_file') would return True.
        list_in_substr('['T2','TSE']','sub_T1_image_file') would return False.
    
    Arguments:
        list_ (string): list containing strings used for matching.
        str_ (string): Larger string to be searched for matches within substring.
    
    Returns: 
        bool_var (bool): Boolean - True or False
    '''
    
    bool_var = False
    
    for word in list_:
        if any(word.lower() in str_.lower() for element in str_.lower()):
            bool_var = True
            
    return bool_var

In [174]:
def is_valid_mr(dcm_file, verbose=False):
    '''
    Checks for a valid DICOM file by inspecting the conversion type label in the DICOM file header.
    This field should be blank. If this label is populated, then it is likely a secondary capture image 
    and thus is not likely to contain meaningful image information.
    
    Arguments:
        dcm_file (string): DICOM filename with absolute filepath
        verbose (boolean): Enable verbosity
    
    Returns: 
        is_valid (boolean): True if DICOM file is not a secondary capture (or does not have text in the conversion type label field)
    '''
    
    # Read DICOM file header
    ds = pydicom.dcmread(dcm_file)
    
    # Invalid files include secondary image captures, and are not suitable for 
    # nifti conversion as they are often not converted and cause problems.
    # This string should be empty. If it is populated, then its likely a secondary capture.
    conv_type = ds.ConversionType
    
    if conv_type in '':
        is_valid = True
    else:
        is_valid = False
        if verbose:
            print(f"Please check Conversion Type (0008, 0064) in dicom header. The presented DICOM file is not a valid file: {dcm_file}.")
    
    return is_valid

In [187]:
def get_scan_tech(dictionary, file, json_file=""):
    '''
    Searches DICOM or PAR file header for scan technique/MR modality used in accordance with the search terms provided
    by the nested dictionary.
    
    Note: This function is still undergoing active development.
    
    Arguments:
        dictionary (dict): Nested dictionary from the 'read_config' function
        dcm_file (string): Source image filename with absolute filepath
    
    Returns: 
        None
    '''
    
    # Check file extension in file list
    if 'dcm' in file:
        file_ext = "dcm"
        file_ext = f".{file_ext.lower()}"
    elif 'PAR' in file:
        file_ext = "PAR"
        file_ext = f".{file_ext.upper()}"
    elif 'nii' in file:
        file_ext = "nii"
        file_ext = f".{file_ext.lower()}"
    else:
        file_ext = ""
        file_ext = f".{file_ext.lower()}"
    
    # Perform Scanning Techniqe Search
    if file_ext == '.dcm':
        get_dcm_scan_tech(dictionary,file)
    elif file_ext == '.PAR':
        get_par_scan_tech(dictionary,file)
    else:
        print("unknown modality")
        
    return None

In [268]:
def get_dcm_scan_tech(dictionary, dcm_file):
    '''
    Searches DICOM file header for scan technique/MR modality used in accordance with the search terms provided by the
    nested dictionary. The DICOM header field searched is a Philips DICOM private tag (2001,1020) [Scanning Technique 
    Description MR]. In the case that field does not match, is empty, or does not exist, then more common DICOM tags
    are searched - and they include: Series Description, Protocol Name, and Image Type.
    
    Note: This function is still undergoing active development.
    
    Arguments:
        dictionary (dict): Nested dictionary from the 'read_config' function
        dcm_file (string): DICOM filename with absolute filepath
    
    Returns: 
        None
    '''
    
    mod_found = False
    
    # Load DICOM data and read header
    ds = pydicom.dcmread(dcm_file)
    
    # Search DICOM header for Scan Technique used
    dcm_scan_tech_str = str(ds[0x2001,0x1020])
    
    for key,item in dictionary.items():
        for dict_key,dict_item in dictionary[key].items():
            if isinstance(dict_item,list):
                if list_in_substr(dict_item,dcm_scan_tech_str):
                    mod_found = True
                    print(f"{key} - {dict_key}: {dict_item}")
                    if mod_found:
                        break
            elif isinstance(dict_item,dict):
                tmp_dict = dictionary[key]
                for d_key,d_item in tmp_dict[dict_key].items():
                    if list_in_substr(d_item,dcm_scan_tech_str):
                        mod_found = True
                        print(f"{key} - {dict_key} - {d_key}: {d_item}")
                        if mod_found:
                            break
                            
        if mod_found:
            break
    
    # Secondary look in the case Private Field (2001, 1020) [Scanning Technique Description MR] is empty
    if not mod_found:
        # Define list of DICOM header fields
        dcm_fields = ['SeriesDescription', 'ImageType', 'ProtocolName']
        
        for dcm_field in dcm_fields:
            dcm_scan_tech_str = str(eval(f"ds.{dcm_field}")) # This makes me dangerously uncomfortable
            
            for key,item in dictionary.items():
                for dict_key,dict_item in dictionary[key].items():
                    if isinstance(dict_item,list):
                        if list_in_substr(dict_item,dcm_scan_tech_str):
                            mod_found = True
                            print(f"{key} - {dict_key}: {dict_item}")
                            if mod_found:
                                break
                    elif isinstance(dict_item,dict):
                        tmp_dict = dictionary[key]
                        for d_key,d_item in tmp_dict[dict_key].items():
                            if list_in_substr(d_item,dcm_scan_tech_str):
                                mod_found = True
                                print(f"{key} - {dict_key} - {d_key}: {d_item}")
                                if mod_found:
                                    break

            if mod_found:
                break
                
    if not mod_found:
        print("unknown modality")
        
    return None

In [189]:
def get_par_scan_tech(dictionary, par_file):
    '''
    Searches PAR file header for scan technique/MR modality used in accordance with the search terms provided by the
    nested dictionary. A regular expression (regEx) search string is defined and searched for conventional PAR headers.
    
    Note: This function is still undergoing active development.
    
    Arguments:
        dictionary (dict): Nested dictionary from the 'read_config' function
        par_file (string): PAR filename with absolute filepath
    
    Returns: 
        None
    '''
    
    mod_found = False
    
    # Define regEx search string
    regexp = re.compile(r'.    Technique                          :  .*', re.M | re.I)
    
    # Open and search PAR header file
    with open(par_file) as f:
        for line in f:
            match_ = regexp.match(line)
            if match_:
                par_scan_tech_str = match_.group()
    
    # Search Scan Technique with search terms
    for key,item in dictionary.items():
        for dict_key,dict_item in dictionary[key].items():
            if isinstance(dict_item,list):
                if list_in_substr(dict_item,par_scan_tech_str):
                    mod_found = True
                    print(f"{key} - {dict_key}: {dict_item}")
                    if mod_found:
                        break
            elif isinstance(dict_item,dict):
                tmp_dict = dictionary[key]
                for d_key,d_item in tmp_dict[dict_key].items():
                    if list_in_substr(d_item,par_scan_tech_str):
                        mod_found = True
                        print(f"{key} - {dict_key} - {d_key}: {d_item}")
                        if mod_found:
                            break
                            
        if mod_found:
            break
            
    if not mod_found:
        print("unknown modality")
    
    return None

In [186]:
def convert_modality(dictionary, file, verbose=False):
    '''
    Converts an image file and extracts information from the filename
    (such as the modality). 
    
    Note: This function is still undergoing active development.
    
    Arguments:
        dictionary (dict): Nested dictionary from the 'read_config' function
        file (string): Filename with absolute filepath
        verbose (boolean): Enable verbosity
    
    Returns: 
        None
    '''
    
    mod_found = False
    
    # Check file type
    if 'nii' in file:
        file_ext = "nii"
        file_ext = f".{file_ext.lower()}"
    elif 'dcm' in file:
        file_ext = "dcm"
        file_ext = f".{file_ext.lower()}"
        if not is_valid_mr(file,verbose):
            sys.exit(f"Invalid DICOM file. Please check {file}")
    
    for key,item in dictionary.items():
        for dict_key,dict_item in dictionary[key].items():
            if isinstance(dict_item,list):
                if list_in_substr(dict_item,file):
                    mod_found = True
                    print(f"{key} - {dict_key}: {dict_item}")
                    if mod_found:
                        break
            elif isinstance(dict_item,dict):
                tmp_dict = dictionary[key]
                for d_key,d_item in tmp_dict[dict_key].items():
                    if list_in_substr(d_item,file):
                        mod_found = True
                        print(f"{key} - {dict_key} - {d_key}: {d_item}")
                        if mod_found:
                            break
                        
    if not mod_found:
        get_scan_tech(dictionary,file)
    
    return None

In [190]:
def batch_convert(file_list, dictionary, verbose=False):
    '''
    Batch conversion function for image files. 
    
    Note: This function is still undergoing active development.
    
    Arguments:
        file_list (list): List of filenames with absolute filepaths
        dictionary (dict): Nested dictionary from the 'read_config' function
        verbose (boolean): Enable verbosity
    
    Returns: 
        None
    '''
    
    for file in file_list:
        try:
            convert_modality(dictionary,file,verbose)
        except SystemExit:
            pass
    
    return None

#### `TaskName` JSON file appending funtion

In [4]:
# task_name = ""
task_name = "visualstrobe"

In [5]:
if task_name == "":
    print("task_name is empty")
else:
    print(f"task_name is: {task_name}")

task_name is: visualstrobe


In [1]:
nii_file = "c:/Users/smart/Desktop/GitProjects/convsauce/BIDS/rawdata/sub-C10/ses-001/func/sub-C10_ses-001_task-rest_acq-PA_run-01_bold.nii.gz"

In [2]:
import os
import nibabel as nib
import numpy as np

In [3]:
os.path.exists(nii_file)

True

In [4]:
img = nib.load(nii_file)

In [5]:
img

<nibabel.nifti1.Nifti1Image at 0x22eb4b28470>

In [6]:
img.header.get_data_shape()

(64, 64, 45, 400)

In [7]:
type(img.header.get_data_shape())

tuple

In [9]:
frames = img.header.get_data_shape()

In [10]:
frames

(64, 64, 45, 400)

In [13]:
type(frames[3])

int

# `NifTi` File Conversion Functions

In [None]:
def data_to_bids_anat(bids_out_dir, file, sub, scan, ses=1, scan_type='anat'):
    '''
    Renames converted nifti files to conform with BIDS format
    (in the case of anatomical files).
    NB: out_dir refers to the parent or RawData directory.
    '''

    # Create Output Directory Variables
    # Zeropad subject ID if possible
    try:
        ses = '{:03}'.format(int(ses))
    except ValueError:
        pass
    # Zeropad session ID if possible
    try:
        ses = '{:03}'.format(int(ses))
    except ValueError:
        pass
    
    bids_out_dir = os.path.abspath(bids_out_dir)
    outdir = os.path.join(out_dir, f"sub-{sub}", f"ses-{ses}", f"{scan_type}")

    # Make output directory
    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # Create temporary output names/directories
    tmp_out_dir = os.path.join(out_dir, f"sub-{sub}", 'tmp_dir' + str(random.randint(0, n)))
    tmp_basename = 'tmp_basename' + str(random.randint(0, n))

    # Convert image file
    # Check file extension in file list
    if 'dcm' in file:
        [nii_file, json_file] = convert_dcm_file(file, tmp_out_dir, tmp_basename)
    elif 'PAR' in file:
        [nii_file, json_file] = convert_par_file(file, tmp_out_dir, tmp_basename)
    elif 'nii.gz' in file:
        file_ext = "nii.gz"
        file_ext = f".{file_ext.lower()}"
    elif 'nii' in file:
        file_ext = "nii.gz"
        file_ext = f".{file_ext.lower()}"
    else:
        file_ext = ""
        file_ext = f".{file_ext.lower()}"
        
    [nii_file, json_file] = convert_par_file(par_file, tmp_out_dir, tmp_basename)

    nii_file = os.path.abspath(nii_file)
    json_file = os.path.abspath(json_file)

    # Append w to T1/T2 if not already done
    if scan in 'T1' or scan in 'T2':
        scan = scan + 'w'
    else:
        scan = scan

    # Get Run number
    run = get_num_runs(outdir, scan=scan)
    run = '{:02}'.format(run)

    # Additional sequence/modality parameters
#     epi_factor = get_epi_factor(par_file)
#     wfs = get_wfs(par_file)
#     bval = get_bval(par_file)
#     acc = get_acc(par_file)
#     mb = get_mb(par_file)
#     sct = get_scan_time(par_file)

#     # update JSON file with additional parameters
#     json_file = update_json(json_file, bval, wfs, epi_factor, acc, mb, sct)

    # Create output filenames
    out_name = f"sub-{sub}_ses-{ses}_run-{run}_{scan}"
    out_nii = os.path.join(outdir, out_name + '.nii.gz')
    out_json = os.path.join(outdir, out_name + '.json')

    os.rename(nii_file, out_nii)
    os.rename(json_file, out_json)

    # remove temporary directory and leftover files
    shutil.rmtree(tmp_out_dir)

In [227]:
def get_nii_tr(nii_file):
    '''
    Reads the NifTi file header and returns the repetition time (TR, sec) as a value if it is not zero, otherwise this 
    function returns the string 'unknown'.
    
    Arguments:
        nii_file (string): NifTi image filename with absolute filepath
        
    Returns: 
        tr (float or string): Repetition time (TR, sec), if not zero, otherwise 'unknown' is returned.
    '''
    
    # Load nifti file
    img = nib.load(nii_file)
    
    # Store nifti image TR
    tr = float(img.header['pixdim'][4])
    
    # Check if TR is likely
    if tr != 0:
        pass
    else:
        tr = "unknown"
    
    return tr

In [242]:
def file_parts(file):
    '''
    Divides file with file path into: path, filename, extension.
    
    Arguments:
        file (string): File with absolute filepath
        
    Returns: 
        path (string): Path of input file
        filename (string): Filename of input file, without the extension
        ext (string): Extension of input file
    '''
    
    [path, file_with_ext] = os.path.split(file)
    [filename,ext] = os.path.splitext(file_with_ext)
    
    path = str(path)
    filename = str(filename)
    ext = str(ext)
    
    return path,filename,ext

In [247]:
def gzip_file(file,rm_orig=True):
    '''
    Gzips file.
    
    Arguments:
        file (string): Input file
        rm_orig (boolean): If true (default), removes original file
        
    Returns: 
        out_file (string): Gzipped file
    '''
    
    # Define tempory file for I/O buffer stream
    tmp_file = file
    path,f_name_,ext_ = file_parts(tmp_file)
    f_name = f_name_ + ext_ + ".gz"
    out_file = os.path.join(path,f_name)
    
    # Gzip file
    with open(file,"rb") as in_file:
        data = in_file.read(); in_file.close()
        with gzip.GzipFile(out_file,"wb") as tmp_out:
            tmp_out.write(data)
            tmp_out.close()
            
    if rm_orig:
        os.remove(file)
            
    return out_file

In [252]:
def gunzip_file(file,rm_orig=True):
    '''
    Gunzips file.
    
    Arguments:
        file (string): Input file
        rm_orig (boolean): If true (default), removes original file
        
    Returns: 
        out_file (string): Gunzipped file
    '''
    
    # Define tempory file for I/O buffer stream
    tmp_file = file
    path,f_name_,ext_ = file_parts(tmp_file)
    f_name = f_name_ # + ext_[:-3]
    out_file = os.path.join(path,f_name)
    
    with gzip.GzipFile(file,"rb") as in_file:
        data = in_file.read(); in_file.close()
        with open(out_file,"wb") as tmp_out:
            tmp_out.write(data)
            tmp_out.close()
            
    if rm_orig:
        os.remove(file)
    
    return out_file

In [270]:
def update_json(json_file,dictionary):
    '''
    Updates JavaScript Object Notation (JSON) file. If the file does not exist, it is created once
    this function is invoked.
    
    Arguments:
        json_file (string): Input file
        dictionary (dict): Dictionary of key mapped items to write to JSON file
        
    Returns: 
        json_file (string): Updated JSON file
    '''
    
    # Check if JSON file exists, if not, then create JSON file
    if not os.path.exists(json_file):
        with open(json_file,"w"): pass
        
    # Read JSON file
    # Try-Except statement has empty exception as JSONDecodeError is not a valid exception to pass, 
    # thus throwing a name error
    try:
        with open(json_file) as file:
            data_orig = json.load(file)
    except:
        pass
        data_orig = dict()
        
    # Update original data from JSON file
    data_orig.update(dictionary)
    
    # Write updated JSON file
    with open(json_file,"w") as file:
        json.dump(data_orig,file,indent=4)
        
    return json_file

In [290]:
def dict_multi_update(dictionary,**kwargs):
    '''
    Updates a dictionary multiple times depending on the number key word mapped pairs that are provided and 
    returns a separate updated dictionary. The dictionary passed as an argument must exist prior to this 
    function being invoked.
    
    Example usage:
    
        new_dict = dict_multi_update(old_dict,
                                    Manufacturer="Philips",
                                    ManufacturersModelName="Ingenia",
                                    MagneticFieldStrength=3,
                                    InstitutionName="CCHMC")
    
    Arguments:
        dictionary (dict): Dictionary of key mapped items to write to JSON file
        **kwargs (string, key,value pairs): key=value pairs
        
    Returns: 
        new_dict (dict): New updated dictionary
    '''
    
    # Create new dictionary
    new_dict = dictionary.copy()
    
    for key,item in kwargs.items():
        tmp_dict = {key:item}
        new_dict.update(tmp_dict)
        
    return new_dict

In [34]:
def get_metadata(dictionary,scan_type="",task=""):
    '''
    Reads the metadata dictionary and looks for keywords to indicate what metadata should be written to which
    dictionary. For example, the keyword 'common' is used to indicate the common information for the imaging
    protocol and may contain information such as: field strength, phase encoding direction, institution name, etc.
    Additional keywords that are BIDS sub-directories names (e.g. anat, func, dwi) will return an additional
    dictionary which contains metadata specific for those modalities. Func also has additional keywords based on
    the task specified.
    
    Arguments:
        dictionary (dict): Nest dictionary of key mapped items from the 'read_config' function
        scan_type (string): BIDS scan type (e.g. anat, func, dwi, etc., default="")
        task (string): Task name to search in the key mapped dictionary
        
    Returns: 
        com_param_dict (dict): Common parameters dictionary
        scan_param_dict (dict): Scan/modality type parameters dictionary
    '''
    
    # Create empty dictionaries
    com_param_dict = dict()
    scan_param_dict = dict()
    scan_task_dict = dict()
    
    # Iterate through, looking for key words (e.g. common and scan_type)
    for key,item in dictionary.items():
        if key.lower() in 'common':
            com_param_dict = dictionary[key]

        if key.lower() in scan_type:
            scan_param_dict = dictionary[key]
            if task.lower() in scan_param_dict:
                for dict_key,dict_item in scan_param_dict.items():
                    if task.lower() in dict_key:
                        scan_task_dict = scan_param_dict[dict_key]
                        
        if len(scan_task_dict) != 0:
            scan_param_dict = scan_task_dict
    
    return com_param_dict, scan_param_dict 

In [40]:
get_metadata(param_dict)

({'Manufacturer': 'Philips',
  'ManufacturersModelName': 'Ingenia',
  'MagneticFieldStrength': 3,
  'InstitutionName': "Cincinnati Children's Hospital Medical Center"},
 {})

## Construct Filenames

In [32]:
# Anat

# Required
sub = '001'
ses = '001'
scan_type = 'anat'
mod = 'T2w'
run = '01'

# optional
# acq = 'NeonateAnat'
# ce = ''
# rec = 'NeonateRecon'

acq = ''
ce = ''
rec = ''

In [23]:
bids_out_dir = os.getcwd()

In [24]:
outdir = os.path.join(bids_out_dir, f"sub-{sub}", f"ses-{ses}", f"{scan_type}")

In [25]:
out_name = f"sub-{sub}" + f"_ses-{sub}"

In [26]:
out_name

'sub-001_ses-001'

In [33]:
bids_out_dir = os.getcwd()
outdir = os.path.join(bids_out_dir, f"sub-{sub}", f"ses-{ses}", f"{scan_type}")

out_name = f"sub-{sub}" + f"_ses-{sub}"

if acq:
    out_name = out_name + f"_acq-{acq}"
    
if ce:
    out_name = out_name + f"_ce-{ce}"
    
if rec:
    out_name = out_name + f"_rec-{rec}"
    
if run:
    out_name = out_name + f"_run-{run}"
    
out_name = out_name + f"_{mod}"

print(outdir)
print(out_name)

C:\Users\smart\Desktop\GitProjects\convsauce\ConvertSource\sub-001\ses-001\anat
sub-001_ses-001_run-01_T2w


In [42]:
# Functional

# if num_frames is 1, mod = sbref ; if greater than 1, mod = bold

# Required
sub = '001'
ses = '001'
scan_type = 'func'
mod = 'bold'
task = 'rest'
run = '01'

# optional
# acq = 'multiband'
# ce = ''
# rec = 'NeonateRecon'
# direction = 'PA'
# echo = '01'

acq = ''
ce = ''
rec = ''
direction = 'PA'
echo = ''

In [43]:
bids_out_dir = os.getcwd()
outdir = os.path.join(bids_out_dir, f"sub-{sub}", f"ses-{ses}", f"{scan_type}")

out_name = f"sub-{sub}" + f"_ses-{ses}_task-{task}"

if acq:
    out_name = out_name + f"_acq-{acq}"
    
if ce:
    out_name = out_name + f"_ce-{ce}"
    
if direction:
    out_name = out_name + f"_dir-{direction}"
    
if rec:
    out_name = out_name + f"_rec-{rec}"
    
if run:
    out_name = out_name + f"_run-{run}"
    
if echo:
    out_name = out_name + f"_echo-{echo}"
    
out_name = out_name + f"_{mod}"

print(outdir)
print(out_name)

C:\Users\smart\Desktop\GitProjects\convsauce\ConvertSource\sub-001\ses-001\func
sub-001_ses-001_task-rest_dir-PA_run-01_bold


In [10]:
# Diffusion

# if num_frames is 1, mod = sbref ; if greater than 1, mod = dwi

# Required
sub = '001'
ses = '001'
scan_type = 'dwi'
mod = 'dwi'
run = '01'

# optional
# acq = 'b2000'
acq = "b0"
direction = 'PA'
te = 88

In [11]:
bids_out_dir = os.getcwd()
outdir = os.path.join(bids_out_dir, f"sub-{sub}", f"ses-{ses}", f"{scan_type}")

out_name = f"sub-{sub}" + f"_ses-{ses}"

if acq and te:
    out_name = out_name + f"_acq-{acq}TE{te}"
elif acq:
    out_name = out_name + f"_acq-{acq}"
elif te:
    out_name = out_name + f"_acq-TE{te}"
    
if direction:
    out_name = out_name + f"_dir-{direction}"
    
if run:
    out_name = out_name + f"_run-{run}"
    
out_name = out_name + f"_{mod}"

print(outdir)
print(out_name)

C:\Users\smart\Desktop\GitProjects\convsauce\ConvertSource\sub-001\ses-001\dwi
sub-001_ses-001_acq-b0TE88_dir-PA_run-01_dwi


In [50]:
# Fieldmap

# Only handles case 3 for real fieldmap images: 
# https://github.com/bids-standard/bids-specification/blob/master/src/04-modality-specific-files/01-magnetic-resonance-imaging-data.md
# Units for fieldmap should be Hz (I think)

# mod not sufficient as two files are expected. 
# magnitude image should have different variable

# Required
sub = '001'
ses = '001'
scan_type = 'fmap'
mod = 'fieldmap'
run = '01'

# optional
# acq = 'multiband'

acq = ''

In [51]:
bids_out_dir = os.getcwd()
outdir = os.path.join(bids_out_dir, f"sub-{sub}", f"ses-{ses}", f"{scan_type}")

out_name = f"sub-{sub}" + f"_ses-{ses}"

if acq:
    out_name = out_name + f"_acq-{acq}"
    
if run:
    out_name = out_name + f"_run-{run}"
    
out_name = out_name + f"_{mod}"

print(outdir)
print(out_name)

C:\Users\smart\Desktop\GitProjects\convsauce\ConvertSource\sub-001\ses-001\fmap
sub-001_ses-001_run-01_fieldmap


## Read `bvals`

In [2]:
file_0800 = "C:/Users/smart/Desktop/GitProjects/convsauce/BIDS/rawdata/sub-C10/ses-001/dwi/sub-C10_ses-001_acq-PA_dirs-036_bval-b800_run-01_dwi.bval"
file_2000 = "C:/Users/smart/Desktop/GitProjects/convsauce/BIDS/rawdata/sub-C10/ses-001/dwi/sub-C10_ses-001_acq-PA_dirs-068_bval-b2000_run-01_dwi.bval"

In [6]:
pd.read_csv(file_0800,header='infer',sep=" ")

Unnamed: 0,0,0.1,0.2,0.3,800,800.1,800.2,800.3,800.4,800.5,...,800.22,800.23,800.24,800.25,800.26,800.27,800.28,800.29,800.30,800.31


In [20]:
with open(file_0800,"r") as file:
    # vals = list(file.read().splitlines())
    print(np.loadtxt(file_0800))
    file.close()
    # print(vals)
    

[  0.   0.   0.   0. 800. 800. 800. 800. 800. 800. 800. 800. 800. 800.
 800. 800. 800. 800. 800. 800. 800. 800. 800. 800. 800. 800. 800. 800.
 800. 800. 800. 800. 800. 800. 800. 800.]


In [22]:
vals = np.loadtxt(file_0800)

In [23]:
len(vals)

36

In [26]:
vals.nonzero()

(array([ 4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
        21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35],
       dtype=int64),)

In [30]:
vals_nonzero = vals[vals.astype(bool)]
vals_nonzero

array([800., 800., 800., 800., 800., 800., 800., 800., 800., 800., 800.,
       800., 800., 800., 800., 800., 800., 800., 800., 800., 800., 800.,
       800., 800., 800., 800., 800., 800., 800., 800., 800., 800.])

In [34]:
list(np.unique(vals_nonzero))

[800.0]

In [39]:
for v in np.unique(vals):
    print(v)

0.0
800.0


In [36]:
def get_bvals(bval_file):
    '''
    Reads the bvals from the (FSL-style) bvalue file and returns a list of unique non-zero bvalues
    
    Arguments:
        bval_file (string): Absolute filepath to bval (.bval) file
        
    Returns: 
        bvals_list (list): List of unique, non-zero bvalues.
    '''
    
    vals = np.loadtxt(bval_file)
    vals_nonzero = vals[vals.astype(bool)]
    bvals_list = list(np.unique(vals_nonzero))
    
    return bvals_list

In [37]:
get_bvals(file_0800)

[800.0]

In [38]:
get_bvals(file_2000)

[2000.0]

## Calculate `EffectiveEchoSpacing` and `TotalReadoutTime`
-----

See this source for details:https://github.com/bids-standard/bids-specification/blob/master/src/04-modality-specific-files/01-magnetic-resonance-imaging-data.md

-----
Siemens:        

`BWPPPE` = `BandwidthPerPixelPhaseEncode `           

`EffectiveEchoSpacing` = 1 / [`BWPPPE` * `ReconMatrixPE`]          

`TotalReadoutTime` = `EffectiveEchoSpacing * (ReconMatrixPE - 1)`

Philips:

`EffectiveEchoSpacing` = (((1000*`WFS`)/(434.215*(`EchoTrainLength`+1)))/`acceleration`)           

`TotalReadoutTime` = 0.001 * `EffectiveEchoSpacing` * `EchoTrainLength`

See these links for Philips specific details:                 

https://www.jiscmail.ac.uk/cgi-bin/webadmin?A2=fsl;162ab1a3.1308           

https://support.brainvoyager.com/brainvoyager/functional-analysis-preparation/29-pre-processing/78-epi-distortion-correction-echo-spacing-and-bandwidth           

https://neurostars.org/t/consolidating-epi-echo-spacing-and-readout-time-for-philips-scanner/4406            







In [15]:
# f = "C:/Users/smart/Desktop/GitProjects/convsauce/IRC287H-8/20171003/1101_rsfMRI_MB6_SENSE_1_fat_shift_P_017100310465322437/MR1101000016.dcm"
f = "/Users/brac4g/Desktop/convsauce/IRC287H-8/20171003/1101_rsfMRI_MB6_SENSE_1_fat_shift_P_017100310465322437/MR1101000016.dcm"
# f = "/Users/brac4g/Downloads/MR.1.3.12.2.1107.5.2.19.45307.2017051015422162853047250.dcm"

In [16]:
ds = pydicom.dcmread(f)

In [17]:
ds

(0008, 0000) Group Length                        UL: 628
(0008, 0005) Specific Character Set              CS: 'ISO_IR 100'
(0008, 0008) Image Type                          CS: ['ORIGINAL', 'PRIMARY', 'M_FFE', 'M', 'FFE']
(0008, 0012) Instance Creation Date              DA: '20171003'
(0008, 0013) Instance Creation Time              TM: '113056.330'
(0008, 0014) Instance Creator UID                UI: 1.3.46.670589.11.89.5
(0008, 0016) SOP Class UID                       UI: MR Image Storage
(0008, 0018) SOP Instance UID                    UI: 1.3.46.670589.11.71329.5.0.7904.2017100310544373441
(0008, 0020) Study Date                          DA: '20171003'
(0008, 0021) Series Date                         DA: '20171003'
(0008, 0022) Acquisition Date                    DA: '20171003'
(0008, 0023) Content Date                        DA: '20171003'
(0008, 0030) Study Time                          TM: '095546'
(0008, 0031) Series Time                         TM: '104653.20000'
(0008, 0032) 

In [14]:
# Siemens
1/2440

0.0004098360655737705

In [18]:
# Philips
1/1539

0.000649772579597141

In [21]:
# Effective Echo Spacing Philips dicom
1/(1539*71)

0.0004575863236599585

In [12]:
# Total Readout time philips dicom
((1/(1539*71))*(71-1))

0.0006406208531239419

In [23]:
# Total Readout time philips dicom (acceptable?)
(0.0006406208531239419)*(71-1)

0.04484345971867593

In [11]:
# Effective Echo Spacing Siemens
1/(29.481*64)

0.0005300023744106374

In [10]:
# Total Readout time Siemens
(1/(29.481*64))*(64-1)

0.033390149587870156

In [None]:
def calc_read_time():
    '''
    working doc-string: continue later
    '''

# `dcm2niix` Wrapper Function

In [46]:
def convert_image_data(file,basename,out_dir,cprss_lvl=6,bids=True,
                       anon_bids=True,gzip=True,comment=True,
                       adjacent=False,dir_search=5,nrrd=False,
                       ignore_2D=True,merge_2D=True,text=False,
                       progress=False,verbose=False,
                       write_conflicts="suffix",crop_3D=False,
                       lossless=False,big_endian="optimal",xml=False):
    '''
    Converts raw image data (DICOM, PAR REC, or Bruker) to NifTi (or NRRD) using dcm2niix.
    This is a wrapper function for dcm2niix (v1.0.20190902+). This wrapper functions has no returns, 
    however output files are generated in a specified directory that must exist prior to the 
    invokation of this function.
    
    Note: Most of the defaults for dcm2niix have been preserved aside from those starred (*) in the
    (optional) arguments section, in order to be BIDS compliant.

    Arguments (Required):
        file (string): Absolute path to raw image data file
        basename (string): Output file(s) basename
        out_dir (string): Absolute path to output directory (must exist at runtime)

    Arguments (Optional):
        cprss_lvl (int): Compression level [1 - 9] - 1 is fastest, 9 is smallest (default: 6)
        bids (bool): BIDS (JSON) sidecar (default: True) * 
        anon_bids (bool): Anonymize BIDS (default: True) * 
        gzip (bool): Gzip compress images (default: True) *
        comment (bool): Image comment(s) stored in NifTi header (default: True) *
        adjacent (bool): Assumes adjacent DICOMs/Image data (images from same series always in same folder) for faster conversion (default: False)
        dir_search (int): Directory search depth (default: 5)
        nrrd (bool): Export as NRRD instead of NifTi, not recommended (default: False)
        ignore_2D (bool): Ignore derived, localizer and 2D images (default: True)
        merge_2D (bool): Merge 2D slices from same series regardless of echo, exposure, etc. (default: True)
        text (bool): Text notes includes private patient details in separate text file (default: False)
        progress (bool): Report progress, slicer format progress information (default: True)
        verbose (bool): Enable verbosity (default: False)
        write_conflicts (string): Write behavior for name conflicts:
            - 'suffix' = Add suffix to name conflict (default)
            - 'overwrite' = Overwrite name conflict
            - 'skip' = Skip name conflict
        crop_3D (bool): crop 3D acquisitions (default: False)
        lossless (bool): Losslessly scale 16-bit integers to use dynamic range (default: True)
        big_endian (string): Byte order:
            - 'optimal' or 'native' = optimal/native byte order (default)
            - 'little-end' = little endian
            - 'big-end' = big endian
        xml (bool): Slicer format features (default: False)
        
        Returns:
            None
    '''

    # Empty list
    conv_cmd = list()

    # Get OS platform
    if platform.system().lower() == 'windows':
        conv_cmd.append("dcm2niix.exe")
    else:
        conv_cmd.append("dcm2niix")

    # Boolean True/False options arrays
    bool_opts = [bids, anon_bids, gzip, comment, adjacent, nrrd, ignore_2D, merge_2D, text, verbose, lossless, progress, xml]
    bool_vars = ["-b", "-ba", "-z", "-c", "-a", "-e", "-i", "-m", "-t", "-v", "-l", "--progress", "--xml"]

    # Initial option(s)
    if cprss_lvl:
        conv_cmd.append(f"-{cprss_lvl}")

    # Required option(s)
    if basename:
        conv_cmd.append("-f")
        conv_cmd.append(f"{basename}")

    if basename:
        conv_cmd.append("-f")
        conv_cmd.append(f"{basename}")

    if out_dir:
        conv_cmd.append("-o")
        conv_cmd.append(f"{out_dir}")

    # Keyword option(s)
    if write_conflicts.lower() == "suffix":
        conv_cmd.append("-w")
        conv_cmd.append("2")
    elif write_conflicts.lower() == "overwrite":
        conv_cmd.append("-w")
        conv_cmd.append("1")
    elif write_conflicts.lower() == "skip":
        conv_cmd.append("-w")
        conv_cmd.append("0")

    if big_endian.lower() == "optimal" or big_endian.lower() == "native":
        conv_cmd.append("--big_endian")
        conv_cmd.append("o")
    elif big_endian.lower() == "little-end":
        conv_cmd.append("--big_endian")
        conv_cmd.append("n")
    elif big_endian.lower() == "big-end":
        conv_cmd.append("--big_endian")
        conv_cmd.append("y")


    for idx,var in enumerate(bool_opts):
        if var:
            conv_cmd.append(bool_vars[idx])
            conv_cmd.append("y")

    # Required arguments
    # Filename
    conv_cmd.append("-f")
    conv_cmd.append(f"{basename}")

    # Output directory
    conv_cmd.append("-o")
    conv_cmd.append(f"{out_dir}")

    # Image file   
    conv_cmd.append(f"{file}")

    # System Call to dcm2niix (assumes dcm2niix is added to system path variable)
    subprocess.call(conv_cmd)

In [33]:
f = "C:/Users/smart/Desktop/GitProjects/convsauce/IRC287H-8/20171003/303_CORONAL_2017100310262626000/MR0303000001.dcm"

In [34]:
out=os.getcwd()

In [35]:
base="test_tmp"

In [42]:
convert_image_data(f,base,out)

In [26]:
import utils

In [None]:
utils.