In [1]:
import os
import sys
import platform
import glob
from typing import (
    List, 
    Dict, 
    Optional, 
    Union, 
    Tuple
)

In [29]:
os.getcwd()

'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests'

In [8]:
test_dir = "img.test.dir/"
test_dir = os.path.abspath(test_dir)

In [9]:
mod_path = "../bin"

In [10]:
# os.listdir(mod_path)
sys.path.append(mod_path)

In [11]:
import convert_source as cs

In [6]:
c_file = "../config.default.yml"

In [7]:
[search_dict, \
    exclude_list, \
        meta_dict ] = cs.read_config(config_file=c_file,verbose=True)

Initialized parameters from configuration file
Categorizing search terms
Exclusion option implemented
Including additional settings for metadata


In [25]:
import utils.img_dir as id

In [9]:
# os.listdir(test_dir)
[dir_list, id_list] = id.img_dir_list(directory=test_dir,verbose=False)

In [10]:
dir_list[0]

'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701'

In [11]:
if 'windows' in platform.platform().lower():
    path_sep = "\\"
else:
    path_sep = "/"

In [12]:
# path_sep = "/"

In [13]:
# Isolate subject and session ID
os.path.dirname(os.path.dirname(dir_list[0].replace(test_dir + path_sep,""))).split(sep="-")

['001', '001']

In [14]:
# Isolate subject and session ID
# os.path.dirname(os.path.basename(dir_list[0].replace(test_dir + path_sep,"")))
os.path.dirname(os.path.dirname(dir_list[0]))

'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001'

In [16]:
[sub, ses] = os.path.dirname(os.path.dirname(dir_list[0].replace(test_dir + path_sep,""))).split(sep="-")

In [17]:
sub

'001'

In [18]:
os.path.dirname(
    os.path.dirname(
        os.path.dirname(
            dir_list[9].replace(test_dir + path_sep,"")))).split(sep="-")

['900XXT5']

In [19]:
os.path.dirname(dir_list[9]).replace(test_dir + path_sep,"").split(sep="-")

['900XXT5\\DICOM\\20211701']

In [28]:
def glob_dcm(dcm_dir: str) -> List[str]:
    '''Globs subject DICOM data directories for the top-most DICOM file
    in each respective directory.
    
    Example usage:
        >>> dcm_files = glob_dcm(dcm_dir)
        
    Arguments:
        dcm_dir: Subject DICOM data directory.
        
    Returns:
        List of strings of image files.
    '''
    dcm_dir: str = os.path.abspath(dcm_dir)
    dir_search: str = os.path.join(dcm_dir,"*")
    dcm_dir_list: List[str] = glob.glob(dir_search)
    
    dcm_files: List[str] = []
    
    for dir_ in dcm_dir_list:
        for root, dirs, files in os.walk(dir_):
            # Only need the first DICOM file
            tmp_dcm_file = files[0]
            tmp_dcm_dir = root
            tmp_file = os.path.join(tmp_dcm_dir, tmp_dcm_file)

            dcm_files.append(tmp_file)
            break
            
    return dcm_files

In [29]:
def glob_img(img_dir: str) -> List[str]:
    '''Globs image data files given a subject image data directory.
    The image file types that are search for are:
        * DICOMs
        * PAR RECs
        * NIFTIs
    
    Example usage:
        >>> img_list = glob_img(img_dir)
        
    Arguments:
        img_dir: Path to image directory.
        
    Returns:
        List of strings of file paths to images.
    '''
    
    # Listed in most desirable order
    img_types: List[str] = [ "dcm", "PAR", "nii" ]
        
    img_list: List[str] = []
        
    for img_type in img_types:
                
        dir_search: str = os.path.join(img_dir,f"*.{img_type}*")
        tmp_list: List[str] = glob.glob(dir_search)
        img_list.extend(tmp_list)
        
        tmp_list: List[str] = glob_dcm(dcm_dir=img_dir)
        img_list.extend(tmp_list)
    
    return img_list

In [None]:
class SubInfoError(Exception):
    pass

In [2]:
class SubDataInfo():
    '''Class instance that creates a data object that organizes a subject's 
    identification (ID) number, session ID number, and the 
    path to the image data directory. This information is then stored for 
    each separate class instance, and can be accessed as shown in the example
    usage.
    
    Usage example:
        >>> sub_info = SubDataInfo(sub="002",
        ...                        data="<path/to/img/data>",
        ...                        ses="001")
        >>> sub_info.sub
        "002"
        >>> 
        >>> sub_info.ses
        "001"
    '''

    def __init__(self,
                 sub: Union[str,int],
                 data: str,
                 ses: Optional[Union[str,int]] = None):
        '''Init doc-string for the 'SubDataInfo' class. 
        
        Arguments:
            sub: Subject ID.
            data: Path to image data directory.
            ses: Session ID.
        '''
        if sub:
            self.sub: str = str(sub)
        else:
            raise SubInfoError("Subject ID was not specified")
        if data:
            self.data: str = data
        else:
            raise SubInfoError("Subject data was not specified.")
        if ses:
            self.ses: str = str(ses)
        else:
            self.ses: str = ""
    
    def __repr__(self):
        '''NOTE: Returns string represented as dictionary.'''
        return (str({"sub": self.sub,
                     "ses": self.ses,
                     "data": self.data}))

In [31]:
def img_exclude(img_list: List[str],
               exclusion_list: Optional[List[str]] = None
               ) -> List[str]:
    '''Constructs a new list with files that DO NOT contain words in the exclusion list.
    Should this list be empty, then the original input list is returned.
    
    NOTE: Exclusion list keywords are case sensitive.
    
    Usage example:
        >>> new_img_list = img_exclude(img_list, ["SWI", "PD","ProtonDensity"])
        
    Arguments:
        img_list: Input list of paths to image files.
        exclusion_list: Exclusion list that consists of keywords used to exclude files. 
        
    Returns:
        List of image files that do not contain words in the exclusion list.
    '''
    if exclusion_list is None or len(exclusion_list) == 0:
        return img_list
    else:
        # Init set of images
        img_set = set(img_list)

        # Init empty set
        exclusion_set = set()
        
        # Temporary list
        tmp_list = []
        
        # Iterate through exclusion and image lists and remove images in the exclusion list
        for file in exclusion_list:
            for img in img_list:
                if file in img:
                    tmp_list.append(img)
            exclusion_set.update(set(tmp_list))
        return list(img_set.difference(exclusion_set))

In [32]:
def collect_info(parent_dir: str,
                exclusion_list: Optional[List[str]] = None
                ) -> List[SubDataInfo]:
    '''Collects image data information for each subject for a study, 
    provided there exists some parent directory. Certain image files 
    can be excluded provided a list of exclusion keywords/terms.
    
    NOTE: Exclusion list keywords/terms are case sensitive.

    Usage example:
        >>> data = collect_info("<parent/directory>",
        ...                     ["SWI", "PD", "ProtonDensity"])
        >>>
        >>> data[0].sub
        "<subject_ID>"
        >>> 
        >>> data[0].data
        "<path/to/data>"
        >>> 
        >>> data[0].ses
        "<session_ID>"
    
    Arguments:
        parent_dir: Parent directory that contains each subject.
        
    Returns:
        List/Array of SubDataInfo objects that corresponds to a subject ID, 
            session ID, and path to medical image directory.
    '''
    
    parent_dir: str = os.path.abspath(parent_dir)
    data: List[SubDataInfo] = []

    if 'windows' in platform.platform().lower():
        path_sep = "\\"
    else:
        path_sep = "/"
    
    # Get image directory information
    [dir_list, id_list] = id.img_dir_list(directory=parent_dir,
                                          verbose=False)

    # Iterate through each subject image directory
    for img_dir in dir_list:
        # Set empty variables
        sub: str = ""
        ses: str = ""
        img_list: List[str] = []
        tmp_list: List[str] = []
        
        # Get subject and session ID from file path
        try:
            [sub, ses] = img_dir.replace(parent_dir + path_sep,"").split(sep=path_sep)[0].split(sep="-")
        except ValueError:
            ses = ""
            sub = img_dir.replace(parent_dir + path_sep,"").split(sep=path_sep)[0]
                
    
        # Glob and grab individual files
        tmp_list: List[str] = glob_img(img_dir=img_dir)
        img_list.extend(tmp_list)
        
        # Exclude files
        img_list = img_exclude(img_list=img_list,
                              exclusion_list=exclusion_list)
        
        for img in img_list:
            # Collect and organize each subjects' session and data
            sub_info: SubDataInfo = SubDataInfo(sub=sub,data=img,ses=ses)
            data.append(sub_info)
        
        # Clear variables
        del sub, ses
        
    return data

In [18]:
# class SubDataInfo():
#     '''Class instance that creates a data object that organizes a subject's 
#     identification (ID) number, session ID number, and the 
#     path to the image data directory. This information is then stored for 
#     each separate class instance, and can be accessed as shown in the example
#     usage.
    
#     Usage example:
#         >>> sub_info = SubDataInfo(sub="002",
#         ...                        data="<path/to/img/data>",
#         ...                        ses="001")
#         >>> sub_info.sub
#         "002"
#         >>> 
#         >>> sub_info.ses
#         "001"
#     '''

#     def __init__(self,
#                  sub: str,
#                  data: str,
#                  ses: Optional[str] = None):
#         '''Init doc-string for the 'SubDataInfo' class. 
        
#         Arguments:
#             sub: Subject ID.
#             data: Path to image data directory.
#             ses: Session ID.
#         '''
#         self.sub: str = sub
#         self.data: str = data
#         if ses:
#             self.ses: str = ses
#         else:
#             self.ses: str = ""
    
#     def __repr__(self):
#         '''NOTE: Returns string represented as dictionary.'''
#         return (str({"sub": self.sub,
#                      "ses": self.ses,
#                      "data": self.data}))

In [33]:
exclusion_list = [ "DWI", "FLAIR", "SWI"]

In [36]:
data = collect_info(parent_dir=test_dir, exclusion_list=exclusion_list)

In [37]:
len(data)

252

In [43]:
data[0]

{'sub': '001', 'ses': '001', 'data': 'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\task-func_sbref\\00000001.dcm'}

In [58]:
t = {}

In [57]:
t

{'sub': '001'}

In [20]:
s = SubDataInfo("001",dir_list[0],"001")

In [24]:
s.data

'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701'

In [25]:
t = []; t.append(s)

In [26]:
s = SubDataInfo("002",dir_list[1],"001")

In [28]:
s.data

'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI'

In [29]:
t.append(s)

In [32]:
t[1].sub

'002'

In [20]:
data = collect_info(parent_dir=test_dir)

In [21]:
len(data)

36

In [23]:
data

[{'sub': '001', 'ses': '001', 'data': 'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701'},
 {'sub': '001', 'ses': '001', 'data': 'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI'},
 {'sub': '001', 'ses': '001', 'data': 'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\PAR REC'},
 {'sub': '002', 'ses': '001', 'data': 'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\002-001\\DICOM\\20211701'},
 {'sub': '002', 'ses': '001', 'data': 'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\002-001\\NIFTI'},
 {'sub': '002', 'ses': '001', 'data': 'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\002-001\\PAR REC'},
 {'sub': '003', 'ses': '001', 'data': 'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\

In [15]:
data[1].data

'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI'

In [25]:
data[1].ses

'001'

In [26]:
data[1].sub

'001'

In [27]:
for i in range(0,len(data)):
    try:
        print(f"sub-{data[i].sub}_ses-{data[i].ses}")
        # print(f"sub-{data[i].sub}")
    except AttributeError:
        pass

sub-001_ses-001
sub-001_ses-001
sub-001_ses-001
sub-002_ses-001
sub-002_ses-001
sub-002_ses-001
sub-003_ses-001
sub-003_ses-001
sub-003_ses-001
sub-900XXT5_ses-
sub-900XXT5_ses-
sub-900XXT5_ses-
sub-901XXP5_ses-
sub-901XXP5_ses-
sub-901XXP5_ses-
sub-902XXY8_ses-
sub-902XXY8_ses-
sub-902XXY8_ses-
sub-C01_ses-001
sub-C01_ses-001
sub-C01_ses-001
sub-C02_ses-001
sub-C02_ses-001
sub-C02_ses-001
sub-C03_ses-001
sub-C03_ses-001
sub-C03_ses-001
sub-P01_ses-001
sub-P01_ses-001
sub-P01_ses-001
sub-P02_ses-001
sub-P02_ses-001
sub-P02_ses-001
sub-P03_ses-001
sub-P03_ses-001
sub-P03_ses-001


In [11]:
dir_list[0]

'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701'

In [12]:
dir_list[1]

'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI'

In [13]:
dir_list[2]

'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\PAR REC'

In [14]:
dir_list[9]

'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\900XXT5\\DICOM\\20211701\\NAME^ 20201701'

In [15]:
from pathlib import Path

In [24]:
# Edge case 1 - DICOM
os.path.basename(Path(dir_list[0]).parents[1])

'001-001'

In [26]:
# Base case [likely NIFTI and PAR REC]
os.path.basename(Path(dir_list[1]).parents[0])

'001-001'

In [29]:
# Edge case 2 - DICOMs in child directory
os.path.basename(Path(dir_list[9]).parents[2])

'900XXT5'

In [35]:
os.path.basename(Path(dir_list[9]).parents[2])

'900XXT5'

In [38]:
os.path.basename(dir_list[1])

'NIFTI'

In [47]:
dir_list[1]

'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI'

In [42]:
os.path.basename(Path(os.path.dirname(dir_list[0])).parents[0])

'001-001'

In [48]:
Path(dir_list[1]).parents[0]

WindowsPath('c:/Users/smart/Desktop/projects/cs_med_data/convert_source/tests/img.test.dir/001-001')

In [62]:
Path(os.path.dirname(dir_list[1])).parents[0]

WindowsPath('c:/Users/smart/Desktop/projects/cs_med_data/convert_source/tests/img.test.dir')

In [66]:
str(Path(dir_list[1])).split("\\")

['c:',
 'Users',
 'smart',
 'Desktop',
 'projects',
 'cs_med_data',
 'convert_source',
 'tests',
 'img.test.dir',
 '001-001',
 'NIFTI']

In [74]:
str(Path(dir_list[0])).split("\\")[-3]

'001-001'

In [73]:
str(Path(dir_list[9])).split("\\")[-4]

'900XXT5'

In [81]:
os.path.dirname(os.path.dirname(os.path.dirname(dir_list[0])))

'c:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir'

In [91]:
str(Path(dir_list[0]).parent).split("\\")

['c:',
 'Users',
 'smart',
 'Desktop',
 'projects',
 'cs_med_data',
 'convert_source',
 'tests',
 'img.test.dir',
 '001-001',
 'DICOM']

In [100]:
Path(dir_list[0].replace(test_dir + "\\","")).parent

WindowsPath('001-001/DICOM')

In [101]:
dir_list[0].replace(test_dir + "\\","")

'001-001\\DICOM\\20211701'

In [102]:
dir_list[1].replace(test_dir + "\\","")

'001-001\\NIFTI'

In [104]:
path_sep = "\\"

In [109]:
dir_list[9].replace(test_dir + "\\","").split(path_sep)[0].split("-")

['900XXT5']

In [108]:
dir_list[0].replace(test_dir + "\\","").split(path_sep)[0].split("-")

['001', '001']

In [110]:
dir_list[1].replace(test_dir + "\\","").split(path_sep)[0].split("-")

['001', '001']