In [26]:
import os
import sys
import platform
import glob
from typing import List, Dict, Optional, Union, Tuple

In [12]:
os.getcwd()

'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\bin'

In [19]:
test_dir = os.getcwd() + "/../tests/img.test.dir"

In [4]:
import utils.img_dir as id

In [6]:
class SubDataInfo():
    '''Class instance that creates a data object that organizes a subject's 
    identification (ID) number, session ID number, and the 
    path to the image data directory. This information is then stored for 
    each separate class instance, and can be accessed as shown in the example
    usage.
    
    Usage example:
        >>> sub_info = SubDataInfo(sub="002",
        ...                        data="<path/to/img/data>",
        ...                        ses="001")
        >>> sub_info.sub
        "002"
        >>> 
        >>> sub_info.ses
        "001"
    '''

    def __init__(self,
                 sub: str,
                 data: str,
                 ses: Optional[str] = None):
        '''Init doc-string for the 'SubDataInfo' class. 
        
        Arguments:
            sub: Subject ID.
            data: Path to image data directory.
            ses: Session ID.
        '''
        self.sub: str = sub
        self.data: str = data
        if ses:
            self.ses: str = ses
        else:
            self.ses: str = ""
    
    def __repr__(self):
        '''NOTE: Returns string represented as dictionary.'''
        return (str({"sub": self.sub,
                     "ses": self.ses,
                     "data": self.data}))

In [94]:
# Deconstruct this function

def collect_info(parent_dir: str) -> List[SubDataInfo]:
    '''Collects image data information for each subject for a study, 
    provided there exists some parent directory.

    Usage example:
        >>> data = collect_info("<parent/directory>")
        >>>
        >>> data[0].sub
        "<subject_ID>"
        >>> 
        >>> data[0].data
        "<path/to/data>"
        >>> 
        >>> data[0].ses
        "<session_ID>"
    
    Arguments:
        parent_dir: Parent directory that contains each subject.
        
    Returns:
        List/Array of SubDataInfo objects that corresponds to a subject ID, 
            session ID, and path to medical image directory.
    '''
    
    parent_dir: str = os.path.abspath(parent_dir)
    data: List[SubDataInfo] = []

    if 'windows' in platform.platform().lower():
        path_sep = "\\"
    else:
        path_sep = "/"
    
    # Get image directory information
    [dir_list, id_list] = id.img_dir_list(directory=parent_dir,
                                          verbose=False)

    # Iterate through each subject image directory
    for img_dir in dir_list:
        # Set empty variables
        sub: str = ""
        ses: str = ""
        img_list: List[str] = []
        tmp_list: List[str] = []
        
        # Get subject and session ID from file path
        try:
            [sub, ses] = img_dir.replace(parent_dir + path_sep,"").split(sep=path_sep)[0].split(sep="-")
        except ValueError:
            ses = ""
            sub = img_dir.replace(parent_dir + path_sep,"").split(sep=path_sep)[0]
                
    
        # Need function here to glob and grab individual files
        tmp_list: List[str] = glob_img(img_dir)
        img_list.extend(tmp_list)
        
        # Need function here to exclude files
        
        for img in img_list:
            # Collect and organize each subjects' session and data
            sub_info: SubDataInfo = SubDataInfo(sub=sub,data=img,ses=ses)
            data.append(sub_info)
        
        # Clear variables
        del sub, ses
        
    return data

In [20]:
parent_dir = test_dir

In [21]:
data: List = []

if 'windows' in platform.platform().lower():
    path_sep = "\\"
else:
    path_sep = "/"

# Get image directory information
[dir_list, id_list] = id.img_dir_list(directory=parent_dir,
                                      verbose=False)

In [25]:
dir_list[1]

'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI'

In [27]:
dir_search = os.path.join(dir_list[1],"*.nii*")
glob.glob(dir_search)

['C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\DWI_32dir.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\DWI_68dir.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\DWI_sbref.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\FLAIR.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\rs-func.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\rs-func_sbref.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\SWI.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\T1.nii.gz',
 'C:\\Users\\smart\\Des

In [29]:
dir_search = os.path.join(dir_list[2],"*.PAR*")
glob.glob(dir_search)

['C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\PAR REC\\DWI_32dir.PAR',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\PAR REC\\DWI_68dir.PAR',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\PAR REC\\DWI_sbref.PAR',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\PAR REC\\FLAIR.PAR',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\PAR REC\\rs-func.PAR',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\PAR REC\\rs-func_sbref.PAR',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\PAR REC\\SWI.PAR',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\PAR REC\\T1.PAR',
 'C:\\Users\\smart\\Desktop\\pr

In [30]:
dir_list[0]

'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701'

In [33]:
dir_search = os.path.join(dir_list[0],"*")
dcm_dir_list = glob.glob(dir_search)
dcm_dir_list

['C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\DWI_32dir',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\DWI_68dir',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\DWI_sbref',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\FLAIR',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\rs-func',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\rs-func_sbref',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\SWI',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\T1',

In [34]:
dcm_files = []

In [35]:
for dir_ in dcm_dir_list:
# print(dir_)
    for root, dirs, files in os.walk(dir_):
        tmp_dcm_file = files[0] # only need the first dicom file
        tmp_dcm_dir = root
        tmp_file = os.path.join(tmp_dcm_dir, tmp_dcm_file)

        dcm_files.append(tmp_file)
        break
dcm_files

['C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\DWI_32dir\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\DWI_68dir\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\DWI_sbref\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\FLAIR\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\rs-func\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\rs-func_sbref\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\DICOM\\20211701\\SWI\\00000001.dcm',
 'C:\\Users\\smart\\D

In [48]:
def glob_dcm(dcm_dir: str) -> List[str]:
    '''Globs subject DICOM data directories for the top-most DICOM file
    in each respective directory.
    
    Example usage:
        >>> dcm_files = glob_dcm(dcm_dir)
        
    Arguments:
        dcm_dir: Subject DICOM data directory.
        
    Returns:
        List of strings of image files.
    '''
    dcm_dir: str = os.path.abspath(dcm_dir)
    dir_search: str = os.path.join(dcm_dir,"*")
    dcm_dir_list: List[str] = glob.glob(dir_search)
    
    dcm_files: List[str] = []
    
    for dir_ in dcm_dir_list:
        for root, dirs, files in os.walk(dir_):
            # only need the first DICOM file
            tmp_dcm_file = files[0]
            tmp_dcm_dir = root
            tmp_file = os.path.join(tmp_dcm_dir, tmp_dcm_file)

            dcm_files.append(tmp_file)
            break
    return dcm_files

In [46]:
glob_dcm(dir_list[1])

[]

In [89]:
def glob_img(img_dir: str) -> List[str]:
    '''Globs image data files given a subject image data directory.
    The image file types that are search for are:
        * DICOMs
        * PAR RECs
        * NIFTIs
    
    Example usage:
        >>> img_list = glob_img(img_dir)
        
    Arguments:
        img_dir: Path to image directory.
        
    Returns:
        List of strings of file paths to images.
    '''
    
    # Listed in most desirable order
    img_types: List[str] = [ "dcm", "PAR", "nii" ]
        
    img_list: List[str] = []
        
    for img_type in img_types:
                
        dir_search: str = os.path.join(img_dir,f"*.{img_type}*")
        tmp_list: List[str] = glob.glob(dir_search)
        img_list.extend(tmp_list)
        
        tmp_list: List[str] = glob_dcm(dcm_dir=img_dir)
        img_list.extend(tmp_list)
    
    return img_list

In [53]:
glob_img(dir_list[3])

['C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\002-001\\DICOM\\20211701\\DWI_32dir\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\002-001\\DICOM\\20211701\\DWI_68dir\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\002-001\\DICOM\\20211701\\DWI_sbref\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\002-001\\DICOM\\20211701\\FLAIR\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\002-001\\DICOM\\20211701\\rs-func\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\002-001\\DICOM\\20211701\\rs-func_sbref\\00000001.dcm',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\002-001\\DICOM\\20211701\\SWI\\00000001.dcm',
 'C:\\Users\\smart\\D

In [91]:
data = collect_info(parent_dir=test_dir)

In [92]:
len(data)

720

In [93]:
data[70]

{'sub': '002', 'ses': '001', 'data': 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\002-001\\DICOM\\20211701\\task-2-func\\00000001.dcm'}