In [1]:
import os
import sys
import platform
import glob
from typing import( 
    List, 
    Dict, 
    Optional, 
    Union, 
    Tuple, 
    Set
)

In [2]:
os.getcwd()

'/Users/adebayobraimah/Desktop/projects/convert_source/work_dir'

In [3]:
test_dir = os.getcwd() + "/../tests/img.test.dir"

In [8]:
from ..utils.img_dir import img_dir

ImportError: attempted relative import with no known parent package

In [5]:
def glob_dcm(dcm_dir: str) -> List[str]:
    '''Globs subject DICOM data directories for the top-most DICOM file
    in each respective directory.
    
    Example usage:
        >>> dcm_files = glob_dcm(dcm_dir)
        
    Arguments:
        dcm_dir: Subject DICOM data directory.
        
    Returns:
        List of strings of image files.
    '''
    dcm_dir: str = os.path.abspath(dcm_dir)
    dir_search: str = os.path.join(dcm_dir,"*")
    dcm_dir_list: List[str] = glob.glob(dir_search)
    
    dcm_files: List[str] = []
    
    for dir_ in dcm_dir_list:
        for root, dirs, files in os.walk(dir_):
            # only need the first DICOM file
            tmp_dcm_file = files[0]
            tmp_dcm_dir = root
            tmp_file = os.path.join(tmp_dcm_dir, tmp_dcm_file)

            dcm_files.append(tmp_file)
            break
            
    return dcm_files

In [6]:
def glob_img(img_dir: str) -> List[str]:
    '''Globs image data files given a subject image data directory.
    The image file types that are search for are:
        * DICOMs
        * PAR RECs
        * NIFTIs
    
    Example usage:
        >>> img_list = glob_img(img_dir)
        
    Arguments:
        img_dir: Path to image directory.
        
    Returns:
        List of strings of file paths to images.
    '''
    
    # Listed in most desirable order
    img_types: List[str] = [ "dcm", "PAR", "nii" ]
        
    img_list: List[str] = []
        
    for img_type in img_types:
                
        dir_search: str = os.path.join(img_dir,f"*.{img_type}*")
        tmp_list: List[str] = glob.glob(dir_search)
        img_list.extend(tmp_list)
        
        tmp_list: List[str] = glob_dcm(dcm_dir=img_dir)
        img_list.extend(tmp_list)
    
    return img_list

In [7]:
class SubDataInfo():
    '''Class instance that creates a data object that organizes a subject's 
    identification (ID) number, session ID number, and the 
    path to the image data directory. This information is then stored for 
    each separate class instance, and can be accessed as shown in the example
    usage.
    
    Usage example:
        >>> sub_info = SubDataInfo(sub="002",
        ...                        data="<path/to/img/data>",
        ...                        ses="001")
        >>> sub_info.sub
        "002"
        >>> 
        >>> sub_info.ses
        "001"
    '''

    def __init__(self,
                 sub: str,
                 data: str,
                 ses: Optional[str] = None):
        '''Init doc-string for the 'SubDataInfo' class. 
        
        Arguments:
            sub: Subject ID.
            data: Path to image data directory.
            ses: Session ID.
        '''
        self.sub: str = sub
        self.data: str = data
        if ses:
            self.ses: str = ses
        else:
            self.ses: str = ""
    
    def __repr__(self):
        '''NOTE: Returns string represented as dictionary.'''
        return (str({"sub": self.sub,
                     "ses": self.ses,
                     "data": self.data}))

In [82]:
def img_exclude(img_list: List[str],
               exclusion_list: Optional[List[str]] = None
               ) -> List[str]:
    '''Constructs a new list with files that DO NOT contain words in the exclusion list.
    Should this list be empty, then the original input list is returned.
    
    Usage example:
        >>> new_img_list = img_exclude(img_list, ["SWI", "PD","ProtonDensity"])
        
    Arguments:
        img_list: Input list of paths to image files.
        exclusion_list: Exclusion list that consists of keywords used to exclude files. 
        
    Returns:
        List of image files that do not contain words in the exclusion list.
    '''
    if exclusion_list is None or len(exclusion_list) == 0:
        return img_list
    else:
        # Init set of images
        img_set: Set = set(img_list)

        # Init empty set
        exclusion_set: Set = set()
        
        # Temporary list
        tmp_list: List[str] = []
        
        # Iterate through exclusion and image lists and remove images in the exclusion list
        for file in exclusion_list:
            for img in img_list:
                if file.lower() in img.lower():
                    tmp_list.append(img)
            exclusion_set.update(set(tmp_list))
        return list(img_set.difference(exclusion_set))

In [83]:
def collect_info(parent_dir: str,
                exclusion_list: Optional[List[str]] = None
                ) -> List[SubDataInfo]:
    '''Collects image data information for each subject for a study, 
    provided there exists some parent directory. Certain image files 
    can be excluded provided a list of exclusion keywords/terms.

    Usage example:
        >>> data = collect_info("<parent/directory>",
        ...                     ["SWI", "PD", "ProtonDensity"])
        >>>
        >>> data[0].sub
        "<subject_ID>"
        >>> 
        >>> data[0].data
        "<path/to/data>"
        >>> 
        >>> data[0].ses
        "<session_ID>"
    
    Arguments:
        parent_dir: Parent directory that contains each subject.
        
    Returns:
        List/Array of SubDataInfo objects that corresponds to a subject ID, 
            session ID, and path to medical image data.
    '''
    
    parent_dir: str = os.path.abspath(parent_dir)
    data: List[SubDataInfo] = []

    if 'windows' in platform.platform().lower():
        path_sep = "\\"
    else:
        path_sep = "/"
    
    # Get image directory information
    [dir_list, id_list] = idir.img_dir_list(directory=parent_dir,
                                          verbose=False)

    # Iterate through each subject image directory
    for img_dir in dir_list:
        # Set empty variables
        sub: str = ""
        ses: str = ""
        img_list: List[str] = []
        tmp_list: List[str] = []
        
        # Get subject and session ID from file path
        try:
            [sub, ses] = img_dir.replace(parent_dir + path_sep,"").split(sep=path_sep)[0].split(sep="-")
        except ValueError:
            ses = ""
            sub = img_dir.replace(parent_dir + path_sep,"").split(sep=path_sep)[0]
                
    
        # Glob and grab individual files
        tmp_list: List[str] = glob_img(img_dir=img_dir)
        img_list.extend(tmp_list)
        
        # Exclude files
        img_list = img_exclude(img_list=img_list,
                              exclusion_list=exclusion_list)
        
        for img in img_list:
            # Collect and organize each subjects' session and data
            sub_info: SubDataInfo = SubDataInfo(sub=sub,data=img,ses=ses)
            data.append(sub_info)
        
        # Clear variables
        del sub, ses
        
    return data

In [84]:
exclusion_list = [ "DWI", "FLAIR", "SWI"]

In [85]:
data = collect_info(parent_dir=test_dir, exclusion_list=exclusion_list)

In [86]:
len(data)

252

In [87]:
data[70]

{'sub': '900XXT5', 'ses': '', 'data': 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\900XXT5\\NIFTI\\NAME^ 20201701\\task-2-func.nii.gz'}

In [21]:
parent_dir = test_dir

In [22]:
[dir_list, id_list] = id.img_dir_list(directory=parent_dir,verbose=False)

In [17]:
dir_list[1]

'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI'

In [18]:
img_list = []
img_list.extend(glob_img(dir_list[1]))
img_list

['C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\DWI_32dir.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\DWI_68dir.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\DWI_sbref.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\FLAIR.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\rs-func.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\rs-func_sbref.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\SWI.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\T1.nii.gz',
 'C:\\Users\\smart\\Des

In [19]:
exclusion_list = [ "DWI", "FLAIR"]

In [20]:
# image set
img_set = set(img_list)

# create empty sets
currated_set = set()
exclusion_set = set()

In [54]:
tmp_list = []
for file in exclusion_list:
    for img in img_list:
        if file in img:
            # do stuff
            # tmp_set = set(img)
            # print(img)
            tmp_list.append(img)
    exclusion_set.update(set(tmp_list))
# img_set.difference(exclusion_set)
list(img_set.difference(exclusion_set))

['C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\T2.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\task-func_sbref.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\task-2-func.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\rs-func.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\T1.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\SWI.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\rs-func_sbref.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\task-1-func.nii.gz']

In [49]:
# tmp_set = set(dir_list)

In [50]:
exclusion_set

{'-',
 '.',
 '0',
 '1',
 '2',
 '3',
 '6',
 '8',
 ':',
 'A',
 'C',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\DWI_32dir.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\DWI_68dir.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\DWI_sbref.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\FLAIR.nii.gz',
 'D',
 'F',
 'I',
 'L',
 'N',
 'R',
 'T',
 'U',
 'W',
 '\\',
 '_',
 'a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'i',
 'j',
 'k',
 'm',
 'n',
 'o',
 'p',
 'r',
 's',
 't',
 'u',
 'v',
 'z'}

In [24]:
if exclusion_list[0] in img_list[0]:
    print(True)

True


In [68]:
img_exclude(dir_list[0],["s"])

True

In [76]:
img_exclude(glob_img(dir_list[1]),["SWI", "DWI"])

['C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\T2.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\task-1-func.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\FLAIR.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\task-2-func.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\task-func_sbref.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\DWI_32dir.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\rs-func.nii.gz',
 'C:\\Users\\smart\\Desktop\\projects\\cs_med_data\\convert_source\\tests\\img.test.dir\\001-001\\NIFTI\\T1.nii.gz',
 'C:\\Users\\smart

In [78]:
len(img_exclude(glob_img(dir_list[1]),["SWI", "DWI"]))

8

In [3]:
from utils.command_utils import File

In [4]:
file: File = File("test.txt")

In [5]:
type(file.file)

str

In [6]:
file

test.txt

In [7]:
type(file)

utils.command_utils.File