In [918]:
import os
import pandas as pd
import fnmatch
import re

In [919]:
# BIGGO TO DO:
# trash bin for each filter

In [920]:
# NOTE: Possible names for each sequence

# "3D_T1": ['*SAG*T1*NII', '*SAG*FSPGR*3D*NII']
# "FLAIR": <ID>_{SAG | AX | COR}_[T1_ | T2_][3D_]FLAIR_<DOE>.nii
# "T2": <ID>_{SAG | AX | COR}_3D_T2_[<type>]<DOE>.nii
# "T2": <ID>_{SAG | AX | COR}_T2_[<type>]<DOE>_<tag>.nii
# "BOLD": <ID>_BOLD_<DOE>.nii
# "Diffusion kurtosis": <ID>_{AX}_DkI_<small#dirs>_<DOE>.nii
#                       <ID>_{AX}_DkI_<small#dirs>_<DOE>.bval
#                       <ID>_{AX}_DkI_<small#dirs>_<DOE>.bvec
# .                     <ID>_{AX}_DkI_<big#dirs>_<DOE>.nii
#                       <ID>_{AX}_DkI_<big#dirs>_<DOE>.bval
#                       <ID>_{AX}_DkI_<big#dirs>_<DOE>.bvec
# "Diffusion tensor": <ID>_{AX}_D{k|T}I_<#dir>_<DOE>.nii
#                     <ID>_{AX}_D{k|T}I_<#dir>_<DOE>.bval
#                     <ID>_{AX}_D{k|T}I_<#dir>_<DOE>.bvec
# "Diffusion weithed": <ID>_{AX}_DWI_<DOE>.nii
#                      <ID>_{AX}_DWI_<DOE>.bval
#                      <ID>_{AX}_DWI_<DOE>.bvec
# "ASL": <ID>_{SAG | AX | COR}_ASL_<DOE>.nii   
# "SWAN": <ID>_{SAG | AX | COR}_ASL_<DOE>.nii
# "Carotid"
# "Aorta": <ID>_[CINE]_AORTA_VENC_<number>_<DOE>.nii

In [921]:
def get_sequence_name(sequence_info, notes=None):
    """
    Convert the input from list to string 
    --------------------------------------------------------
    input:
        sequence_info: array-like
            splited the string 
        
    return:
        string : sequence_info joined together
    """
    if notes:
        name = ('_'.join(sequence_info[:-1]))+notes+'.'+sequence_info[-1].lower()
    else:
        name = ('_'.join(sequence_info[:-1]))+'.'+sequence_info[-1].lower()
    return name
    

In [922]:
def get_sequence_info(sequence_name):
    """
    Convert the input from string to list 
    --------------------------------------------------------
    input:
        sequence_name: string
        
    return:
        array-like: splited the string into items for parsing later
    """
    str_list = sequence_name.replace('-','_').replace('.','_').split('_')
    if not (str_list[-1] == 'NII' or str_list[-1] == 'JSON'):
        return None
    
    str_list = list(filter(None, str_list)) 
    return str_list

#selected everything expect the '.nii' part of the string

In [923]:
def extract_notes(sequence_name):
    """
    ignore notes inside the parentheses of sequence_info
    --------------------------------------------------------
    input:
        string: sequence with notes within parentheses
        
    return:
        string: sequence info without the parentheses and notes inside of them
        string: the notes that were in the parentheses
    """
    op = None
    ed = None
    notes = None
    
    for i in range(len(sequence_name)):
        if sequence_name[i] == '(':
            op = i
        if sequence_name[i] == ')':
            ed = i
            break
    
    if ed:
        sequence_name = sequence_name[:op] + sequence_name[ed+1:]
        notes = sequence_name[op:ed]
    
    return sequence_name, notes

In [924]:
def get_DOE(info):
    """
    get the date of exam 
    --------------------------------------------------------
    input:
        info: array-like
        
    return:
        string: date of exame in YYYY/MM/DD
    """
    
    if 'I' in info[-1]:
        DOE=info[-2]
        return(DOE[0:4]+"/"+DOE[4:6]+'/'+DOE[6:8])
    else:
        DOE=info[-1]
        return(DOE[0:4]+"/"+DOE[4:6]+'/'+DOE[6:8])

In [925]:
def base_standard(o_name):
    """
    clean up the user input sequence name into a standardized naming convetion for sorting later
        SAGITTLE --> SAG
           AXIAL --> AX
         CORONAL --> COR
            CUBE --> 3D
        WHITTIER --> WH
              RT --> RIGHT
              LT --> LEFT
    --------------------------------------------------------
    input:
        o_name: string
    
    return:
        string: standardized sequnece file name
    """
    n_name = o_name.replace('MEN', 'MN').replace('WHITTIER', 'WH')
    n_name = n_name.replace('SAGITTAL', 'SAG').replace('AXIAL', 'AX').replace('CORONAL', 'COR').replace('OBLIQUE', 'OBL')
    n_name = n_name.replace('RT', 'RIGHT').replace('LT', 'LEFT')
    
    info=n_name.replace('-','_').replace('.','_').split('_')
    
    ID = info[0]
    # ID does not follow designation [2-3 letter cohort designation][3-4 number designation]
    if ID.isdigit():
        info[0] = 'WH'+info[0]
        
    n_name = '_'.join(info)
    
    return n_name

In [926]:
# <ID>_{SAG | AX | COR}_[T1_ | T2_][3D_]FLAIR_[MIP_ | MIN_IP_]<DOE>[_<tag>].nii
def filt_T1(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    # TO-DO: how to handle 
    # 1) NEUROMELANIN scans
    # 2) SE_MT
    # 3) BRAVO
    # 4) 2DTOF
    # 5) repeat?
    
    sequence_info=get_sequence_info(sequence_name)
    if sequence_info is None:
        return None
    
    if '3D' not in sequence_info and sequence_info[-2][0] == 'I':
        sequence_info[-2] = sequence_info[-2].lower()
    elif not sequence_info[-2].isdigit():
        trash.append(sequence_name)
        return None

    ignore=['AVG', 'MIN', 'POST_PROCESS', 'MT', '2DTOF', 'THIN', 'BRAVO', 'RE']
    if any(word in sequence_name for word in ignore):
        trash.append(sequence_name)
        return None
    
    accept = ['SAG', 'AX', 'COR', 'OBL']
    if not any(word in sequence_name for word in accept):
        trash.append(sequence_name)
        return None
    
#     print("file name:", file_name)
    return get_sequence_name(sequence_info)

    
#TO-DO: repeated exam of the same date or at the different date
    

In [927]:
# Naming to be estabilshed
# <ID>_{SAG | AX | COR}_[T1_ | T2_][3D_]FLAIR_[MIP_ | MIN_IP_]<DOE>[_<tag>].nii
def filt_FLAIR(sequence_name, flair_3d, flair_2d, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        flair_3d : array-like
            List of files that are 3D FLAIR
        flair_2d : array-like
            List of files that are 2D FLAIR
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info=get_sequence_info(sequence_name)
    if sequence_info is None:
        return None
            
    ignore=['AVG', 'MIN', 'MIP']
    if any(word in sequence_name for word in ignore):
        trash.append(sequence_name)
        return None
    
    if 'CUBE' not in sequence_info and sequence_info[-2][0] == 'I':
        sequence_info[-2] = sequence_info[-2].lower()
    elif not sequence_info[-2].isdigit():
        trash.append(sequence_name)
        return None
    
    if "SAG" in sequence_name:
#         print(sequence_info)
        if '3D' in sequence_name:
            flair_3d.append(sequence_name)
        else:
            flair_2d.append(sequence_name)
    elif 'AX' in sequence_name:
#         print(sequence_info)
        if '3D' in sequence_name:
            flair_3d.append(sequence_name)
        else:
            flair_2d.append(sequence_name)
    elif 'COR' in sequence_name:
#         print(sequence_info)
        if '3D' in sequence_name:
            flair_3d.append(sequence_name)
        else:
            flair_2d.append(sequence_name)
    else: 
#         print("\t ", sequence_name)
        trash.append(sequence_name)
        return None
            
    return get_sequence_name(sequence_info)

#TO-DO: T1 has 3D, 2D

In [928]:
# Naming to be established:
# "T2": <ID>_{SAG | AX | COR}_3D_T2_[<type>]<DOE>.nii
# "T2": <ID>_{SAG | AX | COR}_T2_[<type>]<DOE>_<tag>.nii
def filt_T2(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info=get_sequence_info(sequence_name)
    if sequence_info is None:
        return None
    
    ignore=['AVG', 'MIN', 'MIP', 'TRACE', 'SE']
    if any(word in sequence_name for word in ignore):
        trash.append(sequence_name)
        return None
    
    if 'CUBE' not in sequence_info and sequence_info[-2][0] == 'I':
        sequence_info[-2] = sequence_info[-2].lower()
    elif not sequence_info[-2].isdigit():
        trash.append(sequence_name)
        return None
#     print(sequence_info)
        
    return get_sequence_name(sequence_info)

#TO-DO: T2 has difference pulse types :0 and some other things

In [929]:
def filt_DWI(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info=sequence_name.replace('-','_').replace('.','_').split('_')
#     print(sequence_info)
    
    
    if 'AX' not in sequence_info:
        sequence_info=sequence_info[0:1]+['AX']+sequence_info[1:]

    if len(sequence_info) > 6:
        trash.append(sequence_name)
#             print(sequence_info)
        return None

    if not sequence_info[-2].isdigit():
        trash.append(sequence_name)
#             print("repeeat:",sequence_info)
        return None
    else:
        #1. concatinate all with _expect the last item
        #2. lower case the extension (last item) and add a . in front of it
        return get_sequence_name(sequence_info)

In [930]:
def filt_DTI(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
    n_name = sequence_name.replace('_30_DIRECTION_', '_(30_DIRECTION)_')
    
    return get_sequence_name(sequence_info)

In [931]:
def filt_DKI(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
    if 'EQ_1' in sequence_name:
        trash.append(sequence_name)
        return None
    
    return get_sequence_name(sequence_info)

In [932]:
# BOLD naming convention: <ID>_BOLD_<DOE>.nii
def filt_bold(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_name, notes = extract_notes(sequence_name)
    
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
    # The following ID's are the problem ones (from previous runs). Checking if these ID's are unique
#     probs = ['SH005', 'WH1296', 'MEN009', 'WH1612', 'WH1268', 'MEN040', 'SH019', 
#              'MEN023', 'MEN007', 'WHITTIER5002']
    
    # If it contains EQ_1 or a letter attached to the end of the DOE,
    # then there is a previous one that already exists
    if sequence_info[-3] == 'EQ' or not sequence_info[-2].isdigit():
        trash.append(sequence_name)
        return None
        
#     elif ID in probs:
#         problem_bold = sequence_name
    
#     return problem_bold
    return get_sequence_name(sequence_info, notes)

In [933]:
def filt_asl(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_name, notes = extract_notes(sequence_name)
    
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
    if '3D' not in sequence_info and sequence_info[-2][0] == 'I':
        sequence_info[-2] = sequence_info[-2].lower()
    elif not sequence_info[-2][-1].isdigit():
        trash.append(sequence_name)
        return None

    ignore=['DELAY', 'FL']
    if any(word in sequence_name for word in ignore):
        trash.append(sequence_name)
        return None
    
    return get_sequence_name(sequence_info, notes)

In [934]:
def filt_swan(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_name, notes = extract_notes(sequence_name)
    
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
    if '3D' not in sequence_info and sequence_info[-2][0] == 'I':
        sequence_info[-2] = sequence_info[-2].lower()
    elif not sequence_info[-2][-1].isdigit():
        trash.append(sequence_name)
        return None

    ignore=['MIDBRAIN']
    if any(word in sequence_name for word in ignore):
        trash.append(sequence_name)
        return None
    
    return get_sequence_name(sequence_info, notes)

In [935]:
def filt_csf(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
    index = sequence_info.index('CSF')
    sequence_info.insert(index, 'PC')
    
    if '1.5' in sequence_name:
        trash.append(sequence_name)
        return None
    
    sequence_info=get_sequence_info(sequence_name)
#     print(sequence_name)
#     print(sequence_info)
    if not sequence_info[-2][-1].isdigit():
        trash.append(sequence_name)
        return None
    
    # NOTE: V vs VC vs VENC
    ignore=['CF', 'FM']
    if any(word in sequence_name for word in ignore):
        trash.append(sequence_name)
        return None
    
    return get_sequence_name(sequence_info)

#TO-DO: CSF of different location and CINE ON/OFF

In [936]:
def filt_carotid(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
#     print(sequence_name)

    if not sequence_info[-2][-1].isdigit():
        trash.append(sequence_name)
        return None 
    
    return get_sequence_name(sequence_info)

In [937]:
def filt_aorta(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None

    ignore=['ASCENDING']
    
    if any(word in sequence_name for word in ignore):
        trash.append(sequence_name)
        return None
    
    return get_sequence_name(sequence_info)

In [938]:
def filt_candy_cane(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
#     print(sequence_info)
    
    if not sequence_info[-2].isdigit():
        trash.append(sequence_name)
        return None
    
    return get_sequence_name(sequence_info)

In [939]:
def filt_arch(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
#     print(sequence_name)
#     print(info)
    if not sequence_info[-2][-1].isdigit():
        trash.append(sequence_name)
        return None
    
    if 'VENC' not in sequence_info:
        sequence_info.insert(3, 'VENC_???')
        trash.append('_'.join(sequence_info))
    
    return get_sequence_name(sequence_info)

In [940]:
def filt_mpr(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
#     print(sequence_name)
    print(sequence_info)

In [941]:
def filt_cbf(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
    if sequence_info[-3] == 'EQ' or not sequence_info[-2].isdigit():
        trash.append(sequence_name)
        return None
    
    return get_sequence_name(sequence_info)

In [942]:
def filt_probe(sequence_name, trash):
    """
    Determine if file_name passed in should be kept
    --------------------------------------------------------
    Parameters:
        file_name : string
            Name of file to be checked
        trash : array-like
            List of files that are filtered out by the filter
    
    Returns:
        file_name : string or None
            If file should be kept, return the a renamed version of the file
            else if the file is not to be kept, None is returned
    """
    sequence_info = get_sequence_info(sequence_name)
    
    if sequence_info is None:
        return None
    
#     print(sequence_name)
#     print(sequence_info)
    return get_sequence_name(sequence_info)

In [943]:
def get_path(root):
    """
    Fliter through all the files in the given path
    --------------------------------------------------------
    Parameters:
        root : string
            Path the directory that contains all the files that is to be filtered through
    """
    
    t1=[]
    flair = []
    flair_3d = []
    flair_2d = []
    t2 = []
    bold = []
    dwi=[]
    dti = []
    dki = []
    swan = []
    asl=[]
    candy_cane=[]
    arch=[]
    aorta=[]
    csf = []
    carotid = []
    cbf=[]
    probe = []
        
    t1_t=[]
    flair_t = []
    t2_t = []
    bold_t = []
    dwi_t=[]
    dti_t = []
    dki_t = []
    swan_t = []
    asl_t=[]
    candy_cane_t=[]
    arch_t=[]
    aorta_t=[]
    csf_t = []
    carotid_t = []
    cbf_t=[]
    probe_t = []
    
    change = 0
    count = 0
    
    for path, subdirs, files in os.walk(root):
        for file in files:
            file_upper=base_standard(file.upper())
            if file_upper is None:
                continue
            if fnmatch.fnmatch(file_upper, '*T1*') or fnmatch.fnmatch(file_upper, '*FSPGR*'):                 
                n_file=filt_T1(file_upper, t1_t)
#                 print("T1:", n_file)
                if n_file is not None:
                    t1.append((file, n_file))
#                     if file != n_file:
#                         #TODO: rename file
#                         print(file, '-->', n_file)
#                 pass
            elif fnmatch.fnmatch(file_upper, '*FLAIR*'): 
#                 print("FLAIR: \t", file_upper)
                n_file = filt_FLAIR(file_upper, flair_3d, flair_2d, flair_t)
#                 print("FLAIR:", n_file)
                if n_file is not None:
                    flair.append((file, n_file))
#                     if file != n_file:
#                         #TODO: rename file
#                         print(file, '-->', n_file)
#                 pass
            elif fnmatch.fnmatch(file_upper, '*T2*'):
#                 print('\t', file_upper)
                n_file = filt_T2(file_upper, t2_t)
                if n_file is not None:
                    t2.append((file, n_file))
#                     if file != n_file:
#                         #TODO: rename file
#                         print(file, '-->', n_file)
#                 pass
            elif fnmatch.fnmatch(file_upper, '*BOLD*'):
                n_file = filt_bold(file_upper, bold_t)
                if n_file is not None:
#                     print(file, '-->', n_file)
                    bold.append((file, n_file))
#                     if file != n_file:
#                         #TODO: rename file
#                         print(file, '-->', n_file)
#                 pass
            elif fnmatch.fnmatch(file_upper, '*DWI*'):
#                 print('\t', file_upper)
                n_file=filt_DWI(file_upper, dwi_t)
                if n_file is not None:
                    dwi.append((file, n_file))
            elif fnmatch.fnmatch(file_upper, '*DTI*'):
#                 print('\t', file_upper)
                n_file=filt_DTI(file_upper, dti_t)
                if n_file is not None:
                    dti.append((file, n_file))
            elif fnmatch.fnmatch(file_upper, '*DKI*'):
#                 print('\t', file_upper)
                n_file=filt_DKI(file_upper, dki_t)
                if n_file is not None:
                    dki.append((file, n_file))
            elif fnmatch.fnmatch(file_upper, '*ASL*'):
                n_file=filt_asl(file_upper, asl_t)
                if n_file is not None:
                    asl.append((file, n_file))
#                     if file != n_file:
#                         #TODO: rename file
#                         print(file, '-->', n_file)
#                 pass
            elif fnmatch.fnmatch(file_upper, '*SWAN*'):
                # Swan files has very consistant sequence naming
                # beaware of ID naming though
                n_file = filt_swan(file_upper, swan_t)
                if n_file is not None:
                    swan.append((file, n_file))
#                     if file != n_file:
#                         #TODO: rename file
#                         print(file, '-->', n_file)
            elif fnmatch.fnmatch(file_upper, '*CSF*'):
#                 print('CSF:', file_upper)
                n_file = filt_csf(file_upper, csf_t)
                if n_file is not None:
                    csf.append((file, n_file))
#                     if file != n_file:
#                         #TODO: rename file
#                         print(file, '-->', n_file)
                pass
            elif fnmatch.fnmatch(file_upper, '*CAROTID*'):
                #Need to ask what the carotid is doing: 
                #1. need to know the naming convention for difference carotids
#                 print('Carotid:', file_upper)
                n_file = filt_carotid(file_upper, carotid_t)
                if n_file is not None:
                    carotid.append((file, n_file))
                pass
            elif fnmatch.fnmatch(file_upper, '*AORTA*'):
                n_file=filt_aorta(file_upper, aorta_t)
                if n_file is not None:
                    aorta.append((file, n_file))
#                     if file != n_file:
#                         #TODO: rename file
#                         print(file, '-->', n_file)
            elif fnmatch.fnmatch(file_upper, '*CEREBR*BLOOD*FLOW*'):
#                 print("cbf:", file_upper)
                n_file=filt_cbf(file_upper, cbf_t)
                if n_file is not None:
                    cbf.append((file, n_file))
#                     if file != n_file:
#                         #TODO: rename file
#                     print(file, '-->', n_file)
            elif fnmatch.fnmatch(file_upper, '*MPR*'):
#                 print(file_upper)
#                 filt_mpr(file_upper)
                pass
            elif 'LOC' not in file_upper and fnmatch.fnmatch(file_upper, '*CANDY_CANE*'):
#                 print('Candy cane:', file_upper)
                n_file=filt_candy_cane(file_upper, candy_cane_t)
                if n_file is not None:
                    candy_cane.append((file, n_file))
#                     if file != n_file:
#                     print(file, '-->', n_file)
            elif 'LOC' not in file_upper and fnmatch.fnmatch(file_upper, '*ARCH*'):
                #    
                # 1. the inconsidency happens at the missing venc number.
                #
#                 print(file_upper)
                n_file = filt_arch(file_upper, arch_t)
                if n_file is not None:
                    arch.append((file, n_file))
#                     if file != n_file:
#                         #TODO: rename file
#                     print(file, '-->', n_file)
                pass
            elif 'LOC' not in file_upper and fnmatch.fnmatch(file_upper, '*PROBE*'):
#                 print(file_upper)
                n_file = filt_probe(file_upper, probe_t)
                if n_file is not None:
                    probe.append((file, n_file))
#                     if file != n_file:
#                         #TODO: rename file
#                         print(file, '-->', n_file)
                pass
            elif fnmatch.fnmatch(file_upper, '*'):
#                 print('\t', file_upper)
                pass
            else:
#                 print('\t\t', file_upper)
                pass

    filtered = {'T1' : t1,
                'FLAIR' : flair,
                'T2' : t2,
                'BOLD' : bold,
                'DWI' : dwi,
                'DTI' : dti,
                'DKI' : dki,
                'SWAN' : swan,
                'ASL' : asl,
                'CANDY CANE' : candy_cane,
                'ARCH' : arch,
                'AORTA' : aorta,
                'CSF': csf,
                'CBF': cbf,
                'PROBE' :probe}

    trash = {'T1' : t1_t,
             'FLAIR' : flair_t,
             'T2' : t2_t,
             'BOLD' : bold_t,
             'DWI' : dwi_t,
             'DTI' : dti_t,
             'DKI' : dki_t,
             'SWAN' : swan_t,
             'ASL' : asl_t,
             'CANDY CANE' : candy_cane_t,
             'ARCH' : arch_t,
             'AORTA' : aorta_t,
             'CSF': csf_t,
             'CBF': cbf_t,
             'PROBE' :probe_t}
    
    print(len(t1))
    for pair in t1:
        print(pair)
#     print(len(flair))
#     for pair in flair:
#         print(pair)
#     print(len(t2))
#     for pair in t2:
#         print(pair)
#     print(len(bold))
#     for pair in bold:
#         print(pair)
#     print(len(asl))
#     for pair in asl:
#         print(pair)
#     print(len(swan))
#     for pair in swan:
#         print(pair)
#     print(len(csf))
#     for pair in csf:
#         print(pair)
#     print(len(aorta))
#     for pair in aorta:
#         print(pair)
#     print(len(cbf))
#     for pair in cbf:
#         print(pair)
#     print(len(candy_cane))
#     for pair in candy_cane:
#         print(pair)
#     print(len(arch))
#     for pair in arch:
#         print(pair)
#     print(len(probe))
#     for pair in probe:
#         print(pair)

#     print("count: ", count)
#     print(change)

    for can in trash:
        print(can)
        if can == 'T1':
            for entry in trash[can]:
                print('\t', entry)

#     for key in filtered:
#         print(key)
#         for entry in filtered[key]:
#             print('    ', entry)

In [944]:
def main():
#     source_dir=input("Enter source dir:")
#     source_dir="/Users/baymac/Desktop/Project_NaCl_Test_DataSet/3T_Last_updated_31OCT19KW"
    source_dir = '/media/ke/8tb_part2/FSL_work/all_info'
    get_path(source_dir)
        

In [945]:
if __name__ =="__main__":
    main()

688
('SH049_SAG_FSPGR_3D_20170829093206.nii', 'SH049_SAG_FSPGR_3D_20170829093206.nii')
('SH049_SAG_FSPGR_3D_20170829093206.json', 'SH049_SAG_FSPGR_3D_20170829093206.json')
('MEN005_SAG_FSPGR_3D_20170223153214.nii', 'MN005_SAG_FSPGR_3D_20170223153214.nii')
('MEN005_SAG_FSPGR_3D_20170223153214.json', 'MN005_SAG_FSPGR_3D_20170223153214.json')
('WH1331_Sagittal_T1_3D_20190514132222.json', 'WH1331_SAG_T1_3D_20190514132222.json')
('WH1331_Sagittal_T1_3D_20190514132222.nii', 'WH1331_SAG_T1_3D_20190514132222.nii')
('WH1331_CORONAL_T1_20190514132222.json', 'WH1331_COR_T1_20190514132222.json')
('WH1331_CORONAL_T1_20190514132222.nii', 'WH1331_COR_T1_20190514132222.nii')
('WH1331_SAGITTAL_T1_20190514132222.nii', 'WH1331_SAG_T1_20190514132222.nii')
('WH1331_SAGITTAL_T1_20190514132222.json', 'WH1331_SAG_T1_20190514132222.json')
('WH1331_AXIAL_T1_20190514132222.json', 'WH1331_AX_T1_20190514132222.json')
('WH1331_AXIAL_T1_20190514132222.nii', 'WH1331_AX_T1_20190514132222.nii')
('MEN021_SAG_FSPGR_3D_20

	 SH004_AX_FSPGR_MIN_20161220103018_NII
	 SH004_AX_FSPGR_MIN_20161220103018_JSON
	 SH004_COR_FSPGR_AVG_20161220103018_NII
	 SH004_COR_FSPGR_AVG_20161220103018_JSON
	 SH004_SAG_FSPGR_AVG_20161220103018_NII
	 SH004_SAG_FSPGR_AVG_20161220103018_JSON
	 WH1655_AX_FSPGR_3D_T1_20180626145230_I00001_JSON
	 WH1655_AX_FSPGR_3D_T1_20180626145230_I00002_JSON
	 WH1655_AX_FSPGR_3D_T1_20180626145230_I00002_NII
	 WH1655_AX_FSPGR_3D_T1_20180626145230_I00001_NII
	 WH1655_COR_FSPGR_3D_T1_20180626145230_I00002_JSON
	 WH1655_COR_FSPGR_3D_T1_20180626145230_I00001_NII
	 WH1655_COR_FSPGR_3D_T1_20180626145230_I00002_NII
	 WH1655_COR_FSPGR_3D_T1_20180626145230_I00001_JSON
	 WH1655_AX_T1_SE_MT_20180626145230_NII
	 WH1655_AX_T1_SE_MT_20180626145230_JSON
	 WH1623_AX_FSPGR_3D_T1_20170912141759_I00001_JSON
	 WH1623_AX_FSPGR_3D_T1_20170912141759_I00002_JSON
	 WH1623_AX_FSPGR_3D_T1_20170912141759_I00002_NII
	 WH1623_AX_FSPGR_3D_T1_20170912141759_I00001_NII
	 WH1623_COR_FSPGR_3D_T1_20170912141759_I00002_NII
	 WH1623_CO

	 WH1495_AX_FSPGR_MIN_20170203094107_NII
	 WH1495_AX_FSPGR_MIN_20170203094107_JSON
	 WH1495_COR_FSPGR_AVG_20170203094107_NII
	 WH1495_COR_FSPGR_AVG_20170203094107_JSON
	 WH1495_SAG_FSPGR_AVG_20170203094107_NII
	 WH1495_SAG_FSPGR_AVG_20170203094107_JSON
	 BR030_AX_FSPGR_3D_T1_20180412153641_I00002_JSON
	 BR030_AX_FSPGR_3D_T1_20180412153641_I00001_JSON
	 BR030_AX_FSPGR_3D_T1_20180412153641_I00002_NII
	 BR030_AX_FSPGR_3D_T1_20180412153641_I00001_NII
	 BR030_NEUROMELANIN_T1_FSE_XL_20180412153641_JSON
	 BR030_NEUROMELANIN_T1_FSE_XL_20180412153641_NII
	 SH008_AX_FSPGR_MIN_20170110103631_NII
	 SH008_AX_FSPGR_MIN_20170110103631_JSON
	 SH008_COR_FSPGR_AVG_20170110103631_JSON
	 SH008_COR_FSPGR_AVG_20170110103631_NII
	 SH008_SAG_FSPGR_AVG_20170110103631_JSON
	 SH008_SAG_FSPGR_AVG_20170110103631_NII
	 MN023_AX_FSPGR_MIN_20170314104715_JSON
	 MN023_AX_FSPGR_MIN_20170314104715_NII
	 MN023_COR_FSPGR_AVG_20170314104715_NII
	 MN023_COR_FSPGR_AVG_20170314104715_JSON
	 MN023_SAG_FSPGR_AVG_20170314104715_

In [946]:
# NOTE: Standarized Naming
# > study ID: 2 letters 

# "3D_T1": <AB####>
# "FLAIR": <ID>_{SAG | AX | COR}_[T1_ | T2_][3D_]FLAIR_<DOE>.nii
# "T2": <ID>_{SAG | AX | COR}_3D_T2_[<type>]<DOE>.nii
# "T2": <ID>_{SAG | AX | COR}_T2_[<type>]<DOE>_<tag>.nii
# "BOLD": <ID>_BOLD_<DOE>.nii
# "Diffusion kurtosis": <ID>_{AX}_DkI_<small#dirs>_<DOE>.nii
#                       <ID>_{AX}_DkI_<small#dirs>_<DOE>.bval
#                       <ID>_{AX}_DkI_<small#dirs>_<DOE>.bvec
# .                     <ID>_{AX}_DkI_<big#dirs>_<DOE>.nii
#                       <ID>_{AX}_DkI_<big#dirs>_<DOE>.bval
#                       <ID>_{AX}_DkI_<big#dirs>_<DOE>.bvec
# "Diffusion tensor": <ID>_{AX}_D{k|T}I_<#dir>_<DOE>.nii
#                     <ID>_{AX}_D{k|T}I_<#dir>_<DOE>.bval
#                     <ID>_{AX}_D{k|T}I_<#dir>_<DOE>.bvec
# "Diffusion weithed": <ID>_{AX}_DWI_<DOE>.nii
#                      <ID>_{AX}_DWI_<DOE>.bval
#                      <ID>_{AX}_DWI_<DOE>.bvec
# "ASL": <ID>_{SAG | AX | COR}_ASL_<DOE>.nii   
# "SWAN": <ID>_{SAG | AX | COR}_ASL_<DOE>.nii
# "Carotid"
# "Aorta": <ID>_[CINE]_AORTA_VENC_<number>_<DOE>.nii