# Project NaCl v. 2

In [14]:
import os
import fnmatch
import json
import pandas as pd

### File name parsing

In [15]:
def extract_notes(sequence_name):
    """
    extract notes inside the parentheses before filtering and parsing
    --------------------------------------------------------
    input:
        sequence_name : string
            sequence with notes within parentheses
        
    return:
        sequence_name : string
            sequence info without the parentheses and notes inside of them
        notes : string
            the notes that were in the parentheses
    """
    op = None
    ed = None
    notes = None
    
    for i in range(len(sequence_name)):
        if sequence_name[i] == '(':
            op = i
        if sequence_name[i] == ')':
            ed = i
            break
    
    if ed:
        sequence_name = sequence_name[:op] + sequence_name[ed+1:]
        notes = sequence_name[op:ed]
        notes.replace('_', '-')
    
    return sequence_name, notes

In [16]:
def get_info(sequence_name):
    """
    Convert the input from string to list 
    --------------------------------------------------------
    input:
        sequence_name: string
        
    return:
        str_list : list
            splited the string into items for parsing later
    """
    str_list = sequence_name.replace('-','_').replace('.','_').split('_')
#     if not (str_list[-1] == 'NII' or str_list[-1] == 'JSON'):
#         return None
    
    str_list = list(filter(None, str_list)) 
    return str_list

In [17]:
def get_name(sequence_info, notes=None):
    """
    Convert the input from list to string with notes added if needed be
    --------------------------------------------------------
    input:
        sequence_info: array-like
            splited the string 
        notes : string or None
            notes to be added into the file name
        
    return:
        name : string 
            sequence_info joined together with notes added if need be
    """
    if notes:
        name = ('_'.join(sequence_info[:-1]))+notes+'.'+sequence_info[-1].lower()
    else:
        name = ('_'.join(sequence_info[:-1]))+'.'+sequence_info[-1].lower()
    return name

### Basic Standardization

In [18]:
def base_standard(name):
    """
    clean up the user input sequence name into a standardized naming convetion for sorting later
             MEN --> MN
        WHITTIER --> WH
        SAGITTLE --> SAG
           AXIAL --> AX
         CORONAL --> COR
         OBLIQUE --> OBL
              RT --> RIGHT
              LT --> LEFT
    --------------------------------------------------------
    input:
        name: string
            name to be standardized
    
    return:
        n_name : string
            standardized file name
    """
    info = get_info(name)
    if not info[-2][-1].isdigit():
        trash.append(name)
        return None
    
    n_name = name.replace('MEN', 'MN').replace('WHITTIER', 'WH')
    n_name = n_name.replace('SAGITTAL', 'SAG').replace('AXIAL', 'AX').replace('CORONAL', 'COR').replace('OBLIQUE', 'OBL')
    n_name = n_name.replace('RT', 'RIGHT').replace('LT', 'LEFT')
    
    info=n_name.replace('-','_').replace('.','_').split('_')
    
    ID = info[0]
    # ID does not follow designation [2-3 letter cohort designation][3-4 number designation]
    if ID.isdigit():
        info[0] = 'WH'+info[0]
    
    return n_name

### Rename

In [19]:
def filt_FSPGR(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    
    if not info[-2][-1].isdigit():
        trash.append(name)
        return None
    
    if 'RE' in info:
        info.remove('RE')
        info.insert(-2, '(RE)')
    
    if any(word in info for word in ['T1', 'BRAVO']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [20]:
def filt_CUBE(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    
    if not info[-2][-1].isdigit():
        trash.append(name)
        return None
    
    if 'RE' in info:
        info.remove('RE')
        info.insert(-2, '(RE)')
    
    if any(word in info for word in ['3D', 'BRAVO']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

### Main

In [21]:
root = '/media/ke/8tb_part2/FSL_work/all_info'


accept = []
bad_json = []
reject = []

for path, subdirs, files in os.walk(root):
    for file in files:
        name, extension = os.path.splitext(file)
        if extension == '.json':
            try:
                with open(path + '/' + file) as json_file:
                    data = json.load(json_file)
                    series_num = data['SeriesNumber']
                    if series_num < 100 and series_num > 2:
                        if fnmatch.fnmatch(file, '*FLAIR*') or fnmatch.fnmatch(file, '*T2*') or fnmatch.fnmatch(file, '*FSGPR*'):
                            mr_aq_type = data['MRAcquisitionType']
                            if mr_aq_type == '2D':
                                reject.append(name)
                            else:
                                accept.append(name)
                        else:
                            accept.append(name)
                    else:
                        reject.append(name)
            except:
                bad_json.append(name)
#                 print(file)

fspgr=[]
cube = []
bold = []
dwi=[]
dti = []
dki = []
swan = []
asl=[]
candy_cane=[]
arch=[]
aorta=[]
csf = []
carotid = []
cbf=[]
probe = []

trash = []

change = 0
count = 0
    
for path, subdirs, files in os.walk(root):
    for file in files:
        name, extension = os.path.splitext(file)
        if name in accept:
            file_upper=base_standard(file.upper())
            if not file_upper:
                pass
            if fnmatch.fnmatch(file_upper, '*MPR*'):    
                pass
            if fnmatch.fnmatch(file_upper, '*FSPGR*'):    
                # 17 SAG_T1_3D
#                 print("T1:", file_upper)
                n_name = filt_FSPGR(file_upper, trash)
                if n_name is not None:
#                     print('FSPGR:', n_name)
                    fspgr.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*CUBE*'): 
#                 print("FLAIR:", file_upper)
                n_name = filt_CUBE(file_upper, trash)
                if n_name is not None:
#                     print('CUBE:',n_name)
                    cube.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*BOLD*'):
                print('BOLD:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*DWI*'):
#                 print('DWI:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*DTI*'):
#                 print('DTI:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*DKI*'):
#                 print('DKI:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*ASL*'):
#                 print('ASL:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*SWAN*'):
#                 print('SWAN:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*CSF*'):
#                 print('CSF:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*CAROTID*'):
#                 print('CAROTID:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*AORTA*'):
#                 print('AORTA:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*CANDY_CANE*'):
#                 print('CANDY_CANE:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*ARCH*'):
#                 print('ARCH:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*PROBE*'):
#                 print('PROBE:', file_upper)
                pass
            else:
#                 print('\t', file_upper)
                pass

filtered = {'T1' : fspgr,
            'CUBE' : cube,
            'BOLD' : bold,
            'DWI' : dwi,
            'DTI' : dti,
            'DKI' : dki,
            'SWAN' : swan,
            'ASL' : asl,
            'CANDY CANE' : candy_cane,
            'ARCH' : arch,
            'AORTA' : aorta,
            'CSF': csf,
            'CBF': cbf,
            'PROBE': probe}

BOLD: SH049_BOLD_20170829093206.JSON
BOLD: SH049_BOLD_20170829093206.NII


TypeError: expected str, bytes or os.PathLike object, not NoneType