# Project NaCl v. 2

In [145]:
import os
import fnmatch
import json
import pandas as pd

### File name parsing

In [146]:
def extract_notes(name):
    """
    extract notes inside the parentheses before filtering and parsing
    --------------------------------------------------------
    input:
        sequence_name : string
            sequence with notes within parentheses
        
    return:
        sequence_name : string
            sequence info without the parentheses and notes inside of them
        notes : string
            the notes that were in the parentheses
    """
    op = None
    ed = None
    notes = None
    
    for i in range(len(name)):
        if name[i] == '(':
            op = i
        if name[i] == ')':
            ed = i
            break
    
    if ed:
        notes = name[op:ed+1]
        notes = notes.replace('_', '-')
        name = name[:op-1] + '_' + name[ed+2:]
    
    return name, notes

In [147]:
def get_info(sequence_name):
    """
    Convert the input from string to list 
    --------------------------------------------------------
    input:
        sequence_name: string
        
    return:
        str_list : list
            splited the string into items for parsing later
    """
    str_list = sequence_name.replace('-','_').replace('.','_').split('_')
#     if not (str_list[-1] == 'NII' or str_list[-1] == 'JSON'):
#         return None
    
    str_list = list(filter(None, str_list)) 
    return str_list

In [148]:
def get_name(info, notes=None):
    """
    Convert the input from list to string with notes added if needed be
    --------------------------------------------------------
    input:
        sequence_info: array-like
            splited the string 
        notes : string or None
            notes to be added into the file name
        
    return:
        name : string 
            sequence_info joined together with notes added if need be
    """
    if notes:
        info.insert(-2, notes)
        name = ('_'.join(info[:-1]))+'.'+info[-1].lower()
    else:
        name = ('_'.join(info[:-1]))+'.'+info[-1].lower()
    return name

### Basic Standardization

In [149]:
def base_standard(name):
    """
    clean up the user input sequence name into a standardized naming convetion for sorting later
             MEN --> MN
        WHITTIER --> WH
        SAGITTLE --> SAG
           AXIAL --> AX
         CORONAL --> COR
         OBLIQUE --> OBL
              RT --> RIGHT
              LT --> LEFT
    --------------------------------------------------------
    input:
        name: string
            name to be standardized
    
    return:
        n_name : string
            standardized file name
    """
    n_name = name.replace('MEN', 'MN').replace('WHITTIER', 'WH')
    n_name = n_name.replace('SAGITTAL', 'SAG').replace('AXIAL', 'AX').replace('CORONAL', 'COR').replace('OBLIQUE', 'OBL')
    n_name = n_name.replace('RT', 'RIGHT').replace('LT', 'LEFT')
    
    info = get_info(n_name)
    
    if not info[-2][-1].isdigit():
        trash.append(name)
        return None
    
#     print(info)
    
    if 'RE' in info:
        info.remove('RE')
        info.insert(-2, '(RE)')
    
    ID = info[0]
    # ID does not follow designation [2-3 letter cohort designation][3-4 number designation]
    if ID.isdigit():
        info[0] = 'WH'+info[0]
    
    n_name = '_'.join(info)
    
    return n_name

### Rename

In [150]:
def filt_FSPGR(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    
    if any(word in info for word in ['T1', 'BRAVO']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [151]:
def filt_CUBE(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    
    if any(word in info for word in ['3D', 'BRAVO']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [152]:
def filt_minimal(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    
    n_name = get_name(info, notes)
    
    return n_name

In [153]:
def filt_diffusion(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    name = name.replace('30_DIRECTION', '(30_DIRECTIONS)').replace('ASSET', '(ASSET)')
    n_name, notes = extract_notes(name)
    if notes and notes[-2] != 'S':
        notes = notes[:-1] + 'S)'
    
    info = get_info(n_name)
    if 'AX' not in info:
        info.insert(1, 'AX')
        
    if any(word in info for word in ['MAP', 'MAPS', 'MULEFTI']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [154]:
def filt_ASL(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    if 'AX' not in info:
        info.insert(1, 'AX')
        
    if any(word in info for word in ['3D', '2025', '2525']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [155]:
def filt_SWAN(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
        
    if any(word in info for word in ['3D', 'MIDBRAIN']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [156]:
def filt_CSF(name, trash, c2, aq):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
        c2 : list
            if file is a c2 scan, its thrown into this list
        aq: list
            if file is a aquaduct scan, its thrown into this list
    output:
        n_name : str
            name of file after being renamed and is unable to be identified if its c2 or aq
    """
    info = get_info(name)
        
    if any(word in info for word in ['5T', 'DEEP', 'CF', 'V']):
        trash.append(name)
        return None
    
    if 'VENC' not in info:
        trash.append(name)
        return None
    
    index = info.index('VENC')
    if not info[index+1].isdigit():
        trash.append(name)
        return None
    
    info[index] = '('+info[index]
    info[index+1] = info[index+1]+')'
    
    n_name = get_name(info)
    n_name, notes = extract_notes(n_name)
    n_name = get_name(get_info(n_name), notes).replace('CSF_FLOW', 'PC').replace('CSF', 'PC')
    
    if 'C2' in info:
        c2.append(n_name)
        return None
    
    if 'AQ' in info:
        aq.append(n_name)
        return None
    
    return n_name

In [157]:
def filt_blood(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    info = get_info(name)
        
    if any(word in info for word in ['MRA']):
        trash.append(name)
        return None
    
    if 'VENC' not in info:
        trash.append(name)
        return None
    
    index = info.index('VENC')
    if not info[index+1].isdigit():
        trash.append(name)
        return None
    
    info[index] = '('+info[index]
    info[index+1] = info[index+1]+')'
    
    n_name = get_name(info)
    n_name, notes = extract_notes(n_name)
    n_name = get_name(get_info(n_name), notes)
    
    return n_name

### Main

In [160]:
root = '/media/ke/8tb_part2/FSL_work/all_info'


accept = []
bad_json = []
reject = []

for path, subdirs, files in os.walk(root):
    for file in files:
        name, extension = os.path.splitext(file)
        if extension == '.json':
            try:
                with open(path + '/' + file) as json_file:
                    data = json.load(json_file)
                    series_num = data['SeriesNumber']
                    if series_num < 100 and series_num > 2:
                        if fnmatch.fnmatch(file, '*FLAIR*') or fnmatch.fnmatch(file, '*T2*') or fnmatch.fnmatch(file, '*FSGPR*'):
                            mr_aq_type = data['MRAcquisitionType']
                            if mr_aq_type == '2D':
                                reject.append(name)
                            else:
                                accept.append(name)
                        else:
                            accept.append(name)
                    else:
                        reject.append(name)
            except:
                bad_json.append(name)
#                 print(file)

fspgr=[]
flair = []
t2 = []
bold = []
dwi=[]
dti = []
dki = []
swan = []
asl=[]
candy_cane=[]
arch=[]
c2 = []
aq = []
csf_other = []
carotid = []
probe = []

trash = []

c2_len = 0
aq_len = 0
    
for path, subdirs, files in os.walk(root):
    for file in files:
        name, extension = os.path.splitext(file)
        if name in accept:
            file_upper=base_standard(file.upper())
            if not file_upper:
                trash.append(file)
                continue
            if fnmatch.fnmatch(file_upper, '*MPR*'):    
                pass
            if fnmatch.fnmatch(file_upper, '*FSPGR*'):    
                # 17 SAG_T1_3D
#                 print("T1:", file_upper)
                n_name = filt_FSPGR(file_upper, trash)
                if n_name is not None:
#                     print('FSPGR:', n_name)
                    fspgr.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*FLAIR*'): 
#                 print("FLAIR:", file_upper)
                n_name = filt_CUBE(file_upper, trash)
                if n_name is not None:
#                     print('FLAIR:',n_name)
                    flair.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*T2*'): 
#                 print("T2:", file_upper)
                n_name = filt_CUBE(file_upper, trash)
                if n_name is not None:
#                     print('T2:',n_name)
                    t2.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*BOLD*'):
#                 print('BOLD:', file_upper)
                n_name = filt_minimal(file_upper, trash)
                if n_name is not None:
#                     print('BOLD:',n_name)
                    bold.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*DWI*'):
#                 print('DWI:', file_upper)
                n_name = filt_diffusion(file_upper, trash)
                if n_name is not None:
#                     print('DWI:',n_name)
                    dwi.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*DTI*'):
#                 print('DTI:', file_upper)
                n_name = filt_diffusion(file_upper, trash)
                if n_name is not None:
#                     print('DTI:',n_name)
                    dti.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*DKI*'):
#                 print('DKI:', file_upper)
                n_name = filt_diffusion(file_upper, trash)
                if n_name is not None:
#                     print('DKI:',n_name)
                    dki.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*ASL*'):
#                 print('ASL:', file_upper)
                n_name = filt_ASL(file_upper, trash)
                if n_name is not None:
#                     print('ASL:',n_name)
                    asl.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*SWAN*'):
#                 print('SWAN      :', file_upper)
                n_name = filt_SWAN(file_upper, trash)
                if n_name is not None:
#                     print('SWAN:',n_name)
                    asl.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*CSF*'):
#                 print('CSF       :', file_upper)
                n_name = filt_CSF(file_upper, trash, c2, aq)
#                 if len(c2) != c2_len:
#                     c2_len += 1
#                     print('\tC2 :', c2[-1])
#                 if len(aq) != aq_len:
#                     aq_len += 1
#                     print('\tAQ :', aq[-1])
                if n_name is not None:
#                     print('CSF:',n_name)
                    csf_other.append(n_name)
                pass
            elif fnmatch.fnmatch(file_upper, '*CAROTID*'):
#                 print('CAROTID   :', file_upper)
                n_name = filt_blood(file_upper, trash)
                if n_name is not None:
#                     print('CAROTID:',n_name)
                    asl.append((file, n_name))
                pass
            elif 'LOC' not in file_upper and fnmatch.fnmatch(file_upper, '*CANDY_CANE*'):
#                 print('CANDY_CANE:', file_upper)
                n_name = filt_minimal(file_upper, trash)
                if n_name is not None:
#                     print('CANDY_CANE:',n_name)
                    bold.append((file, n_name))
                pass
            elif 'LOC' not in file_upper and fnmatch.fnmatch(file_upper, '*ARCH*'):
#                 print('ARCH      :', file_upper)
                n_name = filt_blood(file_upper, trash)
                if n_name is not None:
#                     print('ARCH:',n_name)
                    asl.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*PROBE*'):
#                 print('PROBE     :', file_upper)
                asl.append(file)
                pass
            else:
#                 print('\t', file_upper)
                pass

filtered = {'T1' : fspgr,
            'FLAIR' : flair,
            'T2' : t2,
            'BOLD' : bold,
            'DWI' : dwi,
            'DTI' : dti,
            'DKI' : dki,
            'SWAN' : swan,
            'ASL' : asl,
            'CANDY CANE' : candy_cane,
            'ARCH' : arch,
            'C2' : c2,
            'AQ' : aq,
            'CSF_OTHER' : csf_other,
            'CAROTID' : carotid,
            'PROBE': probe}