# Project NaCl v. 2

In [62]:
import os
import fnmatch
import json
import pandas as pd

### File name parsing

In [63]:
def extract_notes(name):
    """
    extract notes inside the parentheses before filtering and parsing
    --------------------------------------------------------
    input:
        sequence_name : string
            sequence with notes within parentheses
        
    return:
        sequence_name : string
            sequence info without the parentheses and notes inside of them
        notes : string
            the notes that were in the parentheses
    """
    op = None
    ed = None
    notes = None
    
    for i in range(len(name)):
        if name[i] == '(':
            op = i
        if name[i] == ')':
            ed = i
            break
    
    if ed:
        notes = name[op:ed+1]
        notes = notes.replace('_', '-')
        name = name[:op-1] + '_' + name[ed+2:]
    
    return name, notes

In [64]:
def get_info(sequence_name):
    """
    Convert the input from string to list 
    --------------------------------------------------------
    input:
        sequence_name: string
        
    return:
        str_list : list
            splited the string into items for parsing later
    """
    str_list = sequence_name.replace('-','_').replace('.','_').split('_')
#     if not (str_list[-1] == 'NII' or str_list[-1] == 'JSON'):
#         return None
    
    str_list = list(filter(None, str_list)) 
    return str_list

In [65]:
def get_name(info, notes=None):
    """
    Convert the input from list to string with notes added if needed be
    --------------------------------------------------------
    input:
        sequence_info: array-like
            splited the string 
        notes : string or None
            notes to be added into the file name
        
    return:
        name : string 
            sequence_info joined together with notes added if need be
    """
    if notes:
        info.insert(-2, notes)
        name = ('_'.join(info[:-1]))+'.'+info[-1].lower()
    else:
        name = ('_'.join(info[:-1]))+'.'+info[-1].lower()
    return name

### Basic Standardization

In [66]:
def base_standard(name):
    """
    clean up the user input sequence name into a standardized naming convetion for sorting later
             MEN --> MN
        WHITTIER --> WH
        SAGITTLE --> SAG
           AXIAL --> AX
         CORONAL --> COR
         OBLIQUE --> OBL
              RT --> RIGHT
              LT --> LEFT
    --------------------------------------------------------
    input:
        name: string
            name to be standardized
    
    return:
        n_name : string
            standardized file name
    """
    n_name = name.replace('MEN', 'MN').replace('WHITTIER', 'WH')
    n_name = n_name.replace('SAGITTAL', 'SAG').replace('AXIAL', 'AX').replace('CORONAL', 'COR').replace('OBLIQUE', 'OBL')
    n_name = n_name.replace('RT', 'RIGHT').replace('LT', 'LEFT')
    
    info = get_info(n_name)
    
    if not info[-2][-1].isdigit():
        trash.append(name)
        return None
    
#     print(info)
    
    if 'RE' in info:
        info.remove('RE')
        info.insert(-2, '(RE)')
    
    ID = info[0]
    # ID does not follow designation [2-3 letter cohort designation][3-4 number designation]
    if ID.isdigit():
        info[0] = 'WH'+info[0]
    
    n_name = '_'.join(info)
    
    return n_name

### Rename

In [67]:
def filt_FSPGR(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    
    if any(word in info for word in ['T1', 'BRAVO']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [68]:
def filt_CUBE(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    
    if any(word in info for word in ['3D', 'BRAVO']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [69]:
def filt_BOLD(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    
    n_name = get_name(info, notes)
    
    return n_name

In [70]:
def filt_diffusion(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    name = name.replace('30_DIRECTION', '(30_DIRECTIONS)').replace('ASSET', '(ASSET)')
    n_name, notes = extract_notes(name)
    if notes and notes[-2] != 'S':
        notes = notes[:-1] + 'S)'
    
    info = get_info(n_name)
    if 'AX' not in info:
        info.insert(1, 'AX')
        
    if any(word in info for word in ['MAP', 'MAPS', 'MULEFTI']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

### Main

In [71]:
root = '/media/ke/8tb_part2/FSL_work/all_info'


accept = []
bad_json = []
reject = []

for path, subdirs, files in os.walk(root):
    for file in files:
        name, extension = os.path.splitext(file)
        if extension == '.json':
            try:
                with open(path + '/' + file) as json_file:
                    data = json.load(json_file)
                    series_num = data['SeriesNumber']
                    if series_num < 100 and series_num > 2:
                        if fnmatch.fnmatch(file, '*FLAIR*') or fnmatch.fnmatch(file, '*T2*') or fnmatch.fnmatch(file, '*FSGPR*'):
                            mr_aq_type = data['MRAcquisitionType']
                            if mr_aq_type == '2D':
                                reject.append(name)
                            else:
                                accept.append(name)
                        else:
                            accept.append(name)
                    else:
                        reject.append(name)
            except:
                bad_json.append(name)
#                 print(file)

fspgr=[]
flair = []
t2 = []
bold = []
dwi=[]
dti = []
dki = []
swan = []
asl=[]
candy_cane=[]
arch=[]
aorta=[]
csf = []
carotid = []
cbf=[]
probe = []

trash = []

change = 0
count = 0
    
for path, subdirs, files in os.walk(root):
    for file in files:
        name, extension = os.path.splitext(file)
        if name in accept:
            file_upper=base_standard(file.upper())
            if not file_upper:
                trash.append(file)
                continue
            if fnmatch.fnmatch(file_upper, '*MPR*'):    
                pass
            if fnmatch.fnmatch(file_upper, '*FSPGR*'):    
                # 17 SAG_T1_3D
#                 print("T1:", file_upper)
                n_name = filt_FSPGR(file_upper, trash)
                if n_name is not None:
#                     print('FSPGR:', n_name)
                    fspgr.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*FLAIR*'): 
#                 print("FLAIR:", file_upper)
                n_name = filt_CUBE(file_upper, trash)
                if n_name is not None:
#                     print('FLAIR:',n_name)
                    flair.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*T2*'): 
#                 print("T2:", file_upper)
                n_name = filt_CUBE(file_upper, trash)
                if n_name is not None:
#                     print('T2:',n_name)
                    t2.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*BOLD*'):
#                 print('BOLD:', file_upper)
                n_name = filt_BOLD(file_upper, trash)
                if n_name is not None:
#                     print('BOLD:',n_name)
                    bold.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*DWI*'):
#                 print('DWI:', file_upper)
                n_name = filt_diffusion(file_upper, trash)
                if n_name is not None:
#                     print('DWI:',n_name)
                    dwi.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*DTI*'):
#                 print('DTI:', file_upper)
                n_name = filt_diffusion(file_upper, trash)
                if n_name is not None:
#                     print('DTI:',n_name)
                    dti.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*DKI*'):
#                 print('DKI:', file_upper)
                n_name = filt_diffusion(file_upper, trash)
                if n_name is not None:
#                     print('DKI:',n_name)
                    dki.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*ASL*'):
#                 print('ASL:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*SWAN*'):
#                 print('SWAN:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*CSF*'):
#                 print('CSF:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*CAROTID*'):
#                 print('CAROTID:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*AORTA*'):
#                 print('AORTA:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*CANDY_CANE*'):
#                 print('CANDY_CANE:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*ARCH*'):
#                 print('ARCH:', file_upper)
                pass
            elif fnmatch.fnmatch(file_upper, '*PROBE*'):
#                 print('PROBE:', file_upper)
                pass
            else:
#                 print('\t', file_upper)
                pass

filtered = {'T1' : fspgr,
            'CUBE' : cube,
            'BOLD' : bold,
            'DWI' : dwi,
            'DTI' : dti,
            'DKI' : dki,
            'SWAN' : swan,
            'ASL' : asl,
            'CANDY CANE' : candy_cane,
            'ARCH' : arch,
            'AORTA' : aorta,
            'CSF': csf,
            'CBF': cbf,
            'PROBE': probe}

FLAIR: SH049_SAG_CUBE_FLAIR_20170829093206.json
FLAIR: SH049_SAG_CUBE_FLAIR_20170829093206.nii
FLAIR: MN005_SAG_CUBE_FLAIR_20170223153214.nii
FLAIR: MN005_SAG_CUBE_FLAIR_20170223153214.json
T2: MN005_SAG_CUBE_T2_20170223153214.json
T2: MN005_SAG_CUBE_T2_20170223153214.nii
FLAIR: MN021_SAG_CUBE_FLAIR_20170302095957.nii
FLAIR: MN021_SAG_CUBE_FLAIR_20170302095957.json
T2: MN021_SAG_CUBE_T2_20170302095957.json
T2: MN021_SAG_CUBE_T2_20170302095957.nii
FLAIR: BR022_SAG_CUBE_FLAIR_20180130110501.nii
FLAIR: BR022_SAG_CUBE_FLAIR_20180130110501.json
FLAIR: WH1619_SAG_CUBE_FLAIR_20170829140829.json
FLAIR: WH1619_SAG_CUBE_FLAIR_20170829140829.nii
FLAIR: SH057_SAG_CUBE_FLAIR_20180523095653.json
FLAIR: SH057_SAG_CUBE_FLAIR_20180523095653.nii
FLAIR: BR013_SAG_CUBE_FLAIR_20190315131838.nii
FLAIR: BR013_SAG_CUBE_FLAIR_20190315131838.json
T2: BR013_SAG_CUBE_T2_20190315131838.json
T2: BR013_SAG_CUBE_T2_20190315131838.nii
FLAIR: WH1295_SAG_CUBE_FLAIR_20180221143829.nii
FLAIR: WH1295_SAG_CUBE_FLAIR_2018022

FLAIR: BR037_SAG_CUBE_FLAIR_20181012092433.json
FLAIR: BR037_SAG_CUBE_FLAIR_20181012092433.nii
FLAIR: MN041_SAG_CUBE_FLAIR_20170815143559.json
FLAIR: MN041_SAG_CUBE_FLAIR_20170815143559.nii
FLAIR: MN032_SAG_CUBE_FLAIR_20170411100927.nii
FLAIR: MN032_SAG_CUBE_FLAIR_20170411100927.json
FLAIR: WH1499_SAG_CUBE_FLAIR_20170111101313.json
FLAIR: WH1499_SAG_CUBE_FLAIR_20170111101313.nii
T2: WH1499_SAG_CUBE_T2_20170111101313.json
T2: WH1499_SAG_CUBE_T2_20170111101313.nii
FLAIR: SH032_SAG_CUBE_FLAIR_20170404095639.json
FLAIR: SH032_SAG_CUBE_FLAIR_20170404095639.nii
FLAIR: WH1612_SAG_CUBE_FLAIR_20170706075309.json
FLAIR: WH1612_SAG_CUBE_FLAIR_20170706075309.nii
FLAIR: MN044_SAG_CUBE_FLAIR_20180515111421.json
FLAIR: MN044_SAG_CUBE_FLAIR_20180515111421.nii
FLAIR: BR034_SAG_CUBE_FLAIR_20180814154401.nii
FLAIR: BR034_SAG_CUBE_FLAIR_20180814154401.json
FLAIR: WH1616_SAG_CUBE_FLAIR_20170628123050.nii
FLAIR: WH1616_SAG_CUBE_FLAIR_20170628123050.json
FLAIR: BR027_SAG_CUBE_FLAIR_20180328115320.json
FLAIR:

FLAIR: WH1621_SAG_CUBE_FLAIR_20180416155525.nii
FLAIR: WH1621_SAG_CUBE_FLAIR_20180416155525.json
FLAIR: WH1609_SAG_CUBE_FLAIR_20170623104119.nii
FLAIR: WH1609_SAG_CUBE_FLAIR_20170623104119.json
FLAIR: WH1647_SAG_CUBE_FLAIR_20171204135728.nii
FLAIR: WH1647_SAG_CUBE_FLAIR_20171204135728.json
FLAIR: BR025_SAG_CUBE_FLAIR_20180314150410.nii
FLAIR: BR025_SAG_CUBE_FLAIR_20180314150410.json
FLAIR: SH033_SAG_CUBE_FLAIR_20170504154607.nii
FLAIR: SH033_SAG_CUBE_FLAIR_20170504154607.json
FLAIR: MN008_SAG_CUBE_FLAIR_20170406135804.json
FLAIR: MN008_SAG_CUBE_FLAIR_20170406135804.nii
FLAIR: MN042_SAG_CUBE_FLAIR_20170919135612.nii
FLAIR: MN042_SAG_CUBE_FLAIR_20170919135612.json
FLAIR: SH022_SAG_CUBE_FLAIR_20170307144621.nii
FLAIR: SH022_SAG_CUBE_FLAIR_20170307144621.json
T2: SH022_SAG_CUBE_T2_20170307144621.nii
T2: SH022_SAG_CUBE_T2_20170307144621.json
FLAIR: WH1653_SAG_CUBE_FLAIR_20180213094821.nii
FLAIR: WH1653_SAG_CUBE_FLAIR_20180213094821.json
FLAIR: BR017_SAG_CUBE_FLAIR_20171214150846.json
FLAIR: