# Project NaCl v. 2

In [1]:
import os
import fnmatch
import json
import pandas as pd

### File name parsing

In [2]:
def extract_notes(name):
    """
    extract notes inside the parentheses before filtering and parsing
    --------------------------------------------------------
    input:
        sequence_name : string
            sequence with notes within parentheses
        
    return:
        sequence_name : string
            sequence info without the parentheses and notes inside of them
        notes : string
            the notes that were in the parentheses
    """
    op = None
    ed = None
    notes = None
    
    for i in range(len(name)):
        if name[i] == '(':
            op = i
        if name[i] == ')':
            ed = i
            break
    
    if ed:
        notes = name[op:ed+1]
        notes = notes.replace('_', '-')
        name = name[:op-1] + '_' + name[ed+2:]
    
    return name, notes

In [3]:
def get_info(sequence_name):
    """
    Convert the input from string to list 
    --------------------------------------------------------
    input:
        sequence_name: string
        
    return:
        str_list : list
            splited the string into items for parsing later
    """
    str_list = sequence_name.replace('-','_').replace('.','_').split('_')
#     if not (str_list[-1] == 'NII' or str_list[-1] == 'JSON'):
#         return None
    
    str_list = list(filter(None, str_list)) 
    return str_list

In [4]:
def get_name(info, notes=None):
    """
    Convert the input from list to string with notes added if needed be
    --------------------------------------------------------
    input:
        sequence_info: array-like
            splited the string 
        notes : string or None
            notes to be added into the file name
        
    return:
        name : string 
            sequence_info joined together with notes added if need be
    """
    if notes:
        info.insert(-2, notes)
        name = ('_'.join(info[:-1]))+'.'+info[-1].lower()
    else:
        name = ('_'.join(info[:-1]))+'.'+info[-1].lower()
    return name

### Basic Standardization

In [5]:
def base_standard(name):
    """
    clean up the user input sequence name into a standardized naming convetion for sorting later
             MEN --> MN
        WHITTIER --> WH
        SAGITTLE --> SAG
           AXIAL --> AX
         CORONAL --> COR
         OBLIQUE --> OBL
              RT --> RIGHT
              LT --> LEFT
    --------------------------------------------------------
    input:
        name: string
            name to be standardized
    
    return:
        n_name : string
            standardized file name
    """
    n_name = name.replace('MEN', 'MN').replace('WHITTIER', 'WH')
    n_name = n_name.replace('SAGITTAL', 'SAG').replace('AXIAL', 'AX').replace('CORONAL', 'COR').replace('OBLIQUE', 'OBL')
    n_name = n_name.replace('RT', 'RIGHT').replace('LT', 'LEFT')
    
    info = get_info(n_name)
    
    if not info[-2][-1].isdigit():
        trash.append(name)
        return None
    
#     print(info)
    
    if 'RE' in info:
        info.remove('RE')
        info.insert(-2, '(RE)')
    
    ID = info[0]
    # ID does not follow designation [2-3 letter cohort designation][3-4 number designation]
    if ID.isdigit():
        info[0] = 'WH'+info[0]
    
    n_name = '_'.join(info)
    
    return n_name

### Rename

In [6]:
def filt_FSPGR(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    
    if any(word in info for word in ['T1', 'BRAVO']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [7]:
def filt_CUBE(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    
    if any(word in info for word in ['3D', 'BRAVO']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [8]:
def filt_minimal(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    
    n_name = get_name(info, notes)
    
    return n_name

In [9]:
def filt_diffusion(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    name = name.replace('30_DIRECTION', '(30_DIRECTIONS)').replace('ASSET', '(ASSET)')
    n_name, notes = extract_notes(name)
    if notes and notes[-2] != 'S':
        notes = notes[:-1] + 'S)'
    
    info = get_info(n_name)
    if 'AX' not in info:
        info.insert(1, 'AX')
        
    if any(word in info for word in ['MAP', 'MAPS', 'MULEFTI']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [10]:
def filt_ASL(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
    if 'AX' not in info:
        info.insert(1, 'AX')
        
    if any(word in info for word in ['3D', '2025', '2525']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [11]:
def filt_SWAN(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    n_name, notes = extract_notes(name)
    
    info = get_info(n_name)
        
    if any(word in info for word in ['3D', 'MIDBRAIN']):
        trash.append(name)
        return None
    
    n_name = get_name(info, notes)
    
    return n_name

In [12]:
def filt_CSF(name, trash, c2, aq):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
        c2 : list
            if file is a c2 scan, its thrown into this list
        aq: list
            if file is a aquaduct scan, its thrown into this list
    output:
        n_name : str
            name of file after being renamed and is unable to be identified if its c2 or aq
    """
    info = get_info(name)
        
    if any(word in info for word in ['5T', 'DEEP', 'CF', 'V']):
        trash.append(name)
        return None
    
    if 'VENC' not in info:
        trash.append(name)
        return None
    
    index = info.index('VENC')
    if not info[index+1].isdigit():
        trash.append(name)
        return None
    
    info[index] = '('+info[index]
    info[index+1] = info[index+1]+')'
    
    n_name = get_name(info)
    n_name, notes = extract_notes(n_name)
    n_name = get_name(get_info(n_name), notes).replace('CSF_FLOW', 'PC').replace('CSF', 'PC')
    
    if 'C2' in info:
        c2.append(n_name)
        return None
    
    if 'AQ' in info:
        aq.append(n_name)
        return None
    
    return n_name

In [13]:
def filt_blood(name, trash):
    """
    Renames the file passed in
    ---------------------------------------------------------------
    input:
        name : str
            name of file to be renamed
        trash : list
            if file is filtered out, it's thrown into this list
    output:
        n_name : str
            name of file after being renamed
    """
    info = get_info(name)
        
    if any(word in info for word in ['MRA']):
        trash.append(name)
        return None
    
    if 'VENC' not in info:
        trash.append(name)
        return None
    
    index = info.index('VENC')
    if not info[index+1].isdigit():
        trash.append(name)
        return None
    
    info[index] = '('+info[index]
    info[index+1] = info[index+1]+')'
    
    n_name = get_name(info)
    n_name, notes = extract_notes(n_name)
    n_name = get_name(get_info(n_name), notes)
    
    return n_name

### Main

In [14]:
root = '/media/ke/8tb_part2/FSL_work/all_info'


accept = []
bad_json = []
reject = []

for path, subdirs, files in os.walk(root):
    for file in files:
        name, extension = os.path.splitext(file)
        if extension == '.json':
            try:
                with open(path + '/' + file) as json_file:
                    data = json.load(json_file)
                    series_num = data['SeriesNumber']
                    if series_num < 100 and series_num > 2:
                        if fnmatch.fnmatch(file, '*FLAIR*') or fnmatch.fnmatch(file, '*T2*') or fnmatch.fnmatch(file, '*FSGPR*'):
                            mr_aq_type = data['MRAcquisitionType']
                            if mr_aq_type == '2D':
                                reject.append(name)
                            else:
                                accept.append(name)
                        else:
                            accept.append(name)
                    else:
                        reject.append(name)
            except:
                bad_json.append(name)
#                 print(file)

fspgr=[]
flair = []
t2 = []
bold = []
dwi=[]
dti = []
dki = []
swan = []
asl=[]
candy_cane=[]
arch=[]
c2 = []
aq = []
csf_other = []
carotid = []
probe = [] # MRS

trash = []

c2_len = 0
aq_len = 0
    
for path, subdirs, files in os.walk(root):
    for file in files:
        name, extension = os.path.splitext(file)
        if name in accept:
            file_upper=base_standard(file.upper())
            if not file_upper:
                trash.append(file)
                continue
            if fnmatch.fnmatch(file_upper, '*MPR*'):    
                pass
            if fnmatch.fnmatch(file_upper, '*FSPGR*'):    
                # 17 SAG_T1_3D
#                 print("T1:", file_upper)
                n_name = filt_FSPGR(file_upper, trash)
                if n_name is not None:
#                     print('FSPGR:', n_name)
                    fspgr.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*FLAIR*'): 
#                 print("FLAIR:", file_upper)
                n_name = filt_CUBE(file_upper, trash)
                if n_name is not None:
#                     print('FLAIR:',n_name)
                    flair.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*T2*'): 
#                 print("T2:", file_upper)
                n_name = filt_CUBE(file_upper, trash)
                if n_name is not None:
#                     print('T2:',n_name)
                    t2.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*BOLD*'):
#                 print('BOLD:', file_upper)
                n_name = filt_minimal(file_upper, trash)
                if n_name is not None:
#                     print('BOLD:',n_name)
                    bold.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*DWI*'):
#                 print('DWI:', file_upper)
                n_name = filt_diffusion(file_upper, trash)
                if n_name is not None:
#                     print('DWI:',n_name)
                    dwi.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*DTI*'):
#                 print('DTI:', file_upper)
                n_name = filt_diffusion(file_upper, trash)
                if n_name is not None:
#                     print('DTI:',n_name)
                    dti.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*DKI*'):
#                 print('DKI:', file_upper)
                n_name = filt_diffusion(file_upper, trash)
                if n_name is not None:
#                     print('DKI:',n_name)
                    dki.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*ASL*'):
#                 print('ASL:', file_upper)
                n_name = filt_ASL(file_upper, trash)
                if n_name is not None:
#                     print('ASL:',n_name)
                    asl.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*SWAN*'):
#                 print('SWAN      :', file_upper)
                n_name = filt_SWAN(file_upper, trash)
                if n_name is not None:
#                     print('SWAN:',n_name)
                    asl.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*CSF*'):
#                 print('CSF       :', file_upper)
                n_name = filt_CSF(file_upper, trash, c2, aq)
#                 if len(c2) != c2_len:
#                     c2_len += 1
#                     print('\tC2 :', c2[-1])
#                 if len(aq) != aq_len:
#                     aq_len += 1
#                     print('\tAQ :', aq[-1])
                if n_name is not None:
#                     print('CSF:',n_name)
                    csf_other.append(n_name)
                pass
            elif fnmatch.fnmatch(file_upper, '*CAROTID*'):
#                 print('CAROTID   :', file_upper)
                n_name = filt_blood(file_upper, trash)
                if n_name is not None:
#                     print('CAROTID:',n_name)
                    asl.append((file, n_name))
                pass
            elif 'LOC' not in file_upper and fnmatch.fnmatch(file_upper, '*CANDY_CANE*'):
#                 print('CANDY_CANE:', file_upper)
                n_name = filt_minimal(file_upper, trash)
                if n_name is not None:
#                     print('CANDY_CANE:',n_name)
                    bold.append((file, n_name))
                pass
            elif 'LOC' not in file_upper and fnmatch.fnmatch(file_upper, '*ARCH*'):
#                 print('ARCH      :', file_upper)
                n_name = filt_blood(file_upper, trash)
                if n_name is not None:
#                     print('ARCH:',n_name)
                    asl.append((file, n_name))
                pass
            elif fnmatch.fnmatch(file_upper, '*PROBE*'):
#                 print('PROBE     :', file_upper)
                asl.append(file)
                pass
            else:
#                 print('\t', file_upper)
                pass

filtered = {'T1' : fspgr,
            'FLAIR' : flair,
            'T2' : t2,
            'BOLD' : bold,
            'DWI' : dwi,
            'DTI' : dti,
            'DKI' : dki,
            'SWAN' : swan,
            'ASL' : asl,
            'CANDY CANE' : candy_cane,
            'ARCH' : arch,
            'C2' : c2,
            'AQ' : aq,
            'CSF_OTHER' : csf_other,
            'CAROTID' : carotid,
            'PROBE': probe}

## Renaming the CVR Gas Files

In [15]:
import glob

In [16]:
path = '/home/ke/Desktop/all_gas/'
nifti_dir = '/media/ke/8tb_part2/FSL_work/all_info/'

In [17]:
txt_files = [file for file in os.listdir(path) if file.upper().endswith('EDITS.TXT')]
# txt_files.sort()
# for file in txt_files:
#     print(file)

p_dic = {'ID' : [],
         'Date' : [],
         'EndTidal_Path' : []}

for file in txt_files:
    file, notes = extract_notes(file)
    file = file.split('_')
    ## NOTE: THIS NEEDS TO BE ALTERED WHEN CONVERTING TO AUTOMATED SCRIPT
    if len(file) != 4:
#         print(file)
        if notes:
            notes = notes[:-1]+'_'+'_'.join(file[3:-2])+')'
        else:
            notes = '('+'_'.join(file[3:-1])+')'
    name = file[:2]
    if notes:
        name.append(notes)
    name.append(file[2])
    name.append(file[-1])
    file = name
    p_id = file[0].upper() + file[1]
    p_dic['ID'].append(p_id)
    p_dic['Date'].append(file[-2])
    p_dic['EndTidal_Path'].append(path+'_'.join(file))

p_df = pd.DataFrame(p_dic)

# patient_BOLDS_header = [p_df.ID[i]+'_BOLD_'+p_df.Date[i] for i in range(len(p_df))]
# pd.set_option('display.max_rows', p_df.shape[0]+1)
p_df.sort_values('ID').reset_index(drop=True)

Unnamed: 0,ID,Date,EndTidal_Path
0,BR001,20170511,/home/ke/Desktop/all_gas/BR_001_20170511_EDITS...
1,BR006,20171107,/home/ke/Desktop/all_gas/BR_006_20171107_EDITS...
2,BR011,20190226,/home/ke/Desktop/all_gas/BR_011_20190226_edits...
3,BR011,20171101,/home/ke/Desktop/all_gas/BR_011_20171101_EDITS...
4,BR012,20171108,/home/ke/Desktop/all_gas/BR_012_20171108_EDITS...
...,...,...,...
139,WH1707,20190401,/home/ke/Desktop/all_gas/WH_1707_20190401_edit...
140,WH1716,20190213,/home/ke/Desktop/all_gas/WH_1716_20190213_edit...
141,WH1721,20190813,/home/ke/Desktop/all_gas/WH_1721_20190813_EDIT...
142,WH1726,20190702,/home/ke/Desktop/all_gas/WH_1726_20190702_EDIT...


In [18]:
for i in range(len(p_df)):
    
    #if bold file doesnt exist then continue
    patient_dir = glob.glob(nifti_dir + p_df.ID[i] + '*' + p_df.Date[i])
    if len(patient_dir) == 0 or not os.path.exists(patient_dir[0] + '/BOLD/'):
        date = p_df.Date[i][-4:] + p_df.Date[i][:-4]
        patient_dir = glob.glob(nifti_dir + p_df.ID[i] + '*' + date)
        if len(patient_dir) == 0 or not os.path.exists(patient_dir[0] + '/BOLD/'):
            continue
        else:
            p_df.Date[i] = date

    patient_dir = patient_dir[0] # patient dir is a list of len 1, need to actual string
    n_name = patient_dir.split('/')[-1].replace('WHITTIER', 'WH')
    n_name, notes = extract_notes(n_name)
#     print(n_name)
    if notes:
        n_name = n_name.split('_')
        n_name.insert(-2, notes)
        n_name = '_'.join(n_name)
    print(path+n_name+'_EDITS.txt')
    print(p_df['EndTidal_Path'][i])

/home/ke/Desktop/all_gas/SH004_206_20161220_EDITS.txt
/home/ke/Desktop/all_gas/SH_004_20161220_edits.txt
/home/ke/Desktop/all_gas/BR042_786_20181204_EDITS.txt
/home/ke/Desktop/all_gas/BR_042_20181204_EDITS.txt
/home/ke/Desktop/all_gas/SH003_205_20161216_EDITS.txt
/home/ke/Desktop/all_gas/SH_003_20161216_edits.txt
/home/ke/Desktop/all_gas/SH037_321_20170517_EDITS.txt
/home/ke/Desktop/all_gas/SH_037_20170517_edits.txt
/home/ke/Desktop/all_gas/WH1604_267_20170310_EDITS.txt
/home/ke/Desktop/all_gas/WH_1604_20170310_EDITS.txt
/home/ke/Desktop/all_gas/WH1664_807_20190104_EDITS.txt
/home/ke/Desktop/all_gas/WH_1664_20190104_EDITS.txt
/home/ke/Desktop/all_gas/WH1651_550_20180501_EDITS.txt
/home/ke/Desktop/all_gas/WH_1651_20180501_EDITS.txt
/home/ke/Desktop/all_gas/SH062_1126_20190813_EDITS.txt
/home/ke/Desktop/all_gas/SH_062_20190813_EDITS.txt
/home/ke/Desktop/all_gas/SH011_222_20170131_EDITS.txt
/home/ke/Desktop/all_gas/SH_011_20170131_edits.txt
/home/ke/Desktop/all_gas/WH1406_316_20170511_EDI