In [14]:
import pandas as pd
import os
import numpy as np

In [15]:
df_path = 'parelsnoer/parelsnoer_Labels_extensive3_pruned.txt'
binningName = 'alternative_binning'

In [16]:
original_labelmap = {
    "T1":0,
    "T1_c":1,
    "T2":2,
    "T2-FLAIR":3,
    "PD":4,
    "SWI":5,
    "GRE":6,
    "T2*":7,
    "DWI":8,
    "ADC":9,
    "BOLD":10,
    "angio":11,
    "PWI":12,
    "ASL":13,
    "DTI":14,
    "Other":15
}

In [17]:
all_binnings_dict = {
    'DDS_binning': {
        'non_DDS': ['SWI','GRE','T2*','ADC','BOLD','angio','ASL','DTI']
    },
    'alternative_binning': {
        'diffusion' : ['DWI', 'ADC', 'DTI'],
        'perfusion' : ['BOLD', 'ASL', 'angio', 'PWI'],
        'suscept' : ['SWI','GRE','T2*']
    }
}

In [18]:
def createBinning(original_labelmap, binningDict):
    orignal_labelmap_copy = original_labelmap.copy()
    new_labelmap = {}
    transfer_array = np.zeros(len(original_labelmap))
    class_counter = 0
    for bin_name, bin_items in binningDict.items():
        new_labelmap[bin_name] = class_counter
        for item in list(bin_items):
            transfer_array[original_labelmap[item]] = class_counter
            orignal_labelmap_copy.pop(item)
        class_counter += 1
    for name, item in orignal_labelmap_copy.items():
        new_labelmap[name] = class_counter
        transfer_array[item] = class_counter
        class_counter += 1
    unchangedLabels = orignal_labelmap_copy
    return new_labelmap, transfer_array, class_counter, unchangedLabels
    

In [19]:
new_labelmap, transfer_array, class_counter, unchangedLabels = createBinning(original_labelmap, all_binnings_dict[binningName])

In [20]:
def apply_binning_to_df(original_df, transfer_array, suffix=""):
    df = original_df.copy()
    suffix = '_'+suffix
    df[f'label{suffix}'] = df['label'].apply(lambda x: int(transfer_array[x]))
    return df

In [21]:
origin_df = pd.read_csv(df_path,names=['ID','label','extra'],sep='\t', dtype={'ID':str,'label':int,'extra':int})

In [22]:
binned_df = apply_binning_to_df(origin_df, transfer_array, suffix=binningName)

In [23]:
binned_df

Unnamed: 0,ID,label,extra,label_alternative_binning
0,/trinity/home/r098375/DDS/data/parelsnoer/NIFT...,14,0,0
1,/trinity/home/r098375/DDS/data/parelsnoer/NIFT...,14,0,0
2,/trinity/home/r098375/DDS/data/parelsnoer/NIFT...,14,0,0
3,/trinity/home/r098375/DDS/data/parelsnoer/NIFT...,14,0,0
4,/trinity/home/r098375/DDS/data/parelsnoer/NIFT...,14,0,0
...,...,...,...,...
11140,/trinity/home/r098375/DDS/data/parelsnoer/NIFT...,14,0,0
11141,/trinity/home/r098375/DDS/data/parelsnoer/NIFT...,14,0,0
11142,/trinity/home/r098375/DDS/data/parelsnoer/NIFT...,14,0,0
11143,/trinity/home/r098375/DDS/data/parelsnoer/NIFT...,14,0,0


In [24]:
binned_df.groupby(f'label_{binningName}')['label'].value_counts()

label_alternative_binning  label
0                          14       7305
1                          13        990
2                          5          30
3                          0        2520
4                          1          30
5                          2          75
8                          15        195
Name: count, dtype: int64

In [25]:
new_labelmap

{'diffusion': 0,
 'perfusion': 1,
 'suscept': 2,
 'T1': 3,
 'T1_c': 4,
 'T2': 5,
 'T2-FLAIR': 6,
 'PD': 7,
 'Other': 8}

In [26]:
extension = os.path.splitext(df_path)
basename = extension[0]
extension = extension[1]
outname = f'{basename}_{binningName}{extension}'
binned_df.to_csv(outname,columns=['ID',f'label_{binningName}','extra'],sep='\t',index=False,header=False)