In [131]:
import pandas as pd
import os
import numpy as np

In [132]:
df_path = 'parelsnoer/Labels_parelsnoer_annotated_extensive3_pruned.txt'
binningName = 'DDS_binning'

In [133]:
original_labelmap = {
    "T1":0,
    "T1_c":1,
    "T2":2,
    "T2-FLAIR":3,
    "PD":4,
    "SWI":5,
    "GRE":6,
    "T2*":7,
    "DWI":8,
    "ADC":9,
    "BOLD":10,
    "angio":11,
    "PWI":12,
    "ASL":13,
    "DTI":14,
    "Other":15
}

In [134]:
all_binnings_dict = {
    'DDS_binning': {
        'non_DDS': ['SWI','GRE','T2*','ADC','BOLD','angio','ASL','DTI']
    },
    'alternative_binning': {
        'diffusion' : ['DWI', 'ADC', 'DTI'],
        'perfusion' : ['BOLD', 'ASL', 'angio', 'PWI'],
        'suscept' : ['SWI','GRE','T2*']
    }
}

In [135]:
def createBinning(original_labelmap, binningDict):
    orignal_labelmap_copy = original_labelmap.copy()
    new_labelmap = {}
    transfer_array = np.zeros(len(original_labelmap))
    class_counter = 0
    for bin_name, bin_items in binningDict.items():
        new_labelmap[bin_name] = class_counter
        for item in list(bin_items):
            transfer_array[original_labelmap[item]] = class_counter
            orignal_labelmap_copy.pop(item)
        class_counter += 1
    for name, item in orignal_labelmap_copy.items():
        new_labelmap[name] = class_counter
        transfer_array[item] = class_counter
        class_counter += 1
    unchangedLabels = orignal_labelmap_copy
    return new_labelmap, transfer_array, class_counter, unchangedLabels
    

In [136]:
new_labelmap, transfer_array, class_counter, unchangedLabels = createBinning(original_labelmap, all_binnings_dict[binningName])

In [137]:
def apply_binning_to_df(original_df, transfer_array, suffix=""):
    df = original_df.copy()
    suffix = '_'+suffix
    df[f'label{suffix}'] = df['label'].apply(lambda x: int(transfer_array[x]))
    return df

In [138]:
origin_df = pd.read_csv(df_path,names=['ID','label','extra'],sep='\t', dtype={'ID':str,'label':int,'extra':int})

In [139]:
binned_df = apply_binning_to_df(origin_df, transfer_array, suffix=binningName)

In [140]:
binned_df

Unnamed: 0,ID,label,extra,label_DDS_binning
0,/data/scratch/r098375/data/parelsnoer_new/NIFT...,7,0,0
1,/data/scratch/r098375/data/parelsnoer_new/NIFT...,7,0,0
2,/data/scratch/r098375/data/parelsnoer_new/NIFT...,7,0,0
3,/data/scratch/r098375/data/parelsnoer_new/NIFT...,7,0,0
4,/data/scratch/r098375/data/parelsnoer_new/NIFT...,7,0,0
...,...,...,...,...
35455,/data/scratch/r098375/data/parelsnoer_new/NIFT...,15,0,8
35456,/data/scratch/r098375/data/parelsnoer_new/NIFT...,15,0,8
35457,/data/scratch/r098375/data/parelsnoer_new/NIFT...,15,0,8
35458,/data/scratch/r098375/data/parelsnoer_new/NIFT...,15,0,8


In [141]:
binned_df.groupby(f'label_{binningName}')['label'].value_counts()

label_DDS_binning  label
0                  14       9600
                   7        7350
                   13        990
                   5          75
1                  0        9195
2                  1          45
4                  3        2475
7                  12        495
8                  15       5235
Name: count, dtype: int64

In [142]:
new_labelmap

{'non_DDS': 0,
 'T1': 1,
 'T1_c': 2,
 'T2': 3,
 'T2-FLAIR': 4,
 'PD': 5,
 'DWI': 6,
 'PWI': 7,
 'Other': 8}

In [143]:
extension = os.path.splitext(df_path)
basename = extension[0]
extension = extension[1]
outname = f'{basename}_{binningName}{extension}'
binned_df.to_csv(outname,columns=['ID',f'label_{binningName}','extra'],sep='\t',index=False,header=False)