In [40]:
import pandas as pd
import os
import numpy as np

In [41]:
df_path = 'combinedDfs/CombinedTrainingFrame_A2_A3_O3_egd_rss_hb_pruned.txt'
binningName = 'DDS_binning'

In [42]:
original_labelmap = {
    "T1":0,
    "T1_c":1,
    "T2":2,
    "T2-FLAIR":3,
    "PD":4,
    "SWI":5,
    "GRE":6,
    "T2*":7,
    "DWI":8,
    "ADC":9,
    "BOLD":10,
    "angio":11,
    "PWI":12,
    "ASL":13,
    "DTI":14,
    "Other":15
}

In [43]:
all_binnings_dict = {
    'DDS_binning': {
        'non_DDS': ['SWI','GRE','T2*','ADC','BOLD','angio','ASL','DTI']
    },
    'alternative_binning': {
        'diffusion' : ['DWI', 'ADC', 'DTI'],
        'perfusion' : ['BOLD', 'ASL', 'angio', 'PWI'],
        'suscept' : ['SWI','GRE','T2*']
    }
}

In [44]:
def createBinning(original_labelmap, binningDict):
    orignal_labelmap_copy = original_labelmap.copy()
    new_labelmap = {}
    transfer_array = np.zeros(len(original_labelmap))
    class_counter = 0
    for bin_name, bin_items in binningDict.items():
        new_labelmap[bin_name] = class_counter
        for item in list(bin_items):
            transfer_array[original_labelmap[item]] = class_counter
            orignal_labelmap_copy.pop(item)
        class_counter += 1
    for name, item in orignal_labelmap_copy.items():
        new_labelmap[name] = class_counter
        transfer_array[item] = class_counter
        class_counter += 1
    unchangedLabels = orignal_labelmap_copy
    return new_labelmap, transfer_array, class_counter, unchangedLabels
    

In [45]:
new_labelmap, transfer_array, class_counter, unchangedLabels = createBinning(original_labelmap, all_binnings_dict[binningName])

In [46]:
def apply_binning_to_df(original_df, transfer_array, suffix=""):
    df = original_df.copy()
    suffix = '_'+suffix
    df[f'label{suffix}'] = df['label'].apply(lambda x: int(transfer_array[x]))
    return df

In [47]:
origin_df = pd.read_csv(df_path,names=['ID','label','extra'],sep='\t', dtype={'ID':str,'label':int,'extra':int})

In [48]:
binned_df = apply_binning_to_df(origin_df, transfer_array, suffix=binningName)

In [49]:
binned_df

Unnamed: 0,ID,label,extra,label_DDS_binning
0,/trinity/home/r098375/DDS/data/ADNI/ADNI3/trai...,3,0,4
1,/trinity/home/r098375/DDS/data/ADNI/ADNI3/trai...,7,0,0
2,/trinity/home/r098375/DDS/data/ADNI/ADNI3/trai...,0,0,1
3,/trinity/home/r098375/DDS/data/ADNI/ADNI3/trai...,3,0,4
4,/trinity/home/r098375/DDS/data/ADNI/ADNI3/trai...,15,0,8
...,...,...,...,...
541180,/trinity/home/r098375/DDS/data/HeartBrain/NIFT...,15,0,8
541181,/trinity/home/r098375/DDS/data/HeartBrain/NIFT...,15,0,8
541182,/trinity/home/r098375/DDS/data/HeartBrain/NIFT...,15,0,8
541183,/trinity/home/r098375/DDS/data/HeartBrain/NIFT...,15,0,8


In [50]:
binned_df.groupby(f'label_{binningName}')['label'].value_counts()

label_DDS_binning  label
0                  7         73110
                   10        40515
                   13        20670
                   5         14280
                   11         7305
                   6          5835
                   14         2175
                   9          1395
1                  0         70110
2                  1          5175
3                  2         39540
4                  3         37095
5                  4          3390
6                  8         32925
7                  12        18405
8                  15       169260
Name: count, dtype: int64

In [51]:
new_labelmap

{'non_DDS': 0,
 'T1': 1,
 'T1_c': 2,
 'T2': 3,
 'T2-FLAIR': 4,
 'PD': 5,
 'DWI': 6,
 'PWI': 7,
 'Other': 8}

In [52]:
extension = os.path.splitext(df_path)
basename = extension[0]
extension = extension[1]
outname = f'{basename}_{binningName}{extension}'
binned_df.to_csv(outname,columns=['ID',f'label_{binningName}','extra'],sep='\t',index=False,header=False)