# **Maps patient ID to survival time (days), event (death yes/no) and discrete time group.**

# Imports

In [1]:
import sys
sys.path.insert(0, 'modules/discrete')

In [2]:
import pandas as pd
import pickle
import json
import transform

# Paths

In [3]:
scan_dataset_path = './data/all_scans.csv'
subjects_dataset_path = './data/all_subjects_cleaned.csv'
brats_survival_path = './data/brats_training_2020/survival_info.csv'

# Data

## Brats

In [4]:
brats_surv = pd.read_csv('./data/brats_training_2020/survival_info.csv')

In [5]:
brats_surv['DeathObserved'] = [1] * len(brats_surv['Brats20ID'].tolist())

In [6]:
brats_surv = brats_surv.rename(columns={"Survival_days": "surv"})

In [7]:
brats_surv.loc[brats_surv['surv'].str.contains('ALIVE'), 'DeathObserved'] = 0
brats_surv.loc[brats_surv['surv'].str.contains('ALIVE'), 'surv'] = 361

In [8]:
brats_surv.index = brats_surv["Brats20ID"]
brats_surv = brats_surv.drop(['Brats20ID', 'Age', 'Extent_of_Resection'], axis=1)

In [9]:
brats_surv.head(1)

Unnamed: 0_level_0,surv,DeathObserved
Brats20ID,Unnamed: 1_level_1,Unnamed: 2_level_1
BraTS20_Training_001,289,1


## Picture

In [10]:
scan_df = pd.read_csv(scan_dataset_path)
scan_df = scan_df.drop('Unnamed: 0', axis=1)
subjects_df = pd.read_csv(subjects_dataset_path)

  interactivity=interactivity, compiler=compiler, result=result)


In [13]:
subjects_df.columns

Index(['age', 'ENTvolML', 'ENTside', 'GenderV2', 'KPSpre', 'Chemo',
       'SurgeryExtend', 'surv', 'DeathObserved', 'ID'],
      dtype='object')

In [14]:
filter_ids = ['BMIAXNAT_S10281', 'BMIAXNAT_S06575']

In [15]:
subjects_df = subjects_df.loc[:, ['ID', 'surv', 'DeathObserved']]
subjects_df = subjects_df.loc[~subjects_df['ID'].isin(filter_ids), :]

In [16]:
with open('./data/pickles_jsons/ids_per_phase.pkl', 'rb') as file:
    phase_id_dict = pickle.load(file)

In [None]:
df_train = subjects_df.loc[subjects_df['ID'].isin(phase_id_dict['train'])]
sorterIndex_train = dict(zip(phase_id_dict['train'], range(len(phase_id_dict['train']))))
df_train['rank'] = df_train['ID'].map(sorterIndex_train)
df_train.sort_values(['rank'], ascending = [True], inplace = True)
df_train.drop('rank', 1, inplace = True)

df_test = subjects_df.loc[subjects_df['ID'].isin(phase_id_dict['test'])]
sorterIndex_test = dict(zip(phase_id_dict['test'], range(len(phase_id_dict['test']))))
df_test['rank'] = df_test['ID'].map(sorterIndex_test)
df_test.sort_values(['rank'], ascending = [True], inplace = True)
df_test.drop('rank', 1, inplace = True)

df_val = subjects_df.loc[subjects_df['ID'].isin(phase_id_dict['val'])]
sorterIndex_val = dict(zip(phase_id_dict['val'], range(len(phase_id_dict['val']))))
df_val['rank'] = df_val['ID'].map(sorterIndex_val)
df_val.sort_values(['rank'], ascending = [True], inplace = True)
df_val.drop('rank', 1, inplace = True)

In [19]:
df_train.index = df_train['ID']
df_train = df_train.drop('ID', axis=1)
df_test.index = df_test['ID']
df_test = df_test.drop('ID', axis=1)
df_val.index = df_val['ID']
df_val = df_val.drop('ID', axis=1)

## Cuts Picture

In [21]:
scheme = 'quantiles'
num_cuts = 10
labtrans = transform.LabTransDiscreteTime(num_cuts, scheme)
get_target = lambda df: (df['surv'].values, df['DeathObserved'].values)
categories = labtrans.fit_transform(*get_target(df_train))[0]
df_train['group'] = categories
categories_test = labtrans.transform(*get_target(df_test))[0]
df_test['group'] = categories_test
categories_val = labtrans.transform(*get_target(df_val))[0]
df_val['group'] = categories_val

categories_brats = labtrans.transform(*get_target(brats_surv))[0]
brats_surv['group'] = categories_brats

In [22]:
labtrans.cuts

array([   0.,   66.,  141.,  208.,  292.,  363.,  449.,  592.,  829.,
       1785.])

## Without BRATS

In [23]:
subjects_df_final = pd.concat([df_train, df_test, df_val])

In [25]:
subjects_surv_mapping = subjects_df_final.to_json()

In [41]:
with open('./data/pickles_jsons/id_surv_mapping_10_groups.json', 'w') as f:
    json.dump(subjects_surv_mapping, f)

## With BRATS

In [None]:
subjects_df_final = pd.concat([df_train, df_test, df_val, brats_surv])

In [None]:
subjects_surv_mapping = subjects_df_final.to_json()

In [26]:
with open('./data/pickles_jsons/id_surv_mapping_10_groups_brats.json', 'w') as f:
    json.dump(subjects_surv_mapping, f)

# Example

In [64]:
with open('./data/pickles_jsons/id_surv_mapping.json') as f:
    data = json.load(f)
    data_dict = json.loads(data)

In [56]:
type(data)

str