### Notebook
- Prepares validation patient data for figure generation

In [1]:
import pandas as pd
import os

### 0. Load data

In [2]:
val_dir = '../data/validation_data'
project_list = set(['.'.join(x.split('.')[:-2]) for x in os.listdir(val_dir) if 'PHBR' in x])

In [6]:
# join all driver mutations
phbr_i_df = pd.DataFrame()
phbr_ii_df = pd.DataFrame()

for project in project_list:
    phbr_i_path = os.path.join(val_dir, '{}.PHBR_I.csv'.format(project))
    phbr_i_df = phbr_i_df.append(pd.read_csv(phbr_i_path, index_col=0), sort=True)
    
    phbr_ii_path = os.path.join(val_dir, '{}.PHBR_II.csv'.format(project))
    phbr_ii_df = phbr_ii_df.append(pd.read_csv(phbr_ii_path, index_col=0), sort=True)

phs000980.v1.p1.c1_NSCLC
phs001565.v1.p1.c1_GenCorrRespResICB_CancerMDS
ICGC
phs001451.v1.p1.c1_PhaseIMelanomaVac
phs001493.v1.p1.c1_RCC
phs001425.v1.p1.c1
phs000452.v2.p1.c1_MelanomaGenomeSeq
phs001041.v1.p1.c1_MelanomaCTLA4
phs001493.v1.p1.c2_RCC
EGA_WGS
phs001469.v1.p1.c1_desmoMelanoma
ICGC.CLLE-ES
ICGC.THCA-SA
phs001519.v1.p1.c1_GBMPhaseIVaccine


In [5]:
phbr_i_df.head()

Unnamed: 0,PHBR_score,age,age_categorical_3070,mutation_id,project_code,sex,tumor_type
AL4602,0.293223,59.0,,,,male,
AL4602,0.348454,59.0,,,,male,
AU5884,1.551376,64.0,,,,male,
BL3403,2.59567,73.0,,,,female,
CA9903,0.863114,57.0,,,,male,


### 1. Add age categorical column

In [4]:
young_thresh = 52
old_thresh = 68

In [5]:
# MHC-I
phbr_i_df['age_categorical_3070'] = pd.cut(phbr_i_df['age'], [0, young_thresh, old_thresh, 200], 
                                           labels=['younger', 'middle', 'older'])

# MHC-II
phbr_ii_df['age_categorical_3070'] = pd.cut(phbr_ii_df['age'], [0, young_thresh, old_thresh, 200], 
                                            labels=['younger', 'middle', 'older'])

In [6]:
# save
savepath_i = '../generated_data/validation_patients_I.age_sex_disease.tsv'
phbr_i_df.to_csv(savepath_i, sep='\t')

savepath_ii = '../generated_data/validation_patients_II.age_sex_disease.tsv'
phbr_ii_df.to_csv(savepath_ii, sep='\t')