In [23]:
from utils import *
from pandas import read_csv, DataFrame

In [37]:
metadata_file = 'Breast_collection_models.csv'
hci_bcm_provider_path = join(home, 'HCI-BCM')

metadata = read_csv(metadata_file)
ps = read_metadata_without_fields(join(hci_bcm_provider_path, 'HCI-BCM_metadata-patient_sample.tsv'))[['patient_id', 'sample_id', 'model_id']]
mms = read_metadata_without_fields(join(hci_bcm_provider_path, 'HCI-BCM_molecular_metadata-sample.tsv'))
mms_template = read_metadata_with_fields(join(hci_bcm_provider_path, 'HCI-BCM_molecular_metadata-sample.tsv')).iloc[0:4,]
biomarker_sheet = read_metadata_with_fields(join(hci_bcm_provider_path, 'biomarker/HCI-BCM_biomarker-Sheet1.tsv'))
patient_treatment = read_metadata_with_fields(join(hci_bcm_provider_path, 'treatment/HCI-BCM_patienttreatment-Sheet1.tsv'))

biomarker_platform = 'cytogenetics_immunohistochemistry'
columns_to_extract = ['Patient ER Status', 'Patient PR Status', 'Patient HER2 Status', 'PDX ER Status', 'PDX PR Status', 'PDX HER2 Status']
columns_to_marker = {'Patient ER Status': 'ESR1', 'Patient PR Status': 'PGR', 'Patient HER2 Status': 'ERBB2', 
                     'PDX ER Status': 'ESR1', 'PDX PR Status': 'PGR', 'PDX HER2 Status': 'ERBB2'}
models_in_metadata = ps.merge(metadata, left_on='model_id', right_on='Model ID', how='inner')

def check_model_in_sheets(sheet, model, so):
    if model not in sheet['model_id'].unique():        
        return True
    sheet_match = sheet[sheet['model_id'] == model].reset_index(drop=True)
    sheet_so = sheet_match['sample_origin'][0]
    if so != sheet_so:
        return True
    return False

biomarker_df = DataFrame()
for model in models_in_metadata.iterrows():
    m = model[1]
    for column in columns_to_extract:
        if column.__contains__('Patient'):
            sample_origin = 'patient'
        else:
            sample_origin = 'xenograft'
        if check_model_in_sheets(mms, m['model_id'], sample_origin):
            temp = DataFrame(columns=biomarker_sheet.columns)
            temp['sample_id'] = [f"{m['sample_id']}_{sample_origin[0]}"]
            temp['model_id'] = m['model_id']
            temp['sample_origin'] = sample_origin
            temp['platform_id'] = biomarker_platform
            temp['biomarker'] = columns_to_marker[column]
            temp['biomarker_status'] = m[column].lower()
            biomarker_df = pd.concat([biomarker_df, temp])
pd.concat([biomarker_sheet, biomarker_df[biomarker_sheet.columns]]).to_csv(join(hci_bcm_provider_path, 'biomarker/HCI-BCM_biomarker-Sheet1.tsv'), index=False, sep='\t')
biomarker_df[['passage', 'host_strain_name', 'host_strain_nomenclature', 'engrafted_tumor_collection_site', 'raw_data_url']] = ''
final_mms = biomarker_df[mms.columns].drop_duplicates()
final_mms = pd.concat([mms_template, mms, final_mms])
final_mms.to_csv(join(hci_bcm_provider_path, 'HCI-BCM_molecular_metadata-sample.tsv'), sep='\t', index=False)

In [53]:
treat = DataFrame()
for model in models_in_metadata.iterrows():
    m = model[1]
    if m['Patient ID'] not in patient_treatment['patient_id'].unique() and m['Patient Treatments'] != '':
        temp = DataFrame(columns=patient_treatment.columns)
        temp['patient_id'] = [m['Patient ID']]
        temp['treatment_name'] = m['Patient Treatments']
        temp['model_id'] = m['Model ID']
        temp['treatment_name'] = temp['treatment_name'].str.split(',')
        temp = temp.explode('treatment_name').reset_index(drop=True)
        treat = pd.concat([treat, temp]).reset_index(drop=True).fillna('Not provided')
treat['Field'] = ''
patient_treatment = pd.concat([patient_treatment, treat]).reset_index(drop=True)

In [55]:
patient_treatment.to_csv(join(hci_bcm_provider_path, 'treatment/HCI-BCM_patienttreatment-Sheet1.tsv'), sep='\t', index=False)

In [52]:
treat

Unnamed: 0,Field,patient_id,treatment_name,treatment_type,treatment_dose,treatment_starting_date,treatment_duration,treatment_event,elapsed_time,treatment_response,response_classification,model_id
0,Not provided,0RR2HE,Zometa,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,HCI-002
1,Not provided,1366,Capecitabine,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,BCM-7649
2,Not provided,1366,Cyclophosphamide,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,BCM-7649
3,Not provided,1366,Docetaxel,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,BCM-7649
4,Not provided,1366,Epirubicin,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,BCM-7649
...,...,...,...,...,...,...,...,...,...,...,...,...
255,Not provided,Y0T0JD,Capcitabine,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,HCI-011
256,Not provided,Y0T0JD,Cyclophosphamide,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,HCI-011
257,Not provided,Y0T0JD,Doxorubicin,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,HCI-011
258,Not provided,Y0T0JD,Pacitaxel,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,Not provided,HCI-011


In [46]:
temp.explode('treatment_name').reset_index(drop=True)

Unnamed: 0,Field,patient_id,treatment_name,treatment_type,treatment_dose,treatment_starting_date,treatment_duration,treatment_event,elapsed_time,treatment_response,response_classification,model_id
0,,0RR2HE,Zometa,,,,,,,,,HCI-002


In [ ]:
''