In [None]:
import pandas as pd
import numpy as np
import pickle

# Get all the data

In [None]:
train_data, test_data=pickle.load(open('./data/data_for_model.pickle','rb'))

In [None]:
joined_data=pd.concat([train_data,test_data])

In [None]:
joined_data=joined_data.dropna(subset=['Entry_Date'])

In [None]:
joined_data['Entry_Date']=pd.to_datetime(joined_data['Entry_Date'])

In [None]:
joined_data=joined_data.sort_values(by=['Entry_Date'])

In [None]:
# Get the first report date of the IrAE. This is the date of the first note to be classified as positive
IrAE_first_date=pd.DataFrame()
for IrAE in joined_data.itis_category.unique():
    IrAE_data=joined_data.loc[joined_data.itis_category==IrAE].copy()
    grouped = IrAE_data.groupby('PatNum')['Entry_Date'].count()
    filtered_groups = grouped[grouped > 1]
    # Get the first entry date for each patient
    result = IrAE_data[IrAE_data['PatNum'].isin(filtered_groups.index)].groupby('PatNum')['Entry_Date'].first().reset_index()
    result['itis_category']=IrAE
    IrAE_first_date=pd.concat([IrAE_first_date,result])

In [None]:
id_to_drug=pd.read_csv(r'./data/Id_to_drug.csv', parse_dates=['min_date','max_added'], index_col=0)

# Steroid use

* Data was extracted about treatment woth medications containing the following generic substances: Prednisone,Prednisolone,Dexamethasone,Methylprednisolone,Hydrocortisone
* Oral or IV routes only, with a dose of at least 20mg of prednisone or equivalent
* Summerized in the file steroids_immune_patients.csv containing the Execution_Date and the PatNum.  

In [None]:
steroid_treatment=pd.read_csv(r'./data/steroids_immune_patients.csv',low_memory=False, index_col=0)

In [None]:
steroid_treatment.Execution_Date=pd.to_datetime(steroid_treatment.Execution_Date)

In [None]:
# Get patient with at least one prescription of relevant corticosteroid treatment within 14 days of the IrAE diagnosis
results=[]
for row in IrAE_first_date.itertuples():
    steroid_rel=steroid_treatment.loc[steroid_treatment.PatNum==row.PatNum].copy()
    steroid_rel=steroid_rel.loc[steroid_rel.Execution_Date.between(row.Entry_Date,row.Entry_Date+np.timedelta64(14,'D'))]
    results.append([row.PatNum,row.itis_category,steroid_rel.shape[0]>0])
results=pd.DataFrame(results,columns=['PatNum','itis_category','steroid_treatment'])

In [None]:
results=results.merge(id_to_drug[['PatNum','drug']], how='left')

In [None]:
print(results.loc[results.steroid_treatment==True].groupby(['drug','itis_category'])['PatNum'].count())

# ICI cessation

In [None]:
ICI_stopping=IrAE_first_date.merge(id_to_drug[['PatNum','min_date','max_added','drug']], on=['PatNum'], how='left')

In [None]:
ICI_stopping.head()

In [None]:
ICI_stopping['max_date']=ICI_stopping.max_added-pd.DateOffset(months=3)

In [None]:
# get patients whose last dose of ICI was in the month before the IrAE 
# meaning that after the IrAE they did not get an additional dose. 
ICI_stopping=ICI_stopping.loc[ICI_stopping.Entry_Date.between(ICI_stopping.max_date-np.timedelta64(30,'D'),ICI_stopping.max_date)]

In [None]:
ICI_stopping.groupby(['drug','itis_category'])['PatNum'].count()