In [103]:
import pandas as pd
import pickle

# Load scaler and imputer 

In [104]:
# Load the scaler
with open('scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)

# Load the imputer
with open('imputer.pkl', 'rb') as f:
    imputer = pickle.load(f)

# Load data

In [105]:
# This was to test if it works
# data = pd.read_csv('bsc_project_set.csv')
# data = data.drop(['id', 'Unnamed: 0', 'map', 'bilirubin', 'creatinine', 'platelets', 'urea', 'diastolic_blood_pressure', 'peep_regime', 'mort_28'], axis=1)

data = pd.read_csv('rct_data.csv')

# Prepare 'sex' variable

In [None]:
# column order before changing sex variable to sex_M
column_order = ['age','weight','height','pf_ratio','po2','pco2','ph','driving_pressure','lung_compliance','fio2','hco3','heart_rate','minute_volume','peep','plateau_pressure','respiratory_rate','syst_blood_pressure', 'sex']
data = data[column_order]

In [106]:
rct_data = pd.get_dummies(data, columns=['sex'], drop_first=False)
rct_data = rct_data.drop(['sex_F'], axis=1, errors='ignore')
column_order = ['age','weight','height','pf_ratio','po2','pco2','ph','driving_pressure','lung_compliance','fio2','hco3','heart_rate','minute_volume','peep','plateau_pressure','respiratory_rate','syst_blood_pressure', 'sex_M']
rct_data = rct_data[column_order]

# Scale

In [107]:
rct_data[rct_data.columns] = scaler.transform(rct_data)

# Impute

In [108]:
rct_data[rct_data.columns] = imputer.transform(rct_data)

# selected features to predict on

In [109]:
selected_features = ['age', 'weight', 'pf_ratio', 'po2', 'ph', 'fio2', 'driving_pressure', 'plateau_pressure']
rct_data = rct_data[selected_features]

# S-Learner with Gradient Boosting

In [110]:
# Load the CATE/DR model
with open('s_learner_gb_model.pkl', 'rb') as f:
    model = pickle.load(f)
    
# predict
outcomes = model.effect(rct_data[selected_features])
outcomes_df = pd.DataFrame(outcomes, columns=['cate'])

# Save the DataFrame to a CSV file
outcomes_df.to_csv('s_learner_gb_model.csv', index=False)

# S-Learner with Linear Regression

In [111]:
# Load the CATE/DR model
with open('s_learner_lr_model.pkl', 'rb') as f:
    model = pickle.load(f)
    
# predict
outcomes = model.effect(rct_data[selected_features])
outcomes_df = pd.DataFrame(outcomes, columns=['cate'])

# Save the DataFrame to a CSV file
outcomes_df.to_csv('s_learner_lr_model.csv', index=False)

# T-Learner with Gradient Boosting

In [112]:
# Load the CATE/DR model
with open('t_learner_gb_model.pkl', 'rb') as f:
    model = pickle.load(f)
    
# predict
outcomes = model.effect(rct_data[selected_features])
outcomes_df = pd.DataFrame(outcomes, columns=['cate'])

# Save the DataFrame to a CSV file
outcomes_df.to_csv('t_learner_gb_model.csv', index=False)

# T-Learner with Linear Regression

In [113]:
# Load the CATE/DR model
with open('t_learner_lr_model.pkl', 'rb') as f:
    model = pickle.load(f)
    
# predict
outcomes = model.effect(rct_data[selected_features])
outcomes_df = pd.DataFrame(outcomes, columns=['cate'])

# Save the DataFrame to a CSV file
outcomes_df.to_csv('t_learner_lr_model.csv', index=False)

# DR-Learner with SVM Propensity and XGBoost Response with XGBoost Regressor Final

In [114]:
# Load the CATE/DR model
with open('dr_learner_svm_xgb_xgb_model.pkl', 'rb') as f:
    model = pickle.load(f)
    
# predict
outcomes = model.effect(rct_data[selected_features])
outcomes_df = pd.DataFrame(outcomes, columns=['cate'])

# Save the DataFrame to a CSV file
outcomes_df.to_csv('dr_learner_svm_xgb_xgb_model.csv', index=False)

# DR-Learner with SVM Propensity and Logistic Regression Response with XGBoost Regressor Final

In [115]:
# Load the CATE/DR model
with open('dr_learner_svm_logreg_xgb_model.pkl', 'rb') as f:
    model = pickle.load(f)
    
# predict
outcomes = model.effect(rct_data[selected_features])
outcomes_df = pd.DataFrame(outcomes, columns=['cate'])

# Save the DataFrame to a CSV file
outcomes_df.to_csv('dr_learner_svm_logreg_xgb_model.csv', index=False)

# DR-Learner with SVM Propensity and Gradient Boosting Response with XGBoost Regressor Final

In [116]:
# Load the CATE/DR model
with open('dr_learner_svm_gb_xgb_model.pkl', 'rb') as f:
    model = pickle.load(f)
    
# predict
outcomes = model.effect(rct_data[selected_features])
outcomes_df = pd.DataFrame(outcomes, columns=['cate'])

# Save the DataFrame to a CSV file
outcomes_df.to_csv('dr_learner_svm_gb_xgb_model.csv', index=False)

# DR-Learner with Logistic Regression Propensity and Logistic Regression Response with Linear Regression Final

In [117]:
# Load the CATE/DR model
with open('dr_learner_logreg_logreg_linear_model.pkl', 'rb') as f:
    model = pickle.load(f)
    
# predict
outcomes = model.effect(rct_data[selected_features])
outcomes_df = pd.DataFrame(outcomes, columns=['cate'])

# Save the DataFrame to a CSV file
outcomes_df.to_csv('dr_learner_logreg_logreg_linear_model.csv', index=False)

# DR-Learner with Gradient Boosting Propensity and Gradient Boosting Response with Gradient Boosting Final

In [118]:
# Load the CATE/DR model
with open('dr_learner_gb_gb_gb_model.pkl', 'rb') as f:
    model = pickle.load(f)
    
# predict
outcomes = model.effect(rct_data[selected_features])
outcomes_df = pd.DataFrame(outcomes, columns=['cate'])

# Save the DataFrame to a CSV file
outcomes_df.to_csv('dr_learner_gb_gb_gb_model.csv', index=False)

# DR-Learner with K-Nearest Neighbors Propensity and Gradient Boosting Response with Gradient Boosting Final

In [119]:
# Load the CATE/DR model
with open('dr_learner_knn_gb_gb_model.pkl', 'rb') as f:
    model = pickle.load(f)
    
# predict
outcomes = model.effect(rct_data[selected_features])
outcomes_df = pd.DataFrame(outcomes, columns=['cate'])

# Save the DataFrame to a CSV file
outcomes_df.to_csv('dr_learner_knn_gb_gb_model.csv', index=False)