In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import brier_score_loss, roc_curve, auc

In [2]:
for data in ["antenatal", "antenatal_growth", "antenatal_intrapartum"]:    
    for outcome in ['_hie', '_lapgar', '_perinataldeath', '_resus']:
        
        print("Running RF on {} for {}".format(data, outcome))
        
        # read in data
        train = pd.read_csv("data/{}{}_train.csv".format(data, outcome), index_col=0).astype('float32')
        test = pd.read_csv("data/{}{}_test.csv".format(data, outcome), index_col=0).astype('float32')
        train_y = train.pop(outcome)
        test_y = test.pop(outcome)
        
        # evaluate model
        clf = RandomForestClassifier(random_state=0, n_estimators=100)
        clf.fit(train, train_y)
        
        # calculate probabilities for test data
        y_test_pred = clf.predict_proba(test)[:, 1]
    
        # calculate roc auc metric
        fpr, tpr, thresholds = roc_curve(test_y, y_test_pred)
        roc_auc = auc(fpr, tpr)
    
        # calculate brier loss for probability accuracy
        brier_loss = brier_score_loss(test_y, y_test_pred)
        
        # write out probs for delong test to obtain CIs (R)
        pd.DataFrame({"Prob" : y_test_pred, "{}{}".format(data, outcome): test_y}).to_csv("data/{}{}.RF.csv".format(data, outcome))
    
        print("ROC AUC: {}\nBrier loss: {}".format(np.round(roc_auc, 3), np.round(brier_loss, 3)))

Running RF on antenatal for _hie
ROC AUC: 0.585
Brier loss: 0.006
Running RF on antenatal for _lapgar
ROC AUC: 0.545
Brier loss: 0.029
Running RF on antenatal for _perinataldeath
ROC AUC: 0.628
Brier loss: 0.013
Running RF on antenatal for _resus
ROC AUC: 0.54
Brier loss: 0.052
Running RF on antenatal_growth for _hie
ROC AUC: 0.596
Brier loss: 0.007
Running RF on antenatal_growth for _lapgar
ROC AUC: 0.551
Brier loss: 0.029
Running RF on antenatal_growth for _perinataldeath
ROC AUC: 0.468
Brier loss: 0.003
Running RF on antenatal_growth for _resus
ROC AUC: 0.595
Brier loss: 0.049
Running RF on antenatal_intrapartum for _hie
ROC AUC: 0.6
Brier loss: 0.005
Running RF on antenatal_intrapartum for _lapgar
ROC AUC: 0.614
Brier loss: 0.028
Running RF on antenatal_intrapartum for _perinataldeath
ROC AUC: 0.683
Brier loss: 0.014
Running RF on antenatal_intrapartum for _resus
ROC AUC: 0.627
Brier loss: 0.05
