In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import brier_score_loss, roc_curve, auc

In [2]:
for data in ["antenatal", "antenatal_growth", "antenatal_intrapartum"]:    
    for outcome in ['_hie', '_lapgar', '_perinataldeath', '_resus']:
        
        print("Running LR on {} for {}".format(data, outcome))
        
        # read in data
        train = pd.read_csv("data/{}{}_train.csv".format(data, outcome), index_col=0).astype('float32')
        test = pd.read_csv("data/{}{}_test.csv".format(data, outcome), index_col=0).astype('float32')
        train_y = train.pop(outcome)
        test_y = test.pop(outcome)
        
        # evaluate model
        clf = LogisticRegression(random_state=0, solver='lbfgs', max_iter=100000)
        clf.fit(train, train_y)
        
        # calculate probabilities for test data
        y_test_pred = clf.predict_proba(test)[:, 1]
    
        # calculate roc auc metric
        fpr, tpr, thresholds = roc_curve(test_y, y_test_pred)
        roc_auc = auc(fpr, tpr)
    
        # calculate brier loss for probability accuracy
        brier_loss = brier_score_loss(test_y, y_test_pred)
        
        # write out probs for delong test to obtain CIs (R)
        pd.DataFrame({"Prob" : y_test_pred, "{}{}".format(data, outcome): test_y}).to_csv("data/{}{}.LR.csv".format(data, outcome))
    
        print("ROC AUC: {}\nBrier loss: {}".format(np.round(roc_auc, 3), np.round(brier_loss, 3)))

Running LR on antenatal for _hie
ROC AUC: 0.712
Brier loss: 0.146
Running LR on antenatal for _lapgar
ROC AUC: 0.633
Brier loss: 0.189
Running LR on antenatal for _perinataldeath
ROC AUC: 0.752
Brier loss: 0.18
Running LR on antenatal for _resus
ROC AUC: 0.642
Brier loss: 0.202
Running LR on antenatal_growth for _hie
ROC AUC: 0.726
Brier loss: 0.148
Running LR on antenatal_growth for _lapgar
ROC AUC: 0.612
Brier loss: 0.213
Running LR on antenatal_growth for _perinataldeath
ROC AUC: 0.555
Brier loss: 0.228
Running LR on antenatal_growth for _resus
ROC AUC: 0.634
Brier loss: 0.2
Running LR on antenatal_intrapartum for _hie
ROC AUC: 0.739
Brier loss: 0.194
Running LR on antenatal_intrapartum for _lapgar
ROC AUC: 0.664
Brier loss: 0.236
Running LR on antenatal_intrapartum for _perinataldeath
ROC AUC: 0.781
Brier loss: 0.176
Running LR on antenatal_intrapartum for _resus
ROC AUC: 0.651
Brier loss: 0.255
