In [13]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import brier_score_loss, roc_curve, auc

In [14]:
for data in ["antenatal", "antenatal_growth", "antenatal_intrapartum"]:    
    for outcome in ['_hie', '_lapgar', '_perinataldeath', '_resus']:
        
        print("Running LR on {} for {}".format(data, outcome))
        
        # read in data
        train = pd.read_csv("data/{}{}_train.csv".format(data, outcome), index_col=0).astype('float32')
        test = pd.read_csv("data/{}{}_test.csv".format(data, outcome), index_col=0).astype('float32')
        train_y = train.pop(outcome)
        test_y = test.pop(outcome)
        
        # evaluate model
        clf = LogisticRegression(random_state=0, solver='lbfgs', max_iter=100000)
        clf.fit(train, train_y)
        
        # calculate probabilities for test data
        y_test_pred = clf.predict_proba(test)[:, 1]
    
        # calculate roc auc metric
        fpr, tpr, thresholds = roc_curve(test_y, y_test_pred)
        roc_auc = auc(fpr, tpr)
    
        # calculate brier loss for probability accuracy
        brier_loss = brier_score_loss(test_y, y_test_pred)
        
        # write out probs for delong test to obtain CIs (R)
        pd.DataFrame({"Prob" : y_test_pred, "{}{}".format(data, outcome): test_y}).to_csv("data/{}{}.LR.csv".format(data, outcome))
    
        print("ROC AUC: {}\nBrier loss: {}".format(np.round(roc_auc, 3), np.round(brier_loss, 3)))

Running LR on antenatal for _hie
ROC AUC: 0.714
Brier loss: 0.142
Running LR on antenatal for _lapgar
ROC AUC: 0.642
Brier loss: 0.19
Running LR on antenatal for _perinataldeath
ROC AUC: 0.744
Brier loss: 0.163
Running LR on antenatal for _resus
ROC AUC: 0.612
Brier loss: 0.241
Running LR on antenatal_growth for _hie
ROC AUC: 0.707
Brier loss: 0.159
Running LR on antenatal_growth for _lapgar
ROC AUC: 0.629
Brier loss: 0.206
Running LR on antenatal_growth for _perinataldeath
ROC AUC: 0.589
Brier loss: 0.215
Running LR on antenatal_growth for _resus
ROC AUC: 0.631
Brier loss: 0.235
Running LR on antenatal_intrapartum for _hie
ROC AUC: 0.733
Brier loss: 0.143
Running LR on antenatal_intrapartum for _lapgar
ROC AUC: 0.676
Brier loss: 0.205
Running LR on antenatal_intrapartum for _perinataldeath
ROC AUC: 0.764
Brier loss: 0.184
Running LR on antenatal_intrapartum for _resus
ROC AUC: 0.678
Brier loss: 0.223
