In [1]:
import numpy as np
import pandas as pd

from sklearn.linear_model import HuberRegressor, LogisticRegression, Ridge
from sklearn.svm import LinearSVC
import sklearn.metrics as metrics

import utils

In [2]:
import utils

TESTS = ['LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST', 'LABEL_Alkalinephos', 
         'LABEL_Bilirubin_total', 'LABEL_Lactate', 'LABEL_TroponinI', 'LABEL_SaO2',
         'LABEL_Bilirubin_direct', 'LABEL_EtCO2']
VITALS = ['LABEL_RRate', 'LABEL_ABPm', 'LABEL_SpO2', 'LABEL_Heartrate']

save_suffix = ""
verbose = True

In [3]:
"""0. Data reforming"""

if verbose:
    print("--- PREPARING DATASET ...")

# Load data
feat_train = pd.read_csv("./train_features.csv")
labl_train = pd.read_csv("./train_labels.csv")
feat_test = pd.read_csv("./test_features.csv")
feat_test

--- PREPARING DATASET ...


Unnamed: 0,pid,Time,Age,EtCO2,PTT,BUN,Lactate,Temp,Hgb,HCO3,...,Alkalinephos,SpO2,Bilirubin_direct,Chloride,Hct,Heartrate,Bilirubin_total,TroponinI,ABPs,pH
0,0,1,39.0,,,,,,,,...,,,,,,,,,,
1,0,2,39.0,,44.2,17.0,,36.0,10.2,13.0,...,119.0,100.0,,98.0,31.0,82.0,21.8,,119.0,
2,0,3,39.0,,,,,,,,...,,100.0,,,,78.0,,,125.0,7.34
3,0,4,39.0,,,,,,,,...,,100.0,,,,80.0,,,136.0,
4,0,5,39.0,,,,,,,,...,,100.0,,,,83.0,,,135.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151963,9997,8,57.0,,,,,,,,...,,100.0,,,,84.0,,,103.0,
151964,9997,9,57.0,,,,,,,,...,,100.0,,,,83.0,,,110.0,
151965,9997,10,57.0,,,,,,,,...,,100.0,,,,88.0,,,111.0,
151966,9997,11,57.0,,,,,37.0,,,...,,100.0,,,,89.0,,,118.0,


In [4]:
# Fill NaN entries
feat_train.fillna(0.0, inplace=True)
feat_test.fillna(0.0, inplace=True)

# feat_train.fillna(method='ffill', inplace=True)
# feat_train.fillna(0.0, inplace=True)
# feat_train

# feat_test.fillna(method='ffill', inplace=True)
# feat_test.fillna(0.0, inplace=True)
# feat_test
# feat_gp = feat_train.groupby("pid", sort=False)
# for pid, feat_data in feat_gp:
#    ind = feat_data.index
#    feat_data.fillna(method='ffill', inplace=True) 
#    feat_data.fillna(0.0, inplace=True)
#    # feat_data.fillna(feat_data.mean(), inplace=True)
#    feat_train.iloc[ind,:]=feat_data
#    print(ind)

In [6]:
# Flatten to patient feature vector and sort by pid
feat_train = utils.patient_feat_flatten(feat_train).sort_index()
# feat_test = utils.patient_feat_flatten(feat_test).sort_index()
labl_train.sort_values("pid", inplace=True)

# Initialize prediction dataframes
labl_pred_train = pd.DataFrame(index=feat_train.index, columns=labl_train.columns)
labl_pred_train.pid = feat_train.index
# labl_pred_test = pd.DataFrame(index=feat_test.index, columns=labl_train.columns)
# labl_pred_test.pid = feat_test.index


In [7]:
"""1. Subtask - test prediction (binary classification)"""

for test_labl in TESTS:
        
    # Train with complete dataset, using Logistic Regression
    test_classifier = LogisticRegression(penalty="l2", C=0.1, fit_intercept=True, 
                                           solver="newton-cg", max_iter=100) # sag, lbfgs
    test_classifier.fit(feat_train, labl_train[test_labl]) 
    
    # test_classifier = SVC(gamma=2, C=1)
    # test_classifier.fit(feat_train, labl_train[test_labl])
        
    # Recall on training set + test set
    labl_pred_train[test_labl] = test_classifier.predict_proba(feat_train)[:, 1]
    # labl_pred_test[test_labl] = test_classifier.predict_proba(feat_test)[:, 1]
    
    # labl_pred_train[test_labl] = test_classifier.decision_function(feat_train)
    # labl_pred_test[test_labl] = test_classifier.decision_function(feat_test)



In [8]:
metrics.roc_auc_score(labl_train.iloc[:, 1], labl_pred_train.iloc[:, 1])

0.9170503214319314

In [14]:
metrics.roc_auc_score(labl_train.iloc[:, 1], labl_pred_train.iloc[:, 1]) # newton-cg

0.9095989705052022

In [10]:
feat_train

Measure,EtCO2,EtCO2,EtCO2,EtCO2,EtCO2,EtCO2,EtCO2,EtCO2,EtCO2,EtCO2,...,pH,pH,pH,pH,pH,pH,pH,pH,pH,Age
Time,0,1,2,3,4,5,6,7,8,9,...,3,4,5,6,7,8,9,10,11,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.37,7.41,7.41,7.41,7.41,7.41,7.39,7.39,7.39,34.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,86.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,66.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.33,7.35,7.34,7.39,7.37,7.34,7.34,7.34,7.34,66.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,42.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31653,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.33,7.33,7.33,7.33,7.33,7.33,7.33,7.33,7.33,52.0
31654,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,66.0
31656,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7.28,7.28,7.34,7.34,7.33,7.33,7.33,7.33,7.33,44.0
31657,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,70.0
