In [None]:
import pandas as pd
import numpy as np
import os
from matplotlib import pyplot as plt
import seaborn as sns
from scipy.stats import norm, logistic
from scipy.optimize import minimize
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss, f1_score
from sklearn.preprocessing import *
from util import *

import lightgbm as lgb
from Models import lgbModel
print(lgb.__version__)



In [None]:
SEED = 1

data_folder = 'data'
df = pd.read_csv(os.path.join(data_folder,'mimiciv_traindata.csv'))
df.head()

In [None]:
x = compute_features(df.drop('mortality',axis=1)) #df.drop('mortality',axis=1)
y = df.mortality.values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=SEED)

#scaler = RobustScaler(unit_variance=True)
scaler = QuantileTransformer(output_distribution='normal', ignore_implicit_zeros=False)
#scaler = PowerTransformer(method='yeo-johnson', standardize=True)

x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [None]:
# Define the F1 Score metric
def f1_metric(y_true, y_pred):
  y_pred = (y_pred>=0.5).astype(int)
  f1 = f1_score(y_true, y_pred)
  return "f1", f1, True

params = {
    "objective": "binary",
    "verbosity": 1,
    "boosting_type": "dart",#"gbdt", #
    "is_unbalance": True,
    "num_boost_round": 1000,
    "early_stopping_round": 100,

    "max_depth" : 100,
    "num_leaves" : 10,
    "learning_rate" : 0.03,
    "min_data_in_leaf": 10,
    "bagging_fraction": 0.9,
    "feature_fraction": 0.9,
    "lambda_l1": 0,
    "lambda_l2": 2.,
    "min_split_gain": 0.1,
    "max_bin": 255,
    # dart param
    "drop_rate": 0.1,
    "max_drop": 50

}



In [None]:
eval_result = {}
record_evaluation_callback = lgb.callback.record_evaluation(eval_result =eval_result )

model = lgbModel(eval_metric=f1_metric, **params)
model.fit(
    x_train, y_train,
    eval_set=[(x_test, y_test)],
    callbacks=[record_evaluation_callback],
)

In [None]:
lgb.plot_metric(eval_result,metric='f1')

In [None]:
from util import *
p = model.predict_proba(x_test)
y_label = model.predict(x_test)
evaluate_model(y_test, y_label, p)