In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
#pd.set_option('max_columns',100)

import matplotlib as mpl
mpl.style.use('ggplot')
sns.set_style('white')

import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost.sklearn import XGBClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier

import sys
import os
import tempfile
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve,auc
from sklearn.metrics import accuracy_score
from sklearn.calibration import calibration_curve
from sklearn.metrics import brier_score_loss
from sklearn.metrics import (precision_score, recall_score,f1_score)
from sklearn.calibration import CalibratedClassifierCV, calibration_curve

In [2]:
from sklearn.ensemble import GradientBoostingClassifier
from bayes_opt import BayesianOptimization

In [3]:
dev_df = pd.read_csv('train_df.csv')
val_df = pd.read_csv('val_df.csv')

In [4]:
label_features_dict = {
'stay_chart_included_mdr':['sodium',
 'RPR',
 'creatinine',
 'monocytes_abs',
 'mcv',
 'INR',
 'mchc',
 'APPT',
 'fibrinogen',
 'Temperature',
 'shock_index',
 'bilirubin',
 'D_dimer',
 'PT',
 'p',
 'gamma_GT',
 'BMI',
 'BNP',
 'PCT',
 'urea',
 'CRP',
 'ck',
 'IL_6',
 'HR',
 'los_hosp_beforeicu'
]
}

In [5]:
# label_features_dict = {
# 'stay_chart_included_mdr':['AGE',
#  'mcv',
#  'INR',
#  'creatinine',
#  'PA',
#  'rdw',
#  'APPT',
#  'ca',
#  'wbc',
#  'fibrinogen',
#  'RR',
#  'mpv',
#  'Temperature',
#  'BNP',
#  'sodium',
#  'urea',
#  'bilirubin',
#  'ck',
#  'BMI',
#  'gamma_GT',
#  'PCT',
#  'IL_6',
#  'HR',
#  'los_hosp_beforeicu',
#  'CRP'
# ]
# }

In [6]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()

# x_train = scaler.fit_transform(x_train)
# x_test = scaler.transform(x_test)

In [7]:
#lr
def lr_cv(expC):
    C = 10 ** expC
    res = cross_val_score( 
        LogisticRegression(C = C
        ),
        x_train, y_train, scoring='roc_auc', cv=5
    ).mean()
    return res

lr_op = BayesianOptimization(
        lr_cv,
        {'expC': (-5, 2)},
    random_state=2024
)

#knn
def knn_cv(n_neighbors, weights):
    weights = int(round(weights))
    res = cross_val_score( 
        KNeighborsClassifier(n_neighbors=int(n_neighbors),
                            weights= ['uniform', 'distance'][weights]
        ),
        x_train, y_train, scoring='roc_auc', cv=5
    ).mean()
    return res

knn_op = BayesianOptimization(
        knn_cv,
        {'n_neighbors': (2, 20),
        'weights': (0,1)},
    random_state = 2024
)

#svc
def svc_cv(expC, expGamma):
    C = 10 ** expC
    gamma = 10 ** expGamma
    
    res = cross_val_score(
        SVC(C=C, gamma=gamma,random_state=2024),
        x_train, y_train, scoring='roc_auc', cv=5
    ).mean()
    return res

svc_op = BayesianOptimization(
        svc_cv,
        {'expC': (-5, 3), 
         'expGamma': (-4, -1)},
    random_state = 2024
)

#mlp
def mlp_cv(layer1, layer2):    
    res = cross_val_score(
        MLPClassifier(hidden_layer_sizes=(int(layer1),int(layer2)),early_stopping = True, random_state=2024),
        x_train, y_train, scoring='roc_auc', cv=5
    ).mean()
    return res

mlp_op = BayesianOptimization(
        mlp_cv,
        {'layer1': (30, 300), 
         'layer2': (4, 120)},
    random_state = 2024
)

In [8]:
def optimize_lr(n_iter):
    total_param_dict = {}
    models_dict = {}
    
    def lr_cv(expC):
        C = 10 ** expC
        res = cross_val_score( 
            LogisticRegression(C = C
            ),
            x_train, y_train, scoring='roc_auc', cv=5
        ).mean()
        return res

    lr_op = BayesianOptimization(
            lr_cv,
            {'expC': (-5, 2)},
        random_state=2024
    )

    for i in label_features_dict:
        label = i
        features = label_features_dict[i]
        x_train = dev_df[features]
        y_train = dev_df[label]
        x_test = val_df[features]
        y_test = val_df[label]

        scaler = MinMaxScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)      
        
        lr_op = BayesianOptimization(
            lr_cv,
            {'expC': (-5, 2)},
            random_state=2024)

        print('---------------------------特征为：%s-------------------------------'%label)
        print('---------------------------模型为：lr-------------------------------')
        lr_op.maximize(n_iter=n_iter)
        print(lr_op.max)
        models_dict[label] = lr_op.max
        print('----------------------------------------------------------------------------------------------------')
        
    return models_dict

In [14]:
def optimize_knn(n_iter):
    total_param_dict = {}
    models_dict = {}
    
    def knn_cv(n_neighbors, weights):
        weights = int(round(weights))
        res = cross_val_score( 
            KNeighborsClassifier(n_neighbors=int(n_neighbors),
                                weights= ['uniform', 'distance'][weights]
            ),
            x_train, y_train, scoring='roc_auc', cv=5
        ).mean()
        return res

    for i in label_features_dict:
        label = i
        features = label_features_dict[i]
        x_train = dev_df[features]
        y_train = dev_df[label]
        x_test = val_df[features]
        y_test = val_df[label]

        scaler = MinMaxScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)      
        
        knn_op = BayesianOptimization(
            knn_cv,
            {'n_neighbors': (2, 50),
            'weights': (0,1)},
            random_state = 2024,allow_duplicate_points=True)

        print('---------------------------特征为：%s-------------------------------'%label)
        print('---------------------------模型为：knn-------------------------------')
        knn_op.maximize(n_iter=n_iter)
        print(knn_op.max)
        models_dict[label] = knn_op.max
        print('----------------------------------------------------------------------------------------------------')
        
    return models_dict

In [15]:
def optimize_svc(n_iter):
    total_param_dict = {}
    models_dict = {}
    
    def svc_cv(expC, expGamma):
        C = 10 ** expC
        gamma = 10 ** expGamma

        res = cross_val_score(
            SVC(C=C, gamma=gamma,random_state=2024),
            x_train, y_train, scoring='roc_auc', cv=5
        ).mean()
        return res

    for i in label_features_dict:
        label = i
        features = label_features_dict[i]
        x_train = dev_df[features]
        y_train = dev_df[label]
        x_test = val_df[features]
        y_test = val_df[label]

        scaler = MinMaxScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)      
        
        svc_op = BayesianOptimization(
            svc_cv,
            {'expC': (-3, 2), 
             'expGamma': (-4, -1)},
                random_state = 2024)

        print('---------------------------特征为：%s-------------------------------'%label)
        print('---------------------------模型为：svc-------------------------------')
        svc_op.maximize(n_iter=n_iter)
        print(svc_op.max)
        models_dict[label] = svc_op.max
        print('----------------------------------------------------------------------------------------------------')
        
    return models_dict

In [16]:
def optimize_mlp(n_iter):
    total_param_dict = {}
    models_dict = {}
    
    def mlp_cv(layer1, layer2):    
        res = cross_val_score(
            MLPClassifier(hidden_layer_sizes=(int(layer1),int(layer2)),early_stopping=False, random_state=2024),
            x_train, y_train, scoring='roc_auc', cv=5).mean()
        return res

    for i in label_features_dict:
        label = i
        features = label_features_dict[i]
        x_train = dev_df[features]
        y_train = dev_df[label]
        x_test = val_df[features]
        y_test = val_df[label]

        scaler = MinMaxScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)      
        
        mlp_op = BayesianOptimization(
                mlp_cv,
                {'layer1': (30, 300), 
                 'layer2': (4, 120)},
            random_state = 2024)

        print('---------------------------特征为：%s-------------------------------'%label)
        print('---------------------------模型为：mlp-------------------------------')
        mlp_op.maximize(n_iter=n_iter)
        print(mlp_op.max)
        models_dict[label] = mlp_op.max
        print('----------------------------------------------------------------------------------------------------')
        
    return models_dict

In [17]:
models_dict_lr = optimize_lr(45)

---------------------------特征为：stay_chart_included_mdr-------------------------------
---------------------------模型为：lr-------------------------------
|   iter    |  target   |   expC    |
-------------------------------------
| [0m1        [0m | [0m0.7475   [0m | [0m-0.8839  [0m |
| [95m2        [0m | [95m0.7485   [0m | [95m-0.1062  [0m |
| [0m3        [0m | [0m0.7426   [0m | [0m-3.683   [0m |
| [0m4        [0m | [0m0.7426   [0m | [0m-4.693   [0m |
| [0m5        [0m | [0m0.7427   [0m | [0m-3.565   [0m |
| [0m6        [0m | [0m0.7419   [0m | [0m2.0      [0m |
| [0m7        [0m | [0m0.7467   [0m | [0m0.4267   [0m |
| [95m8        [0m | [95m0.7492   [0m | [95m-0.4058  [0m |
| [0m9        [0m | [0m0.7491   [0m | [0m-0.5113  [0m |
| [95m10       [0m | [95m0.7494   [0m | [95m-0.4408  [0m |
| [0m11       [0m | [0m0.7443   [0m | [0m-1.563   [0m |
| [0m12       [0m | [0m0.7467   [0m | [0m0.4267   [0m |
| [0m13       [0m 

In [18]:
models_dict_knn = optimize_knn(45)

---------------------------特征为：stay_chart_included_mdr-------------------------------
---------------------------模型为：knn-------------------------------
|   iter    |  target   | n_neig... |  weights  |
-------------------------------------------------
| [0m1        [0m | [0m0.7106   [0m | [0m30.22    [0m | [0m0.6991   [0m |
| [0m2        [0m | [0m0.6828   [0m | [0m11.03    [0m | [0m0.04381  [0m |
| [0m3        [0m | [0m0.6828   [0m | [0m11.84    [0m | [0m0.1061   [0m |
| [95m4        [0m | [95m0.7181   [0m | [95m36.91    [0m | [95m0.6794   [0m |
| [0m5        [0m | [0m0.6985   [0m | [0m24.74    [0m | [0m0.4483   [0m |
| [95m6        [0m | [95m0.7263   [0m | [95m50.0     [0m | [95m1.0      [0m |
| [0m7        [0m | [0m0.7218   [0m | [0m46.77    [0m | [0m0.0      [0m |
| [0m8        [0m | [0m0.7241   [0m | [0m50.0     [0m | [0m0.0      [0m |
[91mData point [50.  1.] is not unique. 1 duplicates registered. Continuing ...[0m

In [19]:
models_dict_svc = optimize_svc(45)

---------------------------特征为：stay_chart_included_mdr-------------------------------
---------------------------模型为：svc-------------------------------
|   iter    |  target   |   expC    | expGamma  |
-------------------------------------------------
| [0m1        [0m | [0m0.5699   [0m | [0m-0.05993 [0m | [0m-1.903   [0m |
| [95m2        [0m | [95m0.6622   [0m | [95m-2.059   [0m | [95m-3.869   [0m |
| [95m3        [0m | [95m0.7068   [0m | [95m-1.975   [0m | [95m-3.682   [0m |
| [0m4        [0m | [0m0.5355   [0m | [0m0.6362   [0m | [0m-1.962   [0m |
| [0m5        [0m | [0m0.6153   [0m | [0m-0.6308  [0m | [0m-2.655   [0m |
| [0m6        [0m | [0m0.6649   [0m | [0m-2.064   [0m | [0m-3.86    [0m |
| [0m7        [0m | [0m0.6456   [0m | [0m-2.222   [0m | [0m-3.323   [0m |
| [0m8        [0m | [0m0.6464   [0m | [0m-1.289   [0m | [0m-2.486   [0m |
| [0m9        [0m | [0m0.6586   [0m | [0m-1.972   [0m | [0m-3.69    [0m |
| 

In [20]:
models_dict_mlp = optimize_mlp(45)

---------------------------特征为：stay_chart_included_mdr-------------------------------
---------------------------模型为：mlp-------------------------------
|   iter    |  target   |  layer1   |  layer2   |
-------------------------------------------------
| [0m1        [0m | [0m0.6078   [0m | [0m188.8    [0m | [0m85.1     [0m |
| [95m2        [0m | [95m0.7296   [0m | [95m80.8     [0m | [95m9.082    [0m |
| [0m3        [0m | [0m0.7137   [0m | [0m85.36    [0m | [0m16.3     [0m |
| [0m4        [0m | [0m0.6244   [0m | [0m226.4    [0m | [0m82.81    [0m |
| [0m5        [0m | [0m0.6473   [0m | [0m157.9    [0m | [0m56.0     [0m |
| [95m6        [0m | [95m0.7432   [0m | [95m32.07    [0m | [95m4.0      [0m |
| [0m7        [0m | [0m0.706    [0m | [0m30.0     [0m | [0m120.0    [0m |
| [0m8        [0m | [0m0.7369   [0m | [0m81.57    [0m | [0m7.7      [0m |
| [0m9        [0m | [0m0.7347   [0m | [0m112.8    [0m | [0m4.325    [0m |
| 

In [21]:
models_dict_lr_mod = models_dict_lr.copy()
models_dict_knn_mod =models_dict_knn.copy()
models_dict_svc_mod =models_dict_svc.copy()
models_dict_mlp_mod =models_dict_mlp.copy()

In [22]:
models_dict_lr_mod

{'stay_chart_included_mdr': {'target': 0.7493514249075407,
  'params': {'expC': -0.4408229265700516}}}

In [23]:
for i in models_dict_lr_mod:
    models_dict_lr_mod[i]['params']['C'] = round(10 ** models_dict_lr_mod[i]['params']['expC'],5)
    del models_dict_lr_mod[i]['params']['expC']

In [24]:
for i in models_dict_knn_mod:
    models_dict_knn_mod[i]['params']['n_neighbors'] = int(models_dict_knn_mod[i]['params']['n_neighbors'])
    models_dict_knn_mod[i]['params']['weights'] = ['uniform', 'distance'][int(models_dict_knn_mod[i]['params']['weights'])]

In [25]:
for i in models_dict_svc_mod:
    models_dict_svc_mod[i]['params']['C'] = round(10 ** models_dict_svc_mod[i]['params']['expC'],5)
    models_dict_svc_mod[i]['params']['gamma'] = round(10 ** models_dict_svc_mod[i]['params']['expGamma'],5)
    del models_dict_svc_mod[i]['params']['expC']
    del models_dict_svc_mod[i]['params']['expGamma']

In [26]:
for i in models_dict_mlp_mod:
    models_dict_mlp_mod[i]['params']['layer1'] = int(models_dict_mlp_mod[i]['params']['layer1'])
    models_dict_mlp_mod[i]['params']['layer2'] = int(models_dict_mlp_mod[i]['params']['layer2'])

In [27]:
np.save('lr_params.npy',models_dict_lr_mod)
np.save('knn_params.npy',models_dict_knn_mod)
np.save('svc_params.npy',models_dict_svc_mod)
np.save('mlp_params.npy',models_dict_mlp_mod)

In [28]:
# total_param_dict = {}
# models_dict = {}
# n_iter = 195
# for i in label_features_dict:
#     label = i
#     features = label_features_dict[i]
#     x_train = dev_df[features]
#     y_train = dev_df[label]
#     x_test = val_df[features]
#     y_test = val_df[label]
    
#     scaler = MinMaxScaler()
#     x_train = scaler.fit_transform(x_train)
#     x_test = scaler.transform(x_test)
    
#     lr_op = BayesianOptimization(
#         lr_cv,
#         {'expC': (-5, 2)},
#     random_state=2024)
    
#     knn_op = BayesianOptimization(
#         knn_cv,
#         {'n_neighbors': (2, 20),
#         'weights': (0,1)},
#     random_state = 2024)
    
#     svc_op = BayesianOptimization(
#         svc_cv,
#         {'expC': (-3, 2), 
#          'expGamma': (-4, -1)},
#     random_state = 2024)
    
#     print('---------------------------特征为：%s-------------------------------'%label)
#     print('---------------------------模型为：l2-------------------------------')
#     lr_op.maximize(n_iter=n_iter)
#     print(lr_op.max)
#     models_dict['l2'] = lr_op.max
#     print('---------------------------模型为：KNN-------------------------------')
#     knn_op.maximize(n_iter=n_iter)
#     print(knn_op.max)
#     models_dict['knn'] = knn_op.max
#     print('---------------------------模型为：SVC-------------------------------')
#     svc_op.maximize(n_iter=n_iter)
#     print(svc_op.max)
#     models_dict['svc'] = svc_op.max
#     total_param_dict[label] = models_dict
#     print('----------------------------------------------------------------------------------------------------')