In [13]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
#pd.set_option('max_columns',100)

import matplotlib as mpl
mpl.style.use('ggplot')
sns.set_style('white')

import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost.sklearn import XGBClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier

import sys
import os
import tempfile
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import cross_val_score

from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve,auc
from sklearn.metrics import accuracy_score
from sklearn.calibration import calibration_curve
from sklearn.metrics import brier_score_loss
from sklearn.metrics import (precision_score, recall_score,f1_score)
from sklearn.calibration import CalibratedClassifierCV, calibration_curve

In [14]:
from sklearn.ensemble import GradientBoostingClassifier
from bayes_opt import BayesianOptimization

In [15]:
dev_df = pd.read_csv('dev_df_revise.csv')
val_df = pd.read_csv('val_df_revise.csv')

In [16]:
label_features_dict = {
'stay_chart_included_mdr':['dementia',
 'platelet',
 'phosphate',
 'heart_rate',
 'weight',
 'creatinine',
 'basophils_abs',
 'lymphocytes_abs',
 'icustay_rank',
 'temperature',
 'sodium',
 'history_difficulty_swallowing',
 'los_hosp_beforeicu',
 'hospstay_seq',
 'neutrophils_abs',
 'wbc',
 'mg',
 'NLR',
 'admission_age',
 'history_assistive_devices_Wheelchair',
 'mchc',
 'history_past_COPD',
 'bun',
 'rdw',
 'previous_mdr_90d'
]
}

In [17]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
# scaler = StandardScaler()

# x_train = scaler.fit_transform(x_train)
# x_test = scaler.transform(x_test)

In [18]:
#lr
def lr_cv(expC):
    C = 10 ** expC
    res = cross_val_score( 
        LogisticRegression(C = C
        ),
        x_train, y_train, scoring='roc_auc', cv=5
    ).mean()
    return res

lr_op = BayesianOptimization(
        lr_cv,
        {'expC': (-5, 2)},
    random_state=2023
)

#knn
def knn_cv(n_neighbors, weights):
    weights = int(round(weights))
    res = cross_val_score( 
        KNeighborsClassifier(n_neighbors=int(n_neighbors),
                            weights= ['uniform', 'distance'][weights]
        ),
        x_train, y_train, scoring='roc_auc', cv=5
    ).mean()
    return res

knn_op = BayesianOptimization(
        knn_cv,
        {'n_neighbors': (2, 20),
        'weights': (0,1)},
    random_state = 2023
)

#svc
def svc_cv(expC, expGamma):
    C = 10 ** expC
    gamma = 10 ** expGamma
    
    res = cross_val_score(
        SVC(C=C, gamma=gamma,random_state=2023),
        x_train, y_train, scoring='roc_auc', cv=5
    ).mean()
    return res

svc_op = BayesianOptimization(
        svc_cv,
        {'expC': (-5, 3), 
         'expGamma': (-4, -1)},
    random_state = 2023
)

#mlp
def mlp_cv(layer1, layer2):    
    res = cross_val_score(
        MLPClassifier(hidden_layer_sizes=(int(layer1),int(layer2)),early_stopping = True, random_state=2023),
        x_train, y_train, scoring='roc_auc', cv=5
    ).mean()
    return res

mlp_op = BayesianOptimization(
        mlp_cv,
        {'layer1': (30, 300), 
         'layer2': (4, 120)},
    random_state = 2023
)

In [19]:
def optimize_lr(n_iter):
    total_param_dict = {}
    models_dict = {}
    
    def lr_cv(expC):
        C = 10 ** expC
        res = cross_val_score( 
            LogisticRegression(C = C
            ),
            x_train, y_train, scoring='roc_auc', cv=5
        ).mean()
        return res

    lr_op = BayesianOptimization(
            lr_cv,
            {'expC': (-5, 2)},
        random_state=2023
    )

    for i in label_features_dict:
        label = i
        features = label_features_dict[i]
        x_train = dev_df[features]
        y_train = dev_df[label]
        x_test = val_df[features]
        y_test = val_df[label]

        scaler = MinMaxScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)      
        
        lr_op = BayesianOptimization(
            lr_cv,
            {'expC': (-5, 2)},
            random_state=2023)

        print('---------------------------特征为：%s-------------------------------'%label)
        print('---------------------------模型为：lr-------------------------------')
        lr_op.maximize(n_iter=n_iter)
        print(lr_op.max)
        models_dict[label] = lr_op.max
        print('----------------------------------------------------------------------------------------------------')
        
    return models_dict

In [20]:
def optimize_knn(n_iter):
    total_param_dict = {}
    models_dict = {}
    
    def knn_cv(n_neighbors, weights):
        weights = int(round(weights))
        res = cross_val_score( 
            KNeighborsClassifier(n_neighbors=int(n_neighbors),
                                weights= ['uniform', 'distance'][weights]
            ),
            x_train, y_train, scoring='roc_auc', cv=5
        ).mean()
        return res

    for i in label_features_dict:
        label = i
        features = label_features_dict[i]
        x_train = dev_df[features]
        y_train = dev_df[label]
        x_test = val_df[features]
        y_test = val_df[label]

        scaler = MinMaxScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)      
        
        knn_op = BayesianOptimization(
            knn_cv,
            {'n_neighbors': (2, 50),
            'weights': (0,1)},allow_duplicate_points=True,
            random_state = 2023)

        print('---------------------------特征为：%s-------------------------------'%label)
        print('---------------------------模型为：knn-------------------------------')
        knn_op.maximize(n_iter=n_iter)
        print(knn_op.max)
        models_dict[label] = knn_op.max
        print('----------------------------------------------------------------------------------------------------')
        
    return models_dict

In [21]:
def optimize_svc(n_iter):
    total_param_dict = {}
    models_dict = {}
    
    def svc_cv(expC, expGamma):
        C = 10 ** expC
        gamma = 10 ** expGamma

        res = cross_val_score(
            SVC(C=C, gamma=gamma,random_state=2023),
            x_train, y_train, scoring='roc_auc', cv=5
        ).mean()
        return res

    for i in label_features_dict:
        label = i
        features = label_features_dict[i]
        x_train = dev_df[features]
        y_train = dev_df[label]
        x_test = val_df[features]
        y_test = val_df[label]

        scaler = MinMaxScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)      
        
        svc_op = BayesianOptimization(
            svc_cv,
            {'expC': (-3, 2), 
             'expGamma': (-4, -1)},
                random_state = 2023)

        print('---------------------------特征为：%s-------------------------------'%label)
        print('---------------------------模型为：svc-------------------------------')
        svc_op.maximize(n_iter=n_iter)
        print(svc_op.max)
        models_dict[label] = svc_op.max
        print('----------------------------------------------------------------------------------------------------')
        
    return models_dict

In [22]:
def optimize_mlp(n_iter):
    total_param_dict = {}
    models_dict = {}
    
    def mlp_cv(layer1, layer2):    
        res = cross_val_score(
            MLPClassifier(hidden_layer_sizes=(int(layer1),int(layer2)),early_stopping=False, random_state=2023),
            x_train, y_train, scoring='roc_auc', cv=5).mean()
        return res

    for i in label_features_dict:
        label = i
        features = label_features_dict[i]
        x_train = dev_df[features]
        y_train = dev_df[label]
        x_test = val_df[features]
        y_test = val_df[label]

        scaler = MinMaxScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)      
        
        mlp_op = BayesianOptimization(
                mlp_cv,
                {'layer1': (30, 300), 
                 'layer2': (4, 120)},
            random_state = 2023)

        print('---------------------------特征为：%s-------------------------------'%label)
        print('---------------------------模型为：mlp-------------------------------')
        mlp_op.maximize(n_iter=n_iter)
        print(mlp_op.max)
        models_dict[label] = mlp_op.max
        print('----------------------------------------------------------------------------------------------------')
        
    return models_dict

In [23]:
def optimize_svc(n_iter):
    total_param_dict = {}
    models_dict = {}

    def svc_cv(expC, expGamma, kernel_index):
        C = 10 ** expC
        gamma = 10 ** expGamma
        kernel_options = ['linear', 'poly', 'rbf', 'sigmoid']
        kernel = kernel_options[int(kernel_index)]

        res = cross_val_score(
            SVC(C=C, gamma=gamma, kernel=kernel, random_state=2023),
            x_train, y_train, scoring='roc_auc', cv=5
        ).mean()
        return res

    for label, features in label_features_dict.items():
        x_train = dev_df[features]
        y_train = dev_df[label]
        x_test = val_df[features]
        y_test = val_df[label]

        #scaler = MinMaxScaler()
        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)
        x_test = scaler.transform(x_test)

        svc_op = BayesianOptimization(
            svc_cv,
            {'expC': (-3, 2), 
             'expGamma': (-4, -1),
             'kernel_index': (0, 3)},  # Add kernel index
            random_state=2023)

        print(f'---------------------------特征为：{label}-------------------------------')
        print('---------------------------模型为：svc-------------------------------')
        svc_op.maximize(n_iter=n_iter)
        print(svc_op.max)
        models_dict[label] = svc_op.max
        print('----------------------------------------------------------------------------------------------------')

    return models_dict

In [None]:
models_dict_svc = optimize_svc(45)

In [None]:
models_dict_lr = optimize_lr(45)

In [None]:
models_dict_knn = optimize_knn(45)

In [None]:
#models_dict_svc = optimize_svc(45)

In [None]:
models_dict_mlp = optimize_mlp(45)

In [None]:
models_dict_lr_mod = models_dict_lr.copy()
models_dict_knn_mod =models_dict_knn.copy()
models_dict_svc_mod =models_dict_svc.copy()
models_dict_mlp_mod =models_dict_mlp.copy()

In [None]:
for i in models_dict_lr_mod:
    models_dict_lr_mod[i]['params']['C'] = round(10 ** models_dict_lr_mod[i]['params']['expC'],5)
    del models_dict_lr_mod[i]['params']['expC']

In [None]:
for i in models_dict_knn_mod:
    models_dict_knn_mod[i]['params']['n_neighbors'] = int(models_dict_knn_mod[i]['params']['n_neighbors'])
    models_dict_knn_mod[i]['params']['weights'] = ['uniform', 'distance'][int(models_dict_knn_mod[i]['params']['weights'])]

In [None]:
for i in models_dict_svc_mod:
    models_dict_svc_mod[i]['params']['C'] = round(10 ** models_dict_svc_mod[i]['params']['expC'],5)
    models_dict_svc_mod[i]['params']['gamma'] = round(10 ** models_dict_svc_mod[i]['params']['expGamma'],5)
    del models_dict_svc_mod[i]['params']['expC']
    del models_dict_svc_mod[i]['params']['expGamma']

In [None]:
for i in models_dict_mlp_mod:
    models_dict_mlp_mod[i]['params']['layer1'] = int(models_dict_mlp_mod[i]['params']['layer1'])
    models_dict_mlp_mod[i]['params']['layer2'] = int(models_dict_mlp_mod[i]['params']['layer2'])

In [None]:
np.save('lr_params_revise.npy',models_dict_lr_mod)
np.save('knn_params_revise.npy',models_dict_knn_mod)
np.save('svc_params_revise.npy',models_dict_svc_mod)
np.save('mlp_params_revise.npy',models_dict_mlp_mod)