# Model hyperparameter For RBC Transfusion 

In [1]:
import pandas as pd
import numpy as np 
from numpy import mean
from numpy import std
import random
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import mutual_info_classif
from sklearn.model_selection import train_test_split

In [3]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline


In [4]:
from sklearn import metrics


from sklearn.metrics import (accuracy_score,
                             precision_score,
                             recall_score, 
                             f1_score,
                             roc_auc_score, 
                             precision_recall_curve,
                             balanced_accuracy_score,
                             auc)

In [23]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
import xgboost as xgb
import lightgbm as lgb
import catboost as cb

In [24]:
from sklearn.model_selection import StratifiedKFold
stratifiedkf=StratifiedKFold(n_splits=5)
scoring = 'roc_auc'

## rescaledtrainX，target_trainval was from the data processing part
## This part carries on the data processing part（1-Data analysis）

## Logistic Regression

In [2]:
c_values = [ 0.5, 0.7, 0.9, 1.0, 1.3, 1.5, 1.7, 2.0]
penalty=['l1','l2', 'elasticnet' ,'none']
param_grid = dict(C=c_values, penalty=penalty)
model = LogisticRegression()

stratifiedkf=StratifiedKFold(n_splits=5)

grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=stratifiedkf)

#rescaledtrainX，target_trainval was from the data processin part
grid_result = grid.fit(rescaledtrainX, target_trainval)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))


## Suppor Vector Machine

In [3]:

c_values = [0.1, 0.01,0.03]
kernel_values = ['linear', 'poly', 'rbf', 'sigmoid']
param_grid = dict(C=c_values, kernel=kernel_values)
model = SVC()

stratifiedkf=StratifiedKFold(n_splits=5)

grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    

## KNeighborsClassifier 

In [4]:

neighbors = [3,5,7,9,15,17,19,21,23,25]
param_grid = dict(n_neighbors=neighbors)

model = KNeighborsClassifier()

grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

## Naive Bayes

In [5]:

priors=[[0.68,0.32,],[0.7,0.3],[0.8,0.2]]
var_smoothing =[1e-9,1e-8,1e-7]

param_grid = dict(priors=priors,var_smoothing=var_smoothing)
# param_grid = dict(C=c_values, kernel=kernel_values)
model = GaussianNB()


grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

## Decision tree

In [6]:
# param_test = {'n_estimators':range(100,1000,200),'max_depth':range(3,16,2)}

param_test= {'criterion':['gini'],'max_depth':range(10,100,10)}

model = DecisionTreeClassifier()


grid = GridSearchCV(estimator=model, param_grid=param_test, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

## RandomForestClassifier 

In [7]:

from sklearn.model_selection import StratifiedKFold


param_test1 = {'n_estimators':range(100,500,100),'max_depth':range(3,14,2)}
# param_grid = dict(learning_rate=learning_rate_values)

model = RandomForestClassifier()



grid = GridSearchCV(estimator=model, param_grid=param_test1, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
    

## Gradient Boosting Classifier 

In [8]:
param_test = {'learning_rate':[0.1,0.02,0.3,0.01]}

model = GradientBoostingClassifier()

# kfold = KFold(n_splits=num_folds, random_state=33,shuffle=True)

grid = GridSearchCV(estimator=model, param_grid=param_test, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

## XGBoosting Classifier

In [9]:
from sklearn.model_selection import StratifiedKFold

learning_rate_values=[0.01,0.02,0.03,0.1,0.3]
param_grid = dict(learning_rate=learning_rate_values)

model = xgb.XGBClassifier()



grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [10]:
param_test = {'n_estimators':range(100,700,100),'max_depth':range(5,12,2)}

model = xgb.XGBClassifier(learning_rate=0.03)

# kfold = KFold(n_splits=num_folds, random_state=33,shuffle=True)

grid = GridSearchCV(estimator=model, param_grid=param_test, scoring="roc_auc", cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

## Light Gradient Boosting Machine

In [11]:
param_test = {'learning_rate':[0.1,0.01,0.3,0.5,0.2]}

model = lgb.LGBMClassifier()



grid = GridSearchCV(estimator=model, param_grid=param_test, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [12]:
param_test = {'learning_rate':[0.1,0.01,0.3,0.5,0.2]}

model = lgb.LGBMClassifier()



grid = GridSearchCV(estimator=model, param_grid=param_test, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

## CatboostClassifier 

In [13]:
from sklearn.model_selection import StratifiedKFold

learning_rate_values=[0.01,0.02,0.03,0.1]
param_grid = dict(learning_rate=learning_rate_values)

model = cb.CatBoostClassifier(verbose=False)


grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [14]:
from sklearn.model_selection import StratifiedKFold
stratifiedkf=StratifiedKFold(n_splits=5)
param_test = {'iterations': range(200,1000,100), 'learning_rate': [0.01]}

model = cb.CatBoostClassifier(verbose=False)



grid = GridSearchCV(estimator=model, param_grid=param_test, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
# random_strength

In [15]:
from sklearn.model_selection import StratifiedKFold
stratifiedkf=StratifiedKFold(n_splits=5)
param_test = {'max_depth':range(5,13,2), 'iterations': [600], 
              'learning_rate': [0.01]}

model = cb.CatBoostClassifier(verbose=False)



grid = GridSearchCV(estimator=model, param_grid=param_test, scoring=scoring, cv=stratifiedkf)
grid_result = grid.fit(rescaledtrainX, target_trainval)
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))
# random_strength