In [80]:
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Data preprocessing
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split

# Models
import xgboost
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgm
from lightgbm import LGBMClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble import GradientBoostingClassifier

# Hyperparameter tuning
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

# Solving Data imbalance
import imblearn
from imblearn.over_sampling import SMOTE 

# Scoring metrics
from sklearn.metrics import f1_score
from sklearn.model_selection import cross_val_score

In [2]:
df = pd.read_csv("german_credit_data.csv",index_col=0)

In [3]:
print("length:", len(df), "features:", len(df.columns))
df.head()

length: 1000 features: 10


Unnamed: 0,Age,Sex,Job,Housing,Saving accounts,Checking account,Credit amount,Duration,Purpose,Risk
0,67,male,2,own,,little,1169,6,radio/TV,good
1,22,female,2,own,little,moderate,5951,48,radio/TV,bad
2,49,male,1,own,little,,2096,12,education,good
3,45,male,2,free,little,little,7882,42,furniture/equipment,good
4,53,male,2,free,little,little,4870,24,car,bad


In [8]:
# get dummies - Converting Categorical data into numerical.
df_dummies = pd.get_dummies(df, dummy_na=True)
print("length:", len(df_dummies), "features:", len(df_dummies.columns))
df_dummies.head()

length: 1000 features: 32


Unnamed: 0,Age,Job,Credit amount,Duration,Sex_female,Sex_male,Sex_nan,Housing_free,Housing_own,Housing_rent,...,Purpose_domestic appliances,Purpose_education,Purpose_furniture/equipment,Purpose_radio/TV,Purpose_repairs,Purpose_vacation/others,Purpose_nan,Risk_bad,Risk_good,Risk_nan
0,67,2,1169,6,0,1,0,0,1,0,...,0,0,0,1,0,0,0,0,1,0
1,22,2,5951,48,1,0,0,0,1,0,...,0,0,0,1,0,0,0,1,0,0
2,49,1,2096,12,0,1,0,0,1,0,...,0,1,0,0,0,0,0,0,1,0
3,45,2,7882,42,0,1,0,1,0,0,...,0,0,1,0,0,0,0,0,1,0
4,53,2,4870,24,0,1,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0


In [10]:
# defining independent and dependent variable
X = df_dummies.drop(["Risk_bad", "Risk_good","Risk_nan"], axis=1)
Y = df_dummies["Risk_good"]

In [11]:
X.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,1000.0,35.546,11.375469,19.0,27.0,33.0,42.0,75.0
Job,1000.0,1.904,0.653614,0.0,2.0,2.0,2.0,3.0
Credit amount,1000.0,3271.258,2822.736876,250.0,1365.5,2319.5,3972.25,18424.0
Duration,1000.0,20.903,12.058814,4.0,12.0,18.0,24.0,72.0
Sex_female,1000.0,0.31,0.462725,0.0,0.0,0.0,1.0,1.0
Sex_male,1000.0,0.69,0.462725,0.0,0.0,1.0,1.0,1.0
Sex_nan,1000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Housing_free,1000.0,0.108,0.310536,0.0,0.0,0.0,0.0,1.0
Housing_own,1000.0,0.713,0.452588,0.0,0.0,1.0,1.0,1.0
Housing_rent,1000.0,0.179,0.383544,0.0,0.0,0.0,0.0,1.0


## Data cleaning and pre-processing

In [12]:
# dropping simillar columns
corr_matrix = X.corr().abs()
upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
to_drop = [column for column in upper.columns if any(upper[column] == 1)]
X.drop(to_drop, axis=1, inplace=True)

In [13]:
print("length:", len(X), "features:", len(X.columns))

length: 1000 features: 28


In [14]:
# Label distribution
Y.value_counts()

1    700
0    300
Name: Risk_good, dtype: int64

In [15]:
# Robust transformer
RS = RobustScaler()
X_transformed = RS.fit_transform(X)
X_transformed = pd.DataFrame(X_transformed, columns = X.columns)

In [17]:
X_transformed.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,1000.0,0.169733,0.758365,-0.933333,-0.4,0.0,0.6,2.8
Job,1000.0,-0.096,0.653614,-2.0,0.0,0.0,0.0,1.0
Credit amount,1000.0,0.365113,1.082857,-0.7939,-0.365973,0.0,0.634027,6.177999
Duration,1000.0,0.241917,1.004901,-1.166667,-0.5,0.0,0.5,4.5
Sex_female,1000.0,0.31,0.462725,0.0,0.0,0.0,1.0,1.0
Sex_nan,1000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Housing_free,1000.0,0.108,0.310536,0.0,0.0,0.0,0.0,1.0
Housing_own,1000.0,-0.287,0.452588,-1.0,-1.0,0.0,0.0,0.0
Housing_rent,1000.0,0.179,0.383544,0.0,0.0,0.0,0.0,1.0
Housing_nan,1000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [18]:
# Splitting Data into train and test set.
X_train,X_test,Y_train,Y_test = train_test_split(X_transformed,Y, test_size=0.2, stratify=Y)

In [19]:
len(X_train), len(Y_train), len(X_test), len(Y_test)

(800, 800, 200, 200)

## Modelling

### XgBoost

In [89]:
np.random.seed(42)

xg_boost_clf = XGBClassifier()
xg_boost_clf.fit(X_train, Y_train)
xg_boost_clf.score(X_test, Y_test)



0.735

In [90]:
f1_score(xg_boost_clf.predict(X_test), Y_test)

0.8166089965397922

In [22]:
params = {
        'n_estimators': [500, 750, 1000],
        'min_child_weight': [1,5,10,15],
        'gamma': [1],
        'subsample': [0.4, 0.6],
        'colsample_bytree': [0.6,0.8,1],
        'max_depth': [None]
        }

folds = 3
param_comb = 5
skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 1001)

grid = GridSearchCV(estimator=XGBClassifier(), param_grid=params, scoring='roc_auc', n_jobs=1, cv=2, verbose=3)
grid.fit(X_train, Y_train)
print(grid.best_params_)

Fitting 2 folds for each of 72 candidates, totalling 144 fits
[CV 1/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=1, n_estimators=500, subsample=0.4;, score=0.655 total time=   0.2s
[CV 2/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=1, n_estimators=500, subsample=0.4;, score=0.690 total time=   0.2s
[CV 1/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=1, n_estimators=500, subsample=0.6;, score=0.646 total time=   0.2s
[CV 2/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=1, n_estimators=500, subsample=0.6;, score=0.707 total time=   0.2s
[CV 1/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=1, n_estimators=750, subsample=0.4;, score=0.661 total time=   0.3s
[CV 2/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=1, n_estimators=750, subsample=0.4;, score=0.690 total time=   0.3s
[CV 1/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_chil

[CV 1/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=5, n_estimators=1000, subsample=0.4;, score=0.643 total time=   0.3s
[CV 2/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=5, n_estimators=1000, subsample=0.4;, score=0.671 total time=   0.3s
[CV 1/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=5, n_estimators=1000, subsample=0.6;, score=0.644 total time=   0.3s
[CV 2/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=5, n_estimators=1000, subsample=0.6;, score=0.692 total time=   0.3s
[CV 1/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=10, n_estimators=500, subsample=0.4;, score=0.717 total time=   0.1s
[CV 2/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=10, n_estimators=500, subsample=0.4;, score=0.706 total time=   0.1s
[CV 1/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=10, n_estimators=500, subsample=0.6;, score=0.6

[CV 2/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=15, n_estimators=750, subsample=0.4;, score=0.707 total time=   0.1s
[CV 1/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=15, n_estimators=750, subsample=0.6;, score=0.722 total time=   0.2s
[CV 2/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=15, n_estimators=750, subsample=0.6;, score=0.728 total time=   0.2s
[CV 1/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=15, n_estimators=1000, subsample=0.4;, score=0.640 total time=   0.3s
[CV 2/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=15, n_estimators=1000, subsample=0.4;, score=0.701 total time=   0.2s
[CV 1/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=15, n_estimators=1000, subsample=0.6;, score=0.709 total time=   0.3s
[CV 2/2] END colsample_bytree=0.6, gamma=1, max_depth=None, min_child_weight=15, n_estimators=1000, subsample=0.6;, score

[CV 1/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=5, n_estimators=500, subsample=0.6;, score=0.627 total time=   0.1s
[CV 2/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=5, n_estimators=500, subsample=0.6;, score=0.699 total time=   0.1s
[CV 1/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=5, n_estimators=750, subsample=0.4;, score=0.657 total time=   0.2s
[CV 2/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=5, n_estimators=750, subsample=0.4;, score=0.632 total time=   0.2s
[CV 1/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=5, n_estimators=750, subsample=0.6;, score=0.637 total time=   0.3s
[CV 2/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=5, n_estimators=750, subsample=0.6;, score=0.696 total time=   0.3s
[CV 1/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=5, n_estimators=1000, subsample=0.4;, score=0.638 tot

[CV 2/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=10, n_estimators=1000, subsample=0.6;, score=0.677 total time=   0.3s
[CV 1/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=15, n_estimators=500, subsample=0.4;, score=0.640 total time=   0.1s
[CV 2/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=15, n_estimators=500, subsample=0.4;, score=0.707 total time=   0.1s
[CV 1/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=15, n_estimators=500, subsample=0.6;, score=0.710 total time=   0.1s
[CV 2/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=15, n_estimators=500, subsample=0.6;, score=0.728 total time=   0.1s
[CV 1/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=15, n_estimators=750, subsample=0.4;, score=0.654 total time=   0.2s
[CV 2/2] END colsample_bytree=0.8, gamma=1, max_depth=None, min_child_weight=15, n_estimators=750, subsample=0.4;, score=0.

[CV 1/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=1, n_estimators=1000, subsample=0.4;, score=0.656 total time=   0.5s
[CV 2/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=1, n_estimators=1000, subsample=0.4;, score=0.699 total time=   0.6s
[CV 1/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=1, n_estimators=1000, subsample=0.6;, score=0.647 total time=   0.6s
[CV 2/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=1, n_estimators=1000, subsample=0.6;, score=0.708 total time=   0.6s
[CV 1/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=5, n_estimators=500, subsample=0.4;, score=0.669 total time=   0.2s
[CV 2/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=5, n_estimators=500, subsample=0.4;, score=0.673 total time=   0.1s
[CV 1/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=5, n_estimators=500, subsample=0.6;, score=0.651 total time=   

[CV 2/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=10, n_estimators=750, subsample=0.4;, score=0.698 total time=   0.2s
[CV 1/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=10, n_estimators=750, subsample=0.6;, score=0.677 total time=   0.2s
[CV 2/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=10, n_estimators=750, subsample=0.6;, score=0.667 total time=   0.3s
[CV 1/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=10, n_estimators=1000, subsample=0.4;, score=0.705 total time=   0.3s
[CV 2/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=10, n_estimators=1000, subsample=0.4;, score=0.693 total time=   0.3s
[CV 1/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=10, n_estimators=1000, subsample=0.6;, score=0.686 total time=   0.4s
[CV 2/2] END colsample_bytree=1, gamma=1, max_depth=None, min_child_weight=10, n_estimators=1000, subsample=0.6;, score=0.668 total t

In [23]:
grid.best_estimator_.score(X_test, Y_test)

0.745

In [26]:
f1_score(grid.best_estimator_.predict(X_test), Y_test)

0.8259385665529011

In [24]:
grid.best_params_

{'colsample_bytree': 0.6,
 'gamma': 1,
 'max_depth': None,
 'min_child_weight': 15,
 'n_estimators': 500,
 'subsample': 0.6}

### RandomForest Classifier

In [36]:
rfc = RandomForestClassifier()
rfc .fit(X_train, Y_train)

f1_score(rfc.predict(X_test), Y_test)

0.8474576271186439

In [37]:
rfc.score(X_test, Y_test)

0.775

In [76]:
gs_grid = {"n_estimators": np.arange(50,500,50),
        "max_depth":[None],
        "min_samples_split":np.arange(2,10,2),
        "min_samples_leaf":np.arange(1,4,2)}

gs_rf = GridSearchCV(RandomForestClassifier(),
                         param_grid=gs_grid,
                         cv=5,
                         verbose=True)
gs_rf.fit(X_train,Y_train)

Fitting 5 folds for each of 72 candidates, totalling 360 fits


In [77]:
gs_rf.best_estimator_.score(X_test, Y_test)

0.74

In [78]:
f1_score(gs_rf.best_estimator_.predict(X_test), Y_test)

0.8300653594771241

In [79]:
gs_rf.best_params_

{'max_depth': None,
 'min_samples_leaf': 3,
 'min_samples_split': 8,
 'n_estimators': 400}

### Logistic Regression

In [41]:
lr_clf = LogisticRegression()
lr_clf.fit(X_train, Y_train)
lr_clf.score(X_test, Y_test)

0.735

In [42]:
f1_score(lr_clf.predict(X_test), Y_test)

0.8262295081967215

In [43]:
gs_grid_lr = {"solver" : ['newton-cg', 'lbfgs', 'liblinear'],
           "penalty": ['l2'],
           "C" : [100, 10, 1.0, 0.1, 0.01]}

gs_lr = GridSearchCV(estimator = LogisticRegression(random_state=42),
                       param_grid=gs_grid_lr,
                       cv=5,
                       verbose=True)
gs_lr.fit(X_train,Y_train)

Fitting 5 folds for each of 15 candidates, totalling 75 fits


In [44]:
gs_lr.score(X_test, Y_test)

0.725

In [45]:
f1_score(gs_lr.predict(X_test), Y_test)

0.8307692307692307

### Gradient Boosting Classifier

In [63]:
gbc = GradientBoostingClassifier()
gbc.fit(X_train, Y_train)
gbc.score(X_test, Y_test)

0.745

In [64]:
f1_score(gbc.predict(X_test), Y_test)

0.8294314381270903

In [68]:
params = {
    "loss":["deviance"],
    "learning_rate": [0.05, 0.1, 0.2,0.3],
    "min_samples_split": np.linspace(0.05, 0.5, 4),
    "min_samples_leaf": np.linspace(0.1, 1, 4),
    "max_depth":[2,3,5],
    "max_features":["sqrt"],
    "criterion": ["friedman_mse"],
    "subsample":[0.2,0.3,0.5, 0.8],
    "n_estimators":[40]
    }

folds = 3
param_comb = 5
skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = 1001)

grid = GridSearchCV(GradientBoostingClassifier(), param_grid=params, scoring='roc_auc', n_jobs=1, cv=2, verbose=3)
grid.fit(X_train, Y_train)
print(grid.best_params_)

Fitting 2 folds for each of 768 candidates, totalling 1536 fits
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.2;, score=0.546 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.2;, score=0.680 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.3;, score=0.724 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.3;, score=0.710 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_dept

[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=

[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5,

[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.2;, score=0.618 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.3;, score=0.730 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.3;, score=0.703 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.5;, score=0.736 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=

[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=

[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5,

[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.3;, score=0.736 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.5;, score=0.738 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.5;, score=0.720 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.8;, score=0.730 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=

[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.542 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5

[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.05, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=

[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.2, n_estimators=40, subsample=0.3;, score=0.744 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.2, n_estimators=40, subsample=0.3;, score=0.715 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.2, n_estimators=40, subsample=0.5;, score=0.736 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.2, n_estimators=40, subsample=0.5;, score=0.726 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.2, n_es

[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.8;, score=0.532 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_es

[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05,

[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.35, n_estimators=40, subsample=0.5;, score=0.751 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.35, n_estimators=40, subsample=0.5;, score=0.734 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.746 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.741 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.5, 

[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.8;, score=0.560 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_es

[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.2, 

[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.735 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.734 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.5, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.5, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.5, n_

[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_es

[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.35, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.35, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.35, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.35, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.1, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.35,

[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.05, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.05, n_estimators=40, subsample=0.8;, score=0.532 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.05, n_estimators=40, subsample=0.8;, score=0.568 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n

[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35, n

[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_

[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_es

[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35,

[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_estimators=40, subsample=0.2;, score=0.578 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_

[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.8;, score=0.530 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.8;, score=0.539 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_

[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5, 

[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.2, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.05, n_e

[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.8;, score=0.530 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.2, n_estimators=40, subsample=0.8;, score=0.565 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.35, n_e

[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.5, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=2, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, 

[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.2, n_estimators=40, subsample=0.8;, score=0.698 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.2, n_estimators=40, subsample=0.8;, score=0.706 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.35, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.35, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.35, n

[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.8;, score=0.555 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.4, min_samples_split=0.5, n_estimators=40, subsample=0.8;, score=0.539 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.05, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.05, n_

[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.05, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=3, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.2, 

[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.729 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.35, n_estimators=40, subsample=0.8;, score=0.734 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.5, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.5, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.1, min_samples_split=0.5, n_

[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_estimators=40, subsample=0.2;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=0.7, min_samples_split=0.2, n_es

[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.3;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.5;, score=0.500 total time=   0.0s
[CV 1/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_estimators=40, subsample=0.8;, score=0.500 total time=   0.0s
[CV 2/2] END criterion=friedman_mse, learning_rate=0.3, loss=deviance, max_depth=5, max_features=sqrt, min_samples_leaf=1.0, min_samples_split=0.5, n_es

In [69]:
grid.best_estimator_.score(X_test, Y_test)

0.74

In [70]:
grid.best_params_

{'criterion': 'friedman_mse',
 'learning_rate': 0.3,
 'loss': 'deviance',
 'max_depth': 2,
 'max_features': 'sqrt',
 'min_samples_leaf': 0.1,
 'min_samples_split': 0.5,
 'n_estimators': 40,
 'subsample': 0.5}

In [71]:
f1_score(grid.predict(X_test), Y_test)

0.8300653594771241

## Using SMOTE

In [81]:
sm = SMOTE(random_state=42)
X_res, Y_res = sm.fit_resample(X_train, Y_train)
X_total = pd.concat([X_train, X_res])
Y_total = pd.concat([Y_train, Y_res])

In [82]:
len(X_total), len(Y_total)

(1920, 1920)

In [83]:
Y_total.value_counts()

1    1120
0     800
Name: Risk_good, dtype: int64

In [84]:
xg_boost_clf_smote = XGBClassifier()
xg_boost_clf_smote.fit(X_total, Y_total)
xg_boost_clf_smote.score(X_test, Y_test)



0.735

In [85]:
rfc_smote = RandomForestClassifier()
rfc_smote.fit(X_total, Y_total)
rfc_smote.score(X_test, Y_test)

0.775

In [86]:
lr_clf_smote = LogisticRegression()
lr_clf_smote.fit(X_total, Y_total)
lr_clf_smote.score(X_test, Y_test)

0.745

In [87]:
gbc_smote = GradientBoostingClassifier()
gbc_smote.fit(X_train, Y_train)
gbc_smote.score(X_test, Y_test)

0.745

In [88]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier

clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()

eclf = VotingClassifier(
estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)],
             voting='hard')

for clf, label in zip([clf1, clf2, clf3, eclf], ['Logistic Regression', 'Random Forest', 'naive Bayes', 'Ensemble']):
    scores = cross_val_score(clf, X_transformed, Y, scoring='accuracy', cv=5)
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" % (scores.mean(), scores.std(), label))

Accuracy: 0.74 (+/- 0.03) [Logistic Regression]
Accuracy: 0.74 (+/- 0.01) [Random Forest]
Accuracy: 0.70 (+/- 0.01) [naive Bayes]
Accuracy: 0.75 (+/- 0.02) [Ensemble]
