In [44]:
import pandas as pd
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression, LogisticRegression, ElasticNet, LinearRegression
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV, KFold
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, log_loss, roc_auc_score, r2_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.decomposition import PCA
from sklearn.svm import SVC

from sklearn.tree import DecisionTreeClassifier, plot_tree

from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, VotingClassifier, VotingRegressor, BaggingClassifier, BaggingRegressor, AdaBoostClassifier, AdaBoostRegressor, GradientBoostingClassifier, GradientBoostingRegressor

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier, CatBoostRegressor

import warnings
warnings.filterwarnings('ignore')

# XGBoost

In [17]:
df = pd.read_csv("Breastcancer.csv")
df.head(3)


Unnamed: 0,Code,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class
0,61634,5,4,3,1,2,2,2,3,1,Benign
1,63375,9,1,2,6,4,10,7,7,2,Malignant
2,76389,10,4,7,2,2,8,6,1,1,Malignant


In [21]:
x, y = df.drop("Class", axis = 1), df["Class"]

# XGBoost requires dependent variable in hot encoded format compulsary

le = LabelEncoder()
y = le.fit_transform(y)


In [23]:
x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                    random_state = 24, 
                                                    test_size= 0.3,
                                                    stratify= y)


In [25]:
# 
xgbm = XGBClassifier(
    random_state = 24
)
xgbm.fit(x_train, y_train)
y_pred = xgbm.predict(x_test)
print(accuracy_score(y_test, y_pred))

0.9761904761904762


In [55]:


# ______________________________________________________________________________________
# Pipeline

# one hot encoder
ohe = OneHotEncoder(
    handle_unknown="ignore",
    sparse_output=False,
    drop='first'
).set_output(transform='pandas')

trans_ohe = make_column_transformer(
    ('passthrough', make_column_selector(dtype_exclude=['category', object])),
    (ohe, make_column_selector(dtype_include=['category', object])),
    verbose_feature_names_out=False
).set_output(transform='pandas')


# scaler
scl_std = StandardScaler().set_output(transform = "pandas")
scl_mm = MinMaxScaler().set_output(transform = "pandas")


# Model

xgbm = XGBClassifier(
    random_state = 24
)

pipe = Pipeline([("XGB", xgbm)])


# _____________________________________________________________________________________
# GCV

params = {
    "XGB__n_estimators" : list(range(35, 41)),
    "XGB__max_depth" : [2, 3, 4],
    "XGB__learning_rate" : np.linspace(0.001, 1, 5)
}

kfolds = StratifiedKFold(n_splits = 5,
                        random_state = 24,
                        shuffle = True)

kfold = KFold(n_splits = 5,
            random_state = 24,
            shuffle = True)

gcv = GridSearchCV(pipe,
                  param_grid = params,
                  scoring = "neg_log_loss",
                  cv = kfolds,
                  verbose = 3)


# gcv.fit(x, y)

In [57]:
gcv.fit(x_train, y_train)

Fitting 5 folds for each of 90 candidates, totalling 450 fits
[CV 1/5] END XGB__learning_rate=0.001, XGB__max_depth=2, XGB__n_estimators=35;, score=-0.622 total time=   0.0s
[CV 2/5] END XGB__learning_rate=0.001, XGB__max_depth=2, XGB__n_estimators=35;, score=-0.620 total time=   0.0s
[CV 3/5] END XGB__learning_rate=0.001, XGB__max_depth=2, XGB__n_estimators=35;, score=-0.622 total time=   0.0s
[CV 4/5] END XGB__learning_rate=0.001, XGB__max_depth=2, XGB__n_estimators=35;, score=-0.620 total time=   0.0s
[CV 5/5] END XGB__learning_rate=0.001, XGB__max_depth=2, XGB__n_estimators=35;, score=-0.617 total time=   0.0s
[CV 1/5] END XGB__learning_rate=0.001, XGB__max_depth=2, XGB__n_estimators=36;, score=-0.621 total time=   0.0s
[CV 2/5] END XGB__learning_rate=0.001, XGB__max_depth=2, XGB__n_estimators=36;, score=-0.619 total time=   0.0s
[CV 3/5] END XGB__learning_rate=0.001, XGB__max_depth=2, XGB__n_estimators=36;, score=-0.622 total time=   0.0s
[CV 4/5] END XGB__learning_rate=0.001, XGB

In [59]:
print(gcv.best_score_)
print(gcv.best_params_)

-0.10783283263987271
{'XGB__learning_rate': 0.25075, 'XGB__max_depth': 3, 'XGB__n_estimators': 38}


In [61]:
y_pred_prob = gcv.predict_proba(x_test)[:, 1]
print(roc_auc_score(y_test, y_pred_prob))

0.9913446054750402


# LightGBM

In [73]:
df = pd.read_csv("Breastcancer.csv")
df.head(3)


Unnamed: 0,Code,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class
0,61634,5,4,3,1,2,2,2,3,1,Benign
1,63375,9,1,2,6,4,10,7,7,2,Malignant
2,76389,10,4,7,2,2,8,6,1,1,Malignant


In [97]:
x, y = df.drop("Class", axis = 1), df["Class"]


In [83]:
x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                    random_state = 24, 
                                                    test_size= 0.3,
                                                    stratify= y)


In [85]:
# 
lgbm = LGBMClassifier(
    random_state = 24,
)
lgbm.fit(x_train, y_train)
y_pred = lgbm.predict(x_test)
print(accuracy_score(y_test, y_pred))

[LightGBM] [Info] Number of positive: 169, number of negative: 320
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000436 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 258
[LightGBM] [Info] Number of data points in the train set: 489, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.345603 -> initscore=-0.638422
[LightGBM] [Info] Start training from score -0.638422
0.9714285714285714


In [87]:


# ______________________________________________________________________________________
# Pipeline

# one hot encoder
ohe = OneHotEncoder(
    handle_unknown="ignore",
    sparse_output=False,
    drop='first'
).set_output(transform='pandas')

trans_ohe = make_column_transformer(
    ('passthrough', make_column_selector(dtype_exclude=['category', object])),
    (ohe, make_column_selector(dtype_include=['category', object])),
    verbose_feature_names_out=False
).set_output(transform='pandas')


# scaler
scl_std = StandardScaler().set_output(transform = "pandas")
scl_mm = MinMaxScaler().set_output(transform = "pandas")


# Model

lgbm = LGBMClassifier(
    random_state = 24,
)

pipe = Pipeline([("LGB", lgbm)])


# _____________________________________________________________________________________
# GCV

params = {
    "LGB__n_estimators" : list(range(35, 41)),
    "LGB__max_depth" : [2, 3, 4],
    "LGB__learning_rate" : np.linspace(0.001, 1, 5)
}

kfolds = StratifiedKFold(n_splits = 5,
                        random_state = 24,
                        shuffle = True)

kfold = KFold(n_splits = 5,
            random_state = 24,
            shuffle = True)

gcv = GridSearchCV(pipe,
                  param_grid = params,
                  scoring = "neg_log_loss",
                  cv = kfolds,
                  verbose = 3)


# gcv.fit(x, y)

In [89]:
gcv.fit(x_train, y_train)

Fitting 5 folds for each of 90 candidates, totalling 450 fits
[LightGBM] [Info] Number of positive: 135, number of negative: 256
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000204 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 223
[LightGBM] [Info] Number of data points in the train set: 391, number of used features: 10
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.345269 -> initscore=-0.639903
[LightGBM] [Info] Start training from score -0.639903
[CV 1/5] END LGB__learning_rate=0.001, LGB__max_depth=2, LGB__n_estimators=35;, score=-0.621 total time=   0.0s
[LightGBM] [Info] Number of positive: 135, number of negative: 256
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000110 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 225
[LightGBM] [Info] Number of data points in the train set: 391, number of used

In [91]:
print(gcv.best_score_)
print(gcv.best_params_)

-0.11463206761871432
{'LGB__learning_rate': 0.25075, 'LGB__max_depth': 2, 'LGB__n_estimators': 35}


In [95]:
y_pred_prob = gcv.predict_proba(x_test)[:, 1] 
print(roc_auc_score(y_test, y_pred_prob))

0.9928542673107891


# cat Boost

In [104]:
df = pd.read_csv("Breastcancer.csv")
df.head(3)


Unnamed: 0,Code,Clump,UniCell_Size,Uni_CellShape,MargAdh,SEpith,BareN,BChromatin,NoemN,Mitoses,Class
0,61634,5,4,3,1,2,2,2,3,1,Benign
1,63375,9,1,2,6,4,10,7,7,2,Malignant
2,76389,10,4,7,2,2,8,6,1,1,Malignant


In [106]:
x, y = df.drop("Class", axis = 1), df["Class"]


In [108]:
x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                    random_state = 24, 
                                                    test_size= 0.3,
                                                    stratify= y)


In [110]:
# 
cbc = CatBoostClassifier(
    random_state= 24,
)
cbc.fit(x_train, y_train)
y_pred = cbc.predict(x_test)
print(accuracy_score(y_test, y_pred))

Learning rate set to 0.00759
0:	learn: 0.6772284	total: 149ms	remaining: 2m 28s
1:	learn: 0.6661382	total: 156ms	remaining: 1m 17s
2:	learn: 0.6524304	total: 162ms	remaining: 53.7s
3:	learn: 0.6403299	total: 166ms	remaining: 41.3s
4:	learn: 0.6277872	total: 170ms	remaining: 33.7s
5:	learn: 0.6147222	total: 174ms	remaining: 28.8s
6:	learn: 0.6044735	total: 177ms	remaining: 25.1s
7:	learn: 0.5908690	total: 179ms	remaining: 22.2s
8:	learn: 0.5791257	total: 182ms	remaining: 20s
9:	learn: 0.5676153	total: 185ms	remaining: 18.3s
10:	learn: 0.5541332	total: 188ms	remaining: 16.9s
11:	learn: 0.5442761	total: 190ms	remaining: 15.7s
12:	learn: 0.5332041	total: 193ms	remaining: 14.6s
13:	learn: 0.5212886	total: 195ms	remaining: 13.7s
14:	learn: 0.5098628	total: 197ms	remaining: 13s
15:	learn: 0.5016971	total: 200ms	remaining: 12.3s
16:	learn: 0.4924687	total: 202ms	remaining: 11.7s
17:	learn: 0.4839510	total: 205ms	remaining: 11.2s
18:	learn: 0.4751784	total: 207ms	remaining: 10.7s
19:	learn: 0.4

In [112]:


# ______________________________________________________________________________________
# Pipeline

# one hot encoder
ohe = OneHotEncoder(
    handle_unknown="ignore",
    sparse_output=False,
    drop='first'
).set_output(transform='pandas')

trans_ohe = make_column_transformer(
    ('passthrough', make_column_selector(dtype_exclude=['category', object])),
    (ohe, make_column_selector(dtype_include=['category', object])),
    verbose_feature_names_out=False
).set_output(transform='pandas')


# scaler
scl_std = StandardScaler().set_output(transform = "pandas")
scl_mm = MinMaxScaler().set_output(transform = "pandas")


# Model

cbc = CatBoostClassifier(
    random_state= 24,
)


pipe = Pipeline([("CBC", cbc)])


# _____________________________________________________________________________________
# GCV

params = {
    "CBC__n_estimators" : list(range(35, 41)),
    "CBC__max_depth" : [2, 3, 4],
    "CBC__learning_rate" : np.linspace(0.001, 1, 5)
}

kfolds = StratifiedKFold(n_splits = 5,
                        random_state = 24,
                        shuffle = True)

kfold = KFold(n_splits = 5,
            random_state = 24,
            shuffle = True)

gcv = GridSearchCV(pipe,
                  param_grid = params,
                  scoring = "neg_log_loss",
                  cv = kfolds,
                  verbose = 3)


# gcv.fit(x, y)

In [114]:
gcv.fit(x_train, y_train)

Fitting 5 folds for each of 90 candidates, totalling 450 fits
0:	learn: 0.6924082	total: 672us	remaining: 22.9ms
1:	learn: 0.6916716	total: 1.16ms	remaining: 19.2ms
2:	learn: 0.6909599	total: 1.65ms	remaining: 17.6ms
3:	learn: 0.6902478	total: 2.11ms	remaining: 16.3ms
4:	learn: 0.6895148	total: 2.56ms	remaining: 15.4ms
5:	learn: 0.6887951	total: 3.02ms	remaining: 14.6ms
6:	learn: 0.6880891	total: 3.47ms	remaining: 13.9ms
7:	learn: 0.6873562	total: 3.93ms	remaining: 13.3ms
8:	learn: 0.6867212	total: 4.42ms	remaining: 12.8ms
9:	learn: 0.6860238	total: 4.9ms	remaining: 12.3ms
10:	learn: 0.6853119	total: 5.37ms	remaining: 11.7ms
11:	learn: 0.6846440	total: 5.82ms	remaining: 11.2ms
12:	learn: 0.6840370	total: 6.3ms	remaining: 10.7ms
13:	learn: 0.6833090	total: 6.74ms	remaining: 10.1ms
14:	learn: 0.6825921	total: 7.21ms	remaining: 9.62ms
15:	learn: 0.6819023	total: 7.64ms	remaining: 9.08ms
16:	learn: 0.6811957	total: 8.09ms	remaining: 8.57ms
17:	learn: 0.6805117	total: 8.65ms	remaining: 8.17

In [116]:
print(gcv.best_score_)
print(gcv.best_params_)

-0.1050290427554565
{'CBC__learning_rate': 0.75025, 'CBC__max_depth': 4, 'CBC__n_estimators': 35}


In [118]:
y_pred_prob = gcv.predict_proba(x_test)[:, 1] 
print(roc_auc_score(y_test, y_pred_prob))

0.9942632850241546


# CatBoost without One Hot Encoding

In [10]:
df = pd.read_csv("HR_comma_sep.csv")
df.head(3)

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,Department,salary
0,0.38,0.53,2,157,3,0,1,0,sales,low
1,0.8,0.86,5,262,6,0,1,0,sales,medium
2,0.1,0.77,6,247,4,0,1,0,sales,low


In [16]:
x, y = df.drop("left", axis = 1), df["left"]

In [18]:
x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                    random_state= 24,
                                                    test_size= 0.3, 
                                                    stratify= y)

In [20]:
cgbm = CatBoostClassifier(
    random_state= 24,
    cat_features= ["Department", "salary"]
)

cgbm.fit(x_train, y_train)

Learning rate set to 0.028113
0:	learn: 0.6521748	total: 257ms	remaining: 4m 16s
1:	learn: 0.6108150	total: 313ms	remaining: 2m 36s
2:	learn: 0.5713910	total: 378ms	remaining: 2m 5s
3:	learn: 0.5320913	total: 440ms	remaining: 1m 49s
4:	learn: 0.5001841	total: 492ms	remaining: 1m 38s
5:	learn: 0.4684059	total: 559ms	remaining: 1m 32s
6:	learn: 0.4390173	total: 619ms	remaining: 1m 27s
7:	learn: 0.4134991	total: 680ms	remaining: 1m 24s
8:	learn: 0.3904645	total: 741ms	remaining: 1m 21s
9:	learn: 0.3706840	total: 807ms	remaining: 1m 19s
10:	learn: 0.3509874	total: 868ms	remaining: 1m 18s
11:	learn: 0.3328178	total: 930ms	remaining: 1m 16s
12:	learn: 0.3167978	total: 995ms	remaining: 1m 15s
13:	learn: 0.3023680	total: 1.06s	remaining: 1m 14s
14:	learn: 0.2893689	total: 1.12s	remaining: 1m 13s
15:	learn: 0.2766897	total: 1.18s	remaining: 1m 12s
16:	learn: 0.2649461	total: 1.25s	remaining: 1m 12s
17:	learn: 0.2543803	total: 1.32s	remaining: 1m 11s
18:	learn: 0.2438560	total: 1.38s	remaining: 

<catboost.core.CatBoostClassifier at 0x2b71ee50620>

In [22]:
y_pred = cgbm.predict(x_test)
roc_auc_score(y_test, y_pred)

0.9690463146935294

In [34]:


# ______________________________________________________________________________________
# Pipeline

# one hot encoder
ohe = OneHotEncoder(
    handle_unknown="ignore",
    sparse_output=False,
    drop='first'
).set_output(transform='pandas')

trans_ohe = make_column_transformer(
    ('passthrough', make_column_selector(dtype_exclude=['category', object])),
    (ohe, make_column_selector(dtype_include=['category', object])),
    verbose_feature_names_out=False
).set_output(transform='pandas')


# scaler
scl_std = StandardScaler().set_output(transform = "pandas")
scl_mm = MinMaxScaler().set_output(transform = "pandas")


# Model

cbc = CatBoostClassifier(
    random_state= 24,
    cat_features= ["Department", "salary"],
    # num_boost_round= 50
    
)


pipe = Pipeline([("CBC", cbc)])


# _____________________________________________________________________________________
# GCV

params = {
    "CBC__n_estimators" : [10, 30],
    "CBC__max_depth" : [2, 3, 4],
    "CBC__learning_rate" : np.linspace(0.001, 1, 3)
}

kfolds = StratifiedKFold(n_splits = 5,
                        random_state = 24,
                        shuffle = True)

kfold = KFold(n_splits = 5,
            random_state = 24,
            shuffle = True)

gcv = GridSearchCV(pipe,
                  param_grid = params,
                  scoring = "neg_log_loss",
                  cv = kfolds,
                  verbose = 3)


# gcv.fit(x, y)

In [36]:
gcv.fit(x_train, y_train)

Fitting 5 folds for each of 18 candidates, totalling 90 fits
0:	learn: 0.6925332	total: 15.3ms	remaining: 137ms
1:	learn: 0.6919303	total: 32.4ms	remaining: 129ms
2:	learn: 0.6913188	total: 50.3ms	remaining: 117ms
3:	learn: 0.6907183	total: 66.8ms	remaining: 100ms
4:	learn: 0.6901190	total: 84.1ms	remaining: 84.1ms
5:	learn: 0.6895093	total: 107ms	remaining: 71.3ms
6:	learn: 0.6889124	total: 123ms	remaining: 52.5ms
7:	learn: 0.6883166	total: 144ms	remaining: 35.9ms
8:	learn: 0.6877221	total: 161ms	remaining: 17.8ms
9:	learn: 0.6871287	total: 187ms	remaining: 0us
[CV 1/5] END CBC__learning_rate=0.001, CBC__max_depth=2, CBC__n_estimators=10;, score=-0.687 total time=   0.2s
0:	learn: 0.6925359	total: 11.5ms	remaining: 104ms
1:	learn: 0.6919353	total: 23.1ms	remaining: 92.5ms
2:	learn: 0.6913247	total: 41.7ms	remaining: 97.2ms
3:	learn: 0.6907265	total: 57.2ms	remaining: 85.9ms
4:	learn: 0.6901183	total: 75.2ms	remaining: 75.2ms
5:	learn: 0.6895131	total: 85.8ms	remaining: 57.2ms
6:	learn

In [38]:
print(gcv.best_score_)
print(gcv.best_params_)

-0.08817578282934348
{'CBC__learning_rate': 1.0, 'CBC__max_depth': 4, 'CBC__n_estimators': 30}


In [40]:
y_pred_prob = gcv.predict_proba(x_test)[:, 1] 
print(roc_auc_score(y_test, y_pred_prob))

0.9840273859848516


### CATboost Regressor on insurance.csv


In [47]:
df = pd.read_csv("insurance.csv")
df

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.900,0,yes,southwest,16884.92400
1,18,male,33.770,1,no,southeast,1725.55230
2,28,male,33.000,3,no,southeast,4449.46200
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.880,0,no,northwest,3866.85520
...,...,...,...,...,...,...,...
1333,50,male,30.970,3,no,northwest,10600.54830
1334,18,female,31.920,0,no,northeast,2205.98080
1335,18,female,36.850,0,no,southeast,1629.83350
1336,21,female,25.800,0,no,southwest,2007.94500


In [49]:
x, y = df.drop("charges", axis = 1), df["charges"]

In [51]:
x_train, x_test, y_train, y_test = train_test_split(x, y, 
                                                    random_state= 24,
                                                    test_size= 0.3,
                                                   )


In [107]:


# ______________________________________________________________________________________
# Pipeline

# one hot encoder
ohe = OneHotEncoder(
    handle_unknown="ignore",
    sparse_output=False,
    drop='first'
).set_output(transform='pandas')

trans_ohe = make_column_transformer(
    ('passthrough', make_column_selector(dtype_exclude=['category', object])),
    (ohe, make_column_selector(dtype_include=['category', object])),
    verbose_feature_names_out=False
).set_output(transform='pandas')


# scaler
scl_std = StandardScaler().set_output(transform = "pandas")
scl_mm = MinMaxScaler().set_output(transform = "pandas")


# Model

cbc = CatBoostRegressor(
    cat_features= list(x.columns[x.dtypes == object])
)


pipe = Pipeline([("CBC", cbc)])


# _____________________________________________________________________________________
# GCV

params = {
    "CBC__n_estimators" : list(range(10, 51, 10)),
    "CBC__max_depth" : [2, 3, 4],
    "CBC__learning_rate" : np.linspace(0.001, 1, 5)
}

kfolds = StratifiedKFold(n_splits = 5,
                        random_state = 24,
                        shuffle = True)

kfold = KFold(n_splits = 5,
            random_state = 24,
            shuffle = True)

gcv = GridSearchCV(pipe,
                  param_grid = params,
                  scoring = "r2",
                  cv = kfold,
                  verbose = 3)


# gcv.fit(x, y)

In [109]:
gcv.fit(x_train, y_train)

Fitting 5 folds for each of 75 candidates, totalling 375 fits
0:	learn: 11772.5646980	total: 8.13ms	remaining: 73.2ms
1:	learn: 11764.5756465	total: 22.5ms	remaining: 90.1ms
2:	learn: 11756.2369923	total: 35.5ms	remaining: 82.8ms
3:	learn: 11747.8342005	total: 48ms	remaining: 72ms
4:	learn: 11739.5133205	total: 66.6ms	remaining: 66.6ms
5:	learn: 11731.7352157	total: 85.1ms	remaining: 56.8ms
6:	learn: 11724.1653903	total: 105ms	remaining: 45.1ms
7:	learn: 11716.6375742	total: 119ms	remaining: 29.7ms
8:	learn: 11708.3066526	total: 140ms	remaining: 15.6ms
9:	learn: 11700.9764900	total: 156ms	remaining: 0us
[CV 1/5] END CBC__learning_rate=0.001, CBC__max_depth=2, CBC__n_estimators=10;, score=0.013 total time=   0.1s
0:	learn: 12211.9003974	total: 8.07ms	remaining: 72.6ms
1:	learn: 12203.6612205	total: 21ms	remaining: 84ms
2:	learn: 12195.1662551	total: 33.4ms	remaining: 78ms
3:	learn: 12186.4260032	total: 50ms	remaining: 74.9ms
4:	learn: 12178.3715362	total: 64.2ms	remaining: 64.2ms
5:	lea

In [111]:
print(gcv.best_score_)
print(gcv.best_params_)

0.8516464319036384
{'CBC__learning_rate': 0.25075, 'CBC__max_depth': 3, 'CBC__n_estimators': 40}


In [113]:
y_pred = gcv.predict(x_test)
print(r2_score(y_test, y_pred))

0.8750264001820316
