In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,StratifiedKFold,RandomizedSearchCV,KFold
from sklearn.ensemble import RandomForestRegressor,StackingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import accuracy_score,log_loss
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

In [29]:
concrete=pd.read_csv('Concrete_Data.csv')
X=concrete.drop('Strength',axis=1)
y=concrete['Strength']
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=23,test_size=0.3)

In [30]:
xgb=XGBRegressor(random_state=23)
rf=RandomForestRegressor(random_state=23)
lgbm=LGBMRegressor(random_state=23)
cgbm=CatBoostRegressor(random_state=23)

elastic=ElasticNet()
scalar=StandardScaler()
knn=KNeighborsRegressor()
pipe_knn=Pipeline([('SCL',scalar),('KNN',knn)])
dtr=DecisionTreeRegressor()


In [31]:
#Grid search CV
stack=StackingRegressor([('ELASTIC',elastic),('KNN',pipe_knn),('TREE',dtr)],final_estimator=xgb,passthrough=True)
params={'ELASTIC__alpha':np.linspace(0.001,5,5),'ELASTIC__l1_ratio':np.linspace(0,1,5),
      'TREE__max_depth':[2,3,4,5,6,None],
        'TREE__min_samples_split':[2,5,10],
        'TREE__min_samples_leaf':[1,3,5,7,10,15],
        'final_estimator':[xgb,rf,lgbm,cgbm]}
kfold=KFold(n_splits=5,shuffle=True,random_state=23)
gcv_stack=RandomizedSearchCV(stack,param_distributions=params,cv=kfold,verbose=3,n_iter=100,random_state=23)
gcv_stack.fit(X,y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.185095 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1636
[LightGBM] [Info] Number of data points in the train set: 824, number of used features: 11
[LightGBM] [Info] Start training from score 35.510279
[CV 1/5] END ELASTIC__alpha=2.5004999999999997, ELASTIC__l1_ratio=0.5, TREE__max_depth=6, TREE__min_samples_leaf=7, TREE__min_samples_split=5, final_estimator=LGBMRegressor(random_state=23);, score=0.905 total time=   2.5s
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000851 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1556
[LightGBM] [Info] Number of data points in the train set: 824, number of used features: 11
[LightGBM] [Info] Start 

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Learning rate set to 0.03971
0:	learn: 16.1275506	total: 54.5ms	remaining: 54.4s
1:	learn: 15.7334103	total: 65.9ms	remaining: 32.9s
2:	learn: 15.3564591	total: 85ms	remaining: 28.2s
3:	learn: 14.9952988	total: 143ms	remaining: 35.7s
4:	learn: 14.6176119	total: 215ms	remaining: 42.8s
5:	learn: 14.2607172	total: 226ms	remaining: 37.4s
6:	learn: 13.9723605	total: 237ms	remaining: 33.7s
7:	learn: 13.6881027	total: 259ms	remaining: 32.1s
8:	learn: 13.3562946	total: 279ms	remaining: 30.7s
9:	learn: 13.0518367	total: 329ms	remaining: 32.6s
10:	learn: 12.7581733	total: 427ms	remaining: 38.4s
11:	learn: 12.4688825	total: 438ms	remaining: 36.1s
12:	learn: 12.2259718	total: 456ms	remaining: 34.6s
13:	learn: 11.9801995	total: 471ms	remaining: 33.2s
14:	learn: 11.7302771	total: 515ms	remaining: 33.8s
15:	learn: 11.5079882	total: 551ms	remaining: 33.9s
16:	learn: 11.2864920	total: 553ms	remaining: 32s
17:	learn: 11.0586226	total: 589ms	remaining: 32.2s
18:	learn: 10.8616150	total: 613ms	remaining: 

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Learning rate set to 0.03971
0:	learn: 16.4040543	total: 20.1ms	remaining: 20.1s
1:	learn: 15.9964078	total: 32.6ms	remaining: 16.3s
2:	learn: 15.6080204	total: 77ms	remaining: 25.6s
3:	learn: 15.2392607	total: 82.5ms	remaining: 20.5s
4:	learn: 14.8595740	total: 92.3ms	remaining: 18.4s
5:	learn: 14.5159382	total: 137ms	remaining: 22.7s
6:	learn: 14.2044078	total: 181ms	remaining: 25.7s
7:	learn: 13.9001759	total: 205ms	remaining: 25.5s
8:	learn: 13.5901873	total: 232ms	remaining: 25.5s
9:	learn: 13.3068670	total: 274ms	remaining: 27.1s
10:	learn: 13.0041068	total: 277ms	remaining: 24.9s
11:	learn: 12.7116872	total: 302ms	remaining: 24.9s
12:	learn: 12.4560234	total: 350ms	remaining: 26.6s
13:	learn: 12.1877822	total: 385ms	remaining: 27.1s
14:	learn: 11.9331855	total: 414ms	remaining: 27.2s
15:	learn: 11.6813473	total: 458ms	remaining: 28.1s
16:	learn: 11.4723682	total: 461ms	remaining: 26.7s
17:	learn: 11.2451373	total: 473ms	remaining: 25.8s
18:	learn: 11.0423371	total: 485ms	remaini

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Learning rate set to 0.03971
0:	learn: 16.1640966	total: 31.8ms	remaining: 31.8s
1:	learn: 15.7675809	total: 34.1ms	remaining: 17s
2:	learn: 15.3981987	total: 63.7ms	remaining: 21.2s
3:	learn: 15.0255496	total: 68ms	remaining: 16.9s
4:	learn: 14.6368487	total: 75.4ms	remaining: 15s
5:	learn: 14.2882788	total: 119ms	remaining: 19.7s
6:	learn: 13.9925099	total: 129ms	remaining: 18.3s
7:	learn: 13.7164859	total: 163ms	remaining: 20.2s
8:	learn: 13.4270441	total: 173ms	remaining: 19s
9:	learn: 13.1419867	total: 175ms	remaining: 17.3s
10:	learn: 12.8610408	total: 206ms	remaining: 18.5s
11:	learn: 12.5863155	total: 213ms	remaining: 17.5s
12:	learn: 12.3479377	total: 229ms	remaining: 17.4s
13:	learn: 12.0864615	total: 240ms	remaining: 16.9s
14:	learn: 11.8445199	total: 255ms	remaining: 16.8s
15:	learn: 11.5925769	total: 264ms	remaining: 16.2s
16:	learn: 11.3818106	total: 273ms	remaining: 15.8s
17:	learn: 11.1619096	total: 280ms	remaining: 15.3s
18:	learn: 10.9681309	total: 289ms	remaining: 14

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Learning rate set to 0.03971
0:	learn: 16.3081650	total: 6.13ms	remaining: 6.13s
1:	learn: 15.9060449	total: 8.78ms	remaining: 4.38s
2:	learn: 15.5194380	total: 49.8ms	remaining: 16.6s
3:	learn: 15.1042535	total: 81.8ms	remaining: 20.4s
4:	learn: 14.7318294	total: 89ms	remaining: 17.7s
5:	learn: 14.3440537	total: 97.3ms	remaining: 16.1s
6:	learn: 14.0448043	total: 104ms	remaining: 14.8s
7:	learn: 13.7391850	total: 180ms	remaining: 22.3s
8:	learn: 13.4188068	total: 189ms	remaining: 20.8s
9:	learn: 13.1221122	total: 198ms	remaining: 19.6s
10:	learn: 12.8191554	total: 265ms	remaining: 23.8s
11:	learn: 12.5417221	total: 270ms	remaining: 22.2s
12:	learn: 12.2656753	total: 273ms	remaining: 20.7s
13:	learn: 12.0130096	total: 276ms	remaining: 19.4s
14:	learn: 11.7617301	total: 279ms	remaining: 18.3s
15:	learn: 11.5207937	total: 287ms	remaining: 17.6s
16:	learn: 11.2985541	total: 290ms	remaining: 16.8s
17:	learn: 11.0714842	total: 294ms	remaining: 16s
18:	learn: 10.8757141	total: 312ms	remainin

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Learning rate set to 0.03971
0:	learn: 16.4533678	total: 24.1ms	remaining: 24s
1:	learn: 16.0012119	total: 68.7ms	remaining: 34.3s
2:	learn: 15.5952680	total: 94.3ms	remaining: 31.3s
3:	learn: 15.2140606	total: 113ms	remaining: 28.1s
4:	learn: 14.8507664	total: 146ms	remaining: 29.1s
5:	learn: 14.4848759	total: 159ms	remaining: 26.3s
6:	learn: 14.1640047	total: 200ms	remaining: 28.3s
7:	learn: 13.8291132	total: 224ms	remaining: 27.7s
8:	learn: 13.4974043	total: 239ms	remaining: 26.3s
9:	learn: 13.1839429	total: 288ms	remaining: 28.5s
10:	learn: 12.8591394	total: 294ms	remaining: 26.5s
11:	learn: 12.6013212	total: 324ms	remaining: 26.7s
12:	learn: 12.3136278	total: 327ms	remaining: 24.9s
13:	learn: 12.0542806	total: 361ms	remaining: 25.4s
14:	learn: 11.8131430	total: 387ms	remaining: 25.4s
15:	learn: 11.5612690	total: 417ms	remaining: 25.6s
16:	learn: 11.3181762	total: 441ms	remaining: 25.5s
17:	learn: 11.1025582	total: 461ms	remaining: 25.1s
18:	learn: 10.8923896	total: 497ms	remaining

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 1/5] END ELASTIC__alpha=3.75025, ELASTIC__l1_ratio=0.0, TREE__max_depth=None, TREE__min_samples_leaf=10, TREE__min_samples_split=2, final_estimator=RandomForestRegressor(random_state=23);, score=0.886 total time=   3.2s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 2/5] END ELASTIC__alpha=3.75025, ELASTIC__l1_ratio=0.0, TREE__max_depth=None, TREE__min_samples_leaf=10, TREE__min_samples_split=2, final_estimator=RandomForestRegressor(random_state=23);, score=0.903 total time=   4.6s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 3/5] END ELASTIC__alpha=3.75025, ELASTIC__l1_ratio=0.0, TREE__max_depth=None, TREE__min_samples_leaf=10, TREE__min_samples_split=2, final_estimator=RandomForestRegressor(random_state=23);, score=0.890 total time=   3.0s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 4/5] END ELASTIC__alpha=3.75025, ELASTIC__l1_ratio=0.0, TREE__max_depth=None, TREE__min_samples_leaf=10, TREE__min_samples_split=2, final_estimator=RandomForestRegressor(random_state=23);, score=0.896 total time=   4.1s


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


[CV 5/5] END ELASTIC__alpha=3.75025, ELASTIC__l1_ratio=0.0, TREE__max_depth=None, TREE__min_samples_leaf=10, TREE__min_samples_split=2, final_estimator=RandomForestRegressor(random_state=23);, score=0.849 total time=   4.9s
[CV 1/5] END ELASTIC__alpha=5.0, ELASTIC__l1_ratio=0.75, TREE__max_depth=2, TREE__min_samples_leaf=10, TREE__min_samples_split=5, final_estimator=RandomForestRegressor(random_state=23);, score=0.891 total time=   3.8s
[CV 2/5] END ELASTIC__alpha=5.0, ELASTIC__l1_ratio=0.75, TREE__max_depth=2, TREE__min_samples_leaf=10, TREE__min_samples_split=5, final_estimator=RandomForestRegressor(random_state=23);, score=0.892 total time=   4.8s
[CV 3/5] END ELASTIC__alpha=5.0, ELASTIC__l1_ratio=0.75, TREE__max_depth=2, TREE__min_samples_leaf=10, TREE__min_samples_split=5, final_estimator=RandomForestRegressor(random_state=23);, score=0.904 total time=   4.6s
[CV 4/5] END ELASTIC__alpha=5.0, ELASTIC__l1_ratio=0.75, TREE__max_depth=2, TREE__min_samples_leaf=10, TREE__min_samples_s

  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


Learning rate set to 0.041135
0:	learn: 16.2716329	total: 9.56ms	remaining: 9.55s
1:	learn: 15.8507755	total: 11.5ms	remaining: 5.74s
2:	learn: 15.4446070	total: 23.7ms	remaining: 7.88s
3:	learn: 15.0367004	total: 95.2ms	remaining: 23.7s
4:	learn: 14.6496199	total: 107ms	remaining: 21.2s
5:	learn: 14.2764142	total: 117ms	remaining: 19.4s
6:	learn: 13.9743416	total: 136ms	remaining: 19.3s
7:	learn: 13.6443764	total: 167ms	remaining: 20.7s
8:	learn: 13.3245372	total: 186ms	remaining: 20.5s
9:	learn: 13.0353002	total: 229ms	remaining: 22.6s
10:	learn: 12.7277820	total: 237ms	remaining: 21.3s
11:	learn: 12.4463510	total: 268ms	remaining: 22s
12:	learn: 12.1753672	total: 276ms	remaining: 20.9s
13:	learn: 11.9191883	total: 279ms	remaining: 19.6s
14:	learn: 11.6666754	total: 282ms	remaining: 18.5s
15:	learn: 11.4186605	total: 288ms	remaining: 17.7s
16:	learn: 11.1861668	total: 291ms	remaining: 16.8s
17:	learn: 10.9579781	total: 294ms	remaining: 16s
18:	learn: 10.7464226	total: 297ms	remaining

In [32]:
print(gcv_stack.best_params_)
print(gcv_stack.best_score_)

{'final_estimator': <catboost.core.CatBoostRegressor object at 0x7f00ff02e550>, 'TREE__min_samples_split': 5, 'TREE__min_samples_leaf': 3, 'TREE__max_depth': None, 'ELASTIC__l1_ratio': 0.0, 'ELASTIC__alpha': 1.2507499999999998}
0.9246311254352987


In [33]:
from joblib import dump
bm = gcv_stack.best_estimator_
dump(bm,"gcv_stack.joblib")

['gcv_stack.joblib']

In [34]:
X.columns

Index(['Cement', 'Blast', 'Fly', 'Water', 'Superplasticizer', 'Coarse', 'Fine',
       'Age'],
      dtype='object')