In [3]:
import numpy as np
import pandas as pd
import sklearn

from sklearn.model_selection import train_test_split

## HyperParameter Tuning

In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import GradientBoostingRegressor

In [7]:
concret_df = pd.read_csv('concrete_data.csv')
X = concret_df.loc[:,concret_df.columns != 'csMPa'].drop(['flyash','coarseaggregate','fineaggregate'], axis=1)
y = concret_df['csMPa']

X.shape,y.shape
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=1)

In [8]:
baseline = GradientBoostingRegressor(max_depth=3,n_estimators=50)
baseline.fit(X_train,y_train)

GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
                          learning_rate=0.1, loss='ls', max_depth=3,
                          max_features=None, max_leaf_nodes=None,
                          min_impurity_decrease=0.0, min_impurity_split=None,
                          min_samples_leaf=1, min_samples_split=2,
                          min_weight_fraction_leaf=0.0, n_estimators=50,
                          n_iter_no_change=None, presort='auto',
                          random_state=None, subsample=1.0, tol=0.0001,
                          validation_fraction=0.1, verbose=0, warm_start=False)

In [9]:
baseline.score(X_test,y_test)

0.8485646649762112

## Grid Search CV

In [10]:
gbr = GradientBoostingRegressor(max_depth=3)

In [11]:
parameters={'n_estimators':[1,5,10,50,100,200,300,400,500]}
gridsearch_reg = GridSearchCV(estimator=gbr,param_grid=parameters,cv=3)

In [13]:
gridsearch_reg.fit(X_train,y_train)

GridSearchCV(cv=3, error_score='raise-deprecating',
             estimator=GradientBoostingRegressor(alpha=0.9,
                                                 criterion='friedman_mse',
                                                 init=None, learning_rate=0.1,
                                                 loss='ls', max_depth=3,
                                                 max_features=None,
                                                 max_leaf_nodes=None,
                                                 min_impurity_decrease=0.0,
                                                 min_impurity_split=None,
                                                 min_samples_leaf=1,
                                                 min_samples_split=2,
                                                 min_weight_fraction_leaf=0.0,
                                                 n_estimators=100,
                                                 n_iter_no_change=None,
                

In [14]:
gridsearch_reg.score(X_test,y_test)

0.9089789200557665