In [1]:
import warnings
warnings.filterwarnings('ignore')

In [9]:
import numpy as np
import pandas as pd

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor

In [3]:
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = pd.Series(boston.target)

In [5]:
X.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33


In [7]:
y.head()

0    24.0
1    21.6
2    34.7
3    33.4
4    36.2
dtype: float64

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [10]:
gradientregressor = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0)

In [11]:
model = gradientregressor.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [12]:
r2_score(y_test, y_pred)*100

74.53190504119107

In [13]:
import matplotlib.pyplot as plt
%matplotlib inline

feature_importance = model.feature_importances_
feature_importance

array([0.01897458, 0.        , 0.        , 0.        , 0.00805087,
       0.68664342, 0.        , 0.02783836, 0.        , 0.        ,
       0.02005123, 0.        , 0.23844155])

In [14]:
boston.feature_names

array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD',
       'TAX', 'PTRATIO', 'B', 'LSTAT'], dtype='<U7')

In [15]:
for idx, val in zip(boston.feature_names, feature_importance):
    print(f"{idx} --> {val:0.4f}")

CRIM --> 0.0190
ZN --> 0.0000
INDUS --> 0.0000
CHAS --> 0.0000
NOX --> 0.0081
RM --> 0.6866
AGE --> 0.0000
DIS --> 0.0278
RAD --> 0.0000
TAX --> 0.0000
PTRATIO --> 0.0201
B --> 0.0000
LSTAT --> 0.2384


In [16]:
from sklearn.model_selection import GridSearchCV

params = {'learning_rate': [0.01, 0.05, 0.10, 0.15], 'n_estimators': [100, 150, 200, 250]}

tuning = GridSearchCV(estimator=GradientBoostingRegressor(), param_grid=params, scoring='r2')
tuning.fit(X_train, y_train)
tuning.best_params_, tuning.best_score_

({'learning_rate': 0.15, 'n_estimators': 200}, 0.8706950756330691)