In [None]:
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor

In [None]:
from tensorflow import keras
from keras.datasets import boston_housing
(X_train, y_train), (X_test, y_test) = boston_housing.load_data()


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/boston_housing.npz
[1m57026/57026[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [None]:
# First weak learner
tree_reg1 = DecisionTreeRegressor(max_depth=3)
tree_reg1.fit(X_train, y_train)
y1 = y_train - tree_reg1.predict(X_train)   #y1: Residual Error from the first decision tree

In [None]:
# Second weak learner
tree_reg2 = DecisionTreeRegressor(max_depth=4)
tree_reg2.fit(X_train, y1)
y2 = y1 - tree_reg2.predict(X_train)  #y2: Residual Error from the second decision tree

In [None]:
# Third weak learner
tree_reg3 = DecisionTreeRegressor(max_depth=4)
tree_reg3.fit(X_train, y2)
y3 = y2 - tree_reg3.predict(X_train) #y3: Residual Error from the third decision tree

In [None]:
y_pred = sum(tree.predict(X_test) for tree in (tree_reg1,tree_reg2,tree_reg3))

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.7413650758685646

In [None]:
#Additional learner
tree_reg4 = DecisionTreeRegressor(max_depth=4)
tree_reg4.fit(X_train, y3)

In [None]:
y_pred = sum(tree.predict(X_test) for tree in (tree_reg1,tree_reg2,tree_reg3,tree_reg4))

r2_score(y_test, y_pred)

0.7506897413651435

In [None]:
# We can see that the predictions get better as additional trees are added to the ensemble.

In [None]:
# Instead of manually calculating our weak learners, we are going to use the GradientBoostingRegressor,
#  and that is what happens in the real world.

In [None]:
gbr = GradientBoostingRegressor(max_depth=3,
                                n_estimators=10,
                                learning_rate=0.5, random_state=1)

gbr.fit(X_train, y_train)

In [None]:
y_pred = gbr.predict(X_test)
r2_score(y_test, y_pred)

0.8238332541121167

In [None]:
# Note: It is important to know that if you reduce the learning rate, you must increase the number of estimators (weak learners) in order to have a better prediction value.
# Let's see the two cases.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [None]:
from sklearn.datasets import load_iris
iris=load_iris()
X=pd.DataFrame(iris.data,columns=iris.feature_names)
y=pd.Series(iris.target)
#


In [None]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=30)
x_train.shape


(90, 4)

In [None]:
gbr = GradientBoostingClassifier(max_depth=3,
                                n_estimators=10,
                                learning_rate=0.5, random_state=1)

gbr.fit(x_train, y_train)

In [None]:
y_pred = gbr.predict(x_test)
r2_score(y_test, y_pred)

0.9440298507462687