In [35]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error,  r2_score
from sklearn.datasets import make_classification


In [36]:
# Manual Gradient Boosting Implementation

np.random.seed(60)
X = 2*np.random.rand(100, 1) - 3
y = 0.7 * X[:, 0] ** 2 + 12 * np.random.randn(100) 


In [37]:
tree_reg1 = DecisionTreeRegressor(max_depth=4, random_state=60)
tree_reg1.fit(X, y)

In [38]:
y2 = y - tree_reg1.predict(X)
tree_reg2 = DecisionTreeRegressor(max_depth=4, random_state=60)
tree_reg2.fit(X, y2)

In [39]:
y3 = y2 - tree_reg2.predict(X)
tree_reg3 = DecisionTreeRegressor(max_depth=4, random_state=60)
tree_reg3.fit(X, y3)

In [40]:
X_new = np.array([[-4], [2], [0.2]])
manual_predictions = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))
print("Manual Gradient Boosting Predictions:", manual_predictions)


Manual Gradient Boosting Predictions: [ 17.18469661 -18.99542987 -18.99542987]


In [41]:
# Gradient Boosting with basic parameters
gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=500, learning_rate=1.0, random_state=60)
gbrt.fit(X, y)

In [42]:
y_pred = gbrt.predict(X)
mae = mean_squared_error(y, y_pred)
print("Gradient Boosting Mean Absolute Error (MAE):", mae)

r2 = r2_score(y, y_pred)
print("Gradient Boosting R² Score:", r2)

Gradient Boosting Mean Absolute Error (MAE): 1.4837433492304868e-16
Gradient Boosting R² Score: 1.0


In [43]:

# Gradient Boosting with smaller learning rate and more estimators (early stopping)
gbrt_best = GradientBoostingRegressor(
    max_depth=2, learning_rate=0.05, n_estimators=500,
    n_iter_no_change=10, random_state=60)
gbrt_best.fit(X, y)



In [44]:
y_pred_best = gbrt_best.predict(X)

mae = mean_squared_error(y, y_pred_best)
print("Gradient Boosting  with smaller learning rate Mean Absolute Error (MAE):", mae)

r2 = r2_score(y, y_pred_best)
print("Gradient Boosting  with smaller learning rate R² Score:", r2)

Gradient Boosting  with smaller learning rate Mean Absolute Error (MAE): 135.04010726901356
Gradient Boosting  with smaller learning rate R² Score: 0.07150018996815422


In [45]:
# Stochastic Gradient Boosting with subsample
gbrt_stochastic = GradientBoostingRegressor(
    max_depth=2, learning_rate=0.05, n_estimators=500, subsample=0.25, random_state=60)
gbrt_stochastic.fit(X, y)




In [46]:
y_pred_stochastic = gbrt_stochastic.predict(X)
mae = mean_squared_error(y, y_pred_stochastic)
print("Stochastic Gradient Boosting with subsample Mean Absolute Error (MAE):", mae)

r2 = r2_score(y, y_pred_stochastic)
print("Stochastic Gradient Boosting with subsample R² Score:", r2)

Stochastic Gradient Boosting with subsample Mean Absolute Error (MAE): 57.53338160512168
Stochastic Gradient Boosting with subsample R² Score: 0.6044157919363344


In [47]:
X_class, y_class = make_classification(n_samples=100, n_features=20, random_state=60)

In [48]:
# Gradient Boosting Classifier with default settings
gb_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb_clf.fit(X_class, y_class)


In [49]:
y_pred_class = gb_clf.predict(X_class)

print("Gradient Boosting Classifier Accuracy:", accuracy_score(y_class, y_pred_class))
print("Precision:", precision_score(y_class, y_pred_class, average='weighted'))
print("Recall:", recall_score(y_class, y_pred_class, average='weighted'))
print("F1 Score:", f1_score(y_class, y_pred_class, average='weighted'))

Gradient Boosting Classifier Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0


In [50]:

# Gradient Boosting Classifier with early stopping (n_iter_no_change)
gb_clf_best = GradientBoostingClassifier(
    n_estimators=100, learning_rate=0.1, n_iter_no_change=10, random_state=60)
gb_clf_best.fit(X_class, y_class)


In [51]:
y_pred_class_best = gb_clf_best.predict(X_class)
print("Gradient Boosting Classifier Accuracy (Early Stopping):", accuracy_score(y_class, y_pred_class_best))
print("Precision:", precision_score(y_class, y_pred_class_best, average='weighted'))
print("Recall:", recall_score(y_class, y_pred_class_best, average='weighted'))
print("F1 Score:", f1_score(y_class, y_pred_class_best, average='weighted'))


Gradient Boosting Classifier Accuracy (Early Stopping): 0.99
Precision: 0.9901960784313725
Recall: 0.99
F1 Score: 0.98999899989999


In [52]:
# Stochastic Gradient Boosting Classifier with subsample
gb_clf_stochastic = GradientBoostingClassifier(
    n_estimators=500, learning_rate=0.05, subsample=0.25, random_state=60)
gb_clf_stochastic.fit(X_class, y_class)
y_pred_class_stochastic = gb_clf_stochastic.predict(X_class)


print("Gradient Boosting Classifier Accuracy (Stochastic):", accuracy_score(y_class, y_pred_class_stochastic))
print("Precision:", precision_score(y_class, y_pred_class_stochastic, average='weighted'))
print("Recall:", recall_score(y_class, y_pred_class_stochastic, average='weighted'))
print("F1 Score:", f1_score(y_class, y_pred_class_stochastic, average='weighted'))

Gradient Boosting Classifier Accuracy (Stochastic): 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
