In [2]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor

# Toy dataset (1D input, single feature, scalar target)
X = np.array([[1], [2], [3], [4], [5]])  # Feature matrix
y = np.array([3, 5, 7, 9, 11])  # Target values

In [12]:
data = pd.DataFrame({'x': X.squeeze(), 'y': y})
display(data)
display(data.mean())

Unnamed: 0,x,y
0,1,3
1,2,5
2,3,7
3,4,9
4,5,11


x    3.0
y    7.0
dtype: float64

In [14]:

# Initial prediction model (F_0(x)) - mean of y
F_0 = np.mean(y)  # Starting model: predict the average target
print(f'Initial model (F_0): {F_0}')

# Step 1: Compute gradients
gradients_1 = 2 * (F_0 - y)  # Gradient at step 1
print("Gradients at step 1 (negative error):", gradients_1)

# Fit first weak learner (h1) to the negative gradient (gradients_1)
tree_1 = DecisionTreeRegressor(max_depth=1)  # Shallow tree (weak learner)
tree_1.fit(X, gradients_1)

# Get the prediction of the weak learner h1(x)
h1 = tree_1.predict(X)
print("Weak learner prediction (h1(x)):", h1)


# For simplicity, we'll assume the step size (gamma) is 0.1
gamma = 0.1
print(f'Chosen step size (gamma): {gamma}')

# Update the model (F_1(x)) after the first step
F_1 = F_0 + gamma * h1
print(f'Updated model predictions (F_1) after step 1: {F_1}')

# Step 2: Recompute the gradients with the updated model F_1
gradients_2 = 2 * (F_1 - y)  # New gradient based on updated predictions (F_1)
print("Gradients at step 2 (negative error):", gradients_2)

# Fit the second weak learner (h2) to the new gradients (gradients_2)
tree_2 = DecisionTreeRegressor(max_depth=1)  # Second weak learner
tree_2.fit(X, gradients_2)

# Get the prediction of the weak learner h2(x)
h2 = tree_2.predict(X)
print("Weak learner prediction (h2(x)):", h2)

# Update the model after the second step using the weak learner h2(x)
F_2 = F_1 + gamma * h2
print(f'Updated model predictions (F_2) after step 2: {F_2}')

display(pd.DataFrame({"x": X.squeeze(), "F0": F_0, "h1": h1, "F1": F_1, "h2": h2, "F2": F_2, "y": y}))

Initial model (F_0): 7.0
Gradients at step 1 (negative error): [ 8.  4.  0. -4. -8.]
Weak learner prediction (h1(x)): [ 6.  6. -4. -4. -4.]
Chosen step size (gamma): 0.1
Updated model predictions (F_1) after step 1: [7.6 7.6 6.6 6.6 6.6]
Gradients at step 2 (negative error): [ 9.2  5.2 -0.8 -4.8 -8.8]
Weak learner prediction (h2(x)): [ 7.2  7.2 -4.8 -4.8 -4.8]
Updated model predictions (F_2) after step 2: [8.32 8.32 6.12 6.12 6.12]


Unnamed: 0,x,F0,h1,F1,h2,F2,y
0,1,7.0,6.0,7.6,7.2,8.32,3
1,2,7.0,6.0,7.6,7.2,8.32,5
2,3,7.0,-4.0,6.6,-4.8,6.12,7
3,4,7.0,-4.0,6.6,-4.8,6.12,9
4,5,7.0,-4.0,6.6,-4.8,6.12,11


In [40]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor

# Toy dataset (1D input, single feature, scalar target)
X = np.array([[1], [2], [3], [4], [5]])  # Feature matrix
y = np.array([3, 5, 7, 9, 11])  # Target values

# Initialize the predictions matrix for storing the results
n_iterations = 100
predictions = np.zeros((len(X), n_iterations))  # (n_samples, n_iterations)

# Initial prediction model (F_0(x)) - mean of y
F_0 = np.mean(y)
print(f"Initial model (F_0): {F_0}")

# Store F_0 as the first column of predictions
predictions[:, 0] = F_0

# Learning rate (gamma)
gamma = 0.1

# Perform gradient boosting for n_iterations
for t in range(1, n_iterations):
    # Calculate gradients (negative errors)
    gradients = 2 * (predictions[:, t - 1] - y)

    # Fit the weak learner (shallow decision tree) to the negative gradient
    tree = DecisionTreeRegressor(max_depth=1)
    tree.fit(X, -gradients) # negative gradients

    # Prediction from the weak learner
    h = tree.predict(X)

    # Update model (F_t = F_(t-1) + gamma * h_t)
    predictions[:, t] = predictions[:, t - 1] + gamma * h

# Now we convert this into a pandas DataFrame for better presentation
df = pd.DataFrame(predictions, columns=[f"F_{i}" for i in range(n_iterations)])
df["x"] = X.flatten()  # Adding the input feature as a column
df["y"] = y  # Adding the target values as a column

# Reordering columns: X and y at the front
df = df[["x"] + [f"F_{i}" for i in range(0, n_iterations, 10)] + ["F_99", "y"]]

# Display the result
display(df)


Initial model (F_0): 7.0


Unnamed: 0,x,F_0,F_10,F_20,F_30,F_40,F_50,F_60,F_70,F_80,F_90,F_99,y
0,1,7.0,4.020218,3.300607,3.079666,3.019048,3.004972,3.001495,3.000364,3.000103,3.000026,3.000007,3
1,2,7.0,5.336112,5.056091,5.016796,5.003783,5.001239,5.000201,5.000044,5.000019,5.000005,5.000002,5
2,3,7.0,6.928112,6.9863,6.991985,7.001272,7.000364,6.999894,7.00002,6.999995,6.999999,7.0,7
3,4,7.0,8.864841,8.923029,8.980871,8.996527,8.99882,8.999694,8.999951,8.99997,8.999995,8.999999,9
4,5,7.0,9.850718,10.733973,10.930682,10.979369,10.994605,10.998716,10.999621,10.999913,10.999975,10.999992,11
