In [3]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

np.random.seed(42)
X_train = np.random.rand(100, 1) * 10
y_train = 3 * X_train.flatten() + 5 + np.random.randn(100) * 2

print("First 5 data points of X_train and y_train:")
print(X_train[:5], y_train[:5])

print('----------------------------------------------------------------')

# 18.1)-2 [3 pts] Compute mean of y_i, which is the first weak classifier h_0

h_0 = np.mean(y_train)

print(f"h_0 (mean of y_i): {h_0}")

H_t = np.array([h_0] * len(y_train))


First 5 data points of X_train and y_train:
[[3.74540119]
 [9.50714306]
 [7.31993942]
 [5.98658484]
 [1.5601864 ]] [16.4102977  32.92341449 27.14333981 18.9846167   9.24121544]
----------------------------------------------------------------
h_0 (mean of y_i): 19.103261581011562


In [4]:
# add weak classifier h_t

H_t = np.array([h_0] * len(y_train))
T = 3

for t in range(1, T):
    y_pred_t_minus_1 = H_t
    residuals = y_train - y_pred_t_minus_1
    print(f"Residuals (revised Y_train) at iteration {t}:")
    print(residuals[:5])

    Y_train_residuals = residuals
    model = DecisionTreeRegressor(max_depth=3)
    model.fit(X_train, Y_train_residuals)
    y_pred_residuals = model.predict(X_train)
    H_t += y_pred_residuals
    print(f"Updated predictions at iteration {t}:")
    print(H_t[:5])




Residuals (revised Y_train) at iteration 1:
[-2.69296388 13.82015291  8.04007823 -0.11864488 -9.86204614]
Updated predictions at iteration 1:
[16.84178904 32.9476559  28.27914665 21.12253359 10.18159537]
Residuals (revised Y_train) at iteration 2:
[-0.43149133 -0.02424141 -1.13580684 -2.13791689 -0.94037994]
Updated predictions at iteration 2:
[16.60094946 33.14178205 28.4732728  21.31665974  9.9407558 ]


In [5]:
from sklearn.tree import DecisionTreeRegressor, export_text

model = DecisionTreeRegressor(max_depth=3)
model.fit(X_train, Y_train_residuals)

tree_rules = export_text(model, feature_names=['X'])
print("Decision Tree Rules (Weak Classifier h_t):")
print(tree_rules)

# Make predictions using the new weak classifier h_t
y_pred_residuals = model.predict(X_train)
H_t += y_pred_residuals
print(f"Updated predictions at iteration {t} (H_t):")
print(H_t[:5])


Decision Tree Rules (Weak Classifier h_t):
|--- X <= 9.70
|   |--- X <= 9.58
|   |   |--- X <= 4.08
|   |   |   |--- value: [-0.24]
|   |   |--- X >  4.08
|   |   |   |--- value: [0.19]
|   |--- X >  9.58
|   |   |--- X <= 9.68
|   |   |   |--- value: [-2.82]
|   |   |--- X >  9.68
|   |   |   |--- value: [-0.84]
|--- X >  9.70
|   |--- X <= 9.78
|   |   |--- value: [1.81]
|   |--- X >  9.78
|   |   |--- value: [3.22]

Updated predictions at iteration 2 (H_t):
[16.36010989 33.3359082  28.66739895 21.51078589  9.69991623]


In [7]:
weighted_sum = np.sum(Y_train_residuals * y_pred_residuals)

if weighted_sum < 0:
    H_t += y_pred_residuals
    print(f"Iteration {t}: Ensemble updated. Weighted sum: {weighted_sum}")
else:
   print(f"Iteration {t}: Stopping. Final ensemble reached. Weighted sum: {weighted_sum}")


Iteration 2: Stopping. Final ensemble reached. Weighted sum: 26.8421104116092
