# Chapter 7 - Ensemble Learning and Random Forests

This notebook covers Ensemble Learning methods, including:
- Voting Classifiers
- Bagging and Pasting
- Random Forests
- Extra-Trees
- AdaBoost
- Gradient Boosting
- XGBoost
- Stacking

## Setup

In [None]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "ensembles"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

## Voting Classifiers

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression(solver="lbfgs", random_state=42)
rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(gamma="scale", random_state=42)

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard')

voting_clf.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

Soft voting often achieves higher performance than hard voting because it gives more weight to highly confident votes. All you need is to replace `voting="hard"` with `voting="soft"` and ensure that all classifiers can estimate class probabilities.

In [None]:
log_clf = LogisticRegression(solver="lbfgs", random_state=42)
rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)
svm_clf = SVC(gamma="scale", probability=True, random_state=42)

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='soft')

voting_clf.fit(X_train, y_train)

for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

## Bagging and Pasting

### Bagging and Pasting in Scikit-Learn

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42), n_estimators=500,
    max_samples=100, bootstrap=True, random_state=42)
bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)
print(f"Bagging accuracy: {accuracy_score(y_test, y_pred)}")

In [None]:
tree_clf = DecisionTreeClassifier(random_state=42)
tree_clf.fit(X_train, y_train)
y_pred_tree = tree_clf.predict(X_test)
print(f"Single Decision Tree accuracy: {accuracy_score(y_test, y_pred_tree)}")

Let's compare the decision boundaries:

In [None]:
from matplotlib.colors import ListedColormap

def plot_decision_boundary(clf, X, y, axes=[-1.5, 2.4, -1, 1.5], alpha=0.5, contour=True):
    x1s = np.linspace(axes[0], axes[1], 100)
    x2s = np.linspace(axes[2], axes[3], 100)
    x1, x2 = np.meshgrid(x1s, x2s)
    X_new = np.c_[x1.ravel(), x2.ravel()]
    y_pred = clf.predict(X_new).reshape(x1.shape)
    custom_cmap = ListedColormap(['#fafab0','#9898ff','#a0faa0'])
    plt.contourf(x1, x2, y_pred, alpha=alpha, cmap=custom_cmap)
    if contour:
        custom_cmap2 = ListedColormap(['#7d7d58','#4c4c7f','#507d50'])
        plt.contour(x1, x2, y_pred, cmap=custom_cmap2, alpha=0.8)
    plt.plot(X[:, 0][y==0], X[:, 1][y==0], "yo", alpha=0.6)
    plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs", alpha=0.6)
    plt.axis(axes)
    plt.xlabel(r"$x_1$", fontsize=18)
    plt.ylabel(r"$x_2$", fontsize=18, rotation=0)

fix, axes = plt.subplots(ncols=2, figsize=(10,4), sharey=True)
plt.sca(axes[0])
plot_decision_boundary(tree_clf, X, y)
plt.title("Decision Tree", fontsize=14)
plt.sca(axes[1])
plot_decision_boundary(bag_clf, X, y)
plt.title("Decision Trees with Bagging", fontsize=14)
plt.ylabel("")
save_fig("decision_tree_without_and_with_bagging_plot")
plt.show()

### Out-of-Bag Evaluation

In [None]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(random_state=42), n_estimators=500,
    bootstrap=True, oob_score=True, random_state=40)
bag_clf.fit(X_train, y_train)
print(f"OOB Score: {bag_clf.oob_score_}")

y_pred = bag_clf.predict(X_test)
print(f"Test Set Accuracy: {accuracy_score(y_test, y_pred)}")

In [None]:
print(f"OOB Decision Function Shape: {bag_clf.oob_decision_function_.shape}")
print(f"First 5 OOB predictions:\n{bag_clf.oob_decision_function_[:5]}")

## Random Forests

In [None]:
from sklearn.ensemble import RandomForestClassifier

rnd_clf = RandomForestClassifier(n_estimators=500, max_leaf_nodes=16, random_state=42)
rnd_clf.fit(X_train, y_train)

y_pred_rf = rnd_clf.predict(X_test)
print(f"Random Forest accuracy: {accuracy_score(y_test, y_pred_rf)}")

This is roughly equivalent to the following `BaggingClassifier`:

In [None]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(splitter="random", max_leaf_nodes=16, random_state=42),
    n_estimators=500, max_samples=1.0, bootstrap=True, random_state=42)

bag_clf.fit(X_train, y_train)
y_pred = bag_clf.predict(X_test)
print(f"Equivalent Bagging accuracy: {accuracy_score(y_test, y_pred)}")

### Feature Importance

In [None]:
from sklearn.datasets import load_iris

iris = load_iris()
rnd_clf = RandomForestClassifier(n_estimators=500, random_state=42)
rnd_clf.fit(iris["data"], iris["target"])
for name, score in zip(iris["feature_names"], rnd_clf.feature_importances_):
    print(f"{name}: {score:.3f}")

In [None]:
plt.figure(figsize=(6, 4))
plt.barh(range(len(iris["feature_names"])), rnd_clf.feature_importances_)
plt.yticks(range(len(iris["feature_names"])), iris["feature_names"])
plt.xlabel("Feature Importance")
plt.title("Random Forest Feature Importance")
save_fig("rf_feature_importance_plot")
plt.show()

Let's load the MNIST dataset and train a Random Forest classifier on it and look at the impurity-based feature importances:

In [None]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784', version=1, as_frame=False)
mnist.target = mnist.target.astype(np.uint8)

rnd_clf = RandomForestClassifier(n_estimators=100, random_state=42)
rnd_clf.fit(mnist["data"], mnist["target"])

In [None]:
def plot_digit(data):
    image = data.reshape(28, 28)
    plt.imshow(image, cmap = mpl.cm.hot,
               interpolation="nearest")
    plt.axis("off")

plot_digit(rnd_clf.feature_importances_)

cbar = plt.colorbar(ticks=[rnd_clf.feature_importances_.min(), rnd_clf.feature_importances_.max()])
cbar.ax.set_yticklabels(['Not important', 'Very important'])

save_fig("mnist_feature_importance_plot")
plt.show()

## Extra-Trees

In [None]:
from sklearn.ensemble import ExtraTreesClassifier

extra_clf = ExtraTreesClassifier(n_estimators=500, max_leaf_nodes=16, random_state=42)
extra_clf.fit(X_train, y_train)
y_pred_extra = extra_clf.predict(X_test)
print(f"Extra-Trees accuracy: {accuracy_score(y_test, y_pred_extra)}")

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(10,4), sharey=True)

plt.sca(axes[0])
plot_decision_boundary(rnd_clf, X, y)
plt.title("Random Forest", fontsize=14)

plt.sca(axes[1])
plot_decision_boundary(extra_clf, X, y)
plt.title("Extra Trees", fontsize=14)
plt.ylabel("")

save_fig("random_forest_vs_extra_trees_plot")
plt.show()

## AdaBoost

In [None]:
from sklearn.ensemble import AdaBoostClassifier

ada_clf = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1), n_estimators=200,
    algorithm="SAMME.R", learning_rate=0.5, random_state=42)
ada_clf.fit(X_train, y_train)

y_pred_ada = ada_clf.predict(X_test)
print(f"AdaBoost accuracy: {accuracy_score(y_test, y_pred_ada)}")

In [None]:
plot_decision_boundary(ada_clf, X, y)
plt.title("AdaBoost", fontsize=14)
save_fig("adaboost_plot")
plt.show()

Let's look at the AdaBoost algorithm in action. The following code trains five consecutive AdaBoost predictors based on Decision Stumps, each trained on a weighted training set. The weight of each training instance is represented by its circle size.

In [None]:
m = len(X_train)

fix, axes = plt.subplots(ncols=2, figsize=(10,4), sharey=True)
for subplot, learning_rate in ((0, 1), (1, 0.5)):
    sample_weights = np.ones(m) / m
    plt.sca(axes[subplot])
    for i in range(5):
        svm_clf = SVC(kernel="rbf", C=0.2, gamma=0.6, random_state=42)
        svm_clf.fit(X_train, y_train, sample_weight=sample_weights * m)
        y_pred = svm_clf.predict(X_train)
        
        r = (1 - learning_rate)
        sample_weights[y_pred != y_train] *= (1 + r)
        sample_weights[y_pred == y_train] *= (1 - r)
        sample_weights /= sample_weights.sum()
        
        plot_decision_boundary(svm_clf, X, y, alpha=0.2)
        plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, s=sample_weights*1000, cmap=plt.cm.Spectral, alpha=0.7)
    
    plt.title("learning_rate = {}".format(learning_rate), fontsize=16)
    if subplot == 0:
        plt.text(-0.75, -0.95, "1", fontsize=14)
        plt.text(-1.05, -0.95, "2", fontsize=14)
        plt.text(1.0, -0.95, "3", fontsize=14)
        plt.text(-1.45, -0.5, "4", fontsize=14)
        plt.text(1.36, -0.95, "5", fontsize=14)
    else:
        plt.ylabel("")

save_fig("boosting_plot")
plt.show()

## Gradient Boosting

In [None]:
np.random.seed(42)
X = np.random.rand(100, 1) - 0.5
y = 3*X[:, 0]**2 + 0.05 * np.random.randn(100)

In [None]:
from sklearn.tree import DecisionTreeRegressor

tree_reg1 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg1.fit(X, y)

In [None]:
y2 = y - tree_reg1.predict(X)
tree_reg2 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg2.fit(X, y2)

In [None]:
y3 = y2 - tree_reg2.predict(X)
tree_reg3 = DecisionTreeRegressor(max_depth=2, random_state=42)
tree_reg3.fit(X, y3)

In [None]:
X_new = np.array([[0.8]])
y_pred = sum(tree.predict(X_new) for tree in (tree_reg1, tree_reg2, tree_reg3))
print(f"Prediction: {y_pred}")

In [None]:
def plot_predictions(regressors, X, y, axes, label=None, style="r-", data_style="b.", data_label=None):
    x1 = np.linspace(axes[0], axes[1], 500)
    y_pred = sum(regressor.predict(x1.reshape(-1, 1)) for regressor in regressors)
    plt.plot(X[:, 0], y, data_style, label=data_label)
    plt.plot(x1, y_pred, style, linewidth=2, label=label)
    if label or data_label:
        plt.legend(loc="upper center", fontsize=16)
    plt.axis(axes)

fix, axes = plt.subplots(ncols=2, figsize=(10,4), sharey=True)

plt.sca(axes[0])
plot_predictions([tree_reg1], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label="$h_1(x_1)$", style="g-", data_label="Training set")
plt.ylabel("$y$", fontsize=16, rotation=0)
plt.title("Residuals and tree predictions", fontsize=16)

plt.sca(axes[1])
plot_predictions([tree_reg1], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label="$h(x_1) = h_1(x_1)$", data_label="Training set")
plot_predictions([tree_reg1, tree_reg2], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label="$h(x_1) = h_1(x_1) + h_2(x_1)$", style="b-")
plot_predictions([tree_reg1, tree_reg2, tree_reg3], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label="$h(x_1) = h_1(x_1) + h_2(x_1) + h_3(x_1)$", style="r-")
plt.xlabel("$x_1$", fontsize=16)
plt.title("Ensemble predictions", fontsize=16)

save_fig("gradient_boosting_plot")
plt.show()

A simpler way to train GBRT ensembles is to use Scikit-Learn's `GradientBoostingRegressor` class:

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=3, learning_rate=1.0, random_state=42)
gbrt.fit(X, y)

gbrt_slow = GradientBoostingRegressor(max_depth=2, n_estimators=200, learning_rate=0.1, random_state=42)
gbrt_slow.fit(X, y)

fix, axes = plt.subplots(ncols=2, figsize=(10,4), sharey=True)

plt.sca(axes[0])
plot_predictions([gbrt], X, y, axes=[-0.5, 0.5, -0.1, 0.8], label="Ensemble predictions")
plt.title("learning_rate={}, n_estimators={}".format(gbrt.learning_rate, gbrt.n_estimators), fontsize=14)
plt.xlabel("$x_1$", fontsize=16)
plt.ylabel("$y$", fontsize=16, rotation=0)

plt.sca(axes[1])
plot_predictions([gbrt_slow], X, y, axes=[-0.5, 0.5, -0.1, 0.8])
plt.title("learning_rate={}, n_estimators={}".format(gbrt_slow.learning_rate, gbrt_slow.n_estimators), fontsize=14)
plt.xlabel("$x_1$", fontsize=16)

save_fig("gbrt_learning_rate_plot")
plt.show()

### Gradient Boosting with Early Stopping

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=49)

gbrt = GradientBoostingRegressor(max_depth=2, n_estimators=120, random_state=42)
gbrt.fit(X_train, y_train)

errors = [mean_squared_error(y_val, y_pred)
          for y_pred in gbrt.staged_predict(X_val)]
bst_n_estimators = np.argmin(errors) + 1

gbrt_best = GradientBoostingRegressor(max_depth=2, n_estimators=bst_n_estimators, random_state=42)
gbrt_best.fit(X_train, y_train)

print(f"Best number of estimators: {bst_n_estimators}")

In [None]:
min_error = np.min(errors)
plt.figure(figsize=(10, 4))

plt.subplot(121)
plt.plot(errors, "b.-")
plt.plot([bst_n_estimators, bst_n_estimators], [0, min_error], "k--")
plt.plot([0, 120], [min_error, min_error], "k--")
plt.plot(bst_n_estimators, min_error, "ko")
plt.text(bst_n_estimators, min_error*1.2, "Minimum", ha="center", fontsize=14)
plt.axis([0, 120, 0, 0.01])
plt.xlabel("Number of trees")
plt.ylabel("Error", rotation=0)
plt.title("Validation error", fontsize=14)

plt.subplot(122)
plot_predictions([gbrt_best], X_val, y_val, axes=[-0.5, 0.5, -0.1, 0.8])
plt.title("Best model (%d trees)" % bst_n_estimators, fontsize=14)
plt.ylabel("")
plt.xlabel("$x_1$", fontsize=16)

save_fig("early_stopping_gbrt_plot")
plt.show()

It is also possible to implement early stopping automatically using the `validation_fraction` hyperparameter:

In [None]:
gbrt = GradientBoostingRegressor(max_depth=2, warm_start=True, random_state=42)

min_val_error = float("inf")
error_going_up = 0
for n_estimators in range(1, 120):
    gbrt.n_estimators = n_estimators
    gbrt.fit(X_train, y_train)
    y_pred = gbrt.predict(X_val)
    val_error = mean_squared_error(y_val, y_pred)
    if val_error < min_val_error:
        min_val_error = val_error
        error_going_up = 0
    else:
        error_going_up += 1
        if error_going_up == 5:
            break  # early stopping

print(f"Early stopping at {gbrt.n_estimators} estimators")

### XGBoost

XGBoost (Extreme Gradient Boosting) is one of the most popular Machine Learning libraries today. Let's install it:

In [None]:
# Uncomment the following line to install XGBoost (if not already installed)
# !pip install xgboost

try:
    import xgboost
    
    xgb_reg = xgboost.XGBRegressor(random_state=42)
    xgb_reg.fit(X_train, y_train)
    y_pred = xgb_reg.predict(X_val)
    val_error = mean_squared_error(y_val, y_pred)
    print(f"XGBoost validation error: {val_error}")
    
    # XGBoost with early stopping
    xgb_reg.fit(X_train, y_train,
                eval_set=[(X_val, y_val)], early_stopping_rounds=2)
    y_pred = xgb_reg.predict(X_val)
    val_error = mean_squared_error(y_val, y_pred)
    print(f"XGBoost with early stopping validation error: {val_error}")
    
except ImportError:
    print("XGBoost not installed. To install: pip install xgboost")

## Stacking

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_predict
from sklearn.ensemble import StackingRegressor

# Generate synthetic data for stacking demonstration
np.random.seed(42)
X_stack = np.random.rand(500, 2)
y_stack = X_stack[:, 0] + 2 * X_stack[:, 1] + 0.1 * np.random.randn(500)
X_train_stack, X_test_stack, y_train_stack, y_test_stack = train_test_split(X_stack, y_stack, random_state=42)

# Define base learners
estimators = [
    ('rf', RandomForestRegressor(n_estimators=10, random_state=42)),
    ('svr', SVR(kernel='linear'))
]

# Create stacking regressor
stacking_regressor = StackingRegressor(
    estimators=estimators,
    final_estimator=LinearRegression()
)

stacking_regressor.fit(X_train_stack, y_train_stack)
y_pred_stack = stacking_regressor.predict(X_test_stack)
print(f"Stacking Regressor MSE: {mean_squared_error(y_test_stack, y_pred_stack)}")

# Compare with individual models
for name, estimator in estimators:
    estimator.fit(X_train_stack, y_train_stack)
    y_pred_individual = estimator.predict(X_test_stack)
    print(f"{name} MSE: {mean_squared_error(y_test_stack, y_pred_individual)}")

## Ensemble Method Comparison

In [None]:
# Let's compare all ensemble methods on the moons dataset
from sklearn.metrics import accuracy_score

# Prepare data
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Define ensemble methods
ensembles = {
    'Voting (Hard)': VotingClassifier(
        estimators=[('lr', LogisticRegression(solver="lbfgs", random_state=42)),
                   ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
                   ('svc', SVC(gamma="scale", random_state=42))],
        voting='hard'),
    
    'Voting (Soft)': VotingClassifier(
        estimators=[('lr', LogisticRegression(solver="lbfgs", random_state=42)),
                   ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
                   ('svc', SVC(gamma="scale", probability=True, random_state=42))],
        voting='soft'),
    
    'Bagging': BaggingClassifier(
        DecisionTreeClassifier(random_state=42), 
        n_estimators=500, random_state=42),
    
    'Random Forest': RandomForestClassifier(n_estimators=500, random_state=42),
    
    'Extra Trees': ExtraTreesClassifier(n_estimators=500, random_state=42),
    
    'AdaBoost': AdaBoostClassifier(
        DecisionTreeClassifier(max_depth=1), 
        n_estimators=200, learning_rate=0.5, random_state=42)
}

# Train and evaluate each ensemble
results = {}
for name, ensemble in ensembles.items():
    ensemble.fit(X_train, y_train)
    y_pred = ensemble.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    results[name] = accuracy
    print(f"{name}: {accuracy:.4f}")

# Plot comparison
plt.figure(figsize=(10, 6))
methods = list(results.keys())
accuracies = list(results.values())
bars = plt.bar(methods, accuracies, color=['skyblue', 'lightgreen', 'lightcoral', 'gold', 'plum', 'orange'])
plt.ylabel('Accuracy')
plt.title('Ensemble Method Comparison')
plt.xticks(rotation=45)
plt.ylim(0.8, 1.0)

# Add value labels on bars
for bar, acc in zip(bars, accuracies):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.005, 
             f'{acc:.3f}', ha='center', va='bottom')

plt.tight_layout()
save_fig("ensemble_comparison_plot")
plt.show()

## Summary

In this chapter, we covered the main ensemble learning techniques:

### **Ensemble Methods Overview:**

1. **Voting Classifiers**
   - Hard voting: Majority vote
   - Soft voting: Average probabilities (often better)
   - Combines different algorithms

2. **Bagging (Bootstrap Aggregating)**
   - Trains multiple instances with bootstrap samples
   - Reduces overfitting
   - Can be parallelized
   - Out-of-bag evaluation available

3. **Random Forests**
   - Bagging + random feature selection
   - Very popular and effective
   - Provides feature importance
   - Works well out-of-the-box

4. **Extra-Trees (Extremely Randomized Trees)**
   - Even more randomness than Random Forest
   - Faster training
   - Often similar performance to Random Forest

5. **Boosting**
   - **AdaBoost**: Focuses on misclassified instances
   - **Gradient Boosting**: Fits new predictors to residual errors
   - **XGBoost**: Optimized gradient boosting implementation
   - Sequential training (cannot be parallelized)

6. **Stacking**
   - Uses meta-learner to combine base learners
   - Most complex but potentially most powerful

### **Key Principles:**

- **Wisdom of Crowds**: Many weak learners → strong learner
- **Bias-Variance Tradeoff**: 
  - Bagging reduces variance
  - Boosting reduces bias
- **Diversity**: Different models should make different errors

### **When to Use:**

- **Random Forest**: Great default choice, interpretable
- **Gradient Boosting/XGBoost**: When maximum performance needed
- **Voting**: When you have different types of algorithms
- **Bagging**: When base learner tends to overfit

### **Best Practices:**

- Start with Random Forest as baseline
- Use cross-validation for hyperparameter tuning
- Consider computational cost vs performance gain
- Monitor for overfitting (especially with boosting)
- Use early stopping with gradient boosting