# 🌲 Visualizing Random Forest on Iris Dataset with PCA

In [None]:

from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target
target_names = iris.target_names

# Use PCA to reduce to 2D for visualization
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)


In [None]:

# Helper function to plot decision boundaries
def plot_decision_boundary(model, X, y, title="Decision Boundary", ax=None):
    h = 0.02
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    if ax is None:
        ax = plt.gca()
    cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF'])
    cmap_bold = ListedColormap(['#FF0000', '#00CC00', '#0000FF'])
    ax.contourf(xx, yy, Z, alpha=0.3, cmap=cmap_light)
    scatter = ax.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, edgecolor='k')
    ax.set_title(title)
    ax.set_xlabel("PCA Component 1")
    ax.set_ylabel("PCA Component 2")
    ax.grid(True)


In [None]:

# Visualize a few bootstrapped decision trees
n_trees = 3
fig, axes = plt.subplots(1, n_trees, figsize=(15, 4))
for i in range(n_trees):
    indices = np.random.choice(len(X_pca), len(X_pca), replace=True)
    X_sample = X_pca[indices]
    y_sample = y[indices]
    tree = DecisionTreeClassifier(max_depth=3)
    tree.fit(X_sample, y_sample)
    plot_decision_boundary(tree, X_sample, y_sample, title=f"Tree {i + 1}", ax=axes[i])

fig.suptitle("Bootstrapped Trees on PCA-reduced Iris Data")
plt.tight_layout()
plt.show()


In [None]:

# Train Random Forest and visualize
forest = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=42)
forest.fit(X_pca, y)

plt.figure(figsize=(6, 5))
plot_decision_boundary(forest, X_pca, y, title="Random Forest (Majority Voting)")
plt.tight_layout()
plt.show()
