In [None]:
import numpy as np
from algorithms.random_forest import (
    RandomForestClassifier,
    train_test_split,
    accuracy,
    plot_decision_boundary,
)

# two blobs, a bit noisy
np.random.seed(0)
A = np.random.randn(120, 2) * 0.5 + [2, 2]
B = np.random.randn(110, 2) * 0.6 + [6, 6]
X = np.vstack([A, B])
y = np.array([0] * len(A) + [1] * len(B))

X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_ratio=0.3)

In [None]:
rf = RandomForestClassifier(n_estimators=30, max_depth=8, verbose=True, oob=True).fit(
    X_tr, y_tr
)

print("\nOOB accuracy :", rf.oob_score(X_tr, y_tr).round(3))

In [None]:
print("Test accuracy:", accuracy(y_te, rf.predict(X_te)).round(3))
plot_decision_boundary(rf, X_tr, y_tr, title="Random Forest (30 trees)")

In [None]:
from collections import Counter

feat_counts = Counter()
for f_idx in rf.features_:  # which feature set each tree saw
    for f in f_idx:
        feat_counts[f] += 1
print("Feature usage count:", feat_counts)