In [None]:
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from matplotlib.colors import ListedColormap

# Decision thresholds

In [None]:
# Create synthetic dataset (2D for visualization)
X, y = make_classification(
    n_samples=300, n_features=2, n_redundant=0, n_informative=2,
    n_clusters_per_class=1, class_sep=0.5, random_state=42
)

# Train classifiers
clf_gini = DecisionTreeClassifier(criterion='gini', max_depth=3, random_state=42)
clf_entropy = DecisionTreeClassifier(criterion='entropy', max_depth=3, random_state=42)

clf_gini.fit(X, y)
clf_entropy.fit(X, y)

# Define function to plot decision boundary
def plot_decision_boundary(clf, X, y, title):
    h = 0.01
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.figure(figsize=(6, 4))
    plt.contourf(xx, yy, Z, alpha=0.3, cmap='coolwarm')
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap='coolwarm', edgecolors='k')
    plt.title(title)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.grid(True)
    plt.tight_layout()
    plt.show()

# Plot both trees
plot_decision_boundary(clf_gini, X, y, "Decision Tree with Gini")
plot_decision_boundary(clf_entropy, X, y, "Decision Tree with Entropy")

# Compare performance
print(f"Accuracy (Gini): {accuracy_score(y, clf_gini.predict(X)):.2f}")
print(f"Accuracy (Entropy): {accuracy_score(y, clf_entropy.predict(X)):.2f}")

In [None]:
print(classification_report(clf_gini.predict(X), y))

In [None]:
print(classification_report(clf_entropy.predict(X), y))

# Random Forest and Gradient Boost Trees

In [None]:
# Create a simple 2D dataset
X, y = make_classification(
    n_samples=500, n_features=2, n_informative=2, n_redundant=0,
    n_clusters_per_class=2, class_sep=1, random_state=21
)

# Train classifiers
rf = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=21)
xgb = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1, 
                    use_label_encoder=False, eval_metric='logloss', random_state=21)

start_time_rf = time.perf_counter()
rf.fit(X, y)
print(f"Random Forest training time: {time.perf_counter() - start_time_rf:.4f} seconds")

start_time_rf = time.perf_counter()
xgb.fit(X, y)
print(f"XGBoost training time: {time.perf_counter() - start_time_rf:.4f} seconds")

# Predictions on training data
y_pred_rf = rf.predict(X)
y_pred_xgb = xgb.predict(X)

# Classification Reports
print("=== Random Forest Classification Report ===")
print(classification_report(y, y_pred_rf))

print("=== XGBoost Classification Report ===")
print(classification_report(y, y_pred_xgb))

# Mesh grid for decision boundary plot
h = 0.1  # grid step
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))
grid = np.c_[xx.ravel(), yy.ravel()]

# Predictions for decision boundary
Z_rf = rf.predict(grid).reshape(xx.shape)
Z_xgb = xgb.predict(grid).reshape(xx.shape)

# Plot decision boundaries
cmap_light = ListedColormap(['#FFAAAA', '#AAAAFF'])
cmap_bold = ListedColormap(['#FF0000', '#0000FF'])

fig, axes = plt.subplots(1, 2, figsize=(10, 4))

axes[0].contourf(xx, yy, Z_rf, cmap=cmap_light, alpha=0.6)
axes[0].scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, s=15)
axes[0].set_title("Random Forest")

axes[1].contourf(xx, yy, Z_xgb, cmap=cmap_light, alpha=0.6)
axes[1].scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold, s=15)
axes[1].set_title("XGBoost")

plt.show()
