## Gradient Boosting

In [None]:
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.tree import plot_tree

In [None]:
# Load the Iris dataset
data = load_iris()
X = data.data
y = data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=0
)

In [None]:
# Create a Gradient Boosting Classifier
clf = GradientBoostingClassifier()

# Train the model
clf.fit(X_train, y_train)

In [None]:
# Make predictions on the test set
y_pred = clf.predict(X_test)

# Accuracy
print("Accuracy:", clf.score(X_test, y_test))

# Generate the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot the confusion matrix
fig, ax = plt.subplots(figsize=(8, 6))
sns.heatmap(
    cm, annot=True, cmap="Blues", fmt="d", ax=ax
)
ax.set_xlabel('Predicted')
ax.set_ylabel('True')
ax.set_title('Confusion Matrix')

In [None]:
# Display the feature importances
fig, ax = plt.subplots(figsize=(8, 6))
sns.barplot(x=data.feature_names, y=clf.feature_importances_, ax=ax)
ax.set_xlabel('Features')
ax.set_ylabel('Importance')
ax.set_title('Feature Importance')

In [None]:
# Get the tree estimators from the fitted classifier
tree_estimators = clf.estimators_[0, :]

# Determine the tree with the largest weight based on the rank
rank = 1
tree_index = np.argsort(
    [tree.tree_.max_depth for tree in tree_estimators]
)[-rank - 1]


# Plot the selected tree
fig, ax = plt.subplots(figsize=(10, 10))
_ = plot_tree(
    tree_estimators[tree_index],
    feature_names=data.feature_names,
    class_names=data.target_names,
    filled=True
)