In [None]:
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier

In [None]:
# 1. Loading dataset
wine = load_wine()
X = wine.data
y = wine.target
feature_names = wine.feature_names
target_names = wine.target_names

In [None]:
# 2. First look at the dataset
df = pd.DataFrame(X, columns=feature_names)
df['target'] = y

df

In [None]:
# 3. Calculate feature importance for Decision Tree Classifier

# Create and fit the model
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X, y)

# Save the feature importances to a dataframe
dt_feat_imp = pd.DataFrame({
    'Feature': feature_names,
    'Importance': dt.feature_importances_
}).sort_values(by='Importance', ascending=False)

# Print the feature importances and resort them
print("Decision Tree Feature Importance:", dt_feat_imp)
dt_feat_imp = dt_feat_imp.sort_values(by='Importance', ascending=True)

# Plot the feature importances
fig, ax = plt.subplots(figsize=(10, 5))
ax.barh(dt_feat_imp['Feature'], dt_feat_imp['Importance'], color='lightcoral')
ax.set_title('Decision Tree Feature Importance')
ax.set_xlabel('Importance')
plt.show()

In [None]:
# 4. Calculate feature importance for Random Forest Classifier

# Create and fit the model
rf = RandomForestClassifier(random_state=42)
rf.fit(X, y)

# Save the feature importances to a dataframe
rf_feat_imp = pd.DataFrame({
    'Feature': feature_names,
    'Importance': rf.feature_importances_
}).sort_values(by='Importance', ascending=False)

# Print the feature importances and resort them
print("Random Forest Feature Importance:", rf_feat_imp)
rf_feat_imp = rf_feat_imp.sort_values(by='Importance', ascending=True)

# Plot the feature importances
fig, ax = plt.subplots(figsize=(10, 5))
ax.barh(rf_feat_imp['Feature'], rf_feat_imp['Importance'], color='mediumseagreen')
ax.set_title('Random Forest Feature Importance')
ax.set_xlabel('Importance')
plt.show()

In [None]:
# 4. Calculate feature importance for Gradient Boosting Classifier

# Create and fit the model
gb = GradientBoostingClassifier(random_state=42)
gb.fit(X, y)

# Save the feature importances to a dataframe
gb_feat_imp = pd.DataFrame({
    'Feature': feature_names,
    'Importance': gb.feature_importances_
}).sort_values(by='Importance', ascending=False)

# Print the feature importances and resort them
print("Gradient Boosting Feature Importance:", gb_feat_imp)
gb_feat_imp = gb_feat_imp.sort_values(by='Importance', ascending=True)

# Plot the feature importances
fig, ax = plt.subplots(figsize=(10, 5))
ax.barh(gb_feat_imp['Feature'], gb_feat_imp['Importance'], color='royalblue')
ax.set_title('Gradient Boosting Feature Importance')
ax.set_xlabel('Importance')
plt.show()

In [None]:
# 5. Calculate the average feature importance across all models

# Create unsorted dataframes
dt_imp_df = pd.DataFrame({'Feature': feature_names, 'DecisionTree': dt.feature_importances_})
rf_imp_df = pd.DataFrame({'Feature': feature_names, 'RandomForest': rf.feature_importances_})
gb_imp_df = pd.DataFrame({'Feature': feature_names, 'GradientBoosting': gb.feature_importances_})

# Merge the dataframes, calculate the averages and sort them
avg_feat_imp = dt_imp_df.merge(rf_imp_df, on='Feature').merge(gb_imp_df, on='Feature')
avg_feat_imp['AverageImportance'] = avg_feat_imp[['DecisionTree', 'RandomForest', 'GradientBoosting']].mean(axis=1)
avg_feat_imp_sorted = avg_feat_imp.sort_values(by='AverageImportance', ascending=False)

# Print the average feature importances and resort them
print("Average Feature Importance across models:\n", avg_feat_imp_sorted[['Feature', 'AverageImportance']])
avg_feat_imp_sorted_plot = avg_feat_imp_sorted.sort_values(by='AverageImportance', ascending=True)

# Plot the average feature importances
fig, ax = plt.subplots(figsize=(10, 5))
ax.barh(avg_feat_imp_sorted_plot['Feature'], avg_feat_imp_sorted_plot['AverageImportance'], color='mediumpurple')
ax.set_title('Average Feature Importance across Models')
ax.set_xlabel('Average Importance')
plt.show()