In [3]:
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

import numpy as np
# Load the iris dataset
iris = load_iris()

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3, random_state=42)

# Train a Random Forest model on the training data
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Calculate the feature importance using the feature_importances_ attribute of the Random Forest model
importances = rf.feature_importances_

# Rank the features based on their importance
indices = np.argsort(importances)[::-1]

# Print the feature ranking
print("Feature ranking:")
for f in range(X_train.shape[1]):
    print(f"{f+1}. {iris.feature_names[indices[f]]} ({importances[indices[f]]})")

# Select the top 2 important features and use them to train a new Random Forest model
X_train_new = X_train[:, indices[:2]]
X_test_new = X_test[:, indices[:2]]
rf_new = RandomForestClassifier(n_estimators=100, random_state=42)
rf_new.fit(X_train_new, y_train)

# Evaluate the performance of the new model on the testing data
score = rf_new.score(X_test_new, y_test)
print(f"Accuracy: {score:.2f}")


Feature ranking:
1. petal width (cm) (0.4339818709871682)
2. petal length (cm) (0.4173081338019912)
3. sepal length (cm) (0.10410500706117767)
4. sepal width (cm) (0.04460498814966301)
Accuracy: 1.00


In [5]:
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']