## Boruta - Exploration

In [1]:
pip install boruta

Note: you may need to restart the kernel to use updated packages.


In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier

# Load the Iris dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = iris.target

# Display the first few rows of the dataset
print(X.head())
print(y[:5])


   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2
[0 0 0 0 0]


In [None]:
from boruta import BorutaPy

# Initialize a random forest classifier
rf = RandomForestClassifier(n_jobs=-1, class_weight='balanced', max_depth=5)

# Initialize Boruta
boruta = BorutaPy(rf, n_estimators='auto', verbose=2, random_state=1)

# Fit Boruta on the dataset
boruta.fit(X.values, y)

# Print results
print("Selected features:")
print(X.columns[boruta.support_].tolist())
print("Tentative features:")
print(X.columns[boruta.support_weak_].tolist())


In [3]:
# Create a DataFrame to display the feature importance
feature_ranking = pd.DataFrame({
    'Feature': X.columns,
    'Ranking': boruta.ranking_,
    'Selected': boruta.support_
})

# Sort by feature importance
feature_ranking.sort_values(by='Ranking', inplace=True)

print(feature_ranking)


             Feature  Ranking  Selected
0  sepal length (cm)        1      True
1   sepal width (cm)        1      True
2  petal length (cm)        1      True
3   petal width (cm)        1      True
