[Reference](https://medium.com/geekculture/feature-selection-with-boruta-in-python-676e3877e596)

In [4]:
pip install boruta

Collecting boruta
  Downloading Boruta-0.3-py3-none-any.whl (56 kB)
[?25l[K     |█████▉                          | 10 kB 16.6 MB/s eta 0:00:01[K     |███████████▋                    | 20 kB 10.1 MB/s eta 0:00:01[K     |█████████████████▍              | 30 kB 5.9 MB/s eta 0:00:01[K     |███████████████████████▏        | 40 kB 5.4 MB/s eta 0:00:01[K     |█████████████████████████████   | 51 kB 4.1 MB/s eta 0:00:01[K     |████████████████████████████████| 56 kB 2.2 MB/s 
Installing collected packages: boruta
Successfully installed boruta-0.3


In [5]:
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from boruta import BorutaPy
import pandas as pd
import numpy as np

# let's load the load_diabetes() dataset from sklearn
X, y = load_diabetes(return_X_y=True, as_frame=True)

# let's initialize a RF model 
model = RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42)

# let's initialize Boruta
feat_selector = BorutaPy(
    verbose=2,
    estimator=model,
    n_estimators='auto',
    max_iter=10  # number of iterations to perform
)

# train Boruta
# N.B.: X and y must be numpy arrays
feat_selector.fit(np.array(X), np.array(y))

# print support and ranking for each feature
print("\n------Support and Ranking for each feature------")
for i in range(len(feat_selector.support_)):
    if feat_selector.support_[i]:
        print("Passes the test: ", X.columns[i],
              " - Ranking: ", feat_selector.ranking_[i])
    else:
        print("Doesn't pass the test: ",
              X.columns[i], " - Ranking: ", feat_selector.ranking_[i])

Iteration: 	1 / 10
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	2 / 10
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	3 / 10
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	4 / 10
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	5 / 10
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	6 / 10
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	7 / 10
Confirmed: 	0
Tentative: 	10
Rejected: 	0
Iteration: 	8 / 10
Confirmed: 	3
Tentative: 	2
Rejected: 	5
Iteration: 	9 / 10
Confirmed: 	3
Tentative: 	2
Rejected: 	5


BorutaPy finished running.

Iteration: 	10 / 10
Confirmed: 	3
Tentative: 	1
Rejected: 	5

------Support and Ranking for each feature------
Doesn't pass the test:  age  - Ranking:  5
Doesn't pass the test:  sex  - Ranking:  8
Passes the test:  bmi  - Ranking:  1
Passes the test:  bp  - Ranking:  1
Doesn't pass the test:  s1  - Ranking:  6
Doesn't pass the test:  s2  - Ranking:  4
Doesn't pass the test:  s3  - Ranking:  3
Doesn't pass the test:  s4 

In [7]:
# features selected by Boruta
X_filtered = feat_selector.transform(np.array(X))

print("\n------Selected Features------\n")
print(X_filtered)

# train the model
model.fit(X_filtered, y)

# compute predictions
predictions = model.predict(X_filtered)

# create a dataframe with real predictions and values
df = pd.DataFrame({'pred': predictions, 'observed': y})

# let's print the dataframe
print("\n------Predizioni e valori reali------\n")
print(df)

# compute RMSE
mse = ((df['pred'] - df['observed']) ** 2).mean()
rmse = np.sqrt(mse)
print("\n------RMSE------\n", round(rmse, 3))


------Selected Features------

[[ 0.06169621  0.02187235  0.01990842]
 [-0.05147406 -0.02632783 -0.06832974]
 [ 0.04445121 -0.00567061  0.00286377]
 ...
 [-0.01590626  0.01728186 -0.04687948]
 [ 0.03906215  0.00121513  0.04452837]
 [-0.0730303  -0.08141377 -0.00421986]]

------Predizioni e valori reali------

           pred  observed
0    196.511416     151.0
1     89.005197      75.0
2    169.873954     141.0
3    176.305353     206.0
4    108.971879     135.0
..          ...       ...
437  225.770177     178.0
438  105.125558     104.0
439   97.288380     132.0
440  193.988695     220.0
441  111.306584      57.0

[442 rows x 2 columns]

------RMSE------
 44.883
