# Feature selection and RFE

In [12]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt 
from sklearn.datasets import make_regression 
from sklearn.linear_model import LinearRegression 
from sklearn.feature_selection import RFE 

In [13]:
X, y = make_regression(n_samples=100, 
                       n_features=10, 
                       noise=10, 
                       random_state=42)

In [14]:
model = LinearRegression()

## Recursive Feature Elimination

In [15]:
rfe = RFE(estimator=model, 
          n_features_to_select=5)

In [None]:
rfe.fit(X, y)

In [None]:
selected_features = np.where(rfe.support_)[0]
print(f"Selected Features: {selected_features}")

In [None]:
print(f"Feature Rankings: {rfe.ranking_}")

In [19]:
X_selected = X[:, selected_features]

In [None]:
model.fit(X_selected, y)

In [None]:
print(f"Model Coefficients: {model.coef_}")
print(f"Model Intercept: {model.intercept_}")

In [42]:
def visualize_rfe(rfe_rank, 
                  title='Feature Importance Ranking',
                  x_lab='Feature Index',
                  y_lab='Ranking', 
                  retained_color='grey',
                  eliminated_color='black'):
    plt.figure(figsize=(8, 5))
    colors = np.array([retained_color if rank == 1 else 
                       eliminated_color for rank in rfe_rank])
    
    plt.bar(range(len(rfe_rank)), rfe_rank, color=colors)
    plt.xlabel(x_lab)
    plt.ylabel(y_lab)
    plt.title(title)
    plt.show()

In [None]:
visualize_rfe(rfe_rank=rfe.ranking_)

## Automating Feature Selection with RFECV

In [44]:
from sklearn.feature_selection import RFECV
from sklearn.model_selection import cross_val_score

In [None]:
rfecv = RFECV(estimator=LinearRegression(), 
              cv=5, 
              scoring="r2")
rfecv.fit(X, y)

In [48]:
cv_scores = rfecv.cv_results_["mean_test_score"]

In [None]:
plt.figure(figsize=(8, 5))
plt.plot(range(1, len(cv_scores) + 1), 
         cv_scores, marker='o', 
         linestyle='dashed',
         color='black')
plt.xlabel("Number of Features Selected")
plt.ylabel("Cross-Validated R² Score")
plt.title("Optimal Feature Selection with RFECV")
plt.grid(True, linestyle="--", alpha=0.6)
plt.show()