In [1]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_friedman1, fetch_california_housing
from sklearn.feature_selection import RFE, SelectFromModel, SequentialFeatureSelector
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# RFE (Recursive Feature Elimination)

In [2]:
data = make_friedman1()
data

(array([[0.61872753, 0.13286871, 0.06268441, 0.08696222, 0.06166789,
         0.38447693, 0.54038678, 0.69004686, 0.41164291, 0.93419334],
        [0.38038024, 0.17664941, 0.43945562, 0.30472055, 0.02851119,
         0.55151313, 0.04061533, 0.65304534, 0.90059443, 0.84054892],
        [0.08614525, 0.36064419, 0.57217889, 0.84425562, 0.74224878,
         0.08732162, 0.54437779, 0.70212643, 0.03109308, 0.83691398],
        [0.36019674, 0.60832591, 0.89833837, 0.84928749, 0.34479599,
         0.17593135, 0.80510918, 0.66120494, 0.05070365, 0.9470378 ],
        [0.50490944, 0.5079982 , 0.5289278 , 0.19919771, 0.60345661,
         0.53121016, 0.71851357, 0.38877559, 0.00742659, 0.10323672],
        [0.87414876, 0.40789045, 0.85453975, 0.58405567, 0.65091682,
         0.72165315, 0.14433221, 0.60307822, 0.75388884, 0.33347017],
        [0.77843942, 0.62854838, 0.08528813, 0.71450045, 0.7000089 ,
         0.94404287, 0.34182363, 0.71413226, 0.39340968, 0.91845437],
        [0.9591743 , 0.3219

In [3]:
X = data[0]
y = data[1]

In [4]:
print(X.shape)
print(y.shape)

(100, 10)
(100,)


In [5]:
estimator = LinearRegression()

In [6]:
selector = RFE(
    estimator=estimator,
    n_features_to_select=3,
)

In [7]:
selector = selector.fit(X, y)

In [8]:
print("Feature ranking:", selector.ranking_)
print("Selected features:", selector.support_)


Feature ranking: [1 2 6 1 1 3 8 7 5 4]
Selected features: [ True False False  True  True False False False False False]


In [9]:
X_new = selector.transform(X)
X_new.shape

(100, 3)

# SelectFromModel

In [10]:
data = fetch_california_housing()
X = data.data
y = data.target

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [12]:
model = LinearRegression()
model.fit(X_train, y_train)

In [13]:
selector = SelectFromModel(model, threshold="mean", max_features=3)
selector.fit(X_train, y_train)


In [14]:
X_train_selected = selector.transform(X_train)
X_test_selected = selector.transform(X_test)

In [15]:
model.fit(X_train_selected, y_train)
y_pred = model.predict(X_test_selected)

In [16]:
print("SelectFromModel - Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("SelectFromModel - Selected features mask:", selector.get_support())
print("SelectFromModel - Number of features selected:", X_train_selected.shape[1])
print("SelectFromModel - Names of features: ", selector.get_feature_names_out())

SelectFromModel - Mean Squared Error: 0.6903587848508181
SelectFromModel - Selected features mask: [ True False False  True False False False  True]
SelectFromModel - Number of features selected: 3
SelectFromModel - Names of features:  ['x0' 'x3' 'x7']


# SequentialFeatureSelection

In [17]:
model = LinearRegression()

In [18]:
selector = SequentialFeatureSelector(model, n_features_to_select=3, direction='forward')

selector.fit(X_train, y_train)

In [19]:
X_train_selected = selector.transform(X_train)
X_test_selected = selector.transform(X_test)

In [20]:
model.fit(X_train_selected, y_train)
y_pred = model.predict(X_test_selected)

In [21]:
print("SequentialFeatureSelector - Mean Squared Error:", mean_squared_error(y_test, y_pred))
print("SequentialFeatureSelector - Selected features mask:", selector.get_support())
print("SequentialFeatureSelector - Number of features selected:", X_train_selected.shape[1])
print("SequentialFeatureSelector - Names of features: ", selector.get_feature_names_out())

SequentialFeatureSelector - Mean Squared Error: 0.6334633000454126
SequentialFeatureSelector - Selected features mask: [ True  True False False False False  True False]
SequentialFeatureSelector - Number of features selected: 3
SequentialFeatureSelector - Names of features:  ['x0' 'x1' 'x6']
