# Feature selection - select from model

In [6]:
from sklearn.datasets import make_regression
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import ExtraTreesRegressor

### Create dataset

* Dataset with 1000 samples, 50 features
* Only 10 features are informative

In [8]:
# Make a dataset with 40 uninformative features
X, y = make_regression(n_samples=int(1e4), n_features=50, n_informative=10)
print(f"Number of features = {X.shape[1]}")

Number of features = 50


### Use SelectFromModel

* Not as robust as RFECV
* Lower computational costs
* Better for massive datasets
* Original dataset had 10 informative features out of 50
* SelectFromModel select 7

In [9]:
# Init the selector and transform feature array
selector = SelectFromModel(estimator=ExtraTreesRegressor()).fit(X, y)

print(f"Number of features = {selector.transform(X).shape[1]}")

Number of features = 7
