In [1]:
from sklearn.datasets import samples_generator
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier

In [2]:
X, y = samples_generator.make_classification(
    n_samples=150,
    n_features=25,
    n_classes=3,
    n_informative=6,
    n_redundant=0,
    random_state=7
)

X[:5]

array([[ 1.01856035, -0.1850947 ,  0.33953529,  0.88377939, -2.22145741,
        -0.71205954,  0.46313981, -2.42424476, -0.07998485,  0.03653191,
        -1.27561144, -1.5670243 , -0.82216114, -0.47040384,  0.98701872,
        -0.34439804,  0.02056176, -1.65437764,  0.94696772, -0.22854693,
         0.40599781,  0.16376894, -0.89722827,  2.43356744, -0.69119524],
       [-0.27783108,  0.99897481,  0.53479038,  1.37774353,  1.5508449 ,
         0.48987688,  0.43270835,  2.04438699,  0.92061477, -2.7363943 ,
         0.35109063, -0.47040716,  0.65870878, -0.77338381, -0.45308357,
         1.33497158,  1.26571843, -0.87354849, -1.15667003,  0.72074654,
        -1.04392163,  0.13473012,  0.22153721,  1.2023667 ,  0.30568521],
       [ 0.03110801,  1.14511831,  0.28923459, -1.50807294,  2.48635502,
        -1.61291006,  0.8857136 ,  2.29172098, -1.54121704, -2.00025812,
        -0.50057328,  1.30147258, -0.05141326, -0.53541711,  1.02896185,
         2.61048917,  0.95206572, -2.13238317, -0

In [3]:
y[:5]

array([0, 2, 2, 0, 2])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [5]:
k_best_selector = SelectKBest(f_regression, k=9)
k_best_selector

SelectKBest(k=9, score_func=<function f_regression at 0x1284fe940>)

In [6]:
model = ExtraTreesClassifier(n_estimators=60, max_depth=4)

In [7]:
pipeline = Pipeline([("selector", k_best_selector), ("erf", model)])
pipeline

Pipeline(steps=[('selector',
                 SelectKBest(k=9,
                             score_func=<function f_regression at 0x1284fe940>)),
                ('erf', ExtraTreesClassifier(max_depth=4, n_estimators=60))])

In [8]:
pipeline.set_params(selector__k=7, erf__n_estimators=30)

Pipeline(steps=[('selector',
                 SelectKBest(k=7,
                             score_func=<function f_regression at 0x1284fe940>)),
                ('erf', ExtraTreesClassifier(max_depth=4, n_estimators=30))])

In [9]:
pipeline.fit(X_train, y_train)

Pipeline(steps=[('selector',
                 SelectKBest(k=7,
                             score_func=<function f_regression at 0x1284fe940>)),
                ('erf', ExtraTreesClassifier(max_depth=4, n_estimators=30))])

In [10]:
y_pred = pipeline.predict(X_test)
y_pred

array([1, 2, 0, 2, 2, 0, 0, 2, 1, 0, 0, 2, 0, 2, 0, 2, 2, 2, 2, 1, 1, 2,
       1, 1, 1, 2, 2, 0, 2, 0, 2, 0, 2, 1, 0, 0, 2, 0])

In [11]:
print("Score:", pipeline.score(X=X_test, y=y_test))

Score: 0.7368421052631579


In [12]:
status = pipeline.named_steps["selector"].get_support()

selected = [i for i, x in enumerate(status) if x]
print("\nIndices of selected features:", ", ".join([str(x) for x in selected]))


Indices of selected features: 4, 7, 8, 12, 17, 22, 23
