# Training Pipelines

Models can be concatenated.  Intermediate steps transform the data (with the `transform` method), the last step estimates it (regresses, classifies, etc.). 

In [2]:
from sklearn.datasets import samples_generator
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.pipeline import Pipeline
from sklearn.ensemble import ExtraTreesClassifier

## Generating Data

In [6]:
X, Y = samples_generator.make_classification(n_samples=150, n_features=25, n_classes=3, n_informative=6, n_redundant=0, random_state=7)

## Building the Pipeline

In [22]:
k_best = SelectKBest(f_regression, k=9)
# Select nine of the most useful features in the data set

classifier = ExtraTreesClassifier(n_estimators=60, max_depth=4, n_jobs=-1)
# Classify the data set using these features

pipeline = Pipeline([("selector", k_best), ("erf", classifier)])
# Individual parameters can be tuned, too.  Use pipeline.blockname__param
# You can cache intermediate transforms, too

pipeline.fit(X, Y)

features = pipeline.named_steps["selector"].get_support(indices=True)
# The nine features selected

## Results

In [23]:
output = pipeline.predict(X) # Classify each point

print("Score: {0:.3f}".format(pipeline.score(X, Y)))
print("Selected Features: {0}".format(features))
print("Predicted Output:\n{0}".format(output))

Score: 0.900
Selected Features: [ 4  7  8 12 14 17 18 22 23]
Predicted Output:
[1 2 2 0 2 0 2 1 0 1 1 2 1 0 2 2 1 0 0 1 0 2 1 1 2 2 0 0 1 2 0 2 1 0 2 2 1
 1 2 2 2 0 1 2 2 1 2 2 1 0 1 2 2 2 2 0 2 2 0 2 2 0 1 0 2 1 1 1 1 2 1 1 0 2
 0 0 1 2 2 0 0 2 2 2 2 0 0 0 2 2 2 1 2 0 2 0 2 2 0 0 1 1 1 1 2 2 2 2 0 1 1
 0 2 1 0 0 1 1 1 1 0 0 0 1 2 0 0 0 2 1 2 0 0 1 0 1 1 0 1 1 1 1 2 2 0 1 2 0
 2 2]
