## Automate Machine Learning Workflows with Pipelines

In [12]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [13]:
dataset = pd.read_csv('dataset/diabetes.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

In [14]:
from sklearn.model_selection import KFold, cross_val_score
kfold = KFold(n_splits=10, random_state=0, shuffle=True)

### Data Preparation and Modeling Pipeline

In [15]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('lda', LinearDiscriminantAnalysis()))

pipeline = Pipeline(estimators)
results = cross_val_score(pipeline, X, y, cv=kfold)
print(results.mean())

0.7747265892002735


### Feature Extraction and Modeling Pipeline
<pre>
            Original X (20 features)
                  ↓
    ┌─────────────┴─────────────┐
    │                           │
 PCA(n_components=3)    SelectKBest(k=6)
    │                           │
 (n_samples, 3)           (n_samples, 6)
    └─────────────┬─────────────┘
                  ↓
   FeatureUnion → (n_samples, 9)

</pre>

In [16]:
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectKBest

features = []
features.append(('pca', PCA(n_components=3)))
features.append(('select_best', SelectKBest(k=6)))
feature_union = FeatureUnion(features)

estimators = []
estimators.append(('feature_union', feature_union))
estimators.append(('logistic', LogisticRegression()))
model = Pipeline(estimators)

results = cross_val_score(model, X, y, cv=kfold)
results.mean()

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0.7747436773752563