### Algorithm Chains and Pipelines

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import mglearn
from matplotlib import style
style.use("ggplot")
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [9]:
from sklearn.svm import SVC
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV

cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state = 0)

scaler = MinMaxScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)

svm = SVC()
svm.fit(X_train_scaled, y_train)
X_test_scaled = scaler.transform(X_test)

print "Test accuracy score: {:.2f}".format(svm.score(X_test_scaled, y_test))

param_grid = {    'C': [0.001, 0.01, 0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1, 10, 100]
              }
grid = GridSearchCV(SVC(), param_grid = param_grid, cv = 5)
grid.fit(X_train_scaled, y_train)
print "Best cross-validation accuracy: {:.2f}".format(grid.best_score_)
print "Best set score: {:.2f}".format(grid.score(X_test_scaled, y_test))
print "Best parameters: ", grid.best_params_


Test accuracy score: 0.95
Best cross-validation accuracy: 0.98
Best set score: 0.97
Best parameters:  {'C': 1, 'gamma': 1}


In [11]:
from sklearn.pipeline import Pipeline

pipe = Pipeline([("scaler", MinMaxScaler()), ("svm", SVC())])
pipe.fit(X_train, y_train)
print "Test score: {:.2f}".format(pipe.score(X_test, y_test))

param_grid = {    'svm__C': [0.001, 0.01, 0.1, 1, 10, 100],
              'svm__gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
grid = GridSearchCV(pipe, param_grid = param_grid, cv = 5)
grid.fit(X_train, y_train)
print "Best cross-validation accuracy: {:.2f}".format(grid.best_score_)
print "Test set score: {:.2f}".format(grid.score(X_test, y_test))
print "Best parameters: {}".format(grid.best_params_)

Test score: 0.95
Best cross-validation accuracy: 0.98
Test set score: 0.97
Best parameters: {'svm__C': 1, 'svm__gamma': 1}
