In [None]:
Pipelines: Testing Methods to Reduce Dimensionality

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC, LinearSVC
from sklearn.decomposition import PCA, NMF, TruncatedSVD
from sklearn.manifold import Isomap
%matplotlib inline


pipe = Pipeline([
    ('reduce_dim', PCA()),
    ('classify', SVC())
])



param_grid = [
    {
        'reduce_dim': [PCA(), NMF(),Isomap(),TruncatedSVD()],
        'reduce_dim__n_components': [2, 3],
        'classify' : [SVC(), LinearSVC()],
        'classify__C': [1, 10, 100, 1000]
    },
]



grid = GridSearchCV(pipe, cv=3, n_jobs=-1, param_grid=param_grid)
iris = load_iris()
grid.fit(iris.data, iris.target)
grid.best_params_



{'classify': SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,
   decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
   max_iter=-1, probability=False, random_state=None, shrinking=True,
   tol=0.001, verbose=False),
 'classify__C': 10,
 'reduce_dim': PCA(copy=True, iterated_power='auto', n_components=3, random_state=None,
   svd_solver='auto', tol=0.0, whiten=False),
 'reduce_dim__n_components': 3}

In [3]:
grid.best_score_

0.97999999999999998

In [5]:
import pandas as pd
results_df = pd.DataFrame(grid.cv_results_)
results_df

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_classify,param_classify__C,param_reduce_dim,param_reduce_dim__n_components,params,rank_test_score,split0_test_score,split0_train_score,split1_test_score,split1_train_score,split2_test_score,split2_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,1.181333,0.002000,0.966667,0.963458,"SVC(C=10, cache_size=200, class_weight=None, c...",1,"PCA(copy=True, iterated_power='auto', n_compon...",2,"{u'classify__C': 1, u'reduce_dim': PCA(copy=Tr...",11,0.980392,0.959596,0.941176,0.979798,0.979167,0.950980,0.834724,1.123916e-07,0.018302,0.012078
1,0.030000,0.001000,0.973333,0.983363,"SVC(C=10, cache_size=200, class_weight=None, c...",1,"PCA(copy=True, iterated_power='auto', n_compon...",3,"{u'classify__C': 1, u'reduce_dim': PCA(copy=Tr...",3,0.980392,0.969697,0.960784,1.000000,0.979167,0.980392,0.019131,1.123916e-07,0.009021,0.012548
2,0.111333,0.003000,0.973333,0.976728,"SVC(C=10, cache_size=200, class_weight=None, c...",1,"NMF(alpha=0.0, beta_loss='frobenius', init=Non...",2,"{u'classify__C': 1, u'reduce_dim': NMF(alpha=0...",3,1.000000,0.969697,0.941176,0.989899,0.979167,0.970588,0.024998,0.000000e+00,0.024581,0.009320
3,0.202000,0.002000,0.900000,0.913547,"SVC(C=10, cache_size=200, class_weight=None, c...",1,"NMF(alpha=0.0, beta_loss='frobenius', init=Non...",3,"{u'classify__C': 1, u'reduce_dim': NMF(alpha=0...",60,0.960784,0.888889,0.901961,0.959596,0.833333,0.892157,0.073544,0.000000e+00,0.051766,0.032589
4,0.189000,0.009000,0.933333,0.943454,"SVC(C=10, cache_size=200, class_weight=None, c...",1,"Isomap(eigen_solver='auto', max_iter=None, n_c...",2,"{u'classify__C': 1, u'reduce_dim': Isomap(eige...",48,0.941176,0.919192,0.921569,0.979798,0.937500,0.931373,0.082426,4.320365e-03,0.008575,0.026176
5,0.043333,0.011333,0.940000,0.950386,"SVC(C=10, cache_size=200, class_weight=None, c...",1,"Isomap(eigen_solver='auto', max_iter=None, n_c...",3,"{u'classify__C': 1, u'reduce_dim': Isomap(eige...",45,0.960784,0.949495,0.921569,0.989899,0.937500,0.911765,0.005185,7.586547e-03,0.016260,0.031904
6,0.030000,0.001000,0.966667,0.966726,"SVC(C=10, cache_size=200, class_weight=None, c...",1,"TruncatedSVD(algorithm='randomized', n_compone...",2,"{u'classify__C': 1, u'reduce_dim': TruncatedSV...",11,0.980392,0.959596,0.941176,0.979798,0.979167,0.960784,0.033297,8.164374e-04,0.018302,0.009256
7,0.003333,0.000667,0.973333,0.983363,"SVC(C=10, cache_size=200, class_weight=None, c...",1,"TruncatedSVD(algorithm='randomized', n_compone...",3,"{u'classify__C': 1, u'reduce_dim': TruncatedSV...",3,0.980392,0.969697,0.960784,1.000000,0.979167,0.980392,0.000471,4.714827e-04,0.009021,0.012548
8,0.003333,0.001333,0.960000,0.973460,"SVC(C=10, cache_size=200, class_weight=None, c...",10,"PCA(copy=True, iterated_power='auto', n_compon...",2,"{u'classify__C': 10, u'reduce_dim': PCA(copy=T...",25,0.980392,0.959596,0.921569,1.000000,0.979167,0.960784,0.001247,4.713704e-04,0.027588,0.018773
9,0.003667,0.000667,0.980000,0.973460,"SVC(C=10, cache_size=200, class_weight=None, c...",10,"PCA(copy=True, iterated_power='auto', n_compon...",3,"{u'classify__C': 10, u'reduce_dim': PCA(copy=T...",1,0.980392,0.959596,0.960784,1.000000,1.000000,0.960784,0.001700,4.714266e-04,0.015925,0.018773
