# 7. Tuning + Pipeline

In [46]:
from sklearn import datasets
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LinearRegression

In [6]:
diabets = datasets.load_diabetes()

In [7]:
diabets

{'data': array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
          0.01990842, -0.01764613],
        [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
         -0.06832974, -0.09220405],
        [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
          0.00286377, -0.02593034],
        ...,
        [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
         -0.04687948,  0.01549073],
        [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
          0.04452837, -0.02593034],
        [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
         -0.00421986,  0.00306441]]),
 'target': array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
         69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
         68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
         87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
        259.,  53., 190., 142.,  75., 142., 155., 225.,  59

In [8]:
scaler = StandardScaler()

In [9]:
X = diabets.data

In [10]:
Y = diabets.target

In [12]:
scaler_x = scaler.fit_transform(X)

In [16]:
x_train, x_test, y_train, y_test = train_test_split(scaler_x,Y,test_size=0.25, random_state=0)

In [17]:
model = SVC()

In [18]:
model.fit(x_train, y_train)

SVC()

In [19]:
model.score(x_train,y_train)

0.16012084592145015

-----------

In [40]:
pipe = make_pipeline(StandardScaler(), PolynomialFeatures(), SVC())

In [41]:
pipe

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('polynomialfeatures', PolynomialFeatures()), ('svc', SVC())])

In [24]:
pipe.named_steps['svc'].get_params().keys() # SVC에선 C랑 gamma / Poly 에선 degree

dict_keys(['C', 'break_ties', 'cache_size', 'class_weight', 'coef0', 'decision_function_shape', 'degree', 'gamma', 'kernel', 'max_iter', 'probability', 'random_state', 'shrinking', 'tol', 'verbose'])

In [36]:
param_grid={'polynomialfeatures__degree': np.arange(1,6),
            'svc__C': np.arange(1,5),
            'svc__gamma':['scale','auto']}

In [37]:
param_grid

{'polynomialfeatures__degree': array([1, 2, 3, 4, 5]),
 'svc__C': array([1, 2, 3, 4]),
 'svc__gamma': ['scale', 'auto']}

In [38]:
mod = GridSearchCV(pipe, param_grid, n_jobs=-1)

In [39]:
mod.fit(diabets.data, diabets.target)



GridSearchCV(estimator=Pipeline(steps=[('standardscaler', StandardScaler()),
                                       ('polynomialfeatures',
                                        PolynomialFeatures()),
                                       ('svc', SVC())]),
             n_jobs=-1,
             param_grid={'polynomialfeatures__degree': array([1, 2, 3, 4, 5]),
                         'svc__C': array([1, 2, 3, 4]),
                         'svc__gamma': ['scale', 'auto']})

In [43]:
mod.best_estimator_  # degree 가 -1,  C =1, gamma = default 일 떄 best

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('polynomialfeatures', PolynomialFeatures(degree=1)),
                ('svc', SVC(C=1))])

In [44]:
mod.best_params_

{'polynomialfeatures__degree': 1, 'svc__C': 1, 'svc__gamma': 'scale'}

In [45]:
mod.score(diabets.data, diabets.target)

0.15610859728506787

------------

In [48]:
pipe1 = make_pipeline(StandardScaler(), PolynomialFeatures(),LinearRegression())

In [49]:
pipe1

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('polynomialfeatures', PolynomialFeatures()),
                ('linearregression', LinearRegression())])

In [53]:
pipe1.named_steps['linearregression'].get_params().keys()

dict_keys(['copy_X', 'fit_intercept', 'n_jobs', 'normalize', 'positive'])

In [55]:
param_grid={'polynomialfeatures__degree': np.arange(1,6),
            'linearregression__n_jobs': np.arange(1,5)}
        

In [56]:
param_grid

{'polynomialfeatures__degree': array([1, 2, 3, 4, 5]),
 'linearregression__n_jobs': array([1, 2, 3, 4])}

In [63]:
mod = GridSearchCV(pipe, param_grid, n_jobs=-1)

In [65]:
mod.fit(diabets.data, diabets.target)



ValueError: Invalid parameter linearregression for estimator Pipeline(steps=[('standardscaler', StandardScaler()),
                ('polynomialfeatures', PolynomialFeatures()), ('svc', SVC())]). Check the list of available parameters with `estimator.get_params().keys()`.

In [None]:
mod.best_estimator_ 