In [19]:
import numpy as np
import pandas as p
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from plot_region import *

In [6]:
X, y = load_wine(return_X_y=True)

# Make a train/test split using 30% test size
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=5)

In [9]:
from sklearn.pipeline import Pipeline
ppln = Pipeline([("scalar", StandardScaler()), ("estimator", SVC())])

In [10]:
ppln.get_params()

{'memory': None,
 'steps': [('scalar',
   StandardScaler(copy=True, with_mean=True, with_std=True)),
  ('estimator', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
     decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
     kernel='rbf', max_iter=-1, probability=False, random_state=None,
     shrinking=True, tol=0.001, verbose=False))],
 'scalar': StandardScaler(copy=True, with_mean=True, with_std=True),
 'estimator': SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
   decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
   kernel='rbf', max_iter=-1, probability=False, random_state=None,
   shrinking=True, tol=0.001, verbose=False),
 'scalar__copy': True,
 'scalar__with_mean': True,
 'scalar__with_std': True,
 'estimator__C': 1.0,
 'estimator__cache_size': 200,
 'estimator__class_weight': None,
 'estimator__coef0': 0.0,
 'estimator__decision_function_shape': 'ovr',
 'estimator__degree': 3,
 'estimator__gamma': 'auto_deprecated',
 'estimato

In [22]:
Cs = [0.01, 0.1, 1, 10, 100]
Krns = ['linear', 'poly', 'rbf', 'sigmoid']
Gammas = [1, 10, 100]
param_grid = {'estimator__C' : Cs, 'estimator__kernel': Krns, 'estimator__gamma' : Gammas}
grid_search = GridSearchCV(ppln, param_grid, iid=False, cv=5)
grid_search.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=Pipeline(memory=None,
     steps=[('scalar', StandardScaler(copy=True, with_mean=True, with_std=True)), ('estimator', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))]),
       fit_params=None, iid=False, n_jobs=None,
       param_grid={'estimator__C': [0.01, 0.1, 1, 10, 100], 'estimator__kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'estimator__gamma': [1, 10, 100]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [12]:
print(grid_search.best_params_)
print(grid_search.best_index_)
print(grid_search.best_estimator_)

{'estimator__C': 0.1, 'estimator__gamma': 1, 'estimator__kernel': 'linear'}
12
Pipeline(memory=None,
     steps=[('scalar', StandardScaler(copy=True, with_mean=True, with_std=True)), ('estimator', SVC(C=0.1, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=1, kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False))])


In [16]:
def GetFormatedRes(grid_search):
    return (
    p.DataFrame({
        "mean_test_score": grid_search.cv_results_["mean_test_score"],
        "mean_fit_time": grid_search.cv_results_["mean_fit_time"]})
      .join(p.io.json.json_normalize(grid_search.cv_results_["params"]).add_prefix("param_"))
)

In [21]:
GetFormatedRes(grid_search)[:5]

Unnamed: 0,mean_test_score,mean_fit_time,param_estimator__C,param_estimator__gamma,param_estimator__kernel
0,0.975641,0.003999,0.01,1,linear
1,0.959949,0.0016,0.01,1,poly
2,0.427282,0.003999,0.01,1,rbf
3,0.427282,0.003199,0.01,1,sigmoid
4,0.975641,0.002398,0.01,10,linear


In [20]:
print(accuracy_score(y_test, grid_search.best_estimator_.predict(X_test)))
print(accuracy_score(y_test, grid_search.predict(X_test)))

0.9629629629629629
0.9629629629629629


In [38]:
from sklearn.linear_model import LogisticRegression
ppln2 = Pipeline([("scalar", StandardScaler()), 
                  ("estimator", LogisticRegression(multi_class='auto',  solver='liblinear', max_iter=1000))])
Cs = [0.01, 0.1, 1, 10, 100]
Penalties = ['l1','l2']
param_grid = {'estimator__C' : Cs, 'estimator__penalty': Penalties}
grid_search2 = GridSearchCV(ppln2, param_grid, iid=False, cv=5)
grid_search2.fit(X_train, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=Pipeline(memory=None,
     steps=[('scalar', StandardScaler(copy=True, with_mean=True, with_std=True)), ('estimator', LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=1000, multi_class='auto',
          n_jobs=None, penalty='l2', random_state=None, solver='liblinear',
          tol=0.0001, verbose=0, warm_start=False))]),
       fit_params=None, iid=False, n_jobs=None,
       param_grid={'estimator__C': [0.01, 0.1, 1, 10, 100], 'estimator__penalty': ['l1', 'l2']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [39]:
GetFormatedRes(grid_search2)

Unnamed: 0,mean_test_score,mean_fit_time,param_estimator__C,param_estimator__penalty
0,0.290205,0.0032,0.01,l1
1,0.967308,0.0024,0.01,l2
2,0.976282,0.0024,0.1,l1
3,0.975641,0.003997,0.1,l2
4,0.967949,0.002398,1.0,l1
5,0.983974,0.004001,1.0,l2
6,0.942949,0.0056,10.0,l1
7,0.983974,0.003199,10.0,l2
8,0.950641,0.0032,100.0,l1
9,0.983974,0.004,100.0,l2


In [40]:
from sklearn.linear_model import LogisticRegression
sc = StandardScaler()
Xft = sc.fit_transform(X_train)
ppln3 = LogisticRegression(multi_class='auto',  solver='liblinear', max_iter=1000)
Cs = [0.01, 0.1, 1, 10, 100]
Penalties = ['l1','l2']
param_grid = {'C' : Cs, 'penalty': Penalties}
grid_search3 = GridSearchCV(ppln3, param_grid, iid=False, cv=5)
grid_search3.fit(Xft, y_train)

GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=1000, multi_class='auto',
          n_jobs=None, penalty='l2', random_state=None, solver='liblinear',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid=False, n_jobs=None,
       param_grid={'C': [0.01, 0.1, 1, 10, 100], 'penalty': ['l1', 'l2']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [41]:
GetFormatedRes(grid_search3)

Unnamed: 0,mean_test_score,mean_fit_time,param_C,param_penalty
0,0.290205,0.0,0.01,l1
1,0.967308,0.0016,0.01,l2
2,0.976282,0.001599,0.1,l1
3,0.975641,0.0008,0.1,l2
4,0.967949,0.0024,1.0,l1
5,0.983974,0.004002,1.0,l2
6,0.942949,0.004,10.0,l1
7,0.983974,0.004,10.0,l2
8,0.958974,0.004001,100.0,l1
9,0.983974,0.001601,100.0,l2


In [46]:
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
c = GradientBoostingClassifier(n_jobs=2)

TypeError: __init__() got an unexpected keyword argument 'n_jobs'