In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris, load_breast_cancer
import numpy as np

In [2]:
iris = load_iris()
xtrain,xtest,ytrain,ytest = train_test_split(iris.data,iris.target,stratify=iris.target,random_state=0)

In [3]:
import numpy as np
print("unique dist {}".format(np.bincount(ytrain)))
print("unique test {}".format(np.bincount(ytest)))

unique dist [37 37 38]
unique test [13 13 12]


In [4]:
sc = StandardScaler()
xtrainscaled = sc.fit_transform(xtrain)
xtestscaled = sc.transform(xtest)

In [7]:
log = LogisticRegression()
log.fit(xtrain,ytrain)
print("log on train_data {}".format(log.score(xtrain,ytrain)))
print("log on test_data {}".format(log.score(xtest,ytest)))

log on train_data 0.9553571428571429
log on test_data 1.0


In [9]:
pipe = Pipeline([("preprocess",StandardScaler()),("classifier",LogisticRegression())])
pipe.fit(xtrain,ytrain)
print("pipe on train_data {}".format(pipe.score(xtrain,ytrain)))
print("pipe on test_data {}".format(pipe.score(xtrain,ytrain)))

pipe on train_data 0.9553571428571429
pipe on test_data 0.9553571428571429


In [10]:
param = {"classifier__C":[0.01,0.1,1,10],
        "preprocess":[None,StandardScaler()]}

In [16]:
pipe.named_steps["classifier"]

LogisticRegression()

In [17]:
grid = GridSearchCV(pipe,param_grid=param,cv=3)

In [18]:
grid.fit(xtrain,ytrain)
print("grid on train_data {}".format(grid.score(xtrain,ytrain)))
print("grid on test_data {}".format(grid.score(xtest,ytest)))

Traceback (most recent call last):
  File "C:\Users\Sidi\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\Sidi\anaconda3\lib\site-packages\sklearn\pipeline.py", line 335, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "C:\Users\Sidi\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1407, in fit
    fold_coefs_ = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\Sidi\anaconda3\lib\site-packages\joblib\parallel.py", line 1041, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\Sidi\anaconda3\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\Sidi\anaconda3\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\Sidi\anaconda3\lib\site-packages\joblib\_

grid on train_data 0.9732142857142857
grid on test_data 1.0


In [22]:
grid.best_estimator_.named_steps

{'preprocess': StandardScaler(), 'classifier': LogisticRegression(C=10)}

In [23]:
import pandas as pd
df = pd.DataFrame(grid.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_classifier__C,param_preprocess,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.020686,0.009455,0.0,0.0,0.01,,"{'classifier__C': 0.01, 'preprocess': None}",0.763158,0.837838,0.810811,0.803936,0.030873,6
1,0.019137,0.007903,0.0,0.0,0.01,StandardScaler(),"{'classifier__C': 0.01, 'preprocess': Standard...",0.815789,0.837838,0.864865,0.839497,0.020069,5
2,0.031663,0.006197,0.0,0.0,0.1,,"{'classifier__C': 0.1, 'preprocess': None}",0.868421,0.945946,0.945946,0.920104,0.036546,3
3,0.008125,0.002548,0.0,0.0,0.1,StandardScaler(),"{'classifier__C': 0.1, 'preprocess': StandardS...",0.815789,0.918919,0.918919,0.884542,0.048616,4
4,0.048211,0.004473,0.0,0.0,1.0,,"{'classifier__C': 1, 'preprocess': None}",0.894737,,0.972973,,,7
5,0.004893,0.000223,0.0,0.0,1.0,StandardScaler(),"{'classifier__C': 1, 'preprocess': StandardSca...",0.894737,0.972973,0.945946,0.937885,0.032444,2
6,0.030055,0.000219,0.0,0.0,10.0,,"{'classifier__C': 10, 'preprocess': None}",0.921053,,,,,8
7,0.010767,0.000982,0.0,0.0,10.0,StandardScaler(),"{'classifier__C': 10, 'preprocess': StandardSc...",0.947368,0.972973,0.918919,0.94642,0.022078,1
