In [2]:
from sklearn.decomposition import PCA
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score, f1_score

# cross validation imports
from sklearn.model_selection import cross_val_score, StratifiedKFold


In [3]:
df = pd.read_csv('../Datasets/cases/Wisconsin/BreastCancer.csv', index_col=0)
X = df.drop('Class', axis=1)
y = df['Class']

### Hyperparameter Tuning using GridSearchCV


We don't have to create loops for each hyperparameter. Rather, we have to create a dictionary of parameters where we have to pass different options to search or range for numerical parameters and pass the dictionary to GridSearch.

The dictionary keys must match parameter's names of the estimator and the values will be the correponding possible values and range.

In [7]:
# import for gridsearchcv
from sklearn.model_selection import GridSearchCV

lr = LogisticRegression()
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=25)

params = { 'solver': ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
          'C' : np.linspace(0.001, 4, 20)
}


gcv = GridSearchCV(lr, param_grid=params, cv=kfold, scoring='roc_auc')
gcv.fit(X, y)



0,1,2
,estimator,LogisticRegression()
,param_grid,"{'C': array([1.0000...00000000e+00]), 'solver': ['lbfgs', 'liblinear', ...]}"
,scoring,'roc_auc'
,n_jobs,
,refit,True
,cv,StratifiedKFo... shuffle=True)
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,np.float64(0.2114736842105263)
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,100


In [8]:
print('best params', gcv.best_params_)
print('best roc_auc score', gcv.best_score_)

best params {'C': np.float64(0.2114736842105263), 'solver': 'lbfgs'}
best roc_auc score 0.9945549588684017


Tuning with max_iter

In [11]:
params = { 'solver': ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
          'C' : np.linspace(0.001, 4, 20),
          'max_iter': [100, 2000, 2500, 3000, 3500, 4000]
}


gcv = GridSearchCV(lr, param_grid=params, cv=kfold, scoring='roc_auc')
gcv.fit(X, y)



0,1,2
,estimator,LogisticRegression()
,param_grid,"{'C': array([1.0000...00000000e+00]), 'max_iter': [100, 2000, ...], 'solver': ['lbfgs', 'liblinear', ...]}"
,scoring,'roc_auc'
,n_jobs,
,refit,True
,cv,StratifiedKFo... shuffle=True)
,verbose,0
,pre_dispatch,'2*n_jobs'
,error_score,
,return_train_score,False

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,np.float64(0.2114736842105263)
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'sag'
,max_iter,2000


In [12]:
print('best params', gcv.best_params_)
print('best roc_auc score', gcv.best_score_)

best params {'C': np.float64(0.2114736842105263), 'max_iter': 2000, 'solver': 'sag'}
best roc_auc score 0.9946908893753392


### TYPES OF GRID SEARCH

1. Randomized

In [21]:
from sklearn.model_selection import RandomizedSearchCV

params = { 'solver': ['lbfgs', 'liblinear', 'newton-cg', 'newton-cholesky', 'sag', 'saga'],
          'C' : np.linspace(0.001, 4, 20),
          'max_iter': [100, 2000, 2500, 3000, 3500, 4000]
}


rgvc = RandomizedSearchCV(lr, param_distributions=params, cv=kfold, scoring='roc_auc', n_iter=10, random_state=25, verbose=3)

rgvc.fit(X,y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END C=2.947631578947368, max_iter=4000, solver=liblinear;, score=0.993 total time=   0.0s
[CV 2/5] END C=2.947631578947368, max_iter=4000, solver=liblinear;, score=0.996 total time=   0.0s
[CV 3/5] END C=2.947631578947368, max_iter=4000, solver=liblinear;, score=0.990 total time=   0.0s
[CV 4/5] END C=2.947631578947368, max_iter=4000, solver=liblinear;, score=0.999 total time=   0.0s
[CV 5/5] END C=2.947631578947368, max_iter=4000, solver=liblinear;, score=0.994 total time=   0.0s
[CV 1/5] END C=3.7895263157894736, max_iter=2500, solver=liblinear;, score=0.993 total time=   0.0s
[CV 2/5] END C=3.7895263157894736, max_iter=2500, solver=liblinear;, score=0.996 total time=   0.0s
[CV 3/5] END C=3.7895263157894736, max_iter=2500, solver=liblinear;, score=0.990 total time=   0.0s
[CV 4/5] END C=3.7895263157894736, max_iter=2500, solver=liblinear;, score=0.999 total time=   0.0s
[CV 5/5] END C=3.7895263157894736, max_iter=



[CV 2/5] END C=0.8428947368421053, max_iter=2000, solver=lbfgs;, score=0.997 total time=   0.0s
[CV 3/5] END C=0.8428947368421053, max_iter=2000, solver=lbfgs;, score=0.989 total time=   0.0s
[CV 4/5] END C=0.8428947368421053, max_iter=2000, solver=lbfgs;, score=0.999 total time=   0.0s
[CV 5/5] END C=0.8428947368421053, max_iter=2000, solver=lbfgs;, score=0.994 total time=   0.0s
[CV 1/5] END C=2.3162105263157895, max_iter=3500, solver=newton-cg;, score=0.993 total time=   0.0s
[CV 2/5] END C=2.3162105263157895, max_iter=3500, solver=newton-cg;, score=0.997 total time=   0.0s
[CV 3/5] END C=2.3162105263157895, max_iter=3500, solver=newton-cg;, score=0.989 total time=   0.0s
[CV 4/5] END C=2.3162105263157895, max_iter=3500, solver=newton-cg;, score=0.999 total time=   0.0s
[CV 5/5] END C=2.3162105263157895, max_iter=3500, solver=newton-cg;, score=0.995 total time=   0.0s
[CV 1/5] END C=2.5266842105263154, max_iter=4000, solver=lbfgs;, score=0.993 total time=   0.0s
[CV 2/5] END C=2.526

0,1,2
,estimator,LogisticRegression()
,param_distributions,"{'C': array([1.0000...00000000e+00]), 'max_iter': [100, 2000, ...], 'solver': ['lbfgs', 'liblinear', ...]}"
,n_iter,10
,scoring,'roc_auc'
,n_jobs,
,refit,True
,cv,StratifiedKFo... shuffle=True)
,verbose,3
,pre_dispatch,'2*n_jobs'
,random_state,25

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,np.float64(3.7895263157894736)
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'liblinear'
,max_iter,2500


In [18]:
print(rgvc.best_params_)
print(rgvc.best_score_)

{'solver': 'saga', 'max_iter': 3500, 'C': np.float64(1.8952631578947368)}
0.9945554565591168
