In [58]:
import sklearn
import numpy as np
from sklearn.datasets import load_iris
import pandas as pd

from sklearn.model_selection import (cross_val_score,
                                     GridSearchCV,
                                     RandomizedSearchCV)

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

from pprint import pprint

In [21]:
df = load_iris()
X,y = df['data'],df['target']

In [24]:
y = np.reshape(y,(-1,1))

In [25]:
data = np.concatenate((X,y),axis = 1)
data.shape

(150, 5)

In [26]:
df_ = pd.DataFrame(data)
df_.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,0.0
1,4.9,3.0,1.4,0.2,0.0
2,4.7,3.2,1.3,0.2,0.0
3,4.6,3.1,1.5,0.2,0.0
4,5.0,3.6,1.4,0.2,0.0


In [22]:
cross_val_score(SVC(kernel = 'linear',C=10,gamma='auto'),df.data,df.target,cv=5)

array([1.        , 1.        , 0.9       , 0.96666667, 1.        ])

In [40]:
clf = GridSearchCV(
    SVC(gamma = 'auto'),
    {
        'C':[1,10,20],
        'kernel':['rbf','linear']
    },
    cv = 5,
    return_train_score = False
)

In [41]:
%%capture
clf.fit(df.data,df.target)

In [42]:
pprint(clf.cv_results_)

{'mean_fit_time': array([0.00138078, 0.00061669, 0.00073366, 0.00061307, 0.0007328 ,
       0.00064335]),
 'mean_score_time': array([0.00050678, 0.00032048, 0.00035429, 0.00031228, 0.00035119,
       0.00033221]),
 'mean_test_score': array([0.98      , 0.98      , 0.98      , 0.97333333, 0.96666667,
       0.96666667]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
             mask=[False, False, False, False, False, False],
       fill_value='?',
            dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
             mask=[False, False, False, False, False, False],
       fill_value='?',
            dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
            {'C': 1, 'kernel': 'linear'},
            {'C': 10, 'kernel': 'rbf'},
            {'C': 10, 'kernel': 'linear'},
            {'C': 20, 'kernel': 'rbf'},
            {'C': 20, 'kernel': 'linear'}],
 'rank_test_score': array([1, 1, 1, 4, 5, 6], dtype=int32),
 'sp

In [43]:
result_df = pd.DataFrame(clf.cv_results_)

In [44]:
result_df.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001381,0.000679,0.000507,0.000167,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.000617,1.9e-05,0.00032,1e-05,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.000734,2.8e-05,0.000354,1.6e-05,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.000613,2.2e-05,0.000312,8e-06,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.000733,1.3e-05,0.000351,9e-06,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5


In [45]:
result_df[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.98
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667


In [52]:
print(
    f"best Score :: \t{clf.best_score_}\n",
    f"best Estimator:: {clf.best_estimator_}\n",
    f"best Params:: {clf.best_params_}\n",
    f"best Index:: \t{clf.best_index_}\n",
)

best Score :: 	0.9800000000000001
 best Estimator:: SVC(C=1, gamma='auto')
 best Params:: {'C': 1, 'kernel': 'rbf'}
 best Index:: 	0



### Randomized Search CV

In [54]:
rs = RandomizedSearchCV(
    SVC(gamma = 'auto'),
    {
        'C': [1,10,20],
        'kernel':['rbf','linear']
    },
    cv = 5,
    return_train_score = False,
    n_iter = 2
)

In [56]:
rs.fit(df.data,df.target)

RandomizedSearchCV(cv=5, estimator=SVC(gamma='auto'), n_iter=2,
                   param_distributions={'C': [1, 10, 20],
                                        'kernel': ['rbf', 'linear']})

In [57]:
pd.DataFrame(rs.cv_results_).head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001159,0.00067,0.0006,0.000279,linear,20,"{'kernel': 'linear', 'C': 20}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,2
1,0.000624,3.6e-05,0.000311,8e-06,linear,10,"{'kernel': 'linear', 'C': 10}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,1


## Testing out on different Models

In [59]:
model_params = {
    'svm':{
        'model':SVC(gamma = 'auto'),
        'params' : {
            'C':[1,10,30],
            'kernel':['rbf','linear']
        }
    },
    'random_forest':{
        'model': RandomForestClassifier(),
        'params':{
            'n_estimators':[1,5,10]
        }
    },
    'logistic Regression':{
        'model':LogisticRegression(solver='liblinear',multi_class = 'auto'),
        'params':{
            'C':[1,5,10]
        }
    }

}

In [60]:
scores = []

for model_name, mp in model_params.items():
  clf = GridSearchCV(mp['model'],mp['params'],
                     cv = 5,
                     return_train_score = False
  )

  clf.fit(df.data,df.target)
  scores.append({
      'model':model_name,
      'best_score':clf.best_score_,
      'best_params':clf.best_params_
  })

In [67]:
pprint(scores)

[{'best_params': {'C': 1, 'kernel': 'rbf'},
  'best_score': 0.9800000000000001,
  'model': 'svm'},
 {'best_params': {'n_estimators': 5},
  'best_score': 0.9666666666666668,
  'model': 'random_forest'},
 {'best_params': {'C': 5},
  'best_score': 0.9666666666666668,
  'model': 'logistic Regression'}]
