# **Load Dataset**

In [39]:
import pandas as pd
from sklearn.datasets import load_iris
iris = load_iris()
df=pd.DataFrame(iris.data, columns=iris.feature_names)
print(df.head())

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2


In [40]:
df['flower']=iris.target
df['flower']=df['flower'].apply(lambda x: iris.target_names[x])
df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


# **Cross Validation**

In [41]:
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
cross_val_score(SVC(kernel='linear',C=10,gamma='auto'), iris.data, iris.target, cv=3)

array([1.  , 0.94, 0.98])

In [42]:
cross_val_score(SVC(kernel='rbf',C=10,gamma='auto'), iris.data, iris.target, cv=3)

array([0.98, 0.98, 0.98])

# **Find the Average with scratch**

In [43]:
import numpy as np
kernal=['linear','rbf']
c=[1,10,20]
avg_score={}
for k_val in kernal:
 for c_val in c:
  c_score=cross_val_score(SVC(kernel=k_val,C=c_val,gamma='auto'), iris.data, iris.target, cv=3)
  avg_score=[k_val+' '+str(c_val),np.average(c_score)]
  print(avg_score)

['linear 1', 0.9933333333333333]
['linear 10', 0.9733333333333333]
['linear 20', 0.9666666666666667]
['rbf 1', 0.9733333333333333]
['rbf 10', 0.98]
['rbf 20', 0.9666666666666667]


# **find the average with gridsearchcv**

In [44]:
from sklearn.model_selection import GridSearchCV
clf=GridSearchCV(SVC(gamma='auto'),{
    'C':[1,10,20],
    'kernel':['rbf','linear']
},cv=3,return_train_score=False)

In [45]:
clf.fit(iris.data,iris.target)
clf.cv_results_

{'mean_fit_time': array([0.00114544, 0.0009052 , 0.00103982, 0.00099818, 0.00119781,
        0.00092371]),
 'std_fit_time': array([7.06498078e-05, 2.96332544e-05, 4.11248821e-05, 9.94390005e-05,
        1.15828742e-04, 3.77627400e-05]),
 'mean_score_time': array([0.00078058, 0.00066781, 0.00080156, 0.000995  , 0.00133197,
        0.00064278]),
 'std_score_time': array([1.67584124e-05, 3.17428172e-05, 4.62654923e-05, 2.53449703e-04,
        8.16704582e-04, 2.27645009e-05]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'ker

In [46]:
df=pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001145,7.1e-05,0.000781,1.7e-05,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.98,0.98,0.96,0.973333,0.009428,3
1,0.000905,3e-05,0.000668,3.2e-05,1,linear,"{'C': 1, 'kernel': 'linear'}",1.0,1.0,0.98,0.993333,0.009428,1
2,0.00104,4.1e-05,0.000802,4.6e-05,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.98,0.98,0.98,0.98,0.0,2
3,0.000998,9.9e-05,0.000995,0.000253,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,0.94,0.98,0.973333,0.024944,3
4,0.001198,0.000116,0.001332,0.000817,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.98,0.94,0.98,0.966667,0.018856,5
5,0.000924,3.8e-05,0.000643,2.3e-05,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,0.94,0.96,0.966667,0.024944,5


In [47]:
df[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.973333
1,1,linear,0.993333
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667


In [48]:
dir(clf)

['__abstractmethods__',
 '__annotations__',
 '__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__sklearn_clone__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_abc_impl',
 '_build_request_for_signature',
 '_check_feature_names',
 '_check_n_features',
 '_check_refit_for_multimetric',
 '_estimator_type',
 '_format_results',
 '_get_default_requests',
 '_get_metadata_request',
 '_get_param_names',
 '_get_tags',
 '_more_tags',
 '_parameter_constraints',
 '_repr_html_',
 '_repr_html_inner',
 '_repr_mimebundle_',
 '_required_parameters',
 '_run_search',
 '_select_best_index',
 '_validate_data',
 '_validate_params',
 'best_estimator_',
 'best_index_',
 'best_params_',
 'best_score_',
 '

In [49]:
clf.best_score_

0.9933333333333333

In [50]:
clf.best_params_

{'C': 1, 'kernel': 'linear'}

# **Random GridSearchCV**

In [51]:
from sklearn.model_selection import RandomizedSearchCV
clf=RandomizedSearchCV(SVC(gamma='auto'),{
    'C':[1,10,20],
    'kernel':['rbf','linear']
},cv=3,return_train_score=False,n_iter=2)

In [52]:
clf.fit(iris.data,iris.target)
clf.cv_results_

{'mean_fit_time': array([0.00100207, 0.00094517]),
 'std_fit_time': array([1.02462494e-04, 1.35351403e-05]),
 'mean_score_time': array([0.0006752 , 0.00066765]),
 'std_score_time': array([4.21297615e-05, 1.00123188e-05]),
 'param_kernel': masked_array(data=['linear', 'rbf'],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'param_C': masked_array(data=[1, 10],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'params': [{'kernel': 'linear', 'C': 1}, {'kernel': 'rbf', 'C': 10}],
 'split0_test_score': array([1.  , 0.98]),
 'split1_test_score': array([1.  , 0.98]),
 'split2_test_score': array([0.98, 0.98]),
 'mean_test_score': array([0.99333333, 0.98      ]),
 'std_test_score': array([0.00942809, 0.        ]),
 'rank_test_score': array([1, 2], dtype=int32)}

In [53]:
df=pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,mean_test_score,std_test_score,rank_test_score
0,0.001002,0.000102,0.000675,4.2e-05,linear,1,"{'kernel': 'linear', 'C': 1}",1.0,1.0,0.98,0.993333,0.009428,1
1,0.000945,1.4e-05,0.000668,1e-05,rbf,10,"{'kernel': 'rbf', 'C': 10}",0.98,0.98,0.98,0.98,0.0,2


In [54]:
df[['param_C','param_kernel','mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.993333
1,10,rbf,0.98


In [55]:
clf.best_score_

0.9933333333333333

# **Hyper Parameter Tuning**

In [56]:
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier


In [57]:
models_params={'svm':{
    'model':svm.SVC(gamma='auto'),
    'params':{
        'C':[1,10,20],
        'kernel':['rbf','linear']
    }
},
               'random_forest':{
                   'model':RandomForestClassifier(),
                   'params':{
                       'n_estimators':[1,5,10]
                   }
               },
               'logistic_regression':{
                   'model':LogisticRegression(solver='liblinear',multi_class='auto'),
                   'params':{
                       'C':[1,5,10]
                   }
               }

}



In [58]:
scores=[]
for model_name,mp in models_params.items():
  clf=GridSearchCV(mp['model'],mp['params'],cv=5,return_train_score=False)
  clf.fit(iris.data,iris.target)
  scores.append({
      'model':model_name,
      'best_score':clf.best_score_,
      'best_params':clf.best_params_
  })

In [59]:
df=pd.DataFrame(scores,columns=['model','best_score','best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.98,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.966667,{'n_estimators': 10}
2,logistic_regression,0.966667,{'C': 5}
