# **Machine Learning - Hyper Parameter Tuning - GridSearchCV**
-----
-----

- ## Import Required Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

- ## Loading the Dataset

In [2]:
from sklearn import datasets

In [3]:
iris = datasets.load_iris()

In [4]:
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

- ## Creating DataFrame 

In [5]:
df = pd.DataFrame(iris.data,columns = iris.feature_names)

df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


- ### Adding **flower** column & Assign Values

In [6]:
df['flower'] = iris.target 

df['flower'] = df['flower'].apply(lambda x: iris.target_names[x]) 


In [7]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


### Apply *EDA*

In [8]:
df.tail()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica
149,5.9,3.0,5.1,1.8,virginica


In [9]:
df.shape

(150, 5)

In [10]:
df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   flower             150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [12]:
df.isnull().sum()

sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
flower               0
dtype: int64

- ## Data Preparation

In [13]:
X = iris.data

y = iris.target


- ## Splitting the dataset

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3) 


- ## Model **(SVC)** Creation 

In [15]:
from sklearn import svm

In [16]:
model = svm.SVC(kernel = 'rbf', C = 30, gamma = 'auto')

model.fit(X_train, y_train)

print("Score is : ",model.score(X_test, y_test))


Score is :  0.9333333333333333


- ## Using Cross Validation 

In [17]:
from sklearn.model_selection import cross_val_score 

In [18]:
# Using ( kernel = 'linear', C = 10, gamma = 'auto',cv = 5 )

sc1 = cross_val_score(svm.SVC(kernel = 'linear', C = 10, gamma = 'auto'), X, y, cv = 5) 
 
print(sc1) 

[1.         1.         0.9        0.96666667 1.        ]


In [19]:
# Using ( kernel = 'linear', C = 20, gamma = 'auto',cv = 5 )

sc1 = cross_val_score(svm.SVC(kernel = 'linear', C = 20, gamma = 'auto'), X, y, cv = 5) 
 
print(sc1) 

[1.         1.         0.9        0.93333333 1.        ]


In [20]:
# Using ( kernel = 'rbf', C = 20, gamma = 'auto',cv = 5 )

sc1 = cross_val_score(svm.SVC(kernel = 'rbf', C = 20, gamma = 'auto'), X, y, cv = 5) 
 
print(sc1) 

[0.96666667 1.         0.9        0.96666667 1.        ]


In [21]:
# Using ( kernel = 'rbf', C = 10, gamma = 'auto',cv = 10 )

sc1 = cross_val_score(svm.SVC(kernel = 'rbf', C = 10, gamma = 'auto'), X, y, cv =10) 
 
print(sc1) 

[1.         0.93333333 1.         1.         0.86666667 1.
 0.93333333 1.         1.         1.        ]


In [22]:
# Using ( kernel = 'rbf', C = 10, gamma = 'float',cv = 5 )

sc1 = cross_val_score(svm.SVC(kernel = 'rbf', C = 10, gamma = 2.5), X, y, cv = 5) 
 
print(sc1) 

[1.         0.96666667 0.9        0.93333333 1.        ]


## Approach : Use **GridSearchCV**

In [23]:
from sklearn.model_selection import GridSearchCV 


In [24]:

d = { 
    'C': [1, 10, 20], 
    'kernel': ['rbf', 'linear'] 
} 

gscv = GridSearchCV(svm.SVC(gamma='auto'), d, cv = 5, return_train_score = False)

gscv.fit(X,y)

In [25]:
gscv.cv_results_

{'mean_fit_time': array([0.00039954, 0.        , 0.        , 0.00281763, 0.00039983,
        0.00039988]),
 'std_fit_time': array([0.00048934, 0.        , 0.        , 0.00414953, 0.00048969,
        0.00048975]),
 'mean_score_time': array([0.00072675, 0.        , 0.        , 0.        , 0.0006    ,
        0.00020013]),
 'std_score_time': array([0.00039169, 0.        , 0.        , 0.        , 0.0004899 ,
        0.00040026]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value=999999),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20, 'kernel': 'linear'}],
 'split0_test_score': ar

- ### Use GridSearchCV, **Creating DataFrame with results** 

In [26]:
df2 = pd.DataFrame(gscv.cv_results_)
df2

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.0004,0.000489,0.000727,0.000392,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.0,0.0,0.0,0.0,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.0,0.0,0.0,0.0,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.002818,0.00415,0.0,0.0,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.0004,0.00049,0.0006,0.00049,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5
5,0.0004,0.00049,0.0002,0.0004,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,6


- ### Filtering Some important Columns

In [27]:
result = df2[['param_C', 'param_kernel', 'mean_test_score']]

result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.98
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667


### Find : Best parameter in this Dataframe

In [28]:
gscv.best_params_

{'C': 1, 'kernel': 'rbf'}

### Find : Best Score in this Dataframe

In [29]:
gscv.best_score_

0.9800000000000001

## **Use RandomizedSearchCV**

In [35]:
from sklearn import svm, datasets 
import pandas as pd 
from sklearn.model_selection import RandomizedSearchCV 
 
iris = datasets.load_iris() 
 
df = pd.DataFrame(iris.data, columns=iris.feature_names) 
df['flower'] = iris.target 
df['flower'] = df['flower'].apply(lambda x: iris.target_names[x]) 
 
d = { 
        'C': [1, 10, 20], 
        'kernel': ['rbf', 'linear'] 
    } 
 
rs = RandomizedSearchCV(svm.SVC(gamma = 'auto'), d, cv = 5,  return_train_score = False,  n_iter=2 ) 
 
rs.fit(X, y) 

cols = ['param_C', 'param_kernel', 'mean_test_score'] 

df = pd.DataFrame(rs.cv_results_,columns = cols)
 
print(df) 


   param_C param_kernel  mean_test_score
0       10          rbf             0.98
1        1       linear             0.98
