# **Machine Learning -: GridSearchCV**
----
-----

- ### Import Required Libraries

In [1]:
import pandas as pd
import numpy as np


- ### Loading the Dataset

In [2]:
from sklearn import datasets

In [3]:
iris = datasets.load_iris()

iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

- ### Creating DataFrame

In [4]:
df = pd.DataFrame(iris.data , columns = iris.feature_names)

df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


- ### Adding **flower** column with assign values

In [5]:
df['flower'] = iris.target

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


### Old value replace with New value in **flower** column 

In [6]:
df['flower'] = df['flower'].apply(lambda x : iris.target_names[x])

df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


- ### Applying **EDA**

In [7]:
df.iloc[98:103 , :]

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),flower
98,5.1,2.5,3.0,1.1,versicolor
99,5.7,2.8,4.1,1.3,versicolor
100,6.3,3.3,6.0,2.5,virginica
101,5.8,2.7,5.1,1.9,virginica
102,7.1,3.0,5.9,2.1,virginica


In [8]:
df.shape

(150, 5)

In [9]:
df.describe()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
 4   flower             150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [11]:
df.isnull().sum()

sepal length (cm)    0
sepal width (cm)     0
petal length (cm)    0
petal width (cm)     0
flower               0
dtype: int64

- ## Data Preparation

In [12]:
X = iris.data

y = iris.target

- ## Splitting the Dataset

In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y)


### Model Creation with **SVC**

In [15]:
from sklearn import svm

In [16]:
svm1 = svm.SVC(kernel = 'linear' , C = 10 , gamma = 'auto')

In [17]:
svm1.fit(X_train, y_train)

svm1.score(X_test,y_test)

0.9736842105263158

In [18]:
svm2 = svm.SVC(kernel = 'rbf' , C = 10 , gamma = 'auto')

svm2.fit(X_train,y_train)

svm2.score(X_test,y_test)

0.9473684210526315

## Using **Cross Validation**

In [19]:
from sklearn.model_selection import cross_val_score

In [20]:
model1 = cross_val_score(svm.SVC(kernel = 'rbf' , C = 10 , gamma = 'auto'), X, y, cv = 5)

model1

array([0.96666667, 1.        , 0.96666667, 0.96666667, 1.        ])

In [21]:

model2 = cross_val_score(svm.SVC(kernel = 'linear' , C = 10 , gamma = 'auto'), X, y, cv = 5)

model2

array([1.        , 1.        , 0.9       , 0.96666667, 1.        ])

In [22]:

model3 = cross_val_score(svm.SVC(kernel = 'rbf' , C = 30 , gamma = 'auto'), X, y, cv = 5)

model3

array([0.96666667, 1.        , 0.9       , 0.93333333, 1.        ])

## **Use GridSearchCV**

In [23]:
from sklearn.model_selection import GridSearchCV

In [24]:

d = { 
    'C': [1, 10, 20], 
    'kernel': ['rbf', 'linear'] 
} 

gscv = GridSearchCV(svm.SVC(gamma = 'auto'), d, cv = 5, return_train_score = False)

gscv.fit(X,y)

In [25]:
gscv.cv_results_

{'mean_fit_time': array([0.00014143, 0.        , 0.        , 0.        , 0.        ,
        0.00319662]),
 'std_fit_time': array([0.00028286, 0.        , 0.        , 0.        , 0.        ,
        0.00639324]),
 'mean_score_time': array([0., 0., 0., 0., 0., 0.]),
 'std_score_time': array([0., 0., 0., 0., 0., 0.]),
 'param_C': masked_array(data=[1, 1, 10, 10, 20, 20],
              mask=[False, False, False, False, False, False],
        fill_value=999999),
 'param_kernel': masked_array(data=['rbf', 'linear', 'rbf', 'linear', 'rbf', 'linear'],
              mask=[False, False, False, False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'C': 1, 'kernel': 'rbf'},
  {'C': 1, 'kernel': 'linear'},
  {'C': 10, 'kernel': 'rbf'},
  {'C': 10, 'kernel': 'linear'},
  {'C': 20, 'kernel': 'rbf'},
  {'C': 20, 'kernel': 'linear'}],
 'split0_test_score': array([0.96666667, 0.96666667, 0.96666667, 1.        , 0.96666667,
        1.        ]),
 'split1_test_score': arr

### Use GridSearchCV Creating DataFrame with results

In [26]:
df2 = pd.DataFrame(gscv.cv_results_)

df2

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.000141,0.000283,0.0,0.0,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.0,0.0,0.0,0.0,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.0,0.0,0.0,0.0,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
3,0.0,0.0,0.0,0.0,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
4,0.0,0.0,0.0,0.0,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5
5,0.003197,0.006393,0.0,0.0,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,6


### Apply filtering for Important Columns

In [27]:
result = df2[['param_C', 'param_kernel', 'mean_test_score']]

result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.98
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667


- ### Find : Best Parameter

In [28]:
gscv.best_params_

{'C': 1, 'kernel': 'rbf'}

In [29]:
gscv.best_score_

0.9800000000000001

## Use **RandomizedSearchCV**

In [30]:
from sklearn.model_selection import RandomizedSearchCV

In [31]:

d = { 
    'C': [1, 10, 20], 
    'kernel': ['rbf', 'linear'] 
} 
                                                                                          #  n_iter = 2
rscv = RandomizedSearchCV(svm.SVC(gamma = 'auto'), d, cv = 5, return_train_score = False, n_iter = 2)

rscv.fit(X,y)

In [32]:
rscv.cv_results_

{'mean_fit_time': array([0.00076661, 0.        ]),
 'std_fit_time': array([0.00116193, 0.        ]),
 'mean_score_time': array([0.        , 0.00231032]),
 'std_score_time': array([0.        , 0.00462065]),
 'param_kernel': masked_array(data=['rbf', 'linear'],
              mask=[False, False],
        fill_value='?',
             dtype=object),
 'param_C': masked_array(data=[20, 10],
              mask=[False, False],
        fill_value=999999),
 'params': [{'kernel': 'rbf', 'C': 20}, {'kernel': 'linear', 'C': 10}],
 'split0_test_score': array([0.96666667, 1.        ]),
 'split1_test_score': array([1., 1.]),
 'split2_test_score': array([0.9, 0.9]),
 'split3_test_score': array([0.96666667, 0.96666667]),
 'split4_test_score': array([1., 1.]),
 'mean_test_score': array([0.96666667, 0.97333333]),
 'std_test_score': array([0.03651484, 0.03887301]),
 'rank_test_score': array([2, 1])}

### Use **RandomizedSearchCV** creating Dataframe with results

In [33]:
df3 = pd.DataFrame(rscv.cv_results_)

df3

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.000767,0.001162,0.0,0.0,rbf,20,"{'kernel': 'rbf', 'C': 20}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,2
1,0.0,0.0,0.00231,0.004621,linear,10,"{'kernel': 'linear', 'C': 10}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,1


- ### filter Important columns

In [34]:
result = df3[['param_C', 'param_kernel', 'mean_test_score']]

result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,20,rbf,0.966667
1,10,linear,0.973333


In [35]:
rscv.best_params_

{'kernel': 'linear', 'C': 10}

In [36]:
rscv.best_score_

0.9733333333333334