<a href="https://colab.research.google.com/github/AyushamMishra/AyushamMishra/blob/main/CV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Hyperparameter tuning using Grid Search CV

In [69]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


In [70]:
df=sns.load_dataset('iris')

In [71]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [72]:
df.tail()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica
149,5.9,3.0,5.1,1.8,virginica


In [73]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [74]:
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [75]:

df.value_counts('species')

Unnamed: 0_level_0,count
species,Unnamed: 1_level_1
setosa,50
versicolor,50
virginica,50


In [76]:
df.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

In [77]:
X=df.drop('species',axis=1)
y=df['species']

In [78]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [79]:
from sklearn.neighbors import KNeighborsClassifier
knn_model=KNeighborsClassifier(n_neighbors=3)

In [80]:
knn_model.fit(X_train,y_train)

In [81]:
knn_model.score(X_test,y_test)

1.0

In [82]:
from sklearn.svm import SVC
svm_model=SVC(gamma='auto')

In [83]:
svm_model.fit(X_train,y_train)

In [84]:
svm_model.score(X_test,y_test)

1.0

## Lets use grid search cv

In [85]:
from sklearn.model_selection import GridSearchCV

In [86]:
classifier=GridSearchCV(svm_model,{
    'C':[1,10,20,30],'kernel':['rbf','linear']
},cv=5,return_train_score=False)

In [87]:
classifier.fit(X,y)

In [88]:
import pandas as pd

result = pd.DataFrame(classifier.cv_results_)
result[['param_C', 'param_kernel', 'mean_test_score']]


Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.98
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667
6,30,rbf,0.96
7,30,linear,0.96


## Hence the grid search cross validation removes overfitting of earlier svm model

## Applying grid search cv on KNN model

In [89]:
knn_classifier = GridSearchCV(
    knn_model,
    {
        'n_neighbors': [3, 5, 7, 9, 13, 15, 17, 25],
        'weights': ['uniform', 'distance'],
        'metric': ['euclidean', 'manhattan', 'minkowski']
    },
    cv=5,
    return_train_score=False,
    n_jobs=-1
)

In [92]:
knn_classifier.fit(X_train,y_train)

In [95]:
knn_result = pd.DataFrame(knn_classifier.cv_results_)
knn_result


Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_metric,param_n_neighbors,param_weights,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.004873,0.001188,0.00524,0.000608,euclidean,3,uniform,"{'metric': 'euclidean', 'n_neighbors': 3, 'wei...",0.958333,1.0,0.875,1.0,0.958333,0.958333,0.045644,1
1,0.003982,0.000738,0.004013,0.001131,euclidean,3,distance,"{'metric': 'euclidean', 'n_neighbors': 3, 'wei...",0.958333,1.0,0.875,1.0,0.958333,0.958333,0.045644,1
2,0.004464,0.001148,0.005394,0.00115,euclidean,5,uniform,"{'metric': 'euclidean', 'n_neighbors': 5, 'wei...",0.958333,0.958333,0.833333,1.0,0.958333,0.941667,0.056519,22
3,0.003813,0.0011,0.004319,0.001574,euclidean,5,distance,"{'metric': 'euclidean', 'n_neighbors': 5, 'wei...",0.958333,0.958333,0.833333,1.0,0.958333,0.941667,0.056519,22
4,0.004492,0.002501,0.005052,0.000995,euclidean,7,uniform,"{'metric': 'euclidean', 'n_neighbors': 7, 'wei...",0.958333,0.958333,0.833333,1.0,0.958333,0.941667,0.056519,22
5,0.004001,0.001084,0.004557,0.001754,euclidean,7,distance,"{'metric': 'euclidean', 'n_neighbors': 7, 'wei...",0.958333,0.958333,0.833333,1.0,0.958333,0.941667,0.056519,22
6,0.003524,0.000261,0.005395,0.001512,euclidean,9,uniform,"{'metric': 'euclidean', 'n_neighbors': 9, 'wei...",0.958333,0.916667,0.833333,1.0,0.958333,0.933333,0.056519,46
7,0.003311,0.000123,0.004293,0.000966,euclidean,9,distance,"{'metric': 'euclidean', 'n_neighbors': 9, 'wei...",0.958333,0.958333,0.833333,1.0,0.958333,0.941667,0.056519,22
8,0.004095,0.001001,0.005207,0.000696,euclidean,13,uniform,"{'metric': 'euclidean', 'n_neighbors': 13, 'we...",1.0,0.916667,0.833333,1.0,0.958333,0.941667,0.062361,22
9,0.003264,0.000307,0.003947,0.000898,euclidean,13,distance,"{'metric': 'euclidean', 'n_neighbors': 13, 'we...",1.0,0.958333,0.833333,1.0,0.958333,0.95,0.061237,7


In [97]:
knn_result[['param_metric'	,'param_n_neighbors'	,'param_weights','mean_test_score']]

Unnamed: 0,param_metric,param_n_neighbors,param_weights,mean_test_score
0,euclidean,3,uniform,0.958333
1,euclidean,3,distance,0.958333
2,euclidean,5,uniform,0.941667
3,euclidean,5,distance,0.941667
4,euclidean,7,uniform,0.941667
5,euclidean,7,distance,0.941667
6,euclidean,9,uniform,0.933333
7,euclidean,9,distance,0.941667
8,euclidean,13,uniform,0.941667
9,euclidean,13,distance,0.95


## The overfit knn model is rectified.