<a href="https://colab.research.google.com/github/RifatMuhtasim/Machine_Learning/blob/main/Miscellaneous_Topics/Hyper_Parameter_Tuning_using_GridSearchCV.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Import Iris Dataset

from sklearn.datasets import load_iris
iris = load_iris()
dir(iris)

['DESCR',
 'data',
 'data_module',
 'feature_names',
 'filename',
 'frame',
 'target',
 'target_names']

# Using Train Test Split to select the data.

In [4]:
X_train, X_test, y_train, y_test = train_test_split(iris['data'], iris['target'], test_size=0.25)

svm_model = SVC(kernel="rbf", C=10, gamma='auto')
svm_model.fit(X_train, y_train)
svm_model.score(X_test, y_test)

0.9210526315789473

# Using cross_val_score

In [5]:
cross_val_score(SVC(kernel="linear", C=10, gamma="auto"), iris['data'], iris['target'], cv=5)

array([1.        , 1.        , 0.9       , 0.96666667, 1.        ])

In [6]:
# Using for loop
kernel_values = ['linear', 'rbf']
C_values = [1, 10, 20, 30]
avg_score = {}

for kernel_value in kernel_values:
  for C_value in C_values:
    result = cross_val_score(SVC(kernel=kernel_value, C=C_value, gamma="auto"), iris['data'], iris['target'], cv=5)
    avg_score[kernel_value+'_'+str(C_value)] = np.average(result)

avg_score

{'linear_1': 0.9800000000000001,
 'linear_10': 0.9733333333333334,
 'linear_20': 0.9666666666666666,
 'linear_30': 0.96,
 'rbf_1': 0.9800000000000001,
 'rbf_10': 0.9800000000000001,
 'rbf_20': 0.9666666666666668,
 'rbf_30': 0.96}

# GridSearchCV

In [7]:
C_values = [1, 10, 20]
kernel_values = ['linear', 'rbf']

clf = GridSearchCV(SVC(gamma='auto'), {
    'C': C_values,
    'kernel': kernel_values
  },
  cv = 5,
  return_train_score=False
)

clf.fit(iris['data'], iris['target'])
df = pd.DataFrame(clf.cv_results_)
df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.008205,0.006621,0.00195,0.001232,1,linear,"{'C': 1, 'kernel': 'linear'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
1,0.0074,0.004887,0.004963,0.004754,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
2,0.006182,0.00323,0.003229,0.001436,10,linear,"{'C': 10, 'kernel': 'linear'}",1.0,1.0,0.9,0.966667,1.0,0.973333,0.038873,4
3,0.005952,0.002538,0.002898,0.000781,10,rbf,"{'C': 10, 'kernel': 'rbf'}",0.966667,1.0,0.966667,0.966667,1.0,0.98,0.01633,1
4,0.0043,0.001258,0.003439,0.000467,20,linear,"{'C': 20, 'kernel': 'linear'}",1.0,1.0,0.9,0.933333,1.0,0.966667,0.042164,6
5,0.00438,0.000616,0.003288,0.001558,20,rbf,"{'C': 20, 'kernel': 'rbf'}",0.966667,1.0,0.9,0.966667,1.0,0.966667,0.036515,5


In [8]:
df[['param_C', 'param_kernel', 'mean_test_score', 'rank_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score,rank_test_score
0,1,linear,0.98,1
1,1,rbf,0.98,1
2,10,linear,0.973333,4
3,10,rbf,0.98,1
4,20,linear,0.966667,6
5,20,rbf,0.966667,5


In [9]:
# Best value
print("Best Score:", clf.best_score_)
print("Best Params:", clf.best_params_)

Best Score: 0.9800000000000001
Best Params: {'C': 1, 'kernel': 'linear'}


# RandomizedSearchCV

In [10]:
kernel_values = ['rbf', 'linear']
C_values = [1, 10, 20, 30, 40, 50]

rscv_clf = RandomizedSearchCV( SVC(gamma='auto'), {
    "kernel": kernel_values,
    "C": C_values
  },
  cv=5,
  return_train_score=False,
  n_iter=2
)

rscv_clf.fit(iris['data'], iris['target'])
pd.DataFrame(rscv_clf.cv_results_)[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.98
1,40,rbf,0.96


# Compare the result for different different model

In [11]:
model_params = {
    "svm": {
        "model": SVC(gamma='auto'),
        "params": {
            'C': [1, 10, 20],
            'kernel': ['rbf', 'linear']
        }
    },
    "random_forest": {
        "model": RandomForestClassifier(),
        "params": {
            "n_estimators": [1, 5, 10]
        }
    },
    "logistic_regression": {
        "model": LogisticRegression(),
        "params": {
            "C": [1, 5, 10, 20]
        }
    }
}

In [12]:
scores = []

for model_name, mp in model_params.items():
  clf = GridSearchCV(mp['model'], mp['params'], cv=5, return_train_score=False)
  clf.fit(iris['data'], iris['target'])
  scores.append({
      'model': model_name,
      'best_score': clf.best_score_,
      'best_params': clf.best_params_
  })


In [13]:
df = pd.DataFrame(scores, columns=['model', 'best_score', 'best_params'])
df

Unnamed: 0,model,best_score,best_params
0,svm,0.98,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.953333,{'n_estimators': 5}
2,logistic_regression,0.98,{'C': 10}
