In [11]:
# Load dataset
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris

iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [12]:
# Splitting data into X and Y
X = df.drop(['target'], axis='columns')
Y = df.target

## Hyperparameter tunning using GridSearchCV

In [16]:
# Approach 1 :- Looping through the hyperparameters
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC

kernels = ['rbf', 'linear']
C = [1, 10, 20]
avg_scores = {}

for i in kernels:
  for j in C:
    cv_scores = cross_val_score(SVC(kernel=i, C=j, gamma="auto"), X, Y, cv=5)
    avg_scores[i + '_' + str(j)] = np.average(cv_scores)

avg_scores

{'rbf_1': np.float64(0.9800000000000001),
 'rbf_10': np.float64(0.9800000000000001),
 'rbf_20': np.float64(0.9666666666666668),
 'linear_1': np.float64(0.9800000000000001),
 'linear_10': np.float64(0.9733333333333334),
 'linear_20': np.float64(0.9666666666666666)}

In [17]:
# Approach 2 :- Using GridSearchCV
from sklearn.model_selection import GridSearchCV

params_grid = {
    'C':[1, 10, 20],
    'kernel':['rbf', 'linear']
}

gcv = GridSearchCV(SVC(gamma="auto"), params_grid, cv=5, return_train_score=False)
gcv.fit(X, Y)
pd.DataFrame(gcv.cv_results_)[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,rbf,0.98
1,1,linear,0.98
2,10,rbf,0.98
3,10,linear,0.973333
4,20,rbf,0.966667
5,20,linear,0.966667


In [18]:
# Show best params and best score
print("Best Params :- ", gcv.best_params_)
print("Best Score :- ", gcv.best_score_)

Best Params :-  {'C': 1, 'kernel': 'rbf'}
Best Score :-  0.9800000000000001


In [20]:
# Approach 3 :- Using RandomizedSearchCV to reduce np. of iterations with random combination of parameters
from sklearn.model_selection import RandomizedSearchCV

params_grid = {
    'C':[1, 10, 20],
    'kernel':['rbf', 'linear']
}

rcv = RandomizedSearchCV(SVC(gamma="auto"), params_grid, cv=5, return_train_score=False, n_iter=2)
rcv.fit(X,Y)
pd.DataFrame(rcv.cv_results_)[['param_C', 'param_kernel', 'mean_test_score']]

Unnamed: 0,param_C,param_kernel,mean_test_score
0,20,rbf,0.966667
1,10,rbf,0.98


## Hyperparameter tunning with different models

In [23]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

model_params = {
    'svm':{
        'model':SVC(gamma="auto"),
        'params':{
            'C':[1, 10, 20],
            'kernel':['rbf', 'linear']
        }
    },
    'random_forest':{
        'model':RandomForestClassifier(),
        'params':{
            'n_estimators':[1, 5, 10]
        }
    },
    'logistic_regression':{
        'model':LogisticRegression(solver='liblinear'),
        'params': {
            'C': [1,5,10]
        }
    }
}

In [25]:
scores = []
for model_name, params in model_params.items():
  clf = GridSearchCV(params['model'], params['params'], cv=5, return_train_score=False)
  clf.fit(X, Y)
  scores.append({
      'model': model_name,
      'best_score': clf.best_score_,
      'best_params': clf.best_params_
  })

pd.DataFrame(scores, columns=['model','best_score','best_params'])

Unnamed: 0,model,best_score,best_params
0,svm,0.98,"{'C': 1, 'kernel': 'rbf'}"
1,random_forest,0.973333,{'n_estimators': 1}
2,logistic_regression,0.966667,{'C': 5}


SVM with C=1 and kernel='rbf is best model for solving the problem

## Hyperparameter tunning for different models for digits dataset

In [30]:
# Load dataset
import pandas as pd
from sklearn.datasets import load_digits

digits = load_digits()
df = pd.DataFrame(digits.data, columns=digits.feature_names)
df['target'] = digits.target
df.head()

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4


In [31]:
# Splitting data into X and Y
X = df.drop('target', axis=1)
Y = df['target']

In [33]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB

# Defining model parameters
model_params = {
    'Logistic_regression':{
        'model':LogisticRegression(solver='liblinear'),
        'params':{
            'C':[1, 5, 10]
        }
    },
    'Decision_tree':{
        'model':DecisionTreeClassifier(),
        'params':{
            'criterion':['gini', 'entropy'],
        }
    },
    'Random_forest':{
        'model':RandomForestClassifier(),
        'params':{
            'n_estimators':[10, 50, 100],
            'criterion':['gini', 'entropy']
        }
    },
    'SVM':{
        'model':SVC(gamma="auto"),
        'params':{
            'C':[1, 5, 10],
            'kernel':['rbf', 'linear']
        }
    },
    'Gaussian_NB':{
        'model':GaussianNB(),
        'params':{}
    },
    'Multinomial_NB':{
        'model':MultinomialNB(),
        'params':{}
    }
}

In [34]:
scores = []
for model_name, params in model_params.items():
  clf = GridSearchCV(params['model'], params['params'], cv=5, return_train_score=False)
  clf.fit(X, Y)
  scores.append({
      'model': model_name,
      'best_score': clf.best_score_,
      'best_params': clf.best_params_
  })

pd.DataFrame(scores, columns=['model','best_score','best_params'])

Unnamed: 0,model,best_score,best_params
0,Logistic_regression,0.922114,{'C': 1}
1,Decision_tree,0.815269,{'criterion': 'entropy'}
2,Random_forest,0.943258,"{'criterion': 'gini', 'n_estimators': 50}"
3,SVM,0.947697,"{'C': 1, 'kernel': 'linear'}"
4,Gaussian_NB,0.806928,{}
5,Multinomial_NB,0.87035,{}
