In [None]:
# https://www.mygreatlearning.com/blog/gridsearchcv/

# Here we am going to train the model twice:
# once without using GridsearchCV(using the default hyperparameters) 
# other time we will use GridSearchCV to find the optimal values of hyperparameters for the dataset at hand. 

In [3]:
#import all necessary libraries
import sklearn
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import classification_report, confusion_matrix 
from sklearn.svm import SVC 
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split 


In [4]:
#load the dataset and split it into training and testing sets
dataset = load_breast_cancer()


In [5]:
X=dataset.data
y=dataset.target


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.30, random_state = 101)

In [7]:
# train the model on train set without using GridSearchCV 
model = SVC() 
model.fit(X_train, y_train) 
   

In [8]:
# print prediction results 
y_pred = model.predict(X_test) 
print(classification_report(y_test, y_pred)) 

              precision    recall  f1-score   support

           0       0.95      0.85      0.90        66
           1       0.91      0.97      0.94       105

    accuracy                           0.92       171
   macro avg       0.93      0.91      0.92       171
weighted avg       0.93      0.92      0.92       171



In [None]:
#*****************************************************
# train the model on train set with using GridSearchCV 

In [9]:
# defining parameter range 
# The C parameter controls the trade-off between achieving a low training error and a low testing error. 
# It is the penalty parameter of the error term and determines the width of the margin. 
# The values in the list represent the possible values of C that will be explored by the GridSearchCV function
# 'c' : range(0,10,0.1)

param_grid = {'C': [0.1, 1, 10, 100],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'gamma':['scale', 'auto'],
              'kernel': ['linear']}  
   

In [11]:
# Refit an estimator using the best found parameters on the whole dataset.

# verbose: int 
# Controls the verbosity: the higher, the more messages.
# >1 : the computation time for each fold and parameter candidate is displayed;
# >2 : the score is also displayed;
# >3 : the fold and candidate parameter indexes are also displayed together with the starting time of the computation
# A higher value means more output.

# n_jobs=-1 : This parameter specifies the number of CPU cores to use during the grid search. 
# value of -1 means to use all available cores.
grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3,n_jobs=-1)

In [12]:
# fitting the model for grid search 
grid.fit(X_train, y_train) 
 
# print best parameter after tuning 
print(grid.best_params_) 
y_pred_grid = grid.predict(X_test) 
   
# print classification report 
print(classification_report(y_test, y_pred_grid)) 


Fitting 5 folds for each of 8 candidates, totalling 40 fits
{'C': 100, 'gamma': 'scale', 'kernel': 'linear'}
              precision    recall  f1-score   support

           0       0.97      0.91      0.94        66
           1       0.94      0.98      0.96       105

    accuracy                           0.95       171
   macro avg       0.96      0.95      0.95       171
weighted avg       0.95      0.95      0.95       171



In [None]:
# A lot of you might think that {‘C’: 100, ‘gamma’: ‘scale’, ‘kernel’: ‘linear’} are the best values for hyperparameters for an SVM model. 
# This is not the case, the above-mentioned hyperparameters may be the best for the dataset we are working on. 
# But for any other dataset, the SVM model can have different optimal values for hyperparameters that may improve its performance.