**Hyperparameter Tuning:**
1. GridSearchCV
2. RandomizedSearchCV

Importing the Dependencies

In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

Load the breast cancer dataset

In [3]:
# loading the data from sklearn
breast_cancer_dataset = datasets.load_breast_cancer()

In [4]:
print(breast_cancer_dataset)

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
 

In [5]:
# loading the data to a data frame
dataset = pd.DataFrame(breast_cancer_dataset.data, columns= breast_cancer_dataset.feature_names)

In [8]:
# print the first 5 rows of the Dataframe
dataset.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [9]:
# adding the 'target' column to the dataframe
dataset['label'] = breast_cancer_dataset.target

In [10]:
# print the first 5 rows of the dataframe
dataset.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,label
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [11]:
# number of rows and olumns in this dataset
dataset.shape

(569, 31)

In [12]:
# checking for missing values
dataset.isnull().sum()

Unnamed: 0,0
mean radius,0
mean texture,0
mean perimeter,0
mean area,0
mean smoothness,0
mean compactness,0
mean concavity,0
mean concave points,0
mean symmetry,0
mean fractal dimension,0


In [13]:
# checking the distribution of Target Variables
dataset['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
1,357
0,212


Separating the features and target

In [14]:
X = dataset.drop(columns='label', axis=1)
y = dataset['label']

In [15]:
print(X)

     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0          17.99         10.38          122.80     1001.0          0.11840   
1          20.57         17.77          132.90     1326.0          0.08474   
2          19.69         21.25          130.00     1203.0          0.10960   
3          11.42         20.38           77.58      386.1          0.14250   
4          20.29         14.34          135.10     1297.0          0.10030   
..           ...           ...             ...        ...              ...   
564        21.56         22.39          142.00     1479.0          0.11100   
565        20.13         28.25          131.20     1261.0          0.09780   
566        16.60         28.08          108.30      858.1          0.08455   
567        20.60         29.33          140.10     1265.0          0.11780   
568         7.76         24.54           47.92      181.0          0.05263   

     mean compactness  mean concavity  mean concave points  mea

In [16]:
print(y)

0      0
1      0
2      0
3      0
4      0
      ..
564    0
565    0
566    0
567    0
568    1
Name: label, Length: 569, dtype: int64


## **GridSearchCV**

---



GridSearchCV is used for determining the best parameters for our model

In [17]:
# loading the SVC model
model = SVC()

In [24]:
# hyperparameters
parameters = {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'C': [1, 5, 10, 20]
}

In [25]:
# grid search
classifier = GridSearchCV(model, parameters, cv=5)

In [26]:
# fitting the data to our model
classifier.fit(X, y)

  _data = np.array(data, dtype=dtype, copy=copy,


In [27]:
classifier.cv_results_

{'mean_fit_time': array([1.72919483e+00, 6.85782433e-03, 6.93764687e-03, 1.68001652e-02,
        3.26020713e+00, 5.86557388e-03, 6.27403259e-03, 1.56593323e-02,
        5.80672607e+00, 1.06853008e-02, 1.01833820e-02, 2.45304585e-02,
        7.92414041e+00, 6.65020943e-03, 6.47039413e-03, 1.54261112e-02]),
 'std_fit_time': array([5.08401760e-01, 1.39852779e-03, 8.30814094e-04, 6.61082142e-04,
        7.90675759e-01, 3.08621814e-04, 4.61627612e-04, 8.77555112e-04,
        1.28827923e+00, 1.35501029e-04, 8.41825449e-05, 3.42588192e-03,
        2.72549484e+00, 5.40086508e-04, 5.28199375e-04, 8.88107542e-04]),
 'mean_score_time': array([0.00358438, 0.00322118, 0.00327544, 0.00577712, 0.00268035,
        0.00262671, 0.0032187 , 0.00509715, 0.00340586, 0.00450869,
        0.00535092, 0.00723338, 0.00286574, 0.00277276, 0.00304704,
        0.00502849]),
 'std_score_time': array([1.04016505e-03, 4.37981837e-04, 1.06474908e-04, 5.44496611e-04,
        4.79365797e-05, 7.86115480e-05, 5.43491504e-

In [28]:
# best parameters
best_parameters = classifier.best_params_

In [29]:
best_parameters

{'C': 10, 'kernel': 'linear'}

In [30]:
# highest accuracy
highest_accuracy = classifier.best_score_

In [31]:
highest_accuracy

0.9525694767893185

In [32]:
# loading the results to pandas dataframe
result = pd.DataFrame(classifier.cv_results_)

In [33]:
result.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.729195,0.508402,0.003584,0.00104,1,linear,"{'C': 1, 'kernel': 'linear'}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,4
1,0.006858,0.001399,0.003221,0.000438,1,poly,"{'C': 1, 'kernel': 'poly'}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,12
2,0.006938,0.000831,0.003275,0.000106,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.850877,0.894737,0.929825,0.947368,0.938053,0.912172,0.035444,11
3,0.0168,0.000661,0.005777,0.000544,1,sigmoid,"{'C': 1, 'kernel': 'sigmoid'}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,13
4,3.260207,0.790676,0.00268,4.8e-05,5,linear,"{'C': 5, 'kernel': 'linear'}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,2


In [34]:
grid_search_result = result[['param_C','param_kernel','mean_test_score']]

In [35]:
grid_search_result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.945536
1,1,poly,0.908663
2,1,rbf,0.912172
3,1,sigmoid,0.460441
4,5,linear,0.950815
5,5,poly,0.922729
6,5,rbf,0.931501
7,5,sigmoid,0.411178
8,10,linear,0.952569
9,10,poly,0.920975


Highest Accuracy = 95.2%

Best Parameters = {'C':10, 'kernel':'linear'}

## **RandomizedSearchCV**

---



In [36]:
 # loading the SVC model
 model = SVC()

In [37]:
# hyperparameters

parameters = {
    'kernel':['linear','poly','rbf','sigmoid'],
    'C':[1, 5, 10, 20]
}

In [38]:
# grid search
classifier = RandomizedSearchCV(model, parameters, cv=5)

In [39]:
# fitting the data to our model
classifier.fit(X, y)

In [40]:
classifier.cv_results_

{'mean_fit_time': array([0.01018043, 1.73995724, 0.00599904, 0.00708947, 0.01543255,
        0.00627341, 0.00579667, 4.83851237, 3.30030332, 0.0176775 ]),
 'std_fit_time': array([7.68653681e-04, 8.43908319e-01, 1.39541195e-04, 5.93920762e-04,
        7.09136046e-04, 1.80684705e-04, 2.92424562e-04, 1.03792030e+00,
        1.06202830e+00, 3.56368784e-03]),
 'mean_score_time': array([0.00502419, 0.00283699, 0.00298524, 0.00352993, 0.00502176,
        0.00256991, 0.00259352, 0.00324807, 0.00289288, 0.00503836]),
 'std_score_time': array([7.54619865e-04, 4.24327321e-04, 6.65610702e-05, 2.03051327e-04,
        2.42179927e-04, 3.98579963e-05, 8.74872095e-05, 7.21493992e-04,
        5.06454930e-04, 1.56397034e-04]),
 'param_kernel': masked_array(data=['poly', 'linear', 'rbf', 'rbf', 'sigmoid', 'poly',
                    'poly', 'linear', 'linear', 'sigmoid'],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',


In [41]:
# best parameters
best_parameters = classifier.best_params_

In [42]:
best_parameters

{'kernel': 'linear', 'C': 10}

In [43]:
# higest accuracy
highest_accuracy = classifier.best_score_

In [44]:
highest_accuracy

0.9525694767893185

In [45]:
# loading the results to pandas dataframe
result = pd.DataFrame(classifier.cv_results_)

In [46]:
result.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.01018,0.000769,0.005024,0.000755,poly,1,"{'kernel': 'poly', 'C': 1}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,8
1,1.739957,0.843908,0.002837,0.000424,linear,1,"{'kernel': 'linear', 'C': 1}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,3
2,0.005999,0.00014,0.002985,6.7e-05,rbf,10,"{'kernel': 'rbf', 'C': 10}",0.877193,0.921053,0.912281,0.95614,0.946903,0.922714,0.027879,5
3,0.007089,0.000594,0.00353,0.000203,rbf,1,"{'kernel': 'rbf', 'C': 1}",0.850877,0.894737,0.929825,0.947368,0.938053,0.912172,0.035444,7
4,0.015433,0.000709,0.005022,0.000242,sigmoid,20,"{'kernel': 'sigmoid', 'C': 20}",0.473684,0.403509,0.421053,0.342105,0.353982,0.398867,0.04764,10


In [47]:
randomized_search_result = result[['param_C','param_kernel','mean_test_score']]

In [48]:
randomized_search_result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,poly,0.908663
1,1,linear,0.945536
2,10,rbf,0.922714
3,1,rbf,0.912172
4,20,sigmoid,0.398867
5,20,poly,0.919221
6,5,poly,0.922729
7,10,linear,0.952569
8,5,linear,0.950815
9,10,sigmoid,0.402391


Highest Accuracy = 95.2%

Best Parameters = {'C':10, 'kernel':'linear'}