###### GridSearchCV and RandomizedSearchCV

In [5]:
# Import the dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
import sklearn.datasets

In [6]:
# Loading Data from sklearn 
# Importing the dataset
breast_cancer_dataset=sklearn.datasets.load_breast_cancer()
#print(breast_cancer_dataset)

In [7]:
# Loading the data into a dataframe
df=pd.DataFrame(breast_cancer_dataset.data, 
                columns=breast_cancer_dataset.feature_names)
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [8]:
# Adding the 'target' column to the dataframe
df['label']=breast_cancer_dataset.target
df.tail()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,label
564,21.56,22.39,142.0,1479.0,0.111,0.1159,0.2439,0.1389,0.1726,0.05623,...,26.4,166.1,2027.0,0.141,0.2113,0.4107,0.2216,0.206,0.07115,0
565,20.13,28.25,131.2,1261.0,0.0978,0.1034,0.144,0.09791,0.1752,0.05533,...,38.25,155.0,1731.0,0.1166,0.1922,0.3215,0.1628,0.2572,0.06637,0
566,16.6,28.08,108.3,858.1,0.08455,0.1023,0.09251,0.05302,0.159,0.05648,...,34.12,126.7,1124.0,0.1139,0.3094,0.3403,0.1418,0.2218,0.0782,0
567,20.6,29.33,140.1,1265.0,0.1178,0.277,0.3514,0.152,0.2397,0.07016,...,39.42,184.6,1821.0,0.165,0.8681,0.9387,0.265,0.4087,0.124,0
568,7.76,24.54,47.92,181.0,0.05263,0.04362,0.0,0.0,0.1587,0.05884,...,30.37,59.16,268.6,0.08996,0.06444,0.0,0.0,0.2871,0.07039,1


In [9]:
# Getting some information about the data
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   mean radius              569 non-null    float64
 1   mean texture             569 non-null    float64
 2   mean perimeter           569 non-null    float64
 3   mean area                569 non-null    float64
 4   mean smoothness          569 non-null    float64
 5   mean compactness         569 non-null    float64
 6   mean concavity           569 non-null    float64
 7   mean concave points      569 non-null    float64
 8   mean symmetry            569 non-null    float64
 9   mean fractal dimension   569 non-null    float64
 10  radius error             569 non-null    float64
 11  texture error            569 non-null    float64
 12  perimeter error          569 non-null    float64
 13  area error               569 non-null    float64
 14  smoothness error         5

In [10]:
# Separating the features and target
X=df.drop(columns='label', axis=1)
y=df['label']

In [11]:
# Let's convert them into numy array 
X=np.asarray(X)
y=np.asarray(y)

In [12]:
X

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

##### A. GridSearchCV

GridSearchCV is used for determining the best parameters for our model

In [13]:
# Loading the SVC model
model=SVC()

In [14]:
# Hyperparameters
parameters = {
                'kernel':['linear','poly','rbf','sigmoid'],
                 'C':[1, 5, 10, 20]
}

In [15]:
# Grid search
classifier=GridSearchCV(model, parameters, cv=5)

In [16]:
# Fitting the data to our model
classifier.fit(X,y)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [1, 5, 10, 20],
                         'kernel': ['linear', 'poly', 'rbf', 'sigmoid']})

In [18]:
#Best parameters
best_parameters=classifier.best_params_
print(best_parameters)

{'C': 10, 'kernel': 'linear'}


In [19]:
# Higest accuracy
highest_accuracy=classifier.best_score_
print(highest_accuracy)

0.9525694767893185


In [17]:
# Best resultats
classifier.cv_results_

{'mean_fit_time': array([1.67936430e+00, 4.43601608e-03, 5.59196472e-03, 1.32475853e-02,
        3.62701182e+00, 4.67376709e-03, 5.60588837e-03, 1.38700962e-02,
        5.09917574e+00, 4.60462570e-03, 4.48927879e-03, 1.13557816e-02,
        7.63086939e+00, 4.92701530e-03, 5.41558266e-03, 1.18056774e-02]),
 'std_fit_time': array([6.84708113e-01, 4.86690871e-04, 3.74560447e-04, 1.04660917e-03,
        7.33493953e-01, 1.93027804e-04, 7.22330586e-04, 9.87476182e-04,
        1.14182976e+00, 2.62483952e-04, 1.60726847e-04, 6.09823919e-04,
        2.39467651e+00, 3.66850403e-04, 5.84057310e-04, 7.01701427e-04]),
 'mean_score_time': array([0.00307579, 0.00142717, 0.00273046, 0.00317698, 0.00113082,
        0.0012104 , 0.00297155, 0.00321121, 0.00084734, 0.00107217,
        0.00206909, 0.00283198, 0.00084281, 0.00121856, 0.00198803,
        0.00306511]),
 'std_score_time': array([3.69112866e-03, 2.66160191e-04, 3.88699729e-04, 5.87444026e-05,
        3.73466917e-04, 1.90318691e-04, 5.10124140e-

In [20]:
# Loading the resultats to pendas Dataframe 
result=pd.DataFrame(classifier.cv_results_)

In [21]:
result.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.679364,0.684708,0.003076,0.003691,1,linear,"{'C': 1, 'kernel': 'linear'}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,4
1,0.004436,0.000487,0.001427,0.000266,1,poly,"{'C': 1, 'kernel': 'poly'}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,12
2,0.005592,0.000375,0.00273,0.000389,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.850877,0.894737,0.929825,0.947368,0.938053,0.912172,0.035444,11
3,0.013248,0.001047,0.003177,5.9e-05,1,sigmoid,"{'C': 1, 'kernel': 'sigmoid'}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,13
4,3.627012,0.733494,0.001131,0.000373,5,linear,"{'C': 5, 'kernel': 'linear'}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,2


In [22]:
grid_search_result=result[['param_C','param_kernel','mean_test_score']]

In [23]:
grid_search_result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.945536
1,1,poly,0.908663
2,1,rbf,0.912172
3,1,sigmoid,0.460441
4,5,linear,0.950815
5,5,poly,0.922729
6,5,rbf,0.931501
7,5,sigmoid,0.411178
8,10,linear,0.952569
9,10,poly,0.920975


Highest accuracy = 95.2%
Best Parameters = {'C':10,'kernel':'linear'}

###### B. RandomizedSearchCV

In [24]:
# Hyperparameters
parameters_randomCV = {
                'kernel':['linear','poly','rbf','sigmoid'],
                 'C':[1, 5, 10, 20]
}

In [25]:
# Randomizedsearch
classifier_randomCV=RandomizedSearchCV(model, parameters, cv=5)

In [26]:
classifier_randomCV.fit(X,y)

RandomizedSearchCV(cv=5, estimator=SVC(),
                   param_distributions={'C': [1, 5, 10, 20],
                                        'kernel': ['linear', 'poly', 'rbf',
                                                   'sigmoid']})

In [27]:
#Best parameters
best_parameters_randomCV=classifier_randomCV.best_params_
print(best_parameters_randomCV)

{'kernel': 'linear', 'C': 10}


In [28]:
# Higest accuracy
highest_accuracy_randomCV=classifier_randomCV.best_score_
print(highest_accuracy_randomCV)

0.9525694767893185


In [29]:
# Best resultats
classifier_randomCV.cv_results_

{'mean_fit_time': array([5.36904335e-03, 4.73732948e-03, 1.08853340e-02, 1.20227814e-02,
        4.26340103e-03, 4.68768373e+00, 5.35302162e-03, 1.19493961e-02,
        4.72936630e-03, 5.00521660e-03]),
 'std_fit_time': array([5.92159599e-04, 3.06807914e-04, 4.40500185e-04, 1.95024155e-03,
        3.75248152e-04, 9.12511647e-01, 6.00849635e-05, 5.62287453e-04,
        2.53274767e-04, 1.92197946e-04]),
 'mean_score_time': array([0.00257535, 0.00128446, 0.00285153, 0.00283766, 0.00115499,
        0.00085497, 0.00270391, 0.00300164, 0.00126829, 0.00247455]),
 'std_score_time': array([1.61839571e-04, 1.78560497e-04, 7.92392891e-05, 1.69781685e-04,
        7.58906374e-05, 1.12689040e-05, 2.33449996e-04, 7.25506443e-05,
        1.05169740e-04, 3.15117431e-04]),
 'param_kernel': masked_array(data=['rbf', 'poly', 'sigmoid', 'sigmoid', 'poly', 'linear',
                    'rbf', 'sigmoid', 'poly', 'rbf'],
              mask=[False, False, False, False, False, False, False, False,
             

In [30]:
# Loading the resultats to pendas Dataframe 
result_randomCV=pd.DataFrame(classifier_randomCV.cv_results_)

In [31]:
result_randomCV.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.005369,0.000592,0.002575,0.000162,rbf,5,"{'kernel': 'rbf', 'C': 5}",0.885965,0.929825,0.938596,0.947368,0.955752,0.931501,0.024358,2
1,0.004737,0.000307,0.001284,0.000179,poly,10,"{'kernel': 'poly', 'C': 10}",0.885965,0.921053,0.903509,0.938596,0.955752,0.920975,0.024701,5
2,0.010885,0.000441,0.002852,7.9e-05,sigmoid,5,"{'kernel': 'sigmoid', 'C': 5}",0.491228,0.421053,0.421053,0.350877,0.371681,0.411178,0.048578,9
3,0.012023,0.00195,0.002838,0.00017,sigmoid,20,"{'kernel': 'sigmoid', 'C': 20}",0.473684,0.403509,0.421053,0.342105,0.353982,0.398867,0.04764,10
4,0.004263,0.000375,0.001155,7.6e-05,poly,5,"{'kernel': 'poly', 'C': 5}",0.885965,0.912281,0.921053,0.938596,0.955752,0.922729,0.023689,3
