In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC

In [2]:
breast_dataset = load_breast_cancer()

In [3]:
print(breast_dataset)

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
 

In [4]:
breast_df = pd.DataFrame(breast_dataset.data,columns=breast_dataset['feature_names'])

In [5]:
print(breast_df)

     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0          17.99         10.38          122.80     1001.0          0.11840   
1          20.57         17.77          132.90     1326.0          0.08474   
2          19.69         21.25          130.00     1203.0          0.10960   
3          11.42         20.38           77.58      386.1          0.14250   
4          20.29         14.34          135.10     1297.0          0.10030   
..           ...           ...             ...        ...              ...   
564        21.56         22.39          142.00     1479.0          0.11100   
565        20.13         28.25          131.20     1261.0          0.09780   
566        16.60         28.08          108.30      858.1          0.08455   
567        20.60         29.33          140.10     1265.0          0.11780   
568         7.76         24.54           47.92      181.0          0.05263   

     mean compactness  mean concavity  mean concave points  mea

In [6]:
breast_df['target'] = breast_dataset.target

In [7]:
breast_df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [8]:
breast_df.tail()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
564,21.56,22.39,142.0,1479.0,0.111,0.1159,0.2439,0.1389,0.1726,0.05623,...,26.4,166.1,2027.0,0.141,0.2113,0.4107,0.2216,0.206,0.07115,0
565,20.13,28.25,131.2,1261.0,0.0978,0.1034,0.144,0.09791,0.1752,0.05533,...,38.25,155.0,1731.0,0.1166,0.1922,0.3215,0.1628,0.2572,0.06637,0
566,16.6,28.08,108.3,858.1,0.08455,0.1023,0.09251,0.05302,0.159,0.05648,...,34.12,126.7,1124.0,0.1139,0.3094,0.3403,0.1418,0.2218,0.0782,0
567,20.6,29.33,140.1,1265.0,0.1178,0.277,0.3514,0.152,0.2397,0.07016,...,39.42,184.6,1821.0,0.165,0.8681,0.9387,0.265,0.4087,0.124,0
568,7.76,24.54,47.92,181.0,0.05263,0.04362,0.0,0.0,0.1587,0.05884,...,30.37,59.16,268.6,0.08996,0.06444,0.0,0.0,0.2871,0.07039,1


In [9]:
breast_df.target.value_counts()

target
1    357
0    212
Name: count, dtype: int64

1 --> Benign

0 --> Malignant

In [10]:
X = breast_df.drop(columns='target',axis=1)

Y = breast_df.target

In [11]:
print(X)

     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0          17.99         10.38          122.80     1001.0          0.11840   
1          20.57         17.77          132.90     1326.0          0.08474   
2          19.69         21.25          130.00     1203.0          0.10960   
3          11.42         20.38           77.58      386.1          0.14250   
4          20.29         14.34          135.10     1297.0          0.10030   
..           ...           ...             ...        ...              ...   
564        21.56         22.39          142.00     1479.0          0.11100   
565        20.13         28.25          131.20     1261.0          0.09780   
566        16.60         28.08          108.30      858.1          0.08455   
567        20.60         29.33          140.10     1265.0          0.11780   
568         7.76         24.54           47.92      181.0          0.05263   

     mean compactness  mean concavity  mean concave points  mea

In [12]:
print(Y)

0      0
1      0
2      0
3      0
4      0
      ..
564    0
565    0
566    0
567    0
568    1
Name: target, Length: 569, dtype: int32


In [13]:
X = np.asarray(X)
Y = np.asarray(Y)

Grid Search CV

In [14]:
model = SVC()

In [20]:
# hyperparameters

parameters = {
    'kernel': ['linear','poly','rbf','sigmoid'],
    'C':[1,5,10,20]
}

In [21]:
# grid search
classifier = GridSearchCV(model,parameters,cv=5)

In [22]:
classifier.fit(X,Y)

In [23]:
classifier.cv_results_

{'mean_fit_time': array([7.56392384e-01, 1.33781433e-03, 0.00000000e+00, 3.12714577e-03,
        2.66644330e+00, 3.22504044e-03, 4.36506271e-03, 1.12952709e-02,
        3.78841729e+00, 2.12769508e-03, 1.69882774e-03, 3.21760178e-03,
        4.21778207e+00, 9.25302505e-04, 2.44040489e-03, 6.36672974e-03]),
 'std_fit_time': array([3.33458406e-01, 1.71659321e-03, 0.00000000e+00, 6.25429153e-03,
        9.82079023e-01, 3.95012300e-03, 3.69024555e-03, 3.59224878e-03,
        2.36284133e+00, 4.25539017e-03, 2.31807103e-03, 3.23844664e-03,
        2.27645307e+00, 1.85060501e-03, 4.88080978e-03, 7.85999701e-03]),
 'mean_score_time': array([0.        , 0.        , 0.00181012, 0.00312505, 0.00078425,
        0.00171175, 0.00565948, 0.00152607, 0.        , 0.        ,
        0.        , 0.00050135, 0.        , 0.        , 0.        ,
        0.00031257]),
 'std_score_time': array([0.        , 0.        , 0.00362024, 0.0062501 , 0.00129421,
        0.00297924, 0.00631284, 0.00305214, 0.        , 

In [24]:
best_params = classifier.best_params_

print(best_params)

{'C': 10, 'kernel': 'linear'}


In [25]:
highest_acc = classifier.best_score_

print(highest_acc)

0.9525694767893185


In [26]:
result = pd.DataFrame(classifier.cv_results_)

In [27]:
result.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.756392,0.333458,0.0,0.0,1,linear,"{'C': 1, 'kernel': 'linear'}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,4
1,0.001338,0.001717,0.0,0.0,1,poly,"{'C': 1, 'kernel': 'poly'}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,12
2,0.0,0.0,0.00181,0.00362,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.850877,0.894737,0.929825,0.947368,0.938053,0.912172,0.035444,11
3,0.003127,0.006254,0.003125,0.00625,1,sigmoid,"{'C': 1, 'kernel': 'sigmoid'}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,13
4,2.666443,0.982079,0.000784,0.001294,5,linear,"{'C': 5, 'kernel': 'linear'}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,2


In [28]:
grid_search_result = result[['param_C','param_kernel','mean_test_score']]

In [29]:
print(grid_search_result)

   param_C param_kernel  mean_test_score
0        1       linear         0.945536
1        1         poly         0.908663
2        1          rbf         0.912172
3        1      sigmoid         0.460441
4        5       linear         0.950815
5        5         poly         0.922729
6        5          rbf         0.931501
7        5      sigmoid         0.411178
8       10       linear         0.952569
9       10         poly         0.920975
10      10          rbf         0.922714
11      10      sigmoid         0.402391
12      20       linear         0.949061
13      20         poly         0.919221
14      20          rbf         0.920944
15      20      sigmoid         0.398867


RandomizedSearchCV

In [30]:
model = SVC()

In [31]:
parameters = {
    'kernel':['linear','poly','rbf','sigmoid'],
    'C':[1,5,10,20]
}

In [32]:
classifier = RandomizedSearchCV(model,parameters,cv=5)

In [33]:
classifier.fit(X,Y)

In [34]:
classifier.cv_results_

{'mean_fit_time': array([6.05378151e-03, 1.04664326e-02, 4.54654694e-03, 2.05979347e-03,
        2.85644531e-03, 6.68298178e+00, 2.65302658e-03, 2.09517479e-03,
        1.09806061e-03, 1.42450585e+00]),
 'std_fit_time': array([1.27812919e-03, 1.37135250e-03, 2.23673458e-03, 1.93557684e-03,
        3.71707421e-03, 4.27999771e+00, 1.34483356e-03, 2.59389351e-03,
        1.78664881e-03, 2.87178239e-01]),
 'mean_score_time': array([0.00220637, 0.00525651, 0.00085268, 0.00031447, 0.00077467,
        0.00038018, 0.        , 0.00092955, 0.        , 0.        ]),
 'std_score_time': array([0.00272088, 0.00757725, 0.00170536, 0.00062895, 0.00154934,
        0.00076036, 0.        , 0.00185909, 0.        , 0.        ]),
 'param_kernel': masked_array(data=['sigmoid', 'sigmoid', 'sigmoid', 'poly', 'rbf',
                    'linear', 'poly', 'rbf', 'poly', 'linear'],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',

In [35]:
best_params = classifier.best_params_
print(best_params)

{'kernel': 'linear', 'C': 5}


In [36]:
highest_acc = classifier.best_score_

print(highest_acc)

0.9508150908244062


In [37]:
result = pd.DataFrame(classifier.cv_results_)

In [40]:
randomized_search_result = result[['param_C','param_kernel','mean_test_score']]

In [41]:
print(randomized_search_result)

  param_C param_kernel  mean_test_score
0       5      sigmoid         0.411178
1      10      sigmoid         0.402391
2      20      sigmoid         0.398867
3      20         poly         0.919221
4       5          rbf         0.931501
5      20       linear         0.949061
6      10         poly         0.920975
7       1          rbf         0.912172
8       5         poly         0.922729
9       5       linear         0.950815
