In [1]:
import numpy as np
import pandas as pd
import sklearn.datasets
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [2]:
breast_cancer_dataset = sklearn.datasets.load_breast_cancer()

In [3]:
breast_cancer_dataset

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0

In [4]:
dataframe = pd.DataFrame(breast_cancer_dataset.data, columns = breast_cancer_dataset.feature_names)

In [5]:
dataframe

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [6]:
dataframe['label'] = breast_cancer_dataset.target

In [7]:
dataframe

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,label
0,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,...,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890,0
1,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,...,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902,0
2,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,...,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,...,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300,0
4,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,...,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,...,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115,0
565,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,...,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637,0
566,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,...,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820,0
567,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,...,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400,0


In [8]:
dataframe.isnull().sum()

mean radius                0
mean texture               0
mean perimeter             0
mean area                  0
mean smoothness            0
mean compactness           0
mean concavity             0
mean concave points        0
mean symmetry              0
mean fractal dimension     0
radius error               0
texture error              0
perimeter error            0
area error                 0
smoothness error           0
compactness error          0
concavity error            0
concave points error       0
symmetry error             0
fractal dimension error    0
worst radius               0
worst texture              0
worst perimeter            0
worst area                 0
worst smoothness           0
worst compactness          0
worst concavity            0
worst concave points       0
worst symmetry             0
worst fractal dimension    0
label                      0
dtype: int64

In [9]:
X = dataframe.drop(columns = 'label', axis=1)
Y = dataframe['label']

In [10]:
X,Y

(     mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
 0          17.99         10.38          122.80     1001.0          0.11840   
 1          20.57         17.77          132.90     1326.0          0.08474   
 2          19.69         21.25          130.00     1203.0          0.10960   
 3          11.42         20.38           77.58      386.1          0.14250   
 4          20.29         14.34          135.10     1297.0          0.10030   
 ..           ...           ...             ...        ...              ...   
 564        21.56         22.39          142.00     1479.0          0.11100   
 565        20.13         28.25          131.20     1261.0          0.09780   
 566        16.60         28.08          108.30      858.1          0.08455   
 567        20.60         29.33          140.10     1265.0          0.11780   
 568         7.76         24.54           47.92      181.0          0.05263   
 
      mean compactness  mean concavity  mean conca

In [11]:
X = np.asarray(X)
Y = np.asarray(Y)

In [12]:
X,Y

(array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
 

In [13]:
model = SVC()

In [14]:
parameters = {
    'kernel':['linear','poly','rbf','sigmoid'],
    'C':[1,5,10,20]
}

In [15]:
classifier = GridSearchCV(model, parameters, cv= 5)

In [16]:
classifier.fit(X,Y)

In [17]:
classifier.cv_results_

{'mean_fit_time': array([1.05944123e+00, 2.27437019e-03, 2.44703293e-03, 5.82275391e-03,
        2.18065343e+00, 2.39210129e-03, 2.16875076e-03, 5.41458130e-03,
        3.25468926e+00, 2.63905525e-03, 2.18873024e-03, 5.41014671e-03,
        5.22248240e+00, 2.98881531e-03, 2.30951309e-03, 5.39393425e-03]),
 'std_fit_time': array([4.09302479e-01, 4.86695426e-05, 5.77041133e-05, 1.31758710e-04,
        4.39122678e-01, 1.96397377e-04, 8.93390525e-05, 2.17069693e-04,
        8.24221510e-01, 8.00125079e-05, 8.11876562e-05, 2.65060708e-04,
        1.65331630e+00, 2.40762396e-04, 1.22436555e-04, 2.69869755e-04]),
 'mean_score_time': array([0.0006372 , 0.00060344, 0.00113783, 0.00136962, 0.00068235,
        0.00057917, 0.0009562 , 0.00125284, 0.00065112, 0.00055451,
        0.00091534, 0.00124264, 0.00064735, 0.0006433 , 0.00089741,
        0.00123625]),
 'std_score_time': array([5.09441091e-05, 9.63399887e-06, 3.48664165e-05, 2.11742545e-05,
        8.51834584e-05, 7.80819937e-06, 2.92132720e-

In [18]:
classifier.best_params_

{'C': 10, 'kernel': 'linear'}

In [19]:
classifier.best_score_

0.9525694767893185

In [20]:
result = pd.DataFrame(classifier.cv_results_)

In [21]:
result

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,1.059441,0.409302,0.000637,5.1e-05,1,linear,"{'C': 1, 'kernel': 'linear'}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,4
1,0.002274,4.9e-05,0.000603,1e-05,1,poly,"{'C': 1, 'kernel': 'poly'}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,12
2,0.002447,5.8e-05,0.001138,3.5e-05,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.850877,0.894737,0.929825,0.947368,0.938053,0.912172,0.035444,11
3,0.005823,0.000132,0.00137,2.1e-05,1,sigmoid,"{'C': 1, 'kernel': 'sigmoid'}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,13
4,2.180653,0.439123,0.000682,8.5e-05,5,linear,"{'C': 5, 'kernel': 'linear'}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,2
5,0.002392,0.000196,0.000579,8e-06,5,poly,"{'C': 5, 'kernel': 'poly'}",0.885965,0.912281,0.921053,0.938596,0.955752,0.922729,0.023689,6
6,0.002169,8.9e-05,0.000956,2.9e-05,5,rbf,"{'C': 5, 'kernel': 'rbf'}",0.885965,0.929825,0.938596,0.947368,0.955752,0.931501,0.024358,5
7,0.005415,0.000217,0.001253,1.3e-05,5,sigmoid,"{'C': 5, 'kernel': 'sigmoid'}",0.491228,0.421053,0.421053,0.350877,0.371681,0.411178,0.048578,14
8,3.254689,0.824222,0.000651,4.8e-05,10,linear,"{'C': 10, 'kernel': 'linear'}",0.938596,0.938596,0.973684,0.947368,0.964602,0.952569,0.0142,1
9,0.002639,8e-05,0.000555,1e-05,10,poly,"{'C': 10, 'kernel': 'poly'}",0.885965,0.921053,0.903509,0.938596,0.955752,0.920975,0.024701,8


In [22]:
grid_search_result = result[['param_C','param_kernel','mean_test_score']]
grid_search_result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,1,linear,0.945536
1,1,poly,0.908663
2,1,rbf,0.912172
3,1,sigmoid,0.460441
4,5,linear,0.950815
5,5,poly,0.922729
6,5,rbf,0.931501
7,5,sigmoid,0.411178
8,10,linear,0.952569
9,10,poly,0.920975


In [23]:
classifier = RandomizedSearchCV(model, parameters, cv= 5)

In [24]:
classifier.fit(X,Y)

In [25]:
classifier.cv_results_

{'mean_fit_time': array([0.00682578, 0.00564113, 0.00222383, 0.00235186, 0.00556722,
        0.00236835, 0.00282183, 0.00229034, 2.18568459, 0.00582891]),
 'std_fit_time': array([1.27969362e-03, 3.11499749e-04, 6.86467664e-05, 2.45610280e-04,
        3.46956678e-04, 1.67623461e-04, 1.09654985e-04, 1.32406749e-04,
        4.26077599e-01, 1.25636719e-04]),
 'mean_score_time': array([0.00174928, 0.00131845, 0.00103817, 0.00056405, 0.00126858,
        0.00090203, 0.00053539, 0.00061321, 0.00065703, 0.00138783]),
 'std_score_time': array([4.79948100e-04, 4.62455842e-05, 9.21598807e-05, 1.59711266e-05,
        3.04009025e-05, 4.23893483e-05, 3.37338615e-05, 5.99278271e-05,
        2.81412308e-05, 1.86045948e-05]),
 'param_kernel': masked_array(data=['sigmoid', 'sigmoid', 'rbf', 'poly', 'sigmoid', 'rbf',
                    'poly', 'poly', 'linear', 'sigmoid'],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?'

In [62]:
best_parameters = classifier.best_params_
best_parameters

{'kernel': 'linear', 'C': 5}

In [27]:
highest_accuracy = classifier.best_score_
highest_accuracy

0.9508150908244062

In [28]:
result = pd.DataFrame(classifier.cv_results_)

In [29]:
result

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_kernel,param_C,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.006826,0.00128,0.001749,0.00048,sigmoid,20,"{'kernel': 'sigmoid', 'C': 20}",0.473684,0.403509,0.421053,0.342105,0.353982,0.398867,0.04764,10
1,0.005641,0.000311,0.001318,4.6e-05,sigmoid,5,"{'kernel': 'sigmoid', 'C': 5}",0.491228,0.421053,0.421053,0.350877,0.371681,0.411178,0.048578,8
2,0.002224,6.9e-05,0.001038,9.2e-05,rbf,5,"{'kernel': 'rbf', 'C': 5}",0.885965,0.929825,0.938596,0.947368,0.955752,0.931501,0.024358,2
3,0.002352,0.000246,0.000564,1.6e-05,poly,5,"{'kernel': 'poly', 'C': 5}",0.885965,0.912281,0.921053,0.938596,0.955752,0.922729,0.023689,3
4,0.005567,0.000347,0.001269,3e-05,sigmoid,10,"{'kernel': 'sigmoid', 'C': 10}",0.482456,0.403509,0.421053,0.342105,0.362832,0.402391,0.048906,9
5,0.002368,0.000168,0.000902,4.2e-05,rbf,20,"{'kernel': 'rbf', 'C': 20}",0.877193,0.921053,0.921053,0.947368,0.938053,0.920944,0.024105,4
6,0.002822,0.00011,0.000535,3.4e-05,poly,20,"{'kernel': 'poly', 'C': 20}",0.877193,0.921053,0.903509,0.938596,0.955752,0.919221,0.0273,5
7,0.00229,0.000132,0.000613,6e-05,poly,1,"{'kernel': 'poly', 'C': 1}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,6
8,2.185685,0.426078,0.000657,2.8e-05,linear,5,"{'kernel': 'linear', 'C': 5}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,1
9,0.005829,0.000126,0.001388,1.9e-05,sigmoid,1,"{'kernel': 'sigmoid', 'C': 1}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,7


In [46]:
randomized_search_result = result[['param_C','param_kernel','mean_test_score']]
randomized_search_result

Unnamed: 0,param_C,param_kernel,mean_test_score
0,20,sigmoid,0.398867
1,5,sigmoid,0.411178
2,5,rbf,0.931501
3,5,poly,0.922729
4,10,sigmoid,0.402391
5,20,rbf,0.920944
6,20,poly,0.919221
7,1,poly,0.908663
8,5,linear,0.950815
9,1,sigmoid,0.460441
