In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV

In [None]:
dataset = datasets.load_breast_cancer()
print(dataset)

In [5]:
dataframe = pd.DataFrame(dataset.data, columns = dataset.feature_names)
dataframe['label'] = dataset.target
dataframe.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,label
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [6]:
dataframe.shape

(569, 31)

In [7]:
dataframe['label'].value_counts()

label
1    357
0    212
Name: count, dtype: int64

In [None]:
X = dataframe.drop(columns='label', axis=1)
y = dataframe['label']
print(X)

In [14]:
X = np.asarray(X)
y = np.asarray(y)

numpy.ndarray

## GridSearchCV

In [15]:
model = SVC()

In [16]:
parameters = {
    'kernel':['linear', 'poly', 'rbf', 'sigmoid'],
    'C':[1,5,10,20]
}

In [17]:
classifier = GridSearchCV(model,parameters, cv=5, verbose=1)

In [18]:
# fit the data to model
classifier.fit(X,y)

Fitting 5 folds for each of 16 candidates, totalling 80 fits


In [19]:
classifier.best_params_

{'C': 10, 'kernel': 'linear'}

In [20]:
classifier.cv_results_

{'mean_fit_time': array([8.03153563e-01, 2.52442360e-03, 2.52532959e-03, 7.34233856e-03,
        1.84525547e+00, 2.29983330e-03, 2.37951279e-03, 6.38980865e-03,
        2.41145563e+00, 2.70586014e-03, 9.01889801e-03, 8.05025101e-03,
        3.97779508e+00, 2.86035538e-03, 2.76207924e-03, 7.08165169e-03]),
 'std_fit_time': array([2.93737434e-01, 5.84883887e-04, 4.64902462e-04, 1.21686089e-03,
        6.92416250e-01, 7.51573802e-04, 8.22058888e-04, 2.78431190e-04,
        5.14746782e-01, 6.81783320e-04, 8.80887743e-03, 2.69806085e-03,
        1.22321513e+00, 5.43258994e-04, 3.77283461e-04, 4.04010299e-04]),
 'mean_score_time': array([0.00092859, 0.0008811 , 0.00240722, 0.00223794, 0.00060215,
        0.00072532, 0.00164027, 0.00172486, 0.00042748, 0.00120268,
        0.00242567, 0.00199614, 0.00041099, 0.00019894, 0.00181661,
        0.00223122]),
 'std_score_time': array([0.00141023, 0.00092829, 0.00080914, 0.00038501, 0.00049346,
        0.00064378, 0.00059732, 0.00035188, 0.00052385, 

In [21]:
# best parameters
best_params = classifier.best_params_

In [23]:
# loading result to pandas dataframe
result = pd.DataFrame(classifier.cv_results_)
result.head(16)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.803154,0.293737,0.000929,0.00141,1,linear,"{'C': 1, 'kernel': 'linear'}",0.947368,0.929825,0.973684,0.921053,0.955752,0.945536,0.018689,4
1,0.002524,0.000585,0.000881,0.000928,1,poly,"{'C': 1, 'kernel': 'poly'}",0.842105,0.885965,0.929825,0.947368,0.938053,0.908663,0.039382,12
2,0.002525,0.000465,0.002407,0.000809,1,rbf,"{'C': 1, 'kernel': 'rbf'}",0.850877,0.894737,0.929825,0.947368,0.938053,0.912172,0.035444,11
3,0.007342,0.001217,0.002238,0.000385,1,sigmoid,"{'C': 1, 'kernel': 'sigmoid'}",0.54386,0.45614,0.464912,0.385965,0.451327,0.460441,0.050253,13
4,1.845255,0.692416,0.000602,0.000493,5,linear,"{'C': 5, 'kernel': 'linear'}",0.947368,0.938596,0.973684,0.929825,0.964602,0.950815,0.016216,2
5,0.0023,0.000752,0.000725,0.000644,5,poly,"{'C': 5, 'kernel': 'poly'}",0.885965,0.912281,0.921053,0.938596,0.955752,0.922729,0.023689,6
6,0.00238,0.000822,0.00164,0.000597,5,rbf,"{'C': 5, 'kernel': 'rbf'}",0.885965,0.929825,0.938596,0.947368,0.955752,0.931501,0.024358,5
7,0.00639,0.000278,0.001725,0.000352,5,sigmoid,"{'C': 5, 'kernel': 'sigmoid'}",0.491228,0.421053,0.421053,0.350877,0.371681,0.411178,0.048578,14
8,2.411456,0.514747,0.000427,0.000524,10,linear,"{'C': 10, 'kernel': 'linear'}",0.938596,0.938596,0.973684,0.947368,0.964602,0.952569,0.0142,1
9,0.002706,0.000682,0.001203,0.000401,10,poly,"{'C': 10, 'kernel': 'poly'}",0.885965,0.921053,0.903509,0.938596,0.955752,0.920975,0.024701,8


In [24]:
print("higest accuracy is : ", classifier.best_score_)

higest accuracy is :  0.9525694767893185


## Randomized SearchCV

In [25]:
model = SVC()

In [26]:
classifier = RandomizedSearchCV(model, parameters, cv=5, verbose=1)

In [27]:
classifier.fit(X, y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


In [28]:
classifier.cv_results_

{'mean_fit_time': array([4.30874825e-03, 3.52859497e-03, 3.91564369e-03, 3.21230888e-03,
        1.89804335e+00, 7.39150047e-03, 5.63874245e-03, 2.46081352e-03,
        4.18133235e+00, 2.43605013e+00]),
 'std_fit_time': array([7.50406218e-04, 8.38231239e-04, 1.01882109e-03, 6.81893218e-04,
        6.87810651e-01, 6.05726251e-04, 6.91472423e-04, 4.90565044e-04,
        1.37794122e+00, 4.80854329e-01]),
 'mean_score_time': array([0.00180731, 0.00119352, 0.00266919, 0.00169811, 0.00080099,
        0.00210657, 0.00158916, 0.0016717 , 0.0007266 , 0.00084853]),
 'std_score_time': array([0.00051579, 0.00074241, 0.00053772, 0.0004064 , 0.00074901,
        0.00049063, 0.00048367, 0.00059092, 0.00063231, 0.00072998]),
 'param_kernel': masked_array(data=['poly', 'poly', 'rbf', 'poly', 'linear', 'sigmoid',
                    'sigmoid', 'rbf', 'linear', 'linear'],
              mask=[False, False, False, False, False, False, False, False,
                    False, False],
        fill_value='?',


In [29]:
classifier.best_params_

{'kernel': 'linear', 'C': 10}

In [30]:
classifier.best_score_

0.9525694767893185