# SVM notebook

### Imports

In [2]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

### Load datasets

In [3]:
wpbc_original = pd.read_csv(
    '../dataset/wpbc_original_B.csv',
    sep=',',
    header=0
)
wpbc_original

Unnamed: 0,OUTCOME,RADIUS_1,TEXTURE_1,PERIMETER_1,AREA_1,SMOOTHNESS_1,COMPACTNESS_1,CONCAVITY_1,CONCAVE_POINTS_1,SYMMETRY_1,FRACTAL_DIMENSIONS_1,RADIUS_2,TEXTURE_2,PERIMETER_2,AREA_2,SMOOTHNESS_2,COMPACTNESS_2,CONCAVITY_2,CONCAVE_POINTS_2,SYMMETRY_2,FRACTAL_DIMENSIONS_2,RADIUS_3,TEXTURE_3,PERIMETER_3,AREA_3,SMOOTHNESS_3,COMPACTNESS_3,CONCAVITY_3,CONCAVE_POINTS_3,SYMMETRY_3,FRACTAL_DIMENSIONS_3,TUMOR_SIZE,LYMPH_NODE_STATUS
0,0,18.020000,27.600000,117.500000,1013.000000,0.094890,0.103600,0.108600,0.070550,0.186500,0.063330,0.624900,1.890000,3.972000,71.550000,0.004433,0.014210,0.032330,0.009854,0.016940,0.003495,21.630000,37.080000,139.700000,1436.000000,0.119500,0.192600,0.314000,0.117000,0.267700,0.081130,5.000000,5.000000
1,0,17.990000,10.380000,122.800000,1001.000000,0.118400,0.277600,0.300100,0.147100,0.241900,0.078710,1.095000,0.905300,8.589000,153.400000,0.006399,0.049040,0.053730,0.015870,0.030030,0.006193,25.380000,17.330000,184.600000,2019.000000,0.162200,0.665600,0.711900,0.265400,0.460100,0.118900,3.000000,2.000000
2,0,21.370000,17.440000,137.500000,1373.000000,0.088360,0.118900,0.125500,0.081800,0.233300,0.060100,0.585400,0.610500,3.928000,82.150000,0.006167,0.034490,0.033000,0.018050,0.030940,0.005039,24.900000,20.980000,159.100000,1949.000000,0.118800,0.344900,0.341400,0.203200,0.433400,0.090670,2.500000,0.000000
3,0,11.420000,20.380000,77.580000,386.100000,0.142500,0.283900,0.241400,0.105200,0.259700,0.097440,0.495600,1.156000,3.445000,27.230000,0.009110,0.074580,0.056610,0.018670,0.059630,0.009208,14.910000,26.500000,98.870000,567.700000,0.209800,0.866300,0.686900,0.257500,0.663800,0.173000,2.000000,0.000000
4,1,20.290000,14.340000,135.100000,1297.000000,0.100300,0.132800,0.198000,0.104300,0.180900,0.058830,0.757200,0.781300,5.438000,94.440000,0.011490,0.024610,0.056880,0.018850,0.017560,0.005115,22.540000,16.670000,152.200000,1575.000000,0.137400,0.205000,0.400000,0.162500,0.236400,0.076780,3.500000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
297,1,20.026955,26.447778,135.085597,1257.432103,0.096783,0.184876,0.172653,0.109357,0.185664,0.060010,0.809096,1.300255,6.351477,101.157407,0.006550,0.046907,0.037969,0.015499,0.023355,0.004790,24.725514,33.595926,171.289300,1882.995881,0.132166,0.485126,0.405787,0.191754,0.303419,0.091121,5.218107,9.917695
298,1,17.433955,21.816099,115.199748,959.636269,0.092011,0.127620,0.100975,0.075580,0.210960,0.055866,0.818805,1.564022,5.987450,90.700076,0.006973,0.053250,0.044579,0.020488,0.027770,0.005470,20.617928,27.004074,138.999653,1321.191743,0.116899,0.275903,0.234718,0.152020,0.310379,0.075122,3.900032,1.199937
299,1,24.450253,18.960836,161.187481,1966.519188,0.090793,0.119089,0.180771,0.112794,0.160575,0.052170,1.666931,0.897773,11.155016,302.785102,0.005101,0.013678,0.024063,0.014892,0.016122,0.002287,34.988185,24.029564,231.395676,3854.528911,0.117644,0.193159,0.308795,0.185801,0.237269,0.063876,3.070555,4.000000
300,1,19.635022,23.339172,128.800549,1192.500748,0.100584,0.116314,0.175905,0.100470,0.187260,0.055531,0.545039,1.829727,4.045063,61.922863,0.008326,0.026489,0.046259,0.016526,0.026990,0.002548,22.095327,32.810923,148.704440,1480.529682,0.137292,0.265190,0.435569,0.186070,0.316944,0.070050,1.749875,10.001497


In [4]:
(original_train, original_test) = train_test_split(wpbc_original, test_size=0.25, train_size=0.75, shuffle=True)

original_train_labels = original_train['OUTCOME']
original_train_features = original_train.iloc[:,2:]
original_test_labels = original_test['OUTCOME']
original_test_features = original_test.iloc[:,2:]

# Model development

## Original dataset

### SVC dev

In [5]:
svc_classifier = SVC(
    C=5.5,
    kernel='linear',
    gamma='scale',
    class_weight='balanced'
)

svc_classifier.fit(
    X=original_train_features,
    y=original_train_labels
)

predictions = svc_classifier.predict(
    X=original_test_features
)
report = classification_report(
    y_true=original_test_labels,
    y_pred=predictions
)
print(report)

              precision    recall  f1-score   support

           0       0.76      0.54      0.63        41
           1       0.60      0.80      0.68        35

    accuracy                           0.66        76
   macro avg       0.68      0.67      0.66        76
weighted avg       0.68      0.66      0.65        76

