In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_curve

In [2]:
df = pd.read_csv('californiabin.csv')
df.head()

Unnamed: 0,4.168499999999999872e+00,2.000000000000000000e+01,6.700636942675159524e+00,1.178343949044585948e+00,7.450000000000000000e+02,2.372611464968152895e+00,3.879999999999999716e+01,-1.211500000000000057e+02,1.000000000000000000e+00
0,2.8194,24.0,4.7625,1.020833,608.0,2.533333,36.75,-119.85,0.0
1,2.425,46.0,4.927711,1.018072,772.0,2.325301,36.33,-119.31,0.0
2,3.1205,16.0,3.728477,1.101545,1260.0,2.781457,33.87,-117.99,0.0
3,4.3889,41.0,5.741007,1.199041,837.0,2.007194,34.15,-118.4,1.0
4,2.9934,52.0,3.441379,1.02069,318.0,2.193103,37.57,-122.32,1.0


In [3]:
data = df.values
data.shape

(1999, 9)

In [4]:
attributes = data[:, :-1]
targets = data[:, -1]

In [5]:
hyperparameters = {
    'svc__C' : [2**i for i in range(-1, 7, 2)],
    'svc__kernel' : ['rbf'],
    'svc__gamma' : [2**i for i in range(-7, 1, 2)]
}

{'svc__C': [0.5, 2, 8, 32],
 'svc__kernel': ['rbf'],
 'svc__gamma': [0.0078125, 0.03125, 0.125, 0.5]}

In [19]:
hyperparameters

{'svc__C': [0.5, 2, 8, 32],
 'svc__kernel': ['rbf'],
 'svc__gamma': [0.0078125, 0.03125, 0.125, 0.5]}

In [6]:
train_attributes, test_attributes, train_targets, test_targets = train_test_split(attributes, targets, test_size=0.2, random_state=42)

In [7]:
kfold = KFold(n_splits= 10, shuffle= True, random_state= 42)

In [8]:
svc = SVC(kernel='rbf')

In [9]:
pipe = Pipeline([('scaler', MinMaxScaler()), ('svc', svc)])

In [10]:
gs = GridSearchCV(estimator= pipe, param_grid= hyperparameters, cv= 5, n_jobs=-1)

In [11]:
gs.fit(train_attributes, train_targets)

In [12]:
gs.score(test_attributes, test_targets)

0.8075

In [13]:
cv_results = pd.DataFrame(gs.cv_results_)
cv_results.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_svc__C,param_svc__gamma,param_svc__kernel,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,0.133262,0.005147,0.072617,0.009573,0.5,0.007812,rbf,"{'svc__C': 0.5, 'svc__gamma': 0.0078125, 'svc_...",0.509375,0.50625,0.50625,0.50625,0.507837,0.507192,0.001252,16
1,0.11957,0.008321,0.068245,0.00195,0.5,0.03125,rbf,"{'svc__C': 0.5, 'svc__gamma': 0.03125, 'svc__k...",0.79375,0.834375,0.8125,0.815625,0.793103,0.809871,0.015372,14
2,0.111688,0.01,0.059081,0.004137,0.5,0.125,rbf,"{'svc__C': 0.5, 'svc__gamma': 0.125, 'svc__ker...",0.81875,0.840625,0.83125,0.85625,0.836991,0.836773,0.012244,11
3,0.083228,0.007869,0.040518,0.002459,0.5,0.5,rbf,"{'svc__C': 0.5, 'svc__gamma': 0.5, 'svc__kerne...",0.834375,0.83125,0.834375,0.875,0.840125,0.843025,0.016243,7
4,0.1128,0.007897,0.068292,0.006125,2.0,0.007812,rbf,"{'svc__C': 2, 'svc__gamma': 0.0078125, 'svc__k...",0.79375,0.83125,0.8125,0.815625,0.793103,0.809246,0.014396,15


In [14]:
best_model = gs.best_estimator_
best_model.fit(train_attributes, train_targets)


prediction = best_model.predict(test_attributes)

In [15]:
roc = roc_curve(test_targets, prediction)
roc

(array([0.       , 0.1745283, 1.       ]),
 array([0.        , 0.78723404, 1.        ]),
 array([2., 1., 0.]))

In [16]:
prec = precision_score(test_targets, prediction)
prec

0.8

In [17]:
f1 = f1_score(test_targets, prediction)
f1

0.7935656836461126

In [18]:
from sklearn.metrics import RocCurveDisplay, PrecisionRecallDisplay

metrics = [
    [None, None, None, None],
    [None, None, None, None]
]