In [159]:
import numpy as np
from sklearn.svm import SVC
import sklearn.preprocessing as preprocessing
from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix
import joblib

## load data and label

In [160]:
spam_data = np.loadtxt('./spambase.data', delimiter=',')
spam_X = spam_data[:, :-1]
spam_Y = spam_data[:, -1]

## prepare the K fold and preprocess step

In [161]:
kfold = KFold(n_splits=10, shuffle=False)

In [162]:
scaler = preprocessing.RobustScaler()
spam_X = scaler.fit_transform(spam_X)

## SVM

In [163]:
classifier = SVC(kernel='linear', gamma='scale')

In [164]:
error = 1
model_num = 0
error_list = []
fp_list = []
fn_list = []
for k, (train, test) in enumerate(kfold.split(spam_X, spam_Y)):
    classifier.fit(spam_X[train], spam_Y[train])
    Y_pred = classifier.predict(spam_X[test])
    tn, fp, fn, tp = confusion_matrix(spam_Y[test], Y_pred, labels=[0,1]).ravel()
    fp_list.append(fp / len(Y_pred))
    fn_list.append(fn / len(Y_pred))
    error_list.append((fp + fn) / len(Y_pred))
    if error > error_list[-1]:
        error = error_list[-1]
        model_num = k
        joblib.dump(classifier, './best_model.pkl')

## test on the bets model

In [165]:
best_classifier = joblib.load('./best_model.pkl')
best_error_list = []
best_fp_list = []
best_fn_list = []
for k, (train, test) in enumerate(kfold.split(spam_X, spam_Y)):
    Y_pred = best_classifier.predict(spam_X[test])
    tn, fp, fn, tp = confusion_matrix(spam_Y[test], Y_pred, labels=[0,1]).ravel()
    best_error_list.append((fp + fn) / len(Y_pred))
    best_fp_list.append(fp / len(Y_pred))
    best_fn_list.append(fn / len(Y_pred))

## show results

In [166]:
import pandas as pd

In [167]:
show_frame = np.concatenate((fp_list, fn_list, error_list)).reshape(3, 10).T
best_model_info = [[sum(best_fp_list)/10, sum(best_fn_list)/10, sum(best_error_list)/10]]
show_frame = np.concatenate((show_frame, best_model_info))

In [168]:
pd.DataFrame(show_frame, columns=["fp", "fn", "error rate"])

Unnamed: 0,fp,fn,error rate
0,0.0,0.149675,0.149675
1,0.0,0.086957,0.086957
2,0.0,0.1,0.1
3,0.004348,0.16087,0.165217
4,0.041304,0.0,0.041304
5,0.065217,0.0,0.065217
6,0.128261,0.0,0.128261
7,0.047826,0.0,0.047826
8,0.043478,0.0,0.043478
9,0.180435,0.0,0.180435
