In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    recall_score,
    precision_score,
    f1_score,
    confusion_matrix
)

from sklearn.svm import SVC

In [2]:
data_path = Path().resolve().parent / "magic04.data"
data = pd.read_csv(data_path, header=None)
data.columns=['fLength','fWidth','fSize','fConc','fConc1','fAsym','fM3Long','fM3Trans','fAlpha','fDist','class']

In [3]:
data2 = (
    data
    .assign(
        **{"class": lambda x: x['class'].map({'g': 1, 'h': 0}).astype('int8')}
    )
)

In [4]:
# train test split
X_train, X_test, Y_train, Y_test = train_test_split(
    data2.drop(columns=['class']),
    data2[['class']],
    train_size=0.7,
    random_state=0
)

# scaler
scaler = StandardScaler()
scaler.set_output(transform="pandas")

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
hyperparameters = {
    'kernel' : "rbf",
    'C' : 92.193361,
    'gamma' : 0.106529,
}

In [10]:
model = SVC(**hyperparameters).fit(X_train, Y_train.values.ravel())
Y_pred = model.predict(X_train)
Y_pred_test = model.predict(X_test)

print(f"Train accuracy: {accuracy_score(Y_train, Y_pred):.3f}")
print(f"Test accuracy: {accuracy_score(Y_test, Y_pred_test):.3f}")
print(f"Train recall: {recall_score(Y_train, Y_pred):.3f}")
print(f"Test recall: {recall_score(Y_test, Y_pred_test):.3f}")
print(f"Train precision: {precision_score(Y_train, Y_pred):.3f}")
print(f"Test precision: {precision_score(Y_test, Y_pred_test):.3f}")
print(f"Train f1: {f1_score(Y_train, Y_pred):.3f}")
print(f"Test f1: {f1_score(Y_test, Y_pred_test):.3f}")

Train accuracy: 0.903
Test accuracy: 0.876
Train recall: 0.974
Test recall: 0.957
Train precision: 0.887
Test precision: 0.865
Train f1: 0.929
Test f1: 0.909


In [8]:
print(f"Train confusion matrix:\n {confusion_matrix(Y_train, Y_pred)}")
print(f"Test confusion matrix:\n {confusion_matrix(Y_test, Y_pred_test)}")

Train confusion matrix:
 [[3602 1069]
 [ 223 8420]]
Test confusion matrix:
 [[1466  551]
 [ 157 3532]]
