# SVM notebook

### Imports

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.svm import SVC, NuSVC, LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score, accuracy_score, recall_score, precision_score
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB

### Load datasets

In [5]:
wpbc_original = pd.read_csv(
    '../dataset/wpbc_original_B.csv',
    sep=',',
    header=0
)
wpbc_original

In [7]:
(original_train, original_test) = train_test_split(wpbc_original, test_size=0.25, train_size=0.75, shuffle=True)

original_train_labels = original_train['OUTCOME']
original_train_features = original_train.iloc[:,2:]
original_test_labels = original_test['OUTCOME']
original_test_features = original_test.iloc[:,2:]

# Model development

## Original dataset

### SVC dev

In [8]:
svc_classifier = SVC(
    C=5.5,
    kernel='linear',
    gamma='scale',
    class_weight='balanced'
)

svc_classifier.fit(
    X=original_train_features,
    y=original_train_labels
)

predictions = svc_classifier.predict(
    X=original_test_features
)
acc = accuracy_score(y_true=original_test_labels, y_pred=predictions)
f1 = f1_score(y_true=original_test_labels, y_pred=predictions, zero_division=0)
rec = recall_score(y_true=original_test_labels, y_pred=predictions, zero_division=0)
prec = precision_score(y_true=original_test_labels, y_pred=predictions, zero_division=0)
print(acc, f1, rec, prec)

0.7631578947368421 0.7272727272727273 0.6666666666666666 0.8
