# Support Vector Machine Exhibition

In [None]:
# Import relevant packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC

import from_scratch.split_data as split_data
import from_scratch.svm as svm


# 2D Data

In [None]:
# Create data
TwoDFeatures, TwoDTargets = make_blobs(
    n_samples=200, n_features=2, centers=2, cluster_std=2)

ClassA = TwoDTargets == 0
TwoDTargets[ClassA] = -1
plt.scatter(TwoDFeatures[ClassA, 0], TwoDFeatures[ClassA, 1], color="red")

ClassB = TwoDTargets == 1
plt.scatter(TwoDFeatures[ClassB, 0], TwoDFeatures[ClassB, 1], color="blue")

plt.plot()


## From Scratch Implementation

In [None]:
# Add bias to features
TwoDFeatures_modded = np.hstack(
    (TwoDFeatures, np.ones((TwoDFeatures.shape[0], 1))))


In [None]:
# Train/test split
train_features, train_targets, test_features, test_targets = split_data.train_test_split(
    TwoDFeatures_modded.T, TwoDTargets.reshape(1, TwoDTargets.shape[0]))


In [None]:
# Train model
svm_model = svm.SVM(train_features.shape[0])
svm_model.fit(train_features, train_targets)


In [None]:
# Test model
predictions = svm_model.predict(test_features)
print(
    f"Accuray = {np.sum(predictions == test_targets)/predictions.shape[1] * 100 :2.3f}%")


In [None]:
# Visualize model
line = svm_model.weights.T
w_0 = line[2]
w_1 = line[0]
w_2 = line[1]

discriminator = -(w_1/w_2)*test_features[0, :] - (w_0/w_2)

test_ClassA = test_targets[0, :] == -1
plt.scatter(test_features[0, test_ClassA],
            test_features[1, test_ClassA], color="red")

test_ClassB = test_targets[0, :] == 1
plt.scatter(test_features[0, test_ClassB],
            test_features[1, test_ClassB], color="blue")

plt.plot(test_features[0, :], discriminator, color="black")
plt.plot()


## Sklearn Implementation

In [None]:
# Train-test split
train_features, test_features, train_targets, test_targets = train_test_split(
    TwoDFeatures, TwoDTargets)


In [None]:
# Train model
linear_svm = LinearSVC(loss="hinge", max_iter=5000)
linear_svm.fit(train_features, train_targets)


In [None]:
# Test model
predictions = linear_svm.predict(test_features)
print(
    f"Accuray = {np.sum(predictions == test_targets)/predictions.shape[0] * 100 :2.3f}%")


In [None]:
# Visualize model
w_1 = linear_svm.coef_[0, 0]
w_2 = linear_svm.coef_[0, 1]
w_0 = linear_svm.intercept_[0]

discriminator = -(w_1/w_2)*test_features[:, 0] - (w_0/w_2)

test_ClassA = test_targets == -1
plt.scatter(test_features[test_ClassA, 0],
            test_features[test_ClassA, 1], color="red")

test_ClassB = test_targets == 1
plt.scatter(test_features[test_ClassB, 0],
            test_features[test_ClassB, 1], color="blue")

plt.plot(test_features[:, 0], discriminator, color="black")
plt.plot()


# Breast Cancer Data

In [None]:
# Load data
original_data = pd.read_csv("data/breast_cancer.csv", sep=',')
original_data.head()


In [None]:
# Clean data
diagnosis_map = {'M': 1, 'B': - 1}
original_data["diagnosis"] = original_data["diagnosis"].map(diagnosis_map)
original_data.drop(original_data.columns[[-1, 0]], axis=1,  inplace=True)
original_data.head()


In [None]:
# Split into features and targets and normalize
Y = original_data.loc[:, "diagnosis"]
X = original_data.iloc[:, 1:]

X_normalized = (X - X.min())/(X.max() - X.min())
X_norm_copy = X_normalized.copy()
X_normalized.head()

## From Scratch Implementation

In [None]:
# Train/test split
X_normalized.insert(loc=len(X_norm_copy.columns),
                    column="intercept", value=1)  # add bias to features

train_features, train_targets, test_features, test_targets = split_data.train_test_split(
    X_norm_copy.values.T, Y.values.reshape((1, Y.shape[0])))


In [None]:
# Train model
svm_model = svm.SVM(train_features.shape[0])
svm_model.fit(train_features, train_targets)


In [None]:
# Test model
predictions = svm_model.predict(test_features)
print(
    f"Accuray = {np.sum(predictions == test_targets)/predictions.shape[1] * 100 :2.3f}%")


## Sklearn Implementation

In [None]:
# Train-test split
train_features, test_features, train_targets, test_targets = train_test_split(
    X_normalized.values, Y)


In [None]:
# Train model
linear_svm = LinearSVC(loss="hinge", max_iter=5000)
linear_svm.fit(train_features, train_targets, )


In [None]:
# Test model
predictions = linear_svm.predict(test_features)
print(
    f"Accuray = {np.sum(predictions == test_targets)/predictions.shape[0] * 100 :2.3f}%")
