# Support Vector Machine From Scratch

In [1]:
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
style.use("ggplot")

In [2]:
class SupportVectorMachine:
    def __init__(self, visualization=True):
        self.visualization = visualization
        self.colors = {1: "r", -1: "b"}
        if self.visualization:
            self.figure = plt.figure()
            self.axis = self.figure.add_subplot(1, 1, 1)
    # Train
    def fit(self, data):
        self.data = data
        # { ||w||: [w, b]}
        opt_dict = []
        transforms = [[1, 1], [-1, 1], [-1, -1], [1, -1]]
        all_data = []
        for yi in self.data:
            for feature_set in self.data[yi]:
                for feature in feature_set:
                    all_data.append(feature)
        self.max_feature_value = max(all_data)
        self.min_feature_value = min(all_data)
        all_data = None
        
        # Support vectors yi(xi.w + b) = 1
        # You will know that you have found a really great value for w and b, when in both positive and negative classes you have a value close to 1.
        
        step_size = [self.max_feature_value * 0.1,
                    self.max_feature_value * 0.01,
                     # Point of expense:
                    self.max_feature_value * 0.001]  # Add self.max_feature_value * 0.0001 to be more preciseд
        # Extremely expensive
        b_range_multiple = 5
        # No need to take as small of steps with b as w
        b_multiple = 5
        latest_optimum = self.max_feature_value * 10
        
        for step in step_size:
            w = np.array([latest_optimum, latest_optimum])
            # Convex
            optimized = False
            while not optimized:
                for b in np.arange(-1 * self.max_feature_value * b_range_multiple,
                                   self.max_feature_value * b_range_multiple,
                                   step * b_multiple):
                    for transformation in transforms:
                        w_transform = w * transformation
                        found_option = True
                        # Weakest link in the SVM fundamentally
                        # SMO attempts to fix this bit
                        # yi(xi.w + b) >= 1
                        for i in self.data:  # i = the class
                            for xi in self.data[i]:
                                yi = i
                                # Even if one sample does not fit the definition, the whole thing is thrown out
                                if not yi * (np.dot(w_transform, xi) + b) >= 1:
                                    found_option = False
                                    # break
                            # break
                        if found_option:
                            opt_dict[np.linalg.norm(w_transform)] = [w_transform, b]  # The magnitude of the vector
                if w[0] < 0:
                    optimized =True
                    print("Optimized a step.")
                else:
                    w = w - step
            norms = sorted([n for n in opt_dict])
            # ||w|| : [w, b]
            opt_choice = opt_dict[norms[0]]  # The smallest norm
            self.w = opt_choice[0]
            self.b = opt_choice[1]
            latest_optimum = opt_choice[0][0] + step * 2
    def predict(self, features):
        # sign(x.w + b)
        classification = np.sign(np.dot(np.array(features), self.w) + self.b)
        return classification

In [3]:
data_dict = {-1: np.array([[1, 7], [2, 8], [3, 8]])
            ,1: np.array([[5, 1], [6, -1], [7, 3]])}