# Design of the linear SVM model
Here we design by ourselves a linear SVM model adapted to our dataset

## Importing the necessary libraries

In [37]:
import numpy as np
import pandas as pd
import dill

## Preparing the data for training and testing
### Importing the datasets

In [40]:
train_data = pd.read_csv("Datasets\\Train\\train_data.csv", index_col=0)
test_data = pd.read_csv("Datasets\\Test\\test_data.csv", index_col=0)

### Creating X and Y for each dataset

In [100]:
X_train = train_data["HOG"]
Y_train = train_data["label"]
X_test = test_data["HOG"]
Y_test = test_data["label"]

### Designing the SVM model through object-oriented programming

In [103]:
class Linear_SVM:
    # The init function will be useful to create the SVM object and will do the fitting as well
    def __init__(self, X, y, iter_nb, lr, C):
        
        self.X = np.array(X)
        self.y = np.array(y)
        self.iter_nb = iter_nb
        # This is simply the number of iterations to do
        self.lr = lr
        # lr is the learning rate for the gradient descent approach used here
        self.C = C
        # C is a coeff defining the impact of each support vector on the updating of the vector w
        
        # Initialize normal vector by setting all its element at 1, then w will be reduced during the fitting phase
        # The vector w defines the hyperplane separating the 'pos' samples from the 'neg' ones
        # (w is actually the vector normal to the hyperplane)
        # Its length has to be the same as any vector sample from the training set as it belongs to the same space
        self.w = np.ones(len(self.X[0]))
    
    
    def distances(self, w):
        # We compute the distances between the sample and the current hyperplane defined by the current vector w
        # We know that (xi.w +b) >= 1 or <= -1 according to the sample being 'pos' or 'neg'
        # Since, for all 'neg' samples, yi = -1, and for all 'pos' samples yi = 1
        # The absolute distance of a sample (xi, yi) to the hyperplane's margin corresponds to yi*(xi.w + b) - 1
        # (This is true because we considered the width of the margin to be 1)
        distances = self.y * (np.dot(self.X, w)) - 1
        
        # The support vectors (located between the two margins) have a distance inferior to 0
        # (Such distances exist in our dataset only if w is not optimal yet)
        distances[distances > 0] = 0
        # All vectors that aren't on support vectors now have distances equal to 0
        # This will allow us to identify which vectors are on the support vector
        # And will allow us to apply the Lagrange multiplier

        return distances
    
    def get_cost_grads(self, X, w, y):
        
        # We compute a list of distances defined by the previous function
        distances = self.distances(w)

        # L is the current cost defined by the following formula:
        # L = |w|²/2 - sum(distance_i*coeff_i)
        L = 1 / 2 * np.dot(w, w) - self.C * np.sum(distances)
        
        dw = np.zeros(len(w))
        
        # We use the index and value of each element in the list "distances"
        for index, val in enumerate(distances):
            if val == 0:  
                # As said earlier, this means the sample is not on the support vector
                # (alpha = 0 for Lagrange multiplier)
                di = w  # We keep the initial value of w as di, no need to change it here
                # In other words, the current value of w "works" for this sample
                # (we are applying the gradient descent method with the Lagrange multiplier)
            else:
                # Here we have a support vector, it's not supposed to happen with an optimal value of w
                # (alpha = 1 for Lagrange multiplier)
                di = w - (self.C * y[index] * X[index]) # We compute an other value of a vector normal to
                # the optimal hyperplane, according to the gradient descent method
            dw += di
        return L, dw / len(X) #dw corresponds to a mean of all the di, and will be used to update the value of w
        # L is the objective cost function, and dw/len(X) is the gradient
    
    def fit(self):   
        # This is the fitting phase, where we find the optimal 
        # We obviously do a number of iterations equal to iter_nb
        # In reality we rarely can get the optimal value of w but we can get very close
        for i in range(self.iter_nb):
            L, dw = self.get_cost_grads(self.X, self.w, self.y)
            # We compute the cost function and the gradient of w at each iteration
            self.w = self.w - self.lr * dw
            # We update the value of w using the learning rate and the gradient
            # The gradient and cost values change at each iteration because w changes
            # (And because the "distances" list changes as well as a result of w being updated)

    def predict(self, X):
        return np.sign(X @ self.w)
        # The value of y_i corresponds to the sign of x_i.w, as it has been explained before
    
    def test(self, X, Y):
        comp = self.predict(X)-Y.flatten()
        # We make sure Y is a one-dimension array, or else the comparison won't work
        accuracy = len(np.where(comp == 0)[0]) / len(comp)
        # The accuracy depends on the comparison between the prediction made from X and the real value of Y
        print("Test Accuracy :", accuracy)

In [105]:
SVM = Linear_SVM(X=X_train, y=Y_train, iter_nb=10000, lr=1e-3, C=30)
SVM.fit()

ValueError: shapes (14596,) and (71,) not aligned: 14596 (dim 0) != 71 (dim 0)

'[0.         0.         0.         ... 0.13701964 0.04555695 0.        ]'