# Linear Support Vector Machine 

In [2]:
#imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cvxpy as cp

In [3]:
# Train dataset
X = pd.read_csv('Xsvm.csv', header=None)
y = pd.read_csv('ysvm.csv', header=None)
x_train = np.asarray(X)
y_train = np.asarray(y).flatten()
x_train.shape, y_train.shape
N = len(x_train) #500
print(x_train.shape)

(500, 2)


In [52]:
# test data
x_test = np.array([[2,0.5],
                   [0.8,0.7],
                   [1.58,1.33],
                   [0.008,0.001]])

##### Observing the training dataset. 
We are given 2-D vectors with labels belonging to two classes y = 1 and y = -1. Plotting these 2-D vectors in a 2-D plane. 
* <span style="color:#FF0000">*Red* : y = 1</span>
* <span style="color:#9400D3">*Violet* : y = -1 </span>

In [None]:
plt.scatter(x_train[:,0], x_train[:,1], s=15, c=y_train, cmap=plt.cm.rainbow)
plt.show()

In [19]:
alpha = cp.Variable(N)
sum1 = cp.sum(alpha) # first term in Ld

# sum2 = 0.5*np.sum(alpha*y_train*np.array([np.sum(alpha*y_train*np.dot(x_train,x),axis=0) for x in x_train]))
# CVXPY does not accept this summation, so let's use CVXPY matmul function

sum2 = 0.5 * cp.norm(cp.matmul(cp.matmul(cp.diag(alpha), y_train).T, x_train))**2
# we set up the loss function in terms of alphas and perform convex optimization with constraints on alpha
Ld = sum1 - sum2
goal = cp.Maximize(Ld)
constraints = [alpha >= 0, cp.matmul(alpha.T, y_train) == 0]
problem = cp.Problem(goal, constraints)
problem.solve(verbose=True)


ECOS 2.0.4 - (C) embotech GmbH, Zurich Switzerland, 2012-15. Web: www.embotech.com/ECOS

It     pcost       dcost      gap   pres   dres    k/t    mu     step   sigma     IR    |   BT
 0  +0.000e+00  -1.047e+00  +2e+03  1e+00  8e-01  1e+00  3e+00    ---    ---    1  1  - |  -  - 
 1  -7.040e+02  -2.216e+02  +1e+03  8e+01  6e+01  6e+02  2e+00  0.4946  4e-01   1  2  2 |  0  0
 2  -2.717e+01  -2.927e+00  +8e+02  6e-01  4e-01  3e+01  2e+00  0.9890  7e-01   2  2  2 |  0  0
 3  -4.351e+01  -7.816e+00  +4e+02  5e-01  3e-01  4e+01  8e-01  0.6145  2e-01   2  2  2 |  0  0
 4  -7.101e+01  -3.516e+01  +2e+02  1e+00  3e-01  4e+01  4e-01  0.8705  4e-01   2  1  1 |  0  0
 5  -2.276e+01  -2.158e+01  +2e+02  9e-02  2e-01  2e+00  3e-01  0.5327  7e-01   2  2  1 |  0  0
 6  -6.649e+01  -6.505e+01  +5e+01  6e-02  7e-02  2e+00  1e-01  0.9890  3e-01   2  1  1 |  0  0
 7  -6.052e+01  -6.015e+01  +1e+01  1e-02  1e-02  4e-01  2e-02  0.8228  4e-02   3  1  1 |  0  0
 8  -6.507e+01  -6.494e+01  +5e+00  3e-03  7e-

69.4370298544791

In [47]:
# calculate weights from given alphas
Alpha = np.asarray(alpha.value)
weights = np.sum(Alpha*y_train*x_train.T, axis=1)
print(weights)

[-0.11843836 11.78388371]


In [49]:
# KKT condition
i_max = np.argmax(Alpha) # max alpha index
# calculate bias
w0 = (1/y_train[i_max]) - np.dot(weights, x_train[i_max]) 

In [51]:
def prediction(_x):
    prob = np.dot(weights, _x) + w0
    return np.sign(prob)

In [55]:
results = [prediction(x) for x in x_test]
print(results)

[1.0, 1.0, 1.0, 1.0]
