# Importing packages.


In [1]:
import numpy as np
from cvxopt import matrix,solvers

In [28]:
np.set_printoptions(threshold=5)

# Loading our dataset. Taken from kaggle https://www.kaggle.com/itsmesunil/bank-loan-modelling

In [29]:
data = np.genfromtxt('bank.csv', delimiter=',', skip_header=True)
data.shape


(5000, 14)

In [30]:
data = np.delete(data,0,1) #deleting the first column since it is just serial number
data

array([[25.,  1., 49., ...,  0.,  0.,  0.],
       [45., 19., 34., ...,  0.,  0.,  0.],
       [39., 15., 11., ...,  0.,  0.,  0.],
       ...,
       [63., 39., 24., ...,  0.,  0.,  0.],
       [65., 40., 49., ...,  0.,  1.,  0.],
       [28.,  4., 83., ...,  0.,  1.,  1.]])

In [70]:
X = data[:,[0,1,2,3,4,5,6,7,9,10,11,12]]
Y = data[:,[8]]
Y = np.ravel(Y)

In [71]:
Y.shape

(5000,)

 RBF Kernel used in this implementation.
![](gaussian-radial-basis-function-RBF.png)

In [37]:
def rbf_kernel_matrix(X_i,X_j,gamma=0.3):
    K = np.zeros((X_i.shape[0], X_j.shape[0]))
    for idx, x_i in enumerate(X_i):
            for jdx, x_j in enumerate(X_j):
                K[idx, jdx] = np.exp(-gamma * np.linalg.norm(x_i - x_j) ** 2)
        
    return K

In [75]:
def train_svm():
    C = 100
    n, k = X.shape
    
    y_matrix = Y.reshape(1, -1)
    H = np.dot(y_matrix.T, y_matrix) * rbf_kernel_matrix(X, X)
    P = matrix(H)
    q = matrix(-np.ones((n, 1)))
    G = matrix(np.vstack((-np.eye((n)), np.eye(n))))
    h = matrix(np.vstack((np.zeros((n,1)), np.ones((n,1)) * C)))
    A = matrix(y_matrix)
    b = matrix(np.zeros(1))
    
    solvers.options['abstol'] = 1e-10
    solvers.options['reltol'] = 1e-10
    solvers.options['feastol'] = 1e-10

    return solvers.qp(P, q, G, h, A, b)

In [76]:
svm_parameters = train_svm()
print(svm_parameters)

     pcost       dcost       gap    pres   dres
 0: -2.2826e+05 -2.3326e+07  2e+07  1e-02  1e-14
 1: -2.3252e+05 -6.8119e+05  4e+05  3e-04  5e-15
 2: -4.1092e+05 -4.5502e+05  4e+04  2e-05  9e-15
 3: -4.5159e+05 -4.5207e+05  5e+02  2e-07  2e-15
 4: -4.5200e+05 -4.5200e+05  5e+00  2e-09  9e-15
 5: -4.5200e+05 -4.5200e+05  5e-02  2e-11  3e-15
 6: -4.5200e+05 -4.5200e+05  5e-04  2e-13  2e-15
 7: -4.5200e+05 -4.5200e+05  5e-06  2e-15  2e-14
Optimal solution found.
{'x': <5000x1 matrix, tc='d'>, 'y': <1x1 matrix, tc='d'>, 's': <10000x1 matrix, tc='d'>, 'z': <10000x1 matrix, tc='d'>, 'status': 'optimal', 'gap': 4.8143803378981185e-06, 'relative gap': 1.0651283933499717e-11, 'primal objective': -451999.99999589217, 'dual objective': -452000.00000069465, 'primal infeasibility': 1.8224139585119536e-15, 'dual infeasibility': 1.713840376470244e-14, 'primal slack': 1.3303551890890983e-13, 'dual slack': 1.3358934343279811e-12, 'iterations': 7}


In [78]:
def get_parameters(alphas):
    threshold = 1e-5 # Values greater than zero (some floating point tolerance)
    S = (alphas > threshold).reshape(-1, )
    w = np.dot(X.T, alphas * Y)
    b = Y[S] - np.dot(X[S], w) # b calculation
    b = np.mean(b)
    return w, b, S

alphas = np.array(svm_parameters['x'])[:, 0]
w, b, S = get_parameters(alphas)

In [79]:
print('Alphas:', alphas[S][0:20])
print('w and b', w, b)

Alphas: [100. 100. 100. ... 100. 100. 100.]
w and b [8.30415189e-23 3.58628147e-23 2.71188047e-22 ... 5.54852477e-25
 1.13624505e-24 5.69245383e-25] -1.625619421064422e-14
