# Importing packages.


In [1]:
import numpy as np
from cvxopt import matrix,solvers
import numexpr as ne

In [2]:
np.set_printoptions(threshold=5)

# Loading our dataset. Taken from kaggle https://www.kaggle.com/itsmesunil/bank-loan-modelling

In [3]:
data = np.genfromtxt('bank.csv', delimiter=',', skip_header=True)
data.shape


(5000, 14)

In [4]:
data = np.delete(data,0,1) #deleting the first column since it is just serial number
data

array([[25.,  1., 49., ...,  0.,  0.,  0.],
       [45., 19., 34., ...,  0.,  0.,  0.],
       [39., 15., 11., ...,  0.,  0.,  0.],
       ...,
       [63., 39., 24., ...,  0.,  0.,  0.],
       [65., 40., 49., ...,  0.,  1.,  0.],
       [28.,  4., 83., ...,  0.,  1.,  1.]])

In [5]:
X = data[:,[0,1,2,3,4,5,6,7,9,10,11,12]]
Y = data[:,[8]]
Y = Y.ravel()

In [6]:
from sklearn.preprocessing import StandardScaler

In [7]:
scaler = StandardScaler()

In [8]:
# for i,j in enumerate(Y):
#     if j == -1:
#         Y[i] = 0

In [9]:
scaler.fit(X)
X_tansf = scaler.transform(X)

In [10]:
X_tansf

array([[-1.77441684, -1.66607847, -0.53822878, ..., -0.25354028,
        -1.21661774, -0.64531434],
       [-0.02952359, -0.09633022, -0.8641094 , ..., -0.25354028,
        -1.21661774, -0.64531434],
       [-0.55299157, -0.44516316, -1.36379301, ..., -0.25354028,
        -1.21661774, -0.64531434],
       ...,
       [ 1.54088033,  1.64783451, -1.08136314, ..., -0.25354028,
        -1.21661774, -0.64531434],
       [ 1.71536965,  1.73504275, -0.53822878, ..., -0.25354028,
         0.82195086, -0.64531434],
       [-1.51268285, -1.40445376,  0.20043396, ..., -0.25354028,
         0.82195086,  1.54963239]])

In [11]:
Y

array([-1., -1., -1., ..., -1., -1., -1.])

 RBF Kernel used in this implementation.
![](gaussian-radial-basis-function-RBF.png)

In [12]:
# def rbf_kernel_matrix(X_i,X_j,gamma=0.5):
#     K = np.zeros((X_i.shape[0], X_j.shape[0]))
#     for idx, x_i in enumerate(X_i):
#             for jdx, x_j in enumerate(X_j):
#                 K[idx, jdx] = np.exp(-gamma * np.linalg.norm(x_i - x_j) ** 2)
        
#     return K

In [13]:
def rbf_kernel(X):
    X_norm = np.sum(X ** 2, axis = -1)
    var = 5
    gamma = 0.01
    K = ne.evaluate('v * exp(-g * (A + B - 2 * C))', {
        'A' : X_norm[:,None],
        'B' : X_norm[None,:],
        'C' : np.dot(X, X.T),
        'g' : gamma,
        'v' : var
    })
    return K

In [14]:
def train_svm():
    C = 100
    n, k = X_tansf.shape
    
    y_matrix = Y.reshape(1, -1)
    H = np.dot(y_matrix.T, y_matrix) * rbf_kernel(X_tansf)
    P = matrix(H)
    q = matrix(-np.ones((n, 1)))
    G = matrix(np.vstack((-np.eye((n)), np.eye(n))))
    h = matrix(np.vstack((np.zeros((n,1)), np.ones((n,1)) * C)))
    A = matrix(y_matrix)
    b = matrix(np.zeros(1))
    
    solvers.options['abstol'] = 1e-10
    solvers.options['reltol'] = 1e-10
    solvers.options['feastol'] = 1e-10

    return solvers.qp(P, q, G, h, A, b)

In [15]:
svm_parameters = train_svm()
print(svm_parameters)

     pcost       dcost       gap    pres   dres
 0:  1.6979e+05 -2.4562e+07  6e+07  6e-01  4e-12
 1:  2.3927e+05 -6.9609e+06  1e+07  9e-02  4e-12
 2:  1.4048e+05 -2.0957e+06  3e+06  2e-02  3e-12
 3:  8.9833e+04 -1.0854e+06  1e+06  8e-03  3e-12
 4:  5.3916e+04 -6.5005e+05  8e+05  3e-03  3e-12
 5:  2.5892e+04 -3.5655e+05  4e+05  1e-03  3e-12
 6:  8.2962e+03 -1.9663e+05  2e+05  5e-04  3e-12
 7: -3.2737e+03 -9.0647e+04  9e+04  1e-04  3e-12
 8: -7.7322e+03 -5.5517e+04  5e+04  5e-05  3e-12
 9: -8.6378e+03 -5.0730e+04  4e+04  4e-05  3e-12
10: -1.0717e+04 -3.9661e+04  3e+04  2e-05  3e-12
11: -1.1636e+04 -3.5751e+04  2e+04  1e-05  3e-12
12: -1.2528e+04 -3.1582e+04  2e+04  6e-06  3e-12
13: -1.3421e+04 -2.7422e+04  1e+04  2e-06  4e-12
14: -1.4229e+04 -2.4507e+04  1e+04  9e-07  4e-12
15: -1.4643e+04 -2.2515e+04  8e+03  3e-07  4e-12
16: -1.5060e+04 -2.1462e+04  6e+03  2e-07  4e-12
17: -1.6028e+04 -1.8972e+04  3e+03  2e-08  5e-12
18: -1.6182e+04 -1.8626e+04  2e+03  8e-09  4e-12
19: -1.6560e+04 -1.79

In [16]:
def get_parameters(alphas):
    threshold = 1e-4 
    S = (alphas > threshold).flatten()
    w = np.dot(X_tansf.T, alphas * Y)
    b = Y[S] - np.dot(X_tansf[S], w)
    b = np.mean(b)
    return w, b, S

alphas = np.array(svm_parameters['x'])[:, 0]
w, b, S = get_parameters(alphas)

In [17]:
print('Alphas:', alphas[S][0:20])
print('w and b', w, b)

Alphas: [ 92.11760169 100.         100.         ... 100.         100.
  12.85531111]
w and b [  8.21197566  27.59608188  48.37508838 ...  23.37312929 -24.63812018
  -1.2421954 ] -38.5833688992389


In [18]:
def predict(x):
    result = np.sign(np.dot(x,w) + b)
    return result

In [19]:
y_preds = predict(X)

In [20]:
y_preds

array([-1., -1., -1., ..., -1., -1., -1.])

In [21]:
from sklearn.metrics import accuracy_score

In [22]:
print("training accuracy is {}".format(accuracy_score(y_preds,Y)))

training accuracy is 0.904
