In [1]:
import numpy as np

from utils.data import create_data
from utils.utils import scikit_svm, predict, accuracy, Q_matrix, is_pos_def
from utils.kernels import rbf_kernel
from svm import SVM

import matplotlib.pyplot as plt

import time

In [2]:
%matplotlib widget

In [3]:
#create_data(file_name = 'data_38', classes = [3,8])
load_data = np.load('data_38.npz')
train_X, train_y = load_data['train_X'], load_data['train_y']
test_X, test_y = load_data['test_X'], load_data['test_y']

# Checking if data is linearly separable

In [52]:
# Takes a bit to run
#linear = scikit_svm('linear', train_X, train_y, test_X, test_y)
rbf = scikit_svm('rbf', train_X, train_y, test_X, test_y)
#print(f'Accuracy using linear kernel: {linear}')
print(f'Accuracy using gaussian kernel: {rbf}')

Accuracy using gaussian kernel: 0.968


In [4]:
# Create mini dataset for testing
train_X, train_y = train_X[:1000,:], train_y[:1000]
test_X, test_y = test_X[:500,:], test_y[:500]

# Running SMO

## Get accuracy before training

In [4]:
num_training_examples = train_y.shape[0]

alpha = np.ones(num_training_examples)
b = 0
kernel_matrix = rbf_kernel(train_X, test_X)

pred_y = predict(alpha, kernel_matrix, b, train_y)

print(f'Accuracy before training: {accuracy(pred_y, test_y)}')

Accuracy before training: 0.5090725806451613


# Speed test: SMO fast and slow

In [5]:
start = time.time()
svm = SVM(optim = 'SMO',
          kernel = 'rbf',
          C = 1,
          max_passes = 10,
          calc_g_iterates = True)
svm.fit(train_X, train_y)
print(svm.get_accuracy(test_X, test_y))
end = time.time()
print(f'Time taken with slow implementation: {end-start}s')

0.916
Time taken with slow implementation: 20.7358238697052s


# Check if Q is positive definite
$Q$ is the matrix with elements $Q_{ij} = y_iy_jk(x_i,x_j)$

In [18]:
Q_mat = Q_matrix(svm.train_X, svm.train_y, rbf_kernel)

In [19]:
is_pos_def(Q_mat)

True

# Checking the convergence of g

\begin{equation}
  g(\alpha) = \sum_{i=1}^m \alpha_i - \frac{1}{2}\sum_{i=1}^m\sum_{j=1}^m \alpha_i \alpha_j y_i y_j k(x_i,x_j)
   \label{eq:svoptim_problem}
\end{equation}

In [6]:
svm.plot_convergence()
plt.xlabel('Steps')

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Text(0.5, 0, 'Steps')

In [8]:
import sklearn

In [42]:
a = sklearn.metrics.pairwise.polynomial_kernel(train_X, train_X, degree=3, gamma=None, coef0=1)

In [29]:
def polynomial_kernel(X_train, X_test, degree = 2, gamma = None, c = 1):
    
    if gamma is None:
        gamma = 1/X_train.shape[1] # Default value = 1/number of features
    
    dot = np.matmul(X_train, np.transpose(X_test))
    
    return np.power(gamma*dot + c, degree)

In [43]:
polynomial_kernel(train_X, train_X, degree=3) == a

array([[ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       ...,
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True],
       [ True,  True,  True, ...,  True,  True,  True]])

In [44]:
a

array([[1.54801477, 1.31978044, 1.26963979, ..., 1.40757825, 1.20765103,
        1.39950812],
       [1.31978044, 1.42531284, 1.28761843, ..., 1.32361529, 1.19533134,
        1.27979038],
       [1.26963979, 1.28761843, 1.5381469 , ..., 1.2536647 , 1.13069621,
        1.22508666],
       ...,
       [1.40757825, 1.32361529, 1.2536647 , ..., 1.58472563, 1.23701806,
        1.38876392],
       [1.20765103, 1.19533134, 1.13069621, ..., 1.23701806, 1.30223601,
        1.25802409],
       [1.39950812, 1.27979038, 1.22508666, ..., 1.38876392, 1.25802409,
        1.57825307]])

In [45]:
polynomial_kernel(train_X, train_X, degree=3)

array([[1.54801477, 1.31978044, 1.26963979, ..., 1.40757825, 1.20765103,
        1.39950812],
       [1.31978044, 1.42531284, 1.28761843, ..., 1.32361529, 1.19533134,
        1.27979038],
       [1.26963979, 1.28761843, 1.5381469 , ..., 1.2536647 , 1.13069621,
        1.22508666],
       ...,
       [1.40757825, 1.32361529, 1.2536647 , ..., 1.58472563, 1.23701806,
        1.38876392],
       [1.20765103, 1.19533134, 1.13069621, ..., 1.23701806, 1.30223601,
        1.25802409],
       [1.39950812, 1.27979038, 1.22508666, ..., 1.38876392, 1.25802409,
        1.57825307]])