In [1]:
import numpy as np

from utils.data import create_data
from utils.utils import scikit_svm, predict, accuracy, Q_matrix, is_pos_def
from utils.kernels import rbf_kernel

from optimisation.backtracking import backtracking
from optimisation.newton import newton_method

from svm import SVM

import matplotlib.pyplot as plt

import time

In [2]:
%matplotlib widget

In [3]:
#create_data(file_name = 'data_38', classes = [3,8])
load_data = np.load('data_38.npz')
train_X, train_y = load_data['train_X'], load_data['train_y']
test_X, test_y = load_data['test_X'], load_data['test_y']

# Checking if data is linearly separable

In [13]:
# Takes a bit to run
#linear = scikit_svm('linear', train_X, train_y, test_X, test_y)
rbf = scikit_svm('rbf', train_X, train_y, test_X, test_y)
#print(f'Accuracy using linear kernel: {linear}')
print(f'Accuracy using gaussian kernel: {rbf}')

Accuracy using gaussian kernel: 0.9924395161290323


In [4]:
# Create mini dataset for testing
train_X, train_y = train_X[:1000,:], train_y[:1000]
test_X, test_y = test_X[:500,:], test_y[:500]

# Running SMO

## Get accuracy before training

In [4]:
num_training_examples = train_y.shape[0]

alpha = np.ones(num_training_examples)
b = 0
kernel_matrix = rbf_kernel(train_X, test_X)

pred_y = predict(alpha, kernel_matrix, b, train_y)

print(f'Accuracy before training: {accuracy(pred_y, test_y)}')

Accuracy before training: 0.5090725806451613


# Speed test: SMO fast and slow

In [7]:
start = time.time()
svm = SVM(optim = 'SMO',
          kernel = 'rbf',
          C = 1,
          max_passes = 10,
          calc_g_iterates = False)
svm.fit(train_X, train_y)
print(svm.get_accuracy(test_X, test_y))
end = time.time()
print(f'Time taken: {end-start}s')

# Full dataset

0.936
Time taken: 0.562298059463501s


# Check if Q is positive definite
$Q$ is the matrix with elements $Q_{ij} = y_iy_jk(x_i,x_j)$

In [18]:
Q_mat = Q_matrix(svm.train_X, svm.train_y, rbf_kernel)

In [19]:
is_pos_def(Q_mat)

True

# Checking the convergence of g

\begin{equation}
  g(\alpha) = \sum_{i=1}^m \alpha_i - \frac{1}{2}\sum_{i=1}^m\sum_{j=1}^m \alpha_i \alpha_j y_i y_j k(x_i,x_j)
   \label{eq:svoptim_problem}
\end{equation}

In [15]:
svm.plot_convergence()
plt.xlabel('Steps')

IndexError: list index out of range

# Backtracking

In [1]:
import numpy as np

from utils.data import create_data
from utils.utils import scikit_svm, predict, accuracy, Q_matrix, is_pos_def
from utils.kernels import rbf_kernel

from optimisation.backtracking import backtracking
from optimisation.newton import newton_method

from svm import SVM

import matplotlib.pyplot as plt

import time

In [2]:
class TestFunction:
    def __init__(self):
        self.Q = np.random.rand(2,2)
    
    def f(self, x):
        return np.transpose(x) @ self.Q @ x
    
    def df(self, x):
        return  2*self.Q @ x
    
    def d2f(self, x):
        return 2*self.Q

In [8]:
F = TestFunction()
A = np.zeros((2,2))
A[0,0] = 1
print(A)
#A = np.random.rand(2,2)
x = 10*np.ones(2)
x[1] = 0
print(x)
b = np.ones(2)
b[1] = 0

[[1. 0.]
 [0. 0.]]
[10.  0.]


In [9]:
newton_method(F, x, A, b, backtracking, max_iter=200, tol=1e-6, alpha0 = 1)

(array([ 11.        , -38.64502415]), 2)

In [12]:
np.linalg.pinv(np.ones((2,2))) @ np.ones(2)

array([0.5, 0.5])