In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
#from sklearn.preprocessing import PolynomialFeatures

In [2]:
iris = datasets.load_iris()
X = iris.data[50:]
y = iris.target[50:]
y[:50] = -1
y[50:] = 1
X_B = np.c_[np.ones((100, 1)), X]

In [3]:
epochs = 7000
eta = 0.01
weights = np.zeros((5, 1))
for epoch in range(epochs):
    grad = np.zeros((5, 1))
    z = np.zeros((5, 1))
    hinge_error = 0
    for point, target in zip(X_B, y):
        hinge_error += max(0, 1 - target * point.dot(weights)) / 100
        grad += 0 if 1 - target * point.dot(weights) <= 0 else (-target) * point.reshape(-1, 1) / 100
        z += grad ** 2
    # print("EPOCH: {} --- ERROR: {}".format(epoch, hinge_error))
    weights -= eta * grad / np.sqrt(z + 1e-7)

In [4]:
print("ACCURACY: ", np.sum(np.sign(X_B.dot(weights)) == y.reshape(-1, 1)),'%')

ACCURACY:  96 %


# Trick 1
В случае когда мы дополняем наши исходные фичи различными комбинациями(между собой), размерность вектора весов может стать очень большой, поэтому используется следующий метод:  
Вектор весов W в линейном классификаторе это есть взвешенная сумма: ∑alphaᵢ * Xᵢ alpha - становится параметром   
Получается, мы будем хранить n-мерный вектор, а не вектор размером 2ᵈ  
1-ый трюк - это обучение линейной модели без фактического хранения и вычисления экстремально большого вектора весов W. Мы храним вектор alpha по 1 для каждой точки данных.  
y_test = sign(∑alphaᵢ * X_Bᵢ.dot(x_test))

In [5]:
alpha = np.zeros((X_B.shape[0], 1))
gamma = np.zeros((X_B.shape[0], 1))
eta = 0.01
for epoch in range(epochs):
    hinge_error = 0
    w = np.sum(alpha * X_B, axis=0).reshape(-1, 1)
    for idx, point, target in zip(range(100), X_B, y):
        hinge_error += max(0, 1 - point.dot(w) * target)
        gamma[idx] = 0 if point.dot(w) * target >= 1 else np.sum((-target) * point) / 100
    # print("EPOCH: {} --- ERROR: {}".format(epoch, hinge_error / 100))
    alpha -= eta * gamma

In [6]:
w = np.sum(alpha * X_B, axis=0).reshape(-1, 1)
print("ACCURACY: ",np.sum(np.sign(X_B.dot(w)) == y.reshape(-1, 1)),'%')

ACCURACY:  97 %


# Linear kernel  
K(x, z) = xᵀz

In [7]:
lin_k = np.c_[X_B, X.dot(X.T)] # linear kernel matrix
n = lin_k.shape

In [8]:
weights = np.random.randn(n[1], 1)
epochs = 15000
eta_0 = 10
for epoch in range(epochs):
    hinge_error = 0
    eta = eta_0 / (epoch + 10)
    grad = np.zeros((n[1], 1))
    for point, target in zip(lin_k, y):
        hinge_error += max(0, 1 - point.dot(weights) * target) / n[0]
        grad += (-point.reshape(-1, 1) * target) / n[0] if point.dot(weights) * target < 1 else 0
    # print("EPOCH: {} --- ERROR: {}".format(epoch, hinge_error))
    weights -= eta * grad

In [9]:
print("ACCURACY: ",np.sum(np.sign(lin_k.dot(weights)) == y.reshape(-1, 1)), '%')

ACCURACY:  96 %


# Polynomial kernel  
K(x, z) = (1 + xᵀz)ᴾ

In [10]:
for p in [2, 3, 4, 5]:
    print("### {} ###".format(p))
    poly_k = np.zeros((X_B.shape[0], X_B.shape[0]))
    for i in range(X.shape[0]):
        for j in range(X.shape[0]):
            poly_k[i][j] = (1 + X_B[i].dot(X_B[j].T)) ** p # Polynomial kernel matrix. p - degree
    n = poly_k.shape
    
    weights = np.random.randn(n[1], 1)
    epochs = 20000
    eta_0 = 10
    for epoch in range(1, epochs+1):
        hinge_error = 0
        eta = eta_0 / (epoch + 100)
        grad = np.zeros((n[1], 1))
        for point, target in zip(poly_k, y):
            hinge_error += max(0, 1 - point.dot(weights) * target) / n[0]
            grad += (-point.reshape(-1, 1) * target) / n[0] if point.dot(weights) * target < 1 else 0
        # if epoch % 5000 == 0:
        #     print("EPOCH: {} --- ERROR: {}".format(epoch, hinge_error))
        weights -= eta * grad
    print("ACCURACY: ",np.sum(np.sign(poly_k.dot(weights)) == y.reshape(-1, 1)), '%')
    print()

### 2 ###
ACCURACY:  98 %

### 3 ###
ACCURACY:  98 %

### 4 ###
ACCURACY:  98 %

### 5 ###
ACCURACY:  96 %



# RBF  
K(x, z) = e^(-||x - z||^2 / gamma\^2 / 2)

In [11]:
rbf_k = np.zeros((X_B.shape[0], X_B.shape[0]))
n = rbf_k.shape
for s in [0.1, 0.5, 1, 3, 5, 10]:
    print("s: {}".format(s))
    s **= 2
    for i in range(X.shape[0]):
        for j in range(X.shape[0]):
            rbf_k[i][j] = np.exp(-1 * (np.sum((X_B[i] - X_B[j]) ** 2)) / s / 2)
    weights = np.random.randn(n[1], 1)
    epochs = 15000
    eta_0 = 10
    eta = 0.01
    for epoch in range(1, epochs+1):
        hinge_error = 0
        #eta = eta_0 / (epoch + 10)
        grad = np.zeros((n[1], 1))
        for point, target in zip(rbf_k, y):
            hinge_error += max(0, 1 - point.dot(weights) * target) / n[0]
            grad += (-point.reshape(-1, 1) * target) / n[0] if point.dot(weights) * target < 1 else 0
        #print("EPOCH: {} --- ERROR: {}".format(epoch, hinge_error))
        weights -= eta * grad
    print("ACCURACY: ",np.sum(np.sign(rbf_k.dot(weights)) == y.reshape(-1, 1)), '%')
    print()

s: 0.1
ACCURACY:  95 %

s: 0.5
ACCURACY:  96 %

s: 1
ACCURACY:  98 %

s: 3
ACCURACY:  95 %

s: 5
ACCURACY:  91 %

s: 10
ACCURACY:  85 %

