# Exercises

In this section we have two exercises:
1. Implement the polynomial kernel.
2. Implement the multiclass C-SVM.

## Polynomial kernel

You need to extend the ``build_kernel`` function and implement the polynomial kernel if the ``kernel_type`` is set to 'poly'. The equation that needs to be implemented:
\begin{equation}
K=(X^{T}*Y)^{d}.
\end{equation}

In [58]:
import numpy as np

In [59]:
def build_kernel(data_set, kernel_type='linear'):
    kernel = np.dot(data_set, data_set.T)
    if kernel_type == 'rbf':
        sigma = 1.0
        objects_count = len(data_set)
        b = np.ones((len(data_set), 1))
        kernel -= 0.5 * (np.dot((np.diag(kernel)*np.ones((1, objects_count))).T, b.T)
                         + np.dot(b, (np.diag(kernel) * np.ones((1, objects_count))).T.T))
        kernel = np.exp(kernel / (2. * sigma ** 2))
    elif kernel_type == 'poly':
        bias = 0
        dimens = 2
        return (kernel + bias) ** dimens
    return kernel

## Implement a multiclass C-SVM

Use the classification method that we used in notebook 7.3 and IRIS dataset to build a multiclass C-SVM classifier. Most implementation is about a function that will return the proper data set that need to be used for the prediction. You need to implement:
- ``choose_set_for_label``
- ``get_labels_count``

In [60]:
import cvxopt

In [61]:
def choose_set_for_label(data_set, labels, current_label):
    binary_labels = np.where(labels == current_label, 1, -1)
    return data_set, binary_labels.reshape(-1 ,1)

In [62]:
def get_labels_count(data_set):
    return len(np.unique(data_set))

In [63]:
def train(train_data_set, train_labels, kernel_type='linear', C=10, threshold=1e-5):
    objects_count = len(train_data_set)  # I added here object_count, because it was used in the function, but not defined

    kernel = build_kernel(train_data_set, kernel_type=kernel_type)

    P = train_labels * train_labels.transpose() * kernel
    q = -np.ones((objects_count, 1))
    G = np.concatenate((np.eye(objects_count), -np.eye(objects_count)))
    h = np.concatenate((C * np.ones((objects_count, 1)), np.zeros((objects_count, 1))))

    A = train_labels.reshape(1, objects_count)
    A = A.astype(float)
    b = 0.0

    sol = cvxopt.solvers.qp(cvxopt.matrix(P), cvxopt.matrix(q), cvxopt.matrix(G), cvxopt.matrix(h), cvxopt.matrix(A), cvxopt.matrix(b))

    lambdas = np.array(sol['x'])

    support_vectors_id = np.where(lambdas > threshold)[0]
    vector_number = len(support_vectors_id)
    support_vectors = train_data_set[support_vectors_id, :]

    lambdas = lambdas[support_vectors_id]
    targets = train_labels[support_vectors_id]

    b = np.sum(targets)
    for n in range(vector_number):
        b -= np.sum(lambdas * targets * np.reshape(kernel[support_vectors_id[n], support_vectors_id], (vector_number, 1)))
    b /= len(lambdas)

    return lambdas, support_vectors, support_vectors_id, b, targets, vector_number

def build_kernel(data_set, kernel_type='linear'):
    kernel = np.dot(data_set, data_set.T)
    if kernel_type == 'rbf':
        sigma = 1.0
        objects_count = len(data_set)
        b = np.ones((len(data_set), 1))
        kernel -= 0.5 * (np.dot((np.diag(kernel)*np.ones((1, objects_count))).T, b.T)
                         + np.dot(b, (np.diag(kernel) * np.ones((1, objects_count))).T.T))
        kernel = np.exp(kernel / (2. * sigma ** 2))
    return kernel

def classify_rbf(test_data_set, train_data_set, lambdas, targets, b, vector_number, support_vectors, support_vectors_id):
    kernel = np.dot(test_data_set, support_vectors.T)
    sigma = 1.0
    c = (1. / sigma * np.sum(test_data_set ** 2, axis=1) * np.ones((1, np.shape(test_data_set)[0]))).T
    c = np.dot(c, np.ones((1, np.shape(kernel)[1])))
    sv = (np.diag(np.dot(train_data_set, train_data_set.T))*np.ones((1,len(train_data_set)))).T[support_vectors_id]
    aa = np.dot(sv,np.ones((1,np.shape(kernel)[0]))).T
    kernel = kernel - 0.5 * c - 0.5 * aa
    kernel = np.exp(kernel / (2. * sigma ** 2))

    y = np.zeros((np.shape(test_data_set)[0], 1))
    for j in range(np.shape(test_data_set)[0]):
        for i in range(vector_number):
            y[j] += lambdas[i] * targets[i] * kernel[j, i]
        y[j] += b
    return np.sign(y)

In [64]:
def multiclass_train(data_set, labels, kernel_type='linear', C=10, threshold=1e-5):

    unique_labels = np.unique(labels)
    classifiers = {}

    for current_label in unique_labels:
        binary_data, binary_labels = choose_set_for_label(data_set, labels, current_label)

        lambdas, support_vectors, support_vectors_id, b, targets, vector_number = train(
            binary_data, binary_labels, kernel_type, C, threshold)

        classifiers[current_label] = {
            'lambdas': lambdas,
            'support_vectors': support_vectors,
            'support_vectors_id': support_vectors_id,
            'b': b,
            'targets': targets,
            'vector_number': vector_number,
            'train_data': binary_data
        }

    return classifiers

def multiclass_predict(test_data_set, classifiers):

    decision_scores = np.zeros((len(test_data_set), len(classifiers)))

    for i, (label, clf) in enumerate(classifiers.items()):
        pred = classify_rbf(
            test_data_set,
            clf['train_data'],
            clf['lambdas'],
            clf['targets'],
            clf['b'],
            clf['vector_number'],
            clf['support_vectors'],
            clf['support_vectors_id']
        )
        decision_scores[:, i] = pred.flatten()

    predicted_labels = np.array(list(classifiers.keys()))[np.argmax(decision_scores, axis=1)]
    return predicted_labels


In [65]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
iris = load_iris()
X = iris.data
y = iris.target

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear kernel
classifiers = multiclass_train(X_train, y_train, kernel_type="linear")
predictions = multiclass_predict(X_test, classifiers)
print("Accuracy:", accuracy_score(y_test, predictions))

# RBF kernel
classifiers_3 = multiclass_train(X_train, y_train, kernel_type="rbf")
predictions_3 = multiclass_predict(X_test, classifiers_3)
print("Accuracy with RBF kernel:", accuracy_score(y_test, predictions_3))


     pcost       dcost       gap    pres   dres
 0: -3.1833e+01 -6.0498e+03  2e+04  6e-01  2e-14
 1:  2.1268e+01 -1.1621e+03  2e+03  6e-02  2e-14
 2:  2.5693e+01 -1.3301e+02  2e+02  5e-03  9e-15
 3:  6.0334e+00 -1.0693e+01  2e+01  4e-05  4e-15
 4:  2.7843e-01 -2.0051e+00  2e+00  2e-16  3e-15
 5: -3.0543e-01 -1.2125e+00  9e-01  2e-16  1e-15
 6: -6.7226e-01 -1.1977e+00  5e-01  2e-16  8e-16
 7: -8.6027e-01 -1.1273e+00  3e-01  2e-16  7e-16
 8: -9.7222e-01 -9.7933e-01  7e-03  2e-16  1e-15
 9: -9.7522e-01 -9.7531e-01  1e-04  2e-16  1e-15
10: -9.7525e-01 -9.7525e-01  1e-06  2e-16  1e-15
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0: -4.1986e+02 -9.8623e+03  2e+04  6e-01  4e-14
 1: -4.1145e+02 -2.7612e+03  2e+03  2e-14  3e-14
 2: -5.0487e+02 -9.0258e+02  4e+02  4e-14  3e-14
 3: -6.2535e+02 -7.8161e+02  2e+02  2e-15  4e-14
 4: -6.6315e+02 -7.2609e+02  6e+01  1e-14  4e-14
 5: -6.7853e+02 -7.0520e+02  3e+01  1e-14  4e-14
 6: -6.8574e+02 -6.9508e+02  9e+00  2e-15  4e-1