# Multiclass SVM: One To Rest

This is the notebook used to train the multiclass classifier that uses One To Rest SVM.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [3]:
genre_names = ['blues', 'classical', 'country', 'disco', 'hipop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

# Dataset import
data = pd.read_csv('extracted_dataset.csv')

# Dataset normalization
data_mean = data.mean()
data_std = data.std()

data_normalized = (data - data_mean) / data_std

dataset = data_normalized.to_numpy()[:, 1:4]
labels = data.to_numpy()[:, 4]

## Separation of the training set and the validation set

In [67]:
#Indexes extraction
indexes_train = np.random.choice(1000, 800, replace=False)
indexes_valid= np.setdiff1d(np.array([i for i in range(1000)]), indexes_train)
np.random.shuffle(indexes_valid)

dataset_train = dataset[indexes_train, :]
dataset_valid = dataset[indexes_valid, :]

labels_train = labels[indexes_train]
labels_valid = labels[indexes_valid]

## Dual with kernel trick

The methodologies used is the resolution of the dual problem of the SVM algorithm with the usage of the kernel trick.

In [68]:
import jax
import jax.numpy as jnp
import scipy.optimize as opt

In [75]:
#The hessian is always equal to zero because the constraint is linear
def hessian(x):
  return np.zeros((800, 800))

#This is the kernel function for the matrices
def kernel_mat(xi, xj):
  return (jnp.dot(xi, xj) + np.ones((N, N)))**3

#This is the kernel function for the vectors
def kernel_vec(xi, xj):
  return (np.dot(xi, xj) + 1)**3

#This is the objective function of the dual problem with the usage of the kernel trick
def obj_kernel(c):
  partial_1 = -jnp.sum(c)

  c_outer = jnp.outer(c, c)
  c_outer = jnp.triu(c_outer)

  y_outer = jnp.outer(in_labels, in_labels)
  y_outer = jnp.triu(y_outer)

  K= kernel_mat(in_data, in_data.T)

  partial_2= 0.5 * jnp.sum(c_outer * y_outer * K)

  return partial_1 + partial_2

In [70]:
N= dataset_train.shape[0]

#The weight used for the method
w_phi = np.zeros(N)

#The biases computed and saved in the .csv
B = np.zeros(len(genre_names))

#The results of the dual problem computed and saved in the .csv
C= np.zeros((len(genre_names), N))

#Iteration on the genres
for i in range(10):
  #Data of the genre considered
  class_genre = dataset_train[labels_train==i, :]

  #Data of the other genres
  class_others = dataset_train[labels_train!=i, :]

  #Data and labels used in the resolution of the dual problem
  in_data = np.concatenate((class_genre, class_others), axis=0)
  in_labels = np.concatenate((np.ones(class_genre.shape[0]), -1 * np.ones(class_others.shape[0])), axis=None)

  a = np.zeros(N)
  obj_k_jit=jax.jit(obj_kernel)

  linear_constraint = opt.LinearConstraint(in_labels, 0, 0, keep_feasible=True)
  res = opt.minimize(obj_k_jit, a, method='trust-constr', jac='2-point', hess=hessian, constraints=[linear_constraint], options={'maxiter': 1000}, bounds=opt.Bounds(np.zeros(N), np.ones(N)*np.inf))

  C[i, :] = np.array(res.x)

  index_non_zero = -1
  for j in range(N):
    w_phi[j] = np.sum(np.array([C[i, k]*in_labels[k]*kernel_vec(dataset_train[j, :], dataset_train[k, :]) for k in range(N)]))
    if C[i, j]>0 and index_non_zero<0:
      index_non_zero = j

  B[i] = - in_labels[index_non_zero] + w_phi[index_non_zero]

  confusion_mat = np.zeros((2,2))

  print('%s done, start testing' % genre_names[i])

  #Computation of the confusion matrix for one genre
  for test_index in range(class_genre.shape[0]):
    predicted = 0 if np.sign(w_phi[test_index] - B[i]) >= 0 else 1
    confusion_mat[0,predicted] += 1

  for test_index in range(class_genre.shape[0], class_genre.shape[0] + class_others.shape[0]):
    predicted = 0 if np.sign(w_phi[test_index] - B[i]) >= 0 else 1
    confusion_mat[1,predicted] += 1
    
  print(confusion_mat)
  print(confusion_mat.trace() / confusion_mat.sum())

blues done, start testing
[[ 28.  54.]
 [176. 542.]]
0.7125
classical done, start testing
[[ 32.  52.]
 [204. 512.]]
0.68
country done, start testing
[[ 35.  44.]
 [232. 489.]]
0.655
disco done, start testing
[[ 28.  50.]
 [185. 537.]]
0.70625
hipop done, start testing
[[ 32.  52.]
 [204. 512.]]
0.68
jazz done, start testing
[[ 32.  52.]
 [204. 512.]]
0.68
metal done, start testing
[[ 34.  49.]
 [209. 508.]]
0.6775
pop done, start testing
[[ 28.  50.]
 [185. 537.]]
0.70625
reggae done, start testing
[[ 22.  51.]
 [145. 582.]]
0.755
rock done, start testing
[[ 21.  54.]
 [126. 599.]]
0.775


In this section we save the results of the training to use them in the classifier

In [71]:
df_C = pd.DataFrame(C)
df_C.to_csv('C_kernel.csv')

df_B = pd.DataFrame(B)
df_B.to_csv('B_kernel.csv')

df_indexes = pd.DataFrame(indexes_train)
df_indexes.to_csv('indexes_kernel.csv')

This part implements the classifier one to rest with all the genres passed through the method paramenters. 
At the end, the one with higher "score" will be considered the most probably correct.

In [72]:
def classifier_oneToRest(input, genres=genre_names):
  classifications = -1000 * np.ones(len(genre_names))

  for genre in genres:
    genre_index = genre_names.index(genre)

    in_labels = np.zeros(N)
    in_labels[labels_train==genre_index]=1
    in_labels[labels_train!=genre_index]=-1

    w_phi = np.sum(np.array([C[genre_index, i]*in_labels[i]*kernel_vec(input, dataset_train[i, :]) for i in range(N)]))
    b = B[genre_index]

    classifications[genre_index] = w_phi - b

  return genre_names[np.argmax(classifications)]

### Confusion matrix and accuracy for 10 genres

In [73]:
confusion_matrix = np.zeros((len(genre_names), len(genre_names)))

for i in range(dataset_valid.shape[0]):
  predicted = genre_names.index(classifier_oneToRest(dataset_valid[i, :]))
  confusion_matrix [labels_valid[i].astype(int), predicted] += 1

print(confusion_matrix)
print(confusion_matrix.trace()/confusion_matrix.sum())

[[ 0.  3.  9.  0.  0.  0.  4.  2.  0.  0.]
 [ 0.  9.  6.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  2. 19.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  4.  7.  0.  0.  0.  5.  4.  2.  0.]
 [ 0.  1.  9.  0.  4.  0.  1.  1.  0.  0.]
 [ 0.  5. 10.  0.  0.  0.  1.  0.  0.  0.]
 [ 0.  0.  3.  0.  0.  0. 12.  2.  0.  0.]
 [ 0.  2.  1.  0.  1.  0.  1. 17.  0.  0.]
 [ 0.  8. 15.  0.  3.  0.  1.  0.  0.  0.]
 [ 0.  1. 13.  0.  1.  0. 10.  0.  0.  0.]]
0.305


### Confusion matrix and accuracy for 4 genres

In [76]:
confusion_matrix = np.zeros((len(genre_names), len(genre_names)))

dataset_reduced = dataset_valid[np.logical_or(np.logical_or(labels_valid==genre_names.index('blues'), labels_valid==genre_names.index('country')), np.logical_or(labels_valid==genre_names.index('disco'), labels_valid==genre_names.index('metal'))), :]
labels_reduced = labels_valid[np.logical_or(np.logical_or(labels_valid==genre_names.index('blues'), labels_valid==genre_names.index('country')), np.logical_or(labels_valid==genre_names.index('disco'), labels_valid==genre_names.index('metal')))]

for i in range(dataset_reduced.shape[0]):
  predicted = genre_names.index(classifier_oneToRest(dataset_reduced[i, :], ['blues',  'country', 'disco', 'metal']))
  confusion_matrix [labels_reduced[i].astype(int), predicted] += 1

print(confusion_matrix)
print(confusion_matrix.trace()/confusion_matrix.sum())

[[ 0.  0. 14.  0.  0.  0.  4.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0. 21.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0. 16.  0.  0.  0.  6.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  4.  0.  0.  0. 13.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]
0.4358974358974359
