# Multiclass SVM: One To Rest

This is the notebook used to train the multiclass classifier that uses One To Rest SVM.

In [51]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [52]:
genre_names = ['blues', 'classical', 'country', 'disco', 'hipop', 'jazz', 'metal', 'pop', 'reggae', 'rock']

# Dataset import
data = pd.read_csv('extracted_dataset.csv')

# Dataset normalization
data_mean = data.mean()
data_std = data.std()

data_normalized = (data - data_mean) / data_std

dataset = data_normalized.to_numpy()[:, 1:4]
labels = data.to_numpy()[:, 4]

## Separation of the training set and the validation set

In [53]:
#Indexes extraction
indexes_train = np.random.choice(1000, 800, replace=False)
indexes_valid= np.setdiff1d(np.array([i for i in range(1000)]), indexes_train)
np.random.shuffle(indexes_valid)

dataset_train = dataset[indexes_train, :]
dataset_valid = dataset[indexes_valid, :]

labels_train = labels[indexes_train]
labels_valid = labels[indexes_valid]

## Dual with kernel trick

The methodologies used is the resolution of the dual problem of the SVM algorithm with the usage of the kernel trick.

In [54]:
import jax
import jax.numpy as jnp
import scipy.optimize as opt

In [55]:
#The hessian is always equal to zero because the constraint is linear
def hessian(x):
  return np.zeros((800, 800))

#This is the kernel function for the matrices
def kernel_mat(xi, xj):
  return (jnp.dot(xi, xj) + np.ones((N, N)))**3

#This is the kernel function for the vectors
def kernel_vec(xi, xj):
  return (np.dot(xi, xj) + 1)**3

#This is the objective function of the dual problem with the usage of the kernel trick
def obj_kernel(c):
  partial_1 = -jnp.sum(c)

  c_outer = jnp.outer(c, c)
  c_outer = jnp.triu(c_outer)

  y_outer = jnp.outer(in_labels, in_labels)
  y_outer = jnp.triu(y_outer)

  K= kernel_mat(in_data, in_data.T)

  partial_2= 0.5 * jnp.sum(c_outer * y_outer * K)

  return partial_1 + partial_2

# Training with 10 genres

In [57]:
N= dataset_train.shape[0]

B = np.zeros(len(genre_names))

#The results of the dual problem computed
C = np.zeros((len(genre_names), N))

#Iteration on the genres
for i in range(10):
  #Data of the genre considered
  class_genre = dataset_train[labels_train==i, :]

  #Data of the other genres
  class_others = dataset_train[labels_train!=i, :]

  #Data and labels used in the resolution of the dual problem
  in_data = np.concatenate((class_genre, class_others), axis=0)
  in_labels = np.concatenate((np.ones(class_genre.shape[0]), -1 * np.ones(class_others.shape[0])), axis=None)

  a = np.zeros(N)
  obj_k_jit=jax.jit(obj_kernel)

  linear_constraint = opt.LinearConstraint(in_labels, 0, 0, keep_feasible=True)
  res = opt.minimize(obj_k_jit, a, method='trust-constr', jac='2-point', hess=hessian, constraints=[linear_constraint], options={'maxiter': 1000}, bounds=opt.Bounds(np.zeros(N), np.ones(N)*np.inf))

  C[i, :] = np.array(res.x)

  index_non_zero = -1
  for j in range(N):
    if C[i, j] > 0 and index_non_zero < 0:
      index_non_zero = j
      break

  w_phi = np.sum(np.array([C[i, j]*in_labels[j]*kernel_vec(dataset_train[index_non_zero, :], dataset_train[j, :]) for j in range(N)]))

  B[i] = - in_labels[index_non_zero] + w_phi

  print('%s done' % genre_names[i])

blues done
classical done
country done
disco done
hipop done
jazz done
metal done
pop done
reggae done
rock done


This part implements the classifier one to rest with all the genres passed through the method paramenters. 
At the end, the one with higher "score" will be considered the most probably correct.

In [58]:
def classifier_oneToRest(input):
  classifications = -1000 * np.ones(len(genre_names))

  for genre in genre_names:
    genre_index = genre_names.index(genre)

    in_labels = np.zeros(N)
    in_labels[labels_train==genre_index]=1
    in_labels[labels_train!=genre_index]=-1

    w_phi = np.sum(np.array([C[genre_index, i]*in_labels[i]*kernel_vec(input, dataset_train[i, :]) for i in range(N)]))

    classifications[genre_index] = w_phi - B[genre_index]

  return genre_names[np.argmax(classifications)]

### Confusion matrix and accuracy for 10 genres

In [59]:
confusion_matrix = np.zeros((len(genre_names), len(genre_names)))

for i in range(dataset_valid.shape[0]):
  predicted = genre_names.index(classifier_oneToRest(dataset_valid[i, :]))
  confusion_matrix [labels_valid[i].astype(int), predicted] += 1

print(confusion_matrix)
print('Accuracy %1.2f %%' % (confusion_matrix.trace()/confusion_matrix.sum() * 100.0))

[[ 5.  3.  0.  1.  1.  0.  5.  1.  0.  4.]
 [ 0. 15.  0.  1.  0.  5.  3.  0.  0.  1.]
 [ 3.  4.  2.  1.  0.  4.  6.  0.  0.  3.]
 [ 0.  3.  0.  3.  0.  0.  9.  2.  0.  0.]
 [ 0.  3.  0.  0.  0.  0.  8. 12.  0.  0.]
 [ 0. 11.  1.  0.  0.  4.  4.  0.  0.  1.]
 [ 0.  0.  1.  0.  0.  0. 12.  0.  0.  0.]
 [ 1.  0.  0.  0.  1.  1.  3. 13.  0.  0.]
 [ 0.  5.  0.  6.  2.  1.  5.  2.  0.  1.]
 [ 0.  4.  1.  1.  0.  0. 10.  0.  0.  1.]]
Accuracy 27.50 %


# 6 genres

In [60]:
genres = ['blues', 'classical', 'country', 'disco', 'pop', 'rock']

# Indices of genres to classify
genre_indices = list()
for genre_n in genres: genre_indices.append(genre_names.index(genre_n))

# Filter to only select samples from the input genres
training_set_filter = np.zeros(dataset_train.shape[0], dtype='bool')
for j in range(dataset_train.shape[0]):
  training_set_filter[j] = labels_train[j] in genre_indices

# Application of filter on the test set
partial_training_set = dataset_train[training_set_filter,:]
partial_training_labels = labels_train[training_set_filter]

N= partial_training_set.shape[0]

In [61]:
#The hessian is always equal to zero because the constraint is linear
def hessian(x):
  return np.zeros((partial_training_set.shape[0], partial_training_set.shape[0]))

In [62]:
#The results of the dual problem computed
C= np.zeros((len(genres), N))

B = np.zeros(len(genres))

#Iteration on the genres
for i in range(len(genres)):
  #Data of the genre considered
  class_genre = partial_training_set[partial_training_labels==i, :]

  #Data of the other genres
  class_others = partial_training_set[partial_training_labels!=i, :]

  #Data and labels used in the resolution of the dual problem
  in_data = np.concatenate((class_genre, class_others), axis=0)
  in_labels = np.concatenate((np.ones(class_genre.shape[0]), -1 * np.ones(class_others.shape[0])), axis=None)

  a = np.zeros(N)
  obj_k_jit=jax.jit(obj_kernel)

  linear_constraint = opt.LinearConstraint(in_labels, 0, 0, keep_feasible=True)
  res = opt.minimize(obj_k_jit, a, method='trust-constr', jac='2-point', hess=hessian, constraints=[linear_constraint], options={'maxiter': 1000}, bounds=opt.Bounds(np.zeros(N), np.ones(N)*np.inf))

  C[i, :] = np.array(res.x)

  index_non_zero = -1
  for j in range(N):
    if C[i, j] > 0 and index_non_zero < 0:
      index_non_zero = j
      break

  w_phi = np.sum(np.array([C[i, j]*in_labels[j]*kernel_vec(dataset_train[index_non_zero, :], dataset_train[j, :]) for j in range(N)]))

  B[i] = - in_labels[index_non_zero] + w_phi

  print('%s done' % genres[i])

blues done, start testing
classical done, start testing
country done, start testing
disco done, start testing
pop done, start testing
rock done, start testing


In [63]:
def classifier_oneToRest(input):
  classifications = -1000 * np.ones(len(genres))

  for genre in genres:
    genre_index = genres.index(genre)

    in_labels = np.zeros(N)
    in_labels[partial_training_labels==genre_index]=1
    in_labels[partial_training_labels!=genre_index]=-1

    w_phi = np.sum(np.array([C[genre_index, i]*in_labels[i]*kernel_vec(input, partial_training_set[i, :]) for i in range(N)]))

    classifications[genre_index] = w_phi - B[genre_index]

  return genres[np.argmax(classifications)]

In [64]:
confusion_matrix = np.zeros((len(genre_names), len(genre_names)))

# Filter to only select samples from the input genres
valid_set_filter = np.zeros(dataset_valid.shape[0], dtype='bool')
for j in range(dataset_valid.shape[0]):
  valid_set_filter[j] = labels_valid[j] in genre_indices

# Application of filter on the test set
partial_valid_set = dataset_valid[valid_set_filter,:]
partial_valid_labels = labels_valid[valid_set_filter]

for i in range(partial_valid_set.shape[0]):
  predicted = genre_names.index(classifier_oneToRest(partial_valid_set[i, :]))
  confusion_matrix [partial_valid_labels[i].astype(int), predicted] += 1

print(confusion_matrix)
print('Accuracy %1.2f %%' % (confusion_matrix.trace()/confusion_matrix.sum() * 100.0))

[[ 1.  3.  0.  0.  0.  0.  0. 16.  0.  0.]
 [ 0. 17.  0.  2.  0.  0.  0.  6.  0.  0.]
 [ 0.  4.  1.  1.  0.  0.  0. 17.  0.  0.]
 [ 0.  3.  0.  2.  0.  0.  0. 12.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0. 19.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  4.  0.  2.  0.  0.  0. 11.  0.  0.]]
Accuracy 33.06 %


# 2 genres

In [65]:
genres = ['classical', 'pop']

# Indices of genres to classify
genre_indices = list()
for genre_n in genres: genre_indices.append(genre_names.index(genre_n))

# Filter to only select samples from the input genres
training_set_filter = np.zeros(dataset_train.shape[0], dtype='bool')
for j in range(dataset_train.shape[0]):
  training_set_filter[j] = labels_train[j] in genre_indices

# Application of filter on the test set
partial_training_set = dataset_train[training_set_filter,:]
partial_training_labels = labels_train[training_set_filter]

N= partial_training_set.shape[0]

In [66]:
#The results of the dual problem computed
C= np.zeros((len(genres), N))

B = np.zeros(len(genres))

#Iteration on the genres
for i in range(len(genres)):
  #Data of the genre considered
  class_genre = partial_training_set[partial_training_labels==i, :]

  #Data of the other genres
  class_others = partial_training_set[partial_training_labels!=i, :]

  #Data and labels used in the resolution of the dual problem
  in_data = np.concatenate((class_genre, class_others), axis=0)
  in_labels = np.concatenate((np.ones(class_genre.shape[0]), -1 * np.ones(class_others.shape[0])), axis=None)

  a = np.zeros(N)
  obj_k_jit=jax.jit(obj_kernel)

  linear_constraint = opt.LinearConstraint(in_labels, 0, 0, keep_feasible=True)
  res = opt.minimize(obj_k_jit, a, method='trust-constr', jac='2-point', hess=hessian, constraints=[linear_constraint], options={'maxiter': 1000}, bounds=opt.Bounds(np.zeros(N), np.ones(N)*np.inf))

  C[i, :] = np.array(res.x)

  index_non_zero = -1
  for j in range(N):
    if C[i, j] > 0 and index_non_zero < 0:
      index_non_zero = j
      break

  w_phi = np.sum(np.array([C[i, j]*in_labels[j]*kernel_vec(dataset_train[index_non_zero, :], dataset_train[j, :]) for j in range(N)]))

  B[i] = - in_labels[index_non_zero] + w_phi

  print('%s done' % genres[i])

classical done
pop done


In [79]:
def classifier_oneToRest(input):
  classifications = -1000 * np.ones(len(genres))

  for genre in genres:
    genre_index = genres.index(genre)

    in_labels = np.zeros(N)
    in_labels[partial_training_labels==genre_index]=1
    in_labels[partial_training_labels!=genre_index]=-1

    w_phi = np.sum(np.array([C[genre_index, i]*in_labels[i]*kernel_vec(input, partial_training_set[i, :]) for i in range(N)]))

    classifications[genre_index] = w_phi - B[genre_index]

  return genres[np.argmax(classifications)]

In [80]:
confusion_matrix = np.zeros((len(genre_names), len(genre_names)))

# Filter to only select samples from the input genres
valid_set_filter = np.zeros(dataset_valid.shape[0], dtype='bool')
for j in range(dataset_valid.shape[0]):
  valid_set_filter[j] = labels_valid[j] in genre_indices

# Application of filter on the test set
partial_valid_set = dataset_valid[valid_set_filter,:]
partial_valid_labels = labels_valid[valid_set_filter]

for i in range(partial_valid_set.shape[0]):
  predicted = genre_names.index(classifier_oneToRest(partial_valid_set[i, :]))
  confusion_matrix [partial_valid_labels[i].astype(int), predicted] += 1

print(confusion_matrix)
print('Accuracy %1.2f %%' % (confusion_matrix.trace()/confusion_matrix.sum() * 100.0))

[[ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0. 25.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0. 19.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]]
Accuracy 56.82 %
