# EEG Biometrics


In [None]:
# Run this cell to load required libraries and mount your Drive folder
import numpy as np
from matplotlib import pyplot as plt
from google.colab import drive
import os
from sklearn.svm import SVC
import pandas as pd
import itertools

# drive.mount('/content/drive')

In [None]:

class pairwiseSVM:
  """
  Define the SVM class which will handle the pairwise manipulation, training & prediction
  """
  def __init__(self, C=1.0, kernel='rbf', degree=3, random_state=None):
    self.svm = SVC(C=C, kernel=kernel, degree=degree, random_state=random_state)

  def read_train_data(filename, label_col = 'labels', id_col = 'id'):
    """
    The training data may be read from file or supplied as a DF when fitting the classifier. 
    In both cases, it is assumed that is in DF format with column names, and includes class labels and trial IDs.
    """
    data = pd.read_csv(filename)

    # Separate the trial IDs, class labels and feature data into separate numpy arrays.
    train_id = np.array(data[id_col]) # Retrieve the trial IDs for each row & convert to numpy array.
    y_train = np.array(data[label_col]) # Retrieve the class labels for each row & convert to numpy array.
    # Retrieve the training features only and convert to numpy array.
    x_train = data.drop([label_col, id_col], axis=1)
    x_train = np.array(x_train)

    self.train_id = train_id
    self.y_train = y_train
    self.x_train = x_train

  def construct_pairs(self, x_train=None, y_train=None, x_test=None, y_test=None):
    """
    Method for constructing pairs from the training or testing data.
    """
    if (x_train is None) ^ (y_train is None):
      raise Exception("Both x_train and y_train datasets should be supplied, or neither.")
    elif x_train is None and y_train is None:
      x_train = self.x_train
      y_train = self.y_train

    # If x_test is not supplied, we want to construct all pairs of the training data with itself.
    if x_test is None:
      # Using the permutations function allows us to get symmetric pairs but excludes pairs of the same index. i.e. both (i,j) and (j,i) will be included but only where i!=j
      index_pairs = itertools.permutations(range(len(x_train)), 2) # Get all two-way permutations of the indexes.

      n_pairs = len(x_train)*len(x_train) - len(x_train) # All two-way combinations except where the indexes are the same.
      x_pairs = np.zeros((n_pairs, x_train.shape[1]*2)) # Create a blank array to hold the concatenated feature vector pairs.
      y_pairs = np.zeros(n_pairs, dtype=np.int8) # Create a blank vector to hold class similarity flag.
      training_label = np.zeros(n_pairs, dtype=np.int8)

      for count, (i,j) in enumerate(index_pairs):
        x_pairs[count] = np.concatenate((x_train[i],x_train[j])) # Concatenate the feature vectors for each pair.
        y_pairs[count] = y_train[i] == y_train[j] # Check if the pair comes from the same class or not.
        training_label[count] = y_train[i] # Record the class label for the element of the pair coming from the training data. 
    
    # If x_test is supplied, we want to construct all pairs combining the test data and the training data.
    elif x_test is not None:
      index_pairs = itertools.product(range(len(x_train)), range(len(x_test))) # Get all two-way permutations of the indexes.

      n_pairs = len(x_train)*len(x_test) # Get the number of pairs.
      x_pairs = np.zeros((n_pairs, x_train.shape[1]*2)) # Create a blank array to hold the concatenated feature vector pairs.
      training_label = np.zeros(n_pairs, dtype=np.int8)
      # If y_test is also supplied (for evaluating classification accuracy for example), 
      #   then we need to check where the class label is the same for each pair of train/test data.
      if y_test is not None:
        y_pairs = np.zeros(n_pairs, dtype=np.int8) # Create a blank vector to hold class similarity flag.
      else: 
        y_pairs = None

      for count, (i,j) in enumerate(index_pairs):
        x_pairs[count] = np.concatenate((x_train[i],x_test[j])) # Concatenate the feature vectors for each pair.
        training_label[count] = y_train[i] # Record the class label for the element of the pair coming from the training data. 
        if y_test is not None:
          y_pairs[count] = y_train[i] == y_test[j] # Check if the pair comes from the same class or not.

    # Return the concatenated feature vectors for each pair, and the binary label whether they are from the same class.
    return x_pairs, y_pairs, training_label


  def fit(self, x_train = None, y_train = None):
    """
    Method to fit the SVM on the pairwise training data.
    """
    if (x_train is None) ^ (y_train is None):
      raise Exception("Either both the x_train and y_train datasets should be supplied, or neither.")

    # Get all pairwise combinations of the training data.
    elif x_train is None and y_train is None:
      x_pairs, y_pairs, _ = self.construct_pairs()

    else:
      self.x_train = x_train
      self.y_train = y_train
      x_pairs, y_pairs, _ = self.construct_pairs(x_train, y_train)

    self.svm.fit(x_pairs, y_pairs)

  def predict_pairwise(self, x_test, y_test=None):
    """Predict the pairwise class similarity with the training data given a set of feature data."""
    x_pairs, y_pairs, training_label = self.construct_pairs(x_test=x_test, y_test=y_test)

    # Return the similarity predictions, the ground truth similarities, and the class label of the training data observation used in the pair.
    return self.svm.predict(x_pairs), y_pairs, training_label
  
  def predict_class(self, x_test, y_test=None):
    """Predict class labels given a set of feature data."""
    y_pairs_pred, y_pairs_true, training_label = self.predict_pairwise(x_test, y_test)

    # Implement voting scheme to decide on class label.

  def add_class(self, new_train, new_class):
    """ Add new participant for prediction purposes. """
    pass

  def tune_hyperparameters(self, x_validation, y_validation):
    """ Optimise the values of C and the degree using the validation set. """
    pass

In [None]:
psvm = pairwiseSVM(C=10.0, kernel='rbf', degree=3, random_state=None) # Create a test instance of the class.

# # Some small test data
# a = np.array([[1,2,3],[4,5,6],[7,8,9],[9,10,11],[2,9,10]])
# b = np.array([[901,801,701],[602,603,604]])
# y_a = np.array([0,1,1,2,2])
# y_b = np.array([1,1])

a = np.array([[1],[2.5],[3.0],[3.7],[5.2],[5.8],[7.1],[7.2],[7.4],[10]])
y_a = np.array([0,1,1,1,2,2,3,3,3,4])

x_pairs, y_pairs, training_label = psvm.construct_pairs(x_train=a,y_train=y_a)
psvm.fit(a,y_a)
# psvm.svm.fit(a, y_a) # Test a regular SVM to separate the classes without a pairwise approach.
psvm.predict_pairwise(a,y_a)

In [None]:
x = [z[0] for z in x_pairs]
y = [z[1] for z in x_pairs]
plt.scatter(x,y, c=y_pairs)

def plot_svc_decision_function(model, ax=None, plot_support=True):
    """Plot the decision function for a 2D SVC"""
    if ax is None:
        ax = plt.gca()
    xlim = ax.get_xlim()
    ylim = ax.get_ylim()
    
    # create grid to evaluate model
    x = np.linspace(xlim[0], xlim[1], 30)
    y = np.linspace(ylim[0], ylim[1], 30)
    Y, X = np.meshgrid(y, x)
    xy = np.vstack([X.ravel(), Y.ravel()]).T
    P = model.decision_function(xy).reshape(X.shape)
    
    # plot decision boundary and margins
    ax.contour(X, Y, P, colors='k',
               levels=[-1, 0, 1], alpha=0.5,
               linestyles=['--', '-', '--'])
    
    # plot support vectors
    if plot_support:
        ax.scatter(model.support_vectors_[:, 0],
                   model.support_vectors_[:, 1],
                   s=300, linewidth=1, facecolors='none');
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

plot_svc_decision_function(psvm.svm)

In [None]:
# Structure to evaluate classification performance
from sklearn.metrics import classification_report, confusion_matrix

def print_results(Y_test, predictions, label_names):
    print(classification_report(predictions, Y_test,
                                target_names=label_names))

    conf_mat = confusion_matrix(predictions, Y_test)

    fig = plt.figure(figsize=(6,6))
    width = np.shape(conf_mat)[1]
    height = np.shape(conf_mat)[0]

    res = plt.imshow(np.array(conf_mat), cmap=plt.cm.summer, interpolation='nearest')
    for i, row in enumerate(conf_mat):
        for j, c in enumerate(row):
            if c>0:
                plt.text(j-.2, i+.1, c, fontsize=16)

    cb = fig.colorbar(res)
    plt.title('Confusion Matrix')
    _ = plt.xticks(range(6), label_names, rotation=90)
    _ = plt.yticks(range(6), label_names)

In [None]:
# Run this cell to save the changes

# drive.flush_and_unmount()
# print('All changes made in this colab session should now be visible in Drive.')