#### Classification using a Logistic Model

In [1]:
import numpy as np
import pandas as pd

from scipy.io import loadmat
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

#### Data Loading and Transformation

In [2]:
eeg = loadmat('WLDataAll.mat')

In [3]:
data = eeg['data']
labels = eeg['label']

In [4]:
data[:2]

array([[[ 4.8829541e+00,  9.8011837e+00, -6.1279667e+01, ...,
         -2.3868773e+01,  1.0183847e+01,  6.5834889e+00],
        [ 1.5645766e+01,  9.2562323e+00, -5.9352314e+01, ...,
          6.3903937e+00, -3.4080923e-01,  7.2190175e+00],
        [ 1.8576738e+01,  9.6379747e+00, -5.8535995e+01, ...,
          3.4576672e+01, -3.2299006e+00,  4.3821239e-01],
        ...,
        [ 1.3261520e+01, -4.5882652e+01,  1.3235929e+01, ...,
          1.9744860e+01, -1.1103893e+01,  5.8311663e+00],
        [ 1.2880102e+01, -5.4907242e+01,  1.1796134e+01, ...,
          2.2629681e+01, -7.8801956e+00,  9.1314917e+00],
        [ 1.1365375e+01, -6.0526150e+01,  8.0718441e+00, ...,
          1.9972019e+01, -9.1800272e-02,  2.4537776e+00]],

       [[ 6.2141876e+00,  2.3298359e+01, -4.9112263e+01, ...,
         -2.7136732e+01,  1.3108792e+01, -1.0402801e-02],
        [ 2.0816244e+01,  2.2637253e+01, -4.7099533e+01, ...,
          2.6988206e+00,  1.7181083e+00,  4.6453357e-01],
        [ 2.5134890e+01, 

In [5]:
labels[:2]

array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

In [6]:
labels[labels == 1] = 0
labels[labels == 2] = 1

print(data.shape)
print(labels.shape)

(62, 512, 360)
(1, 360)


In [9]:
rawdata = np.copy(data)
data = rawdata.reshape(-1, rawdata.shape[-1])
data.shape

(31744, 360)

##### Checking if there are any nan values in the dataset

In [10]:
np.isnan(data).sum()

0

##### Shuffling the dataset using index

In [11]:
idx = np.random.permutation(data.shape[1])
X,y = data[:, idx], labels[:, idx]

print(X.shape)
print(y.shape)

(31744, 360)
(1, 360)


##### Log transformation is required to handle negative values in the data and to compute cost function with errors

In [17]:
minimumvalue = np.amin(data)
data = data+ abs(minimumvalue) + 1

##### Splitting the data into train and test set and consequently scaling them

In [12]:
X_train = data[:,:288]
y_train = labels[:,:288]
X_test = data[:,288:]
y_test = labels[:,288:]

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train.T).T
X_test = scaler.transform(X_test.T).T

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

(31744, 288)
(1, 288)
(31744, 72)
(1, 72)


##### Creating a Logistic Model

In [18]:
def sigmoid(z):
    #Compute the sigmoid of z. z is a scalar or numpy array of any size
    s = 1/(1 + np.exp(-z))
    return s

def initialize_random_weights(m):
    w = np.random.rand(m).reshape(m,1)
    b = 0
    return w, b

def initialize_with_zeros(m): 
    w = np.zeros((m, 1))
    b = 0
    return w, b

def optimize(w, b, X, Y): 
    m = X.shape[1]

    # FORWARD PROPAGATION (FROM X TO COST)
    A = sigmoid(np.dot(w.T, X)+ b) # compute activation
    cost = -(1/m)*(np.sum((Y*np.log(A)) + (1-Y) *np.log(1-A)))

    # BACKWARD PROPAGATION (TO FIND GRAD)
    dw = (1/m)* np.dot(X, ((A-Y).T))
    db = (1/m) * np.sum(A-Y)
    gradient = {'dw': dw, 'db': db}

    return gradient, cost

def optimizer(w, b, X, Y, learning_rate, no_iterations, print_cost):
    costs = []
    for i in range(no_iterations):
        grads, cost = optimize(w,b,X,Y)
        dw = grads["dw"]
        db = grads["db"]
        
        #weight update
        w = w - (learning_rate * dw)
        b = b - (learning_rate * db)
        
        if (i % 100 == 0):
            costs.append(cost)
            if(print_cost):
                print("Cost after %i iteration is %f" %(i, cost))
    
    #final parameters
    coeff = {"w": w, "b": b}
    gradient = {"dw": dw, "db": db}
    return coeff, gradient, costs

def predict(w, b, X):
    m = X.shape[1]
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0], 1)

    A = sigmoid(np.dot(w.T, X) + b)

    for i in range(A.shape[1]):
        # Convert probabilities A[0,i] to actual predictions p[0,i]
        Y_prediction[0,i] = 1 if A[0, i] > 0.5 else 0
    pass

    return Y_prediction

def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.5, num_iterations = 2000, print_cost = False):
    w, b = initialize_random_weights(X_train.shape[0])
    
    # Gradient descent
    coeff, gradient, costs = optimizer(w, b, X_train, Y_train, learning_rate, num_iterations, print_cost)

    # Retrieve parameters w and b from dictionary “parameters”
    w = coeff['w']
    b = coeff['b']

    # Predict test/train set examples
    Y_prediction_test = predict(w, b, X_test)
    Y_prediction_train = predict(w, b, X_train)
    # Print train/test Errors
    print('train accuracy: {} %'.format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print('test accuracy: {} %'.format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))
    params = {'costs': costs,
    'Y_prediction_test': Y_prediction_test, 
    'Y_prediction_train' : Y_prediction_train, 
    'w' : w, 
    'b' : b,
    'learning_rate' : learning_rate,
    'num_iterations': num_iterations}

    return params

In [19]:
parameters = model(X_train, y_train, X_test, y_test)

  cost = -(1/m)*(np.sum((Y*np.log(A)) + (1-Y) *np.log(1-A)))
  cost = -(1/m)*(np.sum((Y*np.log(A)) + (1-Y) *np.log(1-A)))
  s = 1/(1 + np.exp(-z))


train accuracy: 68.40277777777777 %
test accuracy: 68.05555555555556 %


In [20]:
preds = parameters['Y_prediction_test']
print(preds)

[[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1.
  0. 1. 1. 1. 1. 1. 1. 0. 0. 1. 0. 1. 0. 0. 0. 1. 1. 0. 1. 1. 0. 1. 1. 1.]]


In [21]:
print('Test Accuracy {} %'.format(accuracy_score(preds[0,:], y_test[0,:])*100))

Test Accuracy 68.05555555555556 %
