This project tries to classify the data from EEG signals as high Valance or low Valance.

In [2]:
import pandas as pd
import numpy as np
import pickle
import math
import statistics as stat
import scipy.stats as scStat

### The DEAP Dataset

The dataset contains 40 experiments for each of the 32 participants. The labels array contain the valence, arousal, dominance and liking ratings for each participant for each of the 40 experiments. The data array contains 8064 physiological/EEG signal data from 40 different channels for each of the 40 experiments for each of the 32 participants.

### Feature Extraction

We divide the 8064 readings per channel, into 10 batches of approximately 807 readings each. For each batch we extract the mean, median, maximum, minimum, standard deviation, variance, range, skewness and kurtosis values for the 807 readings. Hence for each of the 10 batches of a single channel we extract 9 values mentioned above, we get 90 values as our processed dataset. We further add the net mean, median, maximum, minimum, standard deviation, variance, range, skewness and kurtosis values for the entire 8064 readings along with the experiment and participant number to our dataset, bringing it up to 101 values per channel.

In [3]:
def extract_features(data, trial, participantNumber):
    extData = []
    for x in np.array_split(data, 10):
        extData.extend(calc_features(x))
    extData.extend(calc_features(data))
    extData.append(participantNumber)
    extData.append(participantNumber)
    return extData

In [4]:
def calc_features(array):
    return [stat.mean(array),
                stat.median(array),
                stat.variance(array),
                stat.stdev(array),
                max(array),
                min(array),
                scStat.mode(array)[0][0],
                scStat.kurtosis(array),
                scStat.skew(array, axis=0, bias=True)]

Features from each channel are extracted and appended to a df so that it can be stored into a csv file and accessed later.

In [5]:
def process_data_file(fileName, participantNumber):
    with open(fileName, 'rb') as f: content = pickle.load(f, encoding='latin1')
    data = content['data']
    labels = content['labels']
    extracted_features = []
    for index, trialData in enumerate(data):
        for i, channelData in enumerate(trialData):
            extracted_features.append(extract_features(channelData, index, participantNumber))
    df = pd.DataFrame(extracted_features)
    df['Valance Label'] = list(labels[:,0])*int(len(df)/len(labels))
    df['Arousal Label'] = list(labels[:,1])*int(len(df)/len(labels))
    return df

### Reading Data from DEAP Dataset

The data from DEAP Dataset .dat files are read one by one and the extracted features are appended into a csv file.

In [6]:
files = ['s05.dat']
participants = [int(x.split('.')[0][-1]) for x in files]
for f, participantNumber in zip(files, participants):
    process_data_file(f, participantNumber).to_csv('ExtractedFeatures.csv', mode='a', index=False, header=False)

FileNotFoundError: [Errno 2] No such file or directory: 's05.dat'

### Learning from extracted features

If you're reading from the provided csv file, learning starts here

In [7]:
columns = [str(i) for i in range(0,99)]
columns.extend(['experiment No', 'participant No', 'Valance Label', 'Arousal Label'])
df = pd.read_csv('ExtractedFeatures.csv', header=None, names=columns)

In [8]:
df['Valance Label'] = df['Valance Label'].apply(lambda x: 1 if x>5 else 0)
df['Arousal Label'] = df['Arousal Label'].apply(lambda x: 1 if x>5 else 0)

In [9]:
X = df[df.columns.difference(['Valance Label', 'Arousal Label'])].values

y_valance = np.array([x for i, x in enumerate(df['Valance Label'].values) if i%1640 < 40])
y_arousal = np.array([x for i, x in enumerate(df['Arousal Label'].values) if i%1640 < 40])

In [10]:
X = X.reshape(int(X.shape[0]/40),40,101)

In [11]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Dropout, Flatten
from sklearn.model_selection import KFold
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split

### Building basic CNN Model
This model follows the setup recommended in the paper.

In [12]:
def build_model():
    model = Sequential()
    
    model.add(Conv2D(100, (3,3), padding="valid", activation='tanh', input_shape = (40, 101, 1)))
    
    model.add(Conv2D(100, (3, 3), activation='tanh' ))
    
    model.add(MaxPooling2D(pool_size = (2, 2)))
    model.add(Dropout(0.25))
    
    model.add(Flatten())
    
    model.add(Dense(128, activation='tanh'))
    model.add(Dropout(0.50))
    
    model.add(Dense(2, activation='softplus'))
    
    sgd = SGD(lr = 0.00001, momentum = 0.9, nesterov = True)
    model.compile(loss ='categorical_crossentropy', optimizer = sgd,metrics=['accuracy'])
    
    return model

###### Building a model for tuning using hyperparameters
The function sets hyperparameters for the following:
- No. of input layers into 1st Conv layer
- No. subsequent conv layers
- No. of units in these layers
- No. of Dense layers
- Learning rate

In [39]:
def build_hyperparameter_model(hp):
    model = Sequential()
    
    model.add(Conv2D(hp.Int("input_units", min_value=50, max_value=500, step=10),
                     (3,3), padding="valid", activation='tanh', input_shape = (40, 101, 1)))
    
    for i in range(hp.Int("conv_layers",1, 8)):
        model.add(Conv2D(hp.Int(f"conv_{i}_units", min_value=32, max_value=512, step=32),
                         (3, 3), activation='tanh' ))
    
    model.add(MaxPooling2D(pool_size = (2, 2)))
    model.add(Dropout(0.50))
    
    model.add(Flatten())
    for i in range(hp.Int("dense_layers",0, 8)):
        model.add(Dense(hp.Int(f"dense_{i}_units", min_value=64, max_value=512, step=32), activation='tanh'))
    
    model.add(Dropout(0.50))
    
    model.add(Dense(2, activation='softplus'))
    
    hp_learning_rate = hp.Choice('learning_rate', values = [1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8]) 
    
    sgd = SGD(lr = hp_learning_rate, momentum = 0.9, nesterov = True)
    model.compile(loss ='categorical_crossentropy', optimizer = sgd,metrics=['accuracy'])
    
    return model

### One-Hot encoding labels

In [14]:
y_valance = to_categorical(y_valance, num_classes=2)
y_arousal = to_categorical(y_arousal, num_classes=2)

### KFold cross validation before hyperparameter tuning

In [41]:
def kfold_validate(X, y, model):
    model = build_model()
    kf = KFold(n_splits=int(len(X)/40), shuffle=False)
    losses = []
    accuracies = []
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
        
        X_train = X_train.reshape(X_train.shape[0], 40, 101, 1)
        X_test = X_test.reshape(X_test.shape[0], 40, 101, 1)
        
        history = model.fit(X_train, y_train, batch_size = 50, 
                            epochs = 10, validation_data = (X_test, y_test))
        score = model.evaluate(X_test, y_test, batch_size = 1)
        losses.append(score[0])
        accuracies.append(score[1])
        
        return (losses, accuracies)

In [28]:
losses, accuracies = kfold_validate(X, y_valance, build_model())
print("max loss = ",max(losses))
print("max accuracy = ",max(accuracies))

Train on 200 samples, validate on 40 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
max loss =  0.7398222625255585
max accuracy =  0.475


## Tuning using Keras-Tuner
- RandomSearch
- Bayesian optimisation
- Hyperband Tuning

In [24]:
import kerastuner as kt
from kerastuner.tuners import RandomSearch, BayesianOptimization, Hyperband
import time

X_train, X_test, y_train, y_test = train_test_split(X, y_valance, random_state=0)

X_train = X_train.reshape(X_train.shape[0], 40, 101, 1)
X_test = X_test.reshape(X_test.shape[0], 40, 101, 1)

LOG_DIR = f'{int(time.time())}'

rand_tuner = RandomSearch(build_hyperparameter_model, 
                    objective='acc',
                    max_trials=5,
                    executions_per_trial = 1,
                    directory=LOG_DIR)

rand_tuner.search(x=X_train,
            y=y_train,
            epochs=10,
            batch_size=50,
            validation_data=(X_test, y_test))

Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO:tensorflow:Oracle triggered exit


In [33]:
best_model_randSearch = tuner.get_best_models(1)[0]



In [36]:
bayesian_tuner = kt.BayesianOptimization(build_hyperparameter_model,
                                         objective='val_acc', 
                                         max_trials=5,
                                         executions_per_trial = 1,
                                         directory=LOG_DIR)

bayesian_tuner.search(x=X_train,
                      y=y_train,
                      epochs=10,
                      batch_size=50,
                      validation_data=(X_test, y_test))

Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO:tensorflow:Oracle triggered exit


In [37]:
best_model_bayesian = bayesian_tuner.get_best_models(1)[0]

In [38]:
best_model_bayesian.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 38, 99, 360)       3600      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 36, 97, 128)       414848    
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 95, 224)       258272    
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 93, 128)       258176    
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 30, 91, 448)       516544    
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 28, 89, 224)       903392    
_________________________________________________________________
flatten (Flatten)            (None, 558208)            0

In [49]:
hyperband_tuner = kt.Hyperband(build_hyperparameter_model,
                               objective='val_acc', 
                               max_epochs=5, 
                               directory=LOG_DIR)

hyperband_tuner.search(x=X_train,
                      y=y_train,
                      epochs=10,
                      batch_size=50,
                      validation_data=(X_test, y_test))

INFO:tensorflow:Reloading Oracle from existing project 1600680976\untitled_project\oracle.json
INFO:tensorflow:Reloading Tuner from 1600680976\untitled_project\tuner0.json
Train on 180 samples, validate on 60 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


Train on 180 samples, validate on 60 samples
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Train on 180 samples, validate on 60 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


INFO:tensorflow:Oracle triggered exit


In [50]:
best_model_hyperband = hyperband_tuner.get_best_models(1)[0]
best_model_hyperband.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 38, 99, 310)       3100      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 36, 97, 448)       1250368   
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 34, 95, 288)       1161504   
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 17, 47, 288)       0         
_________________________________________________________________
dropout (Dropout)            (None, 17, 47, 288)       0         
_________________________________________________________________
flatten (Flatten)            (None, 230112)            0         
_________________________________________________________________
dense (Dense)                (None, 224)               5

In [44]:
losses, accuracies = kfold_validate(X, y_valance, best_model_randSearch)
print("mean loss = ",np.mean(losses))
print("mean accuracy = ",np.mean(accuracies))

mean loss =  0.7141988947987556
mean accuracy =  0.35


In [45]:
losses, accuracies = kfold_validate(X, y_valance, best_model_bayesian)
print("mean loss = ",np.mean(losses))
print("mean accuracy = ",np.mean(accuracies))

Train on 200 samples, validate on 40 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
mean loss =  0.7239915445446968
mean accuracy =  0.475


In [51]:
losses, accuracies = kfold_validate(X, y_valance, best_model_hyperband)
print("mean loss = ",np.mean(losses))
print("mean accuracy = ",np.mean(accuracies))

Train on 200 samples, validate on 40 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
mean loss =  0.7540453724563122
mean accuracy =  0.475
