In [19]:
import os
import sys

import pandas as pd
import numpy as np

import pywt
import scipy.io as spio
from scipy.stats import entropy
from collections import Counter

import mne

from sklearn import svm
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import classification_report

import timeit

In [2]:
WAVELET = "db6"
MAX_LEVEL = 5
cwd = os.getcwd()

In [3]:
dataset_dir = "datasets\\EEG\\SEED-V"
working_dir = os.path.join(cwd, dataset_dir)


c:\00_Main\Repos\Biorobotics_term_Project\datasets\EEG\SEED-V


In [14]:
# reading the channel order for dataframe
channel_order_path = os.path.join(working_dir, "Channel Order.xlsx")
channel_order = pd.read_excel(channel_order_path, header=None)
channel_order.head()

Unnamed: 0,0
0,FP1
1,FPZ
2,FP2
3,AF3
4,AF4


In [16]:
files = os.listdir("EEG_raw")

del files[-1]
files

['10_1_20180507.cnt',
 '10_2_20180524.cnt',
 '10_3_20180626.cnt',
 '11_1_20180510.cnt',
 '11_2_20180508.cnt',
 '11_3_20180522.cnt',
 '12_1_20180515.cnt',
 '12_2_20180508.cnt',
 '12_3_20180517.cnt',
 '13_1_20180720.cnt',
 '13_2_20180806.cnt',
 '13_3_20180725.cnt',
 '14_1_20180420.cnt',
 '14_2_20180423.cnt',
 '14_3_20180427.cnt',
 '15_1_20180724.cnt',
 '15_2_20180807.cnt',
 '15_3_20180730.cnt',
 '16_1_20180805.cnt',
 '16_2_20180815.cnt',
 '16_3_20180813.cnt',
 '1_1_20180804.cnt',
 '1_2_20180810.cnt',
 '1_3_20180808.cnt',
 '2_1_20180416.cnt',
 '2_2_20180419.cnt',
 '2_3_20180425.cnt',
 '3_1_20180414.cnt',
 '3_2_20180419.cnt',
 '3_3_20180424.cnt',
 '4_1_20180414.cnt',
 '4_2_20180417.cnt',
 '4_3_20180501.cnt',
 '5_1_20180719.cnt',
 '5_2_20180728.cnt',
 '5_3_20180723.cnt',
 '6_1_20180713.cnt',
 '6_2_20180731.cnt',
 '6_3_20180802.cnt',
 '7_1_20180411.cnt',
 '7_2_20180418.cnt',
 '7_3_20180422.cnt',
 '8_1_20180717.cnt',
 '8_2_20180802.cnt',
 '8_3_20180726.cnt',
 '9_1_20180724.cnt',
 '9_2_2018080

**Feature extraction**

In [12]:
def calc_wavelet_energy(data_set):
  """
    Input : 1 * N vector
    Output: Float with the wavelet energy of the input vector,
    rounded to 3 decimal places.
  """
  # p_sqr = [i ** 2 for i in data_set]
  wavelet_energy = np.nansum(np.log2(np.square(data_set)))
  return round(wavelet_energy, 3)

In [13]:
def calc_shannon_entropy(data_set):
    """
        Input : 1 * N vector
        Output: Float with the wavelet entropy of the input vector,
                rounded to 3 decimal places.
    """
    # probability = [i ** 2 for i in data_set]
    probability = np.square(data_set)
    shannon_entropy = -np.nansum(probability * np.log2(probability))
    return round(shannon_entropy, 3)

In [43]:
def load_cnt(file):
    eeg_raw = mne.io.read_raw_cnt(file)
    useless_ch = ['M1', 'M2', 'VEO', 'HEO']
    eeg_raw.drop_channels(useless_ch)
    new_ch = eeg_raw.ch_names
    data_matrix = eeg_raw.get_data()
    print(data_matrix)
    data_pd = pd.DataFrame(data_matrix)
    return data_pd

In [44]:
matrix = load_cnt("EEG_raw\\10_2_20180524.cnt")
matrix

  eeg_raw = mne.io.read_raw_cnt(file)


[[-7.42077827e-06 -8.79168510e-06 -9.44733620e-06 ... -7.42971897e-05
  -7.48932362e-05 -7.56084919e-05]
 [-5.48362732e-06 -6.04987144e-06 -6.70552254e-06 ...  3.33786011e-06
   2.98023224e-06  2.38418579e-06]
 [ 2.17556953e-06  1.04308128e-06 -5.36441803e-07 ... -9.13739204e-05
  -9.19997692e-05 -9.25362110e-05]
 ...
 [-1.22189522e-06 -1.49011612e-06 -2.11596489e-06 ... -1.03831291e-04
  -1.04516745e-04 -1.05023384e-04]
 [ 1.19209290e-06  4.17232513e-07 -5.66244125e-07 ... -5.82933426e-05
  -5.89191914e-05 -5.96046448e-05]
 [-1.15036964e-05 -1.18911266e-05 -1.32024288e-05 ... -1.05142593e-04
  -1.05738640e-04 -1.06394291e-04]]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3269990,3269991,3269992,3269993,3269994,3269995,3269996,3269997,3269998,3269999
0,-0.000007,-8.791685e-06,-9.447336e-06,-0.000010,-0.000010,-0.000010,-0.000011,-0.000011,-0.000011,-0.000011,...,-0.000073,-0.000073,-0.000073,-0.000073,-0.000073,-0.000074,-0.000074,-0.000074,-0.000075,-0.000076
1,-0.000005,-6.049871e-06,-6.705523e-06,-0.000007,-0.000007,-0.000008,-0.000009,-0.000010,-0.000010,-0.000011,...,0.000003,0.000003,0.000003,0.000003,0.000003,0.000003,0.000003,0.000003,0.000003,0.000002
2,0.000002,1.043081e-06,-5.364418e-07,-0.000002,-0.000002,-0.000002,-0.000001,-0.000001,-0.000002,-0.000003,...,-0.000089,-0.000089,-0.000089,-0.000090,-0.000090,-0.000091,-0.000091,-0.000091,-0.000092,-0.000093
3,-0.000012,-1.159310e-05,-1.189113e-05,-0.000012,-0.000012,-0.000013,-0.000013,-0.000013,-0.000013,-0.000013,...,-0.000073,-0.000073,-0.000073,-0.000074,-0.000074,-0.000074,-0.000075,-0.000075,-0.000076,-0.000076
4,0.000007,6.020069e-06,5.245209e-06,0.000005,0.000006,0.000007,0.000007,0.000008,0.000007,0.000007,...,-0.000054,-0.000054,-0.000055,-0.000055,-0.000055,-0.000056,-0.000056,-0.000056,-0.000057,-0.000057
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,-0.000007,-7.361174e-06,-7.659197e-06,-0.000008,-0.000008,-0.000009,-0.000009,-0.000011,-0.000014,-0.000015,...,-0.000074,-0.000074,-0.000074,-0.000075,-0.000075,-0.000076,-0.000076,-0.000076,-0.000077,-0.000077
58,-0.000002,-2.413988e-06,-2.980232e-06,-0.000003,-0.000003,-0.000004,-0.000004,-0.000004,-0.000005,-0.000005,...,-0.000045,-0.000045,-0.000046,-0.000046,-0.000046,-0.000046,-0.000046,-0.000047,-0.000048,-0.000048
59,-0.000001,-1.490116e-06,-2.115965e-06,-0.000003,-0.000004,-0.000004,-0.000005,-0.000006,-0.000007,-0.000008,...,-0.000101,-0.000101,-0.000101,-0.000102,-0.000102,-0.000103,-0.000103,-0.000104,-0.000105,-0.000105
60,0.000001,4.172325e-07,-5.662441e-07,-0.000002,-0.000003,-0.000004,-0.000005,-0.000006,-0.000006,-0.000006,...,-0.000056,-0.000056,-0.000057,-0.000057,-0.000057,-0.000058,-0.000058,-0.000058,-0.000059,-0.000060


In [None]:
participant_trial = []
features_table = pd.DataFrame(columns=range(620))

for file in files:
  mat_file = spio.loadmat("temp/Preprocessed_EEG/" + file)
  keys = [key for key, values in mat_file.items() if key != '__header__' and key != '__version__' and key != '__globals__' ]
  for data_file in keys:
    data_df = pd.DataFrame(mat_file[data_file])
    # print(data_df)
    M = data_df.shape[0]
    N = data_df.shape[1]
    # Feature extraction part of the module
    Entropy = []
    Energy = []
    for channel in data_df.iterrows():  # Iterating through the 62 channels
      dwt_bands = []
      data = channel[1]
      int_ent = []
      int_eng = []
      for band in range(MAX_LEVEL):
        (data, coeff_d) = pywt.dwt(data, WAVELET)
        dwt_bands.append(coeff_d)
      
      for band in range(len(dwt_bands)):  # DWT_bands = 23504, 11755
        int_ent.append(calc_shannon_entropy(dwt_bands[len(dwt_bands) - band - 1]))
        int_eng.append(calc_wavelet_energy(dwt_bands[len(dwt_bands) - band - 1]))
      
      Entropy.append(int_ent)
      Energy.append(int_eng)
    
    unroll_entropy = []
    unroll_energy = []
    '''
        Transforming 2D array into 1D vector of features and then 
        combining the two 1D arrays.
    '''
    for i in range(len(Entropy)):
      for j in range(len(Entropy[0])):
        unroll_entropy.append(Entropy[i][j])
        unroll_energy.append(Energy[i][j])

    features = unroll_entropy + unroll_energy
    participant_trial.append(features)
    features_table.loc[len(features_table.index)] = features
    # print(data_file)
    # print(features)
  print(file)


In [63]:
file_dir = dir + "/temp/analysis/"

In [60]:
features_table.to_csv(file_dir + "features" + WAVELET + ".csv", index=False)

**Principal Components Analysis**

In [64]:
data = pd.read_csv(file_dir + "features" + WAVELET + ".csv")

In [101]:
# 1. Normalising data and getting transpose
normalised = pd.DataFrame(normalize(data, axis = 0))

# 2. Finding covariance matrix
covariance_df = normalised.cov()

# 3. Eigen Vectors
u, s, v = np.linalg.svd(covariance_df)

# 4. Principal Components
data_reduced = normalised @ u
data_reduced.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,...,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619
0,0.097817,0.130165,0.069822,-0.339419,0.012962,0.022875,0.032319,0.154534,0.035682,0.005306,-0.06354,-0.102065,0.086618,-0.008909,-0.099754,0.011716,-0.068401,-0.072563,0.017964,-0.005755,0.082702,0.09963,0.121874,0.025797,0.02966,0.00987,0.134024,-0.091542,0.047139,0.029237,0.042781,-0.112607,-0.017711,-0.050721,0.176584,-0.003242,0.045504,-0.097517,-0.068512,-0.044981,...,9e-06,1.86227e-07,-1.6e-05,-1.3e-05,-2e-06,8e-06,9.287625e-07,1e-06,2.044501e-07,-6e-06,3.825408e-07,-5.865877e-07,-3.348752e-07,3.027798e-07,1.279925e-07,1e-06,-1.216881e-06,3.88667e-07,-6.618711e-07,7.504141e-07,4.407065e-08,-4.451369e-07,1.713568e-07,1.621219e-07,3.818677e-07,4.69184e-08,-4.66707e-08,-1.047121e-07,1.437236e-07,1.539457e-07,-2.098129e-08,-5.612988e-08,3.211213e-08,-4.723769e-08,4.073753e-09,4.360708e-08,-7.719521e-08,-1.279139e-08,1.84098e-08,-2.671326e-08
1,0.07913,0.107703,0.013614,-0.3237,0.005515,0.015283,0.045864,0.142307,0.047709,0.004523,-0.064133,-0.093897,0.073178,-0.009482,-0.094226,0.00071,-0.05041,-0.06825,0.021639,-0.003977,0.076888,0.088081,0.117455,0.01789,0.024536,0.014868,0.114364,-0.088827,0.03416,0.028324,0.048997,-0.09166,-0.030368,-0.049431,0.159302,-0.001226,0.05124,-0.097318,-0.065109,-0.037735,...,1e-06,-2.432004e-06,-1.6e-05,-1.1e-05,-3e-06,8e-06,1.278023e-06,2e-06,2.228182e-06,-6e-06,1.297098e-06,-7.741014e-07,-3.410565e-07,-1.614335e-07,4.022831e-07,1e-06,-8.737522e-07,3.870899e-07,-4.521406e-07,6.239186e-07,2.837787e-08,-3.984898e-07,2.10395e-07,7.799253e-08,4.492663e-07,9.70427e-08,-7.896122e-08,-9.481077e-08,1.457714e-07,1.83517e-07,-6.724168e-08,-9.023388e-08,3.03957e-08,-2.052639e-08,-9.275702e-09,3.880146e-08,-8.299719e-08,-5.382707e-09,1.8425e-08,-3.027882e-08
2,0.088409,0.118777,0.08946,-0.286453,0.008892,0.020814,0.014377,0.147221,0.029138,0.000999,-0.051779,-0.074488,0.067087,-0.006247,-0.075656,0.008401,-0.052398,-0.066528,0.020673,0.002985,0.060274,0.084171,0.109292,0.020673,0.027263,0.015637,0.11286,-0.074215,0.03939,0.02314,0.054142,-0.095459,-0.013652,-0.055456,0.135946,-0.00832,0.040155,-0.075728,-0.054987,-0.025159,...,4e-06,-9.94869e-07,-1.4e-05,-1.2e-05,-3e-06,9e-06,1.189258e-06,1e-06,1.448787e-06,-5e-06,8.777831e-07,-5.567351e-07,-2.543641e-07,1.289873e-07,1.330648e-07,1e-06,-1.041733e-06,4.109182e-07,-6.141837e-07,6.911247e-07,-4.15012e-08,-4.8743e-07,2.016586e-07,1.082429e-07,4.237289e-07,9.627468e-08,-5.019041e-08,-1.015426e-07,1.195451e-07,1.916321e-07,-3.923679e-08,-6.696104e-08,3.048998e-08,-3.824638e-08,-4.436387e-09,4.483001e-08,-8.183916e-08,-1.037805e-08,2.075966e-08,-3.009659e-08
3,0.08927,0.120594,0.047669,-0.31896,0.012963,0.019192,0.046841,0.154494,0.034705,3e-05,-0.060654,-0.093238,0.075378,-0.007603,-0.087917,0.001959,-0.049254,-0.079916,0.022041,0.008019,0.07783,0.087366,0.126025,0.012981,0.021779,0.01952,0.122227,-0.099641,0.050041,0.010458,0.063402,-0.098252,-0.033244,-0.050116,0.151246,0.003756,0.053185,-0.095541,-0.069392,-0.033584,...,6e-06,1.539661e-06,-1.7e-05,-1.2e-05,-6e-06,8e-06,1.530125e-07,2e-06,8.928652e-07,-5e-06,4.4757e-07,-6.17204e-07,2.534976e-07,-9.811037e-08,8.963929e-08,1e-06,-7.367891e-07,5.55598e-07,-7.257258e-07,6.343024e-07,1.047278e-07,-3.759031e-07,2.353655e-07,1.287926e-07,4.991714e-07,3.138122e-08,4.562266e-08,-1.862962e-07,1.02222e-07,2.629627e-07,-6.193438e-08,-7.203549e-08,3.924766e-08,-2.518006e-08,3.521498e-09,2.864041e-08,-7.464052e-08,-7.413186e-09,1.996893e-08,-3.272369e-08
4,0.578583,0.076831,0.016236,-0.230923,-0.222639,-0.038365,0.126182,0.150741,0.017501,-0.008049,-0.046198,-0.09868,0.065333,-0.012777,-0.09003,-0.044711,-0.069119,-0.067042,0.041172,-0.021609,0.074078,0.085064,0.094152,0.032143,0.056237,-0.029938,0.042653,-0.110838,0.011368,-0.007524,0.058345,-0.15711,-0.026012,-0.074466,0.086068,0.031457,0.015314,-0.093344,-0.07737,-0.030921,...,6e-06,5.35658e-07,-1.5e-05,-1.2e-05,-3e-06,7e-06,1.467248e-06,1e-06,1.23129e-06,-6e-06,7.304807e-07,-7.374172e-07,-1.559129e-07,8.989781e-08,1.324989e-07,1e-06,-9.338416e-07,4.267613e-07,-6.008731e-07,6.108701e-07,-6.640622e-09,-4.502359e-07,2.06136e-07,1.152751e-07,4.441744e-07,7.309165e-08,-5.875483e-08,-1.204628e-07,1.133269e-07,1.979997e-07,-4.002713e-08,-6.928006e-08,3.225721e-08,-3.729608e-08,-3.529057e-09,3.982864e-08,-7.999112e-08,-8.889159e-09,1.980401e-08,-3.009316e-08


In [103]:
data_reduced.to_csv(file_dir + "pc" + WAVELET + ".csv", index=False)

In [122]:
file_dir

'/content/drive/MyDrive/EEG-emotion-python/seed dataset/SEED/temp/analysis/'

**Running data through classifiers**


1.   SVM



In [124]:
# Reading data and splitting 
pcs = pd.read_csv(file_dir + "pc" + WAVELET + ".csv")
outputs = pd.read_csv(file_dir + "outputs_main.csv", header=None)

X = pcs.iloc[:, :].values
Y = outputs.iloc[:, :].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=42)

In [129]:
svc = SVC()
parameters = {"C": (100, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9), "gamma": (1e-08, 1e-7, 1e-6, 1e-5)}
grid_search = GridSearchCV(svc, parameters, n_jobs=-1, cv=5)
start_time = timeit.default_timer()
grid_search.fit(X_train, Y_train)
print("--- {0:.3f} seconds ---".format(timeit.default_timer() - start_time))
print(grid_search.best_params_)
svc_best = grid_search.best_estimator_
accuracy = svc_best.score(X_test, Y_test)
print("Accuracy on the testing set is: {0:.1f}%".format(accuracy*100))
prediction = svc_best.predict(X_test)
report = classification_report(Y_test, prediction)
print(report)


  y = column_or_1d(y, warn=True)


--- 22.287 seconds ---
{'C': 10000000.0, 'gamma': 1e-05}
Accuracy on the testing set is: 69.7%
              precision    recall  f1-score   support

          -1       0.59      0.59      0.59        44
           0       0.59      0.56      0.57        41
           1       0.88      0.91      0.90        47

    accuracy                           0.70       132
   macro avg       0.69      0.69      0.69       132
weighted avg       0.69      0.70      0.69       132

