In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import os


random_seed=37
np.random.seed(random_seed)
tf.random.set_seed(random_seed)

used_classes = ['Airliner',
                'Sorrel',
                'Jack-o’-lantern',
                'Panda',
                'Anemone fish']

num_classes = len(used_classes)
num_chanels = 14
eeg_samples_cutoff=120

data_dir = 'eeg_processed/'


In [None]:
def get_data():
    X = []
    y = []
    rows_per_sample = []
    eeg_files = sorted([data_dir+'ds/' + x for x in os.listdir(data_dir+'ds/') if x.endswith(".csv")] + [data_dir+'ds1/' + x for x in os.listdir(data_dir+'ds1/') if x.endswith(".csv")])

    for file_name in eeg_files:
        df = pd.read_csv(file_name,index_col=0)
        rows_per_sample.append(len(df.index))
        if len(df.index) >= eeg_samples_cutoff and df['class'][0] in used_classes:
            X.append(df.iloc[:eeg_samples_cutoff, 1:-3].to_numpy())
            y.append(used_classes.index(df['class'][0]))
            y.append(df['img_num'][0])

    print(len(y))
    y = np.array(y).reshape(len(y) // 2, 2)
    X = np.asarray(X)
    return X, y


def split_ds(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_seed)
    X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=random_seed)


    y_train =to_categorical(y_train[:,0], num_classes)
    y_test = to_categorical(y_test[:,0], num_classes)
    y_valid = to_categorical(y_valid[:,0], num_classes)

    return X_train, X_test, X_valid, y_train, y_test, y_valid


In [None]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LSTM, Input, Reshape, Lambda, Concatenate
from tensorflow.keras.optimizers import Adam, Adadelta


def stack_model(l1=28, l2=28, lr=0.001, dropout=0.1):
    model = Sequential()
    model.add(LSTM(l1, input_shape=(eeg_samples_cutoff, num_chanels), return_sequences=True, dropout=dropout, recurrent_dropout=dropout))
    model.add(LSTM(l2, input_shape=(eeg_samples_cutoff, num_chanels), return_sequences=False, dropout=dropout, recurrent_dropout=dropout))
    model.add(Dense(num_classes, activation='softmax'))
    opt = Adam(lr=lr)
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model


In [None]:
dt = get_data()
X_train, X_test, X_valid, y_train, y_test, y_valid = split_ds(dt[0], dt[1])

scaler = StandardScaler()
scaler.fit(np.concatenate(X_train, axis=0))

X_train_scld = []
X_test_scld = []

for i in range(X_train.shape[0]):
    X_train_scld.append(scaler.transform(X_train[i]))
    
for i in range(X_test.shape[0]):
    X_test_scld.append(scaler.transform(X_test[i]))
    
X_train_scld = np.asarray(X_train_scld)  
X_test_scld = np.asarray(X_test_scld)

In [None]:
from sklearn.metrics import classification_report


lr = 0.0003
dropout = 0.2
epoch = 80
b_s = 2
d_n = 56
l_n = 56

st_model = stack_model(l_n, d_n, lr, dropout)
st_model.summary()
history = st_model.fit(X_train_scld, y_train, epochs=epoch, batch_size=b_s)

scores = st_model.evaluate(X_test_scld, y_test, verbose=1)

print("Accuracy: %.2f%%" % (scores[1]*100))

y_pred = st_model.predict(X_test_scld, verbose=1)

y_pred_bool = np.argmax(y_pred, axis=1)
y_test_bool = np.argmax(y_test, axis=1)

print(classification_report(y_test_bool, y_pred_bool, target_names=used_classes))

filename = 'encoder_final.h5'
st_model.save(filename)