## Here we compare models of different depth and architecture on clear data.

In [1]:
import warnings
warnings.filterwarnings("ignore")

import matplotlib as mpl
mpl.use("TkAgg")

from sklearn.preprocessing import normalize#, MinMaxScaler
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import h5py
import mne
import keras
import keras.backend as K
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Reshape, GRU, LSTM, Lambda, RepeatVector, Reshape, Dropout, Conv1D, UpSampling1D, Bidirectional

from os import walk, listdir


%matplotlib inline
data_path = "../../data/train/"
clear_data_path = "/media/valbub/Docs/data/train/"
raw_data_path = "../../data/resting_state/"

Using TensorFlow backend.


In [2]:
class AutoEncoder(object):
    def __init__(self, 
                 input_dim = (7, 58), 
                 encoded_dim = (7, 29), 
                 loss="mse", 
                 optimizer="adadelta", 
                 activation=("elu", "sigmoid"),
                 kernel = 7):
        
            self.input_dim = input_dim
            self.encoded_dim = encoded_dim
            
            class MinMaxScaler():

                def __init__(self, minimum=None, maximum=None):
                    self.minimum = minimum
                    self.maximum = maximum

                def fit_transform(self, X):
                    if self.minimum is None or self.maximum is None:
                        self.minimum = np.min(X, axis=(0, 1))
                        self.maximum = np.max(X, axis=(0, 1))
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def transform(self, X):
                    return (np.array(X) - self.minimum) / (self.maximum - self.minimum)

                def reverse_transform(self, X_scl):
                    return X_scl * (self.maximum - self.minimum) + self.minimum

            self.scaler = MinMaxScaler()
    
    def prepare_clear_data(self, data_path, limit=2):
        train_eeg_dir = data_path
        train_eeg_names = [x for x in listdir(train_eeg_dir) 
                         if x[-3:] == ".h5"]
        data = np.zeros((0, self.input_dim[0], self.input_dim[1]))

        flag = 0
        err_files = 0
        for eeg_name in train_eeg_names:
            if flag == limit:
                break
            flag += 1
            h5_file = h5py.File(train_eeg_dir + eeg_name, 'r')
            a_group_key = list(h5_file.keys())[0]
            eeg_data = np.array(h5_file[a_group_key]).T
            batches = np.array(self._getBatches(eeg_data, batch_size=self.input_dim[0]))
            data = np.concatenate((data, batches), axis=0)
        return data
    
    def prepare_raw_data(self, data_path, limit=2):
        def preparefile(file_path):
            raw = np.array(mne.io.read_raw_brainvision(file_path, preload=True).to_data_frame())
            batches = np.array(self._getBatches(raw, batch_size=self.input_dim[0]))
            del raw
            return batches
        files = []
        data = []
        for elem in walk(data_path):
            for file in elem[-1]:
                if file[-4:] == "vhdr":
                    files.append(file)
        data = np.ndarray(shape=(0, self.input_dim[0], self.input_dim[1]))
        flag = 0
        for file in files:
            flag += 1
            file_name = data_path + file
            if flag == limit:
                break
            batches =  preparefile(file_name)
            data = np.concatenate((data, batches), axis=0)
        return data
    
    def fit(self, X_train, epochs=50):
        X_scaled = self.scaler.fit_transform(X_train)
        return self.autoencoder.fit(X_scaled, X_scaled, epochs = epochs)
    
    def fit_scaler(self, X_train):
        self.scaler.fit_transform(X_train)
    
    def encode(self, df):
        return self._predict(df, self.encoder, self.input_dim[0])
    
    def decode(self, df):
        return self._predict(df, self.decoder, self.encoded_dim[1])
    
    def run(self, df):
        return self._predict(df, self.autoencoder, self.input_dim[0])
    
    def save(self, path, part="autoencoder"):
        if part == "encoder":
            self.encoder.save(path)
        elif part == "decoder":
            self.decoder.save(path)
        elif part == "autoencoder":
            self.autoencoder.save(path)
        pass
    
    def load(self, path, part="autoencoder", X_train=None):
        if part == "encoder":
            self.encoder = keras.models.load_model(path)
        elif part == "decoder":
            self.decoder = keras.models.load_model(path)
        elif part == "autoencoder":
            self.autoencoder = keras.models.load_model(path)
        if x_train is not None:
            self.fit_scaler(X_train)
    

    def _predict(self, df, model):
        batches = self.scaler.transform(df)
        batches = tuple(self._predictBatch(batch.reshape((1, *batch.shape)), model) for batch in batches)
        batches = self._concatBatches(batches) 
        return self.scaler.reverse_transform(batches)
    
    def _predictBatch(self, batch, model):
        return model.predict(batch)
    
    def _getBatches(self, arr, batch_size, axis=0):
        n_batches = arr.shape[axis] // batch_size
        result = np.array_split(arr, n_batches, axis=axis)
        i = 0
        while result[i].shape[0] != batch_size:
            i += 1
        result = result[i:]
        return result
    
    def _concatBatches(self, batches, axis=0):
        return np.concatenate(batches, axis=axis)


In [3]:
class AU_Stage_1(AutoEncoder):
        def __init__(self, 
                 input_dim = (7, 58), 
                 encoded_dim = (7, 29), 
                 loss="mse", 
                 optimizer="adadelta", 
                 activation=("elu", "sigmoid"),
                 kernel = 7):
        
            self.input_dim = input_dim
            self.encoded_dim = encoded_dim

            #Encoder
            self._inputs = Input(shape=input_dim)
            self._flat = Flatten()(self._inputs)
            self._dense = Dense(units=np.prod(encoded_dim), activation=activation[0])(self._flat)
            self._encoded = Reshape(encoded_dim)(self._dense)

            #Decoder
            self._encoded_inputs = Input(shape=(encoded_dim[0]*encoded_dim[1],))
            self._flat_decoded = Dense(input_dim[0]*input_dim[1], activation=activation[1])(self._encoded_inputs)
            self._decoded = Reshape(input_dim)(self._flat_decoded)

            #Models
            self.encoder = Model(self._inputs, self._encoded)
            self.decoder = Model(self._encoded_inputs, self._decoded)
            self.autoencoder = Model(self._inputs, self.decoder(self.encoder(self._inputs)))
            
            self.autoencoder.compile(optimizer=optimizer, loss=loss)

            class MinMaxScaler():

                def __init__(self, minimum=None, maximum=None):
                    self.minimum = minimum
                    self.maximum = maximum

                def fit_transform(self, X):
                    if self.minimum is None or self.maximum is None:
                        self.minimum = np.min(X, axis=(0, 1))
                        self.maximum = np.max(X, axis=(0, 1))
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def transform(self, X):
                    return (np.array(X) - self.minimum) / (self.maximum - self.minimum)

                def reverse_transform(self, X_scl):
                    return X_scl * (self.maximum - self.minimum) + self.minimum

            self.scaler = MinMaxScaler()

In [4]:
class AU_Stage_2(AutoEncoder):
        def __init__(self, 
                 input_dim = (7, 58), 
                 encoded_dim = (7, 29), 
                 loss="mse", 
                 optimizer="adadelta", 
                 activation=("elu", "sigmoid"),
                 kernel = 7):
        
            self.input_dim = input_dim
            self.encoded_dim = encoded_dim

            #Encoder
            self._inputs = Input(shape=input_dim)
            self._conv = Conv1D(filters=encoded_dim[1], kernel_size=kernel)(self._inputs) 
            self._dense = Dense(units=np.prod(encoded_dim), activation=activation[0])(self._conv)
            self._encoded = Reshape(encoded_dim)(self._dense)

            #Decoder
            self._encoded_inputs = Input(shape=encoded_dim)
            self._flat_decoded = Dense(units=np.prod(input_dim), activation=activation[1])(self._encoded_inputs)
            self._decoded = Reshape(input_dim)(self._flat_decoded)

            #Models
            self.encoder = Model(self._inputs, self._encoded)
            self.decoder = Model(self._encoded_inputs, self._decoded)
            self.autoencoder = Model(self._inputs, self.decoder(self.encoder(self._inputs)))

            self.autoencoder.compile(optimizer=optimizer, loss=loss)
            
            class MinMaxScaler():

                def __init__(self, minimum=None, maximum=None):
                    self.minimum = minimum
                    self.maximum = maximum

                def fit_transform(self, X):
                    if self.minimum is None or self.maximum is None:
                        self.minimum = np.min(X, axis=(0, 1))
                        self.maximum = np.max(X, axis=(0, 1))
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def transform(self, X):
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def reverse_transform(self, X_scl):
                    return X_scl * (self.maximum - self.minimum) + self.minimum

            self.scaler = MinMaxScaler()


In [5]:
class AU_Stage_3(AutoEncoder):
        def __init__(self, 
                 input_dim = (7, 58), 
                 encoded_dim = (7, 29), 
                 loss="mse", 
                 optimizer="adadelta", 
                 activation=("elu", "sigmoid"),
                 kernel = 7,
                 folds=100):
        
            self.input_dim = input_dim
            self.encoded_dim = encoded_dim

            #Encoder
            self._inputs = Input(shape=input_dim)
            self._lambda = Lambda(lambda x: K.round(x * folds) / folds)(self._inputs)
            self._lstm = LSTM(input_dim[1], return_sequences=True, dropout=0, recurrent_dropout=0.1)(self._lambda)
            self._conv = Conv1D(filters=encoded_dim[1], kernel_size=input_dim[0])(self._lstm)
            self._dense = Dense(units=np.prod(encoded_dim), activation=activation[0])(self._conv)
            self._encoded = Reshape(encoded_dim)(self._dense)
            
            #Decoder
            self._encoded_inputs = Input(shape=encoded_dim)
            self._flat_decoded_1 = Dense(units=np.prod(input_dim), activation=activation[1])(self._encoded_inputs)
            self._flat_decoded_2 = UpSampling1D(size=input_dim[0] // encoded_dim[0])(self._flat_decoded_1)
            self._lstm_2 = LSTM(input_dim[1], return_sequences=True)(self._flat_decoded_2)
            self._decoded = Reshape(input_dim)(self._lstm_2)
            
            #Models
            self.encoder = Model(self._inputs, self._encoded)
            self.decoder = Model(self._encoded_inputs, self._decoded)
            self.autoencoder = Model(self._inputs, self.decoder(self.encoder(self._inputs)))
            
            self.autoencoder.compile(optimizer=optimizer, loss=loss)
            
            class MinMaxScaler():

                def __init__(self, minimum=None, maximum=None):
                    self.minimum = minimum
                    self.maximum = maximum

                def fit_transform(self, X):
                    if self.minimum is None or self.maximum is None:
                        self.minimum = np.min(X, axis=(0, 1))
                        self.maximum = np.max(X, axis=(0, 1))  + 1e-10
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def transform(self, X):
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def reverse_transform(self, X_scl):
                    return X_scl * (self.maximum - self.minimum) + self.minimum

            self.scaler = MinMaxScaler()

In [6]:
class AU_Stage_3_5(AutoEncoder):
        def __init__(self, 
                 input_dim = (7, 58), 
                 encoded_dim = (7, 29), 
                 loss="mse", 
                 optimizer="adadelta", 
                 activation=("elu", "sigmoid"),
                 kernel = 7):
        
            self.input_dim = input_dim
            self.encoded_dim = encoded_dim

            #Encoder
            self._inputs = Input(shape=input_dim)
            self._lstm = GRU(input_dim[1], return_sequences=True, dropout=0, recurrent_dropout=0.1)(self._inputs)
            self._conv = Conv1D(filters=encoded_dim[1], kernel_size=input_dim[0])(self._lstm)
            self._dense = Dense(units=np.prod(encoded_dim), activation=activation[0])(self._conv)
            self._encoded = Reshape(encoded_dim)(self._dense)

            #Decoder
            self._encoded_inputs = Input(shape=encoded_dim)
            self._flat_decoded_1 = Dense(units=np.prod(input_dim), activation=activation[1])(self._encoded_inputs)
            self._flat_decoded_2 = UpSampling1D(size=input_dim[0] // encoded_dim[0])(self._flat_decoded_1)
            self._lstm_2 = GRU(input_dim[1], return_sequences=True)(self._flat_decoded_2)
            self._decoded = Reshape(input_dim)(self._lstm_2)

            #Models
            self.encoder = Model(self._inputs, self._encoded)
            self.decoder = Model(self._encoded_inputs, self._decoded)
            self.autoencoder = Model(self._inputs, self.decoder(self.encoder(self._inputs)))

            self.autoencoder.compile(optimizer=optimizer, loss=loss)

            class MinMaxScaler():

                def __init__(self, minimum=None, maximum=None):
                    self.minimum = minimum
                    self.maximum = maximum

                def fit_transform(self, X):
                    if self.minimum is None or self.maximum is None:
                        self.minimum = np.min(X, axis=(0, 1))
                        self.maximum = np.max(X, axis=(0, 1)) + 1e-10
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def transform(self, X):
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def reverse_transform(self, X_scl):
                    return X_scl * (self.maximum - self.minimum) + self.minimum

            self.scaler = MinMaxScaler()

In [7]:
class AU_Stage_4(AutoEncoder):
        def __init__(self, 
                 input_dim = (7, 58), 
                 encoded_dim = (7, 29), 
                 loss="mse", 
                 optimizer="adadelta", 
                 activation=("elu", "sigmoid"),
                 kernel = 7,
                 folds = 100):
        
            self.input_dim = input_dim
            self.encoded_dim = encoded_dim

            #Encoder
            self._inputs = Input(shape=input_dim)
            self._lambda = Lambda(lambda x: K.round(x * folds) / folds)(self._inputs)
            self._lstm = LSTM(input_dim[1], return_sequences=True, dropout=0, recurrent_dropout=0.1)(self._lambda)
            self._reshape = Reshape((-1, input_dim[1] * input_dim[0]))(self._lstm)
            self._dense = Dense(units=np.prod(encoded_dim), activation=activation[0])(self._reshape)
            self._encoded = Reshape(encoded_dim)(self._dense)

            #Decoder
            self._encoded_inputs = Input(shape=encoded_dim)
            self._flat_decoded_1 = Dense(units=np.prod(input_dim), activation=activation[1])(self._encoded_inputs)
            self._lstm_2 = LSTM(input_dim[1] * input_dim[0], return_sequences=True)(self._flat_decoded_1)
            self._decoded = Reshape((-1, input_dim[1]))(self._lstm_2)

            #Models
            self.encoder = Model(self._inputs, self._encoded)
            self.decoder = Model(self._encoded_inputs, self._decoded)
            self.autoencoder = Model(self._inputs, self.decoder(self.encoder(self._inputs)))

            self.autoencoder.compile(optimizer=optimizer, loss=loss)

            class MinMaxScaler():

                def __init__(self, minimum=None, maximum=None):
                    self.minimum = minimum
                    self.maximum = maximum

                def fit_transform(self, X):
                    if self.minimum is None or self.maximum is None:
                        self.minimum = np.min(X, axis=(0, 1))
                        self.maximum = np.max(X, axis=(0, 1)) + 1e-10
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def transform(self, X):
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def reverse_transform(self, X_scl):
                    return X_scl * (self.maximum - self.minimum) + self.minimum

            self.scaler = MinMaxScaler()

In [8]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

epoch_numb = 50
limit = 14

In [9]:
# here we get data for all experiments in future
au_1 = AU_Stage_1()
data_set = au_1.prepare_clear_data(clear_data_path, limit=limit)
train_data, test_data = train_test_split(data_set, random_state=0, test_size=0.3)
data_set.shape

ValueError: Dimensions must be equal, but are 29 and 203 for 'model_2/dense_2/MatMul' (op: 'MatMul') with input shapes: [?,29], [203,406].

In [None]:
au_1.fit(train_data, epochs=epoch_numb)
folder = "./model_stage_1/"
au_1.save(folder + "au")
au_1.save(folder + "en")
au_1.save(folder + "de")

In [None]:
pr = au_1._predict(test_data, au_1.autoencoder)
ds = np.concatenate(test_data)
p = np.concatenate(pr)
print('\nDense')
print('\nClear data')
print(r2_score(ds, p))
print(mean_absolute_error(ds, p))
print(mean_squared_error(ds, p))
print()

In [None]:
del au_1

In [12]:
au_2 = AU_Stage_2()
# here we assume that batches are the same and use data from previous model to compare models correctly
# data_set = au_2.prepare_clear_data(clear_data_path, limit=limit)
# train_data, test_data = train_test_split(data_set, random_state=0, test_size=0.3)

au_2.fit(train_data, epochs=epoch_numb)
folder = "./model_stage_2/"
au_2.save(folder + "au")
au_2.save(folder + "en")
au_2.save(folder + "de")

ValueError: total size of new array must be unchanged

In [None]:
pr = au_2._predict(test_data, au_2.autoencoder)
ds = np.concatenate(test_data)
p = np.concatenate(pr)
print('\nDense+Conv')
print('\nClear data')
print(r2_score(ds, p))
print(mean_absolute_error(ds, p))
print(mean_squared_error(ds, p))
print()

In [None]:
del au_2

In [13]:
au_3 = AU_Stage_3()
# here we assume that batches are the same and use data from previous model to compare models correctly
# data_set = au_3.prepare_clear_data(clear_data_path, limit=limit)
# train_data, test_data = train_test_split(data_set, random_state=0, test_size=0.3)

au_3.fit(train_data, epochs=epoch_numb)
folder = "./model_stage_3/"
au_3.save(folder + "au")
au_3.save(folder + "en")
au_3.save(folder + "de")

NameError: name 'train_data' is not defined

In [None]:
pr = au_3._predict(test_data, au_3.autoencoder)
ds = np.concatenate(test_data)
p = np.concatenate(pr)
print("\nDense+Conv+LSTM")
print('\nClear data')
print(r2_score(ds, p))
print(mean_absolute_error(ds, p))
print(mean_squared_error(ds, p))
print()

In [None]:
del au_3

In [None]:
au_3_5 = AU_Stage_3_5()
# here we assume that batches are the same and use data from previous model to compare models correctly
# data_set = au_3_5.prepare_clear_data(clear_data_path, limit=limit)
# train_data, test_data = train_test_split(data_set, random_state=0, test_size=0.3)

au_3_5.fit(train_data, epochs=epoch_numb)
folder = "./model_stage_3_5/"
au_3_5.save(folder + "au")
au_3_5.save(folder + "en")
au_3_5.save(folder + "de")

In [None]:
pr = au_3_5._predict(test_data, au_3_5.autoencoder)
ds = np.concatenate(test_data)
p = np.concatenate(pr)
print("\nDense+Conv+GRU")
print('\nClear data')
print(r2_score(ds, p))
print(mean_absolute_error(ds, p))
print(mean_squared_error(ds, p))
print()

In [None]:
del au_3_5

In [None]:
au_4 = AU_Stage_4()
# here we assume that batches are the same and use data from previous model to compare models correctly
# data_set = au_4.prepare_clear_data(clear_data_path, limit=limit)
# train_data, test_data = train_test_split(data_set, random_state=0, test_size=0.3)

au_4.fit(train_data, epochs=epoch_numb)
folder = "./model_stage_4/"
au_4.save(folder + "au")
au_4.save(folder + "en")
au_4.save(folder + "de")

In [None]:
pr = au_4._predict(test_data, au_4.autoencoder)
ds = np.concatenate(test_data)
p = np.concatenate(pr)
print("\nDense+LSTM")
print('\nClear data')
print(r2_score(ds, p))
print(mean_absolute_error(ds, p))
print(mean_squared_error(ds, p))
print()

In [None]:
del au_4

TODO:
    + попробовать обучить всё это же, но без лямбды
    + попробовать размер батча 5
    + попробовать сжимать из (7, 58) в (7, 29)