## Here we compare different batch sizes on clear data.

In [1]:
import warnings
warnings.filterwarnings("ignore")

import matplotlib as mpl
mpl.use("TkAgg")

from sklearn.preprocessing import normalize#, MinMaxScaler
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import h5py
import mne
import keras
from keras.models import Model
from keras.layers import Input, Dense, Flatten, Reshape, LSTM, RepeatVector, Reshape, Dropout, Conv1D, UpSampling1D, Bidirectional

from os import walk, listdir


%matplotlib inline
data_path = "../../data/train/"
clear_data_path = "/media/valbub/Docs/data/train/"
raw_data_path = "../../data/resting_state/"

Using TensorFlow backend.


In [49]:
class AutoEncoder(object):
    def __init__(self, 
             input_dim = (5, 58), 
             encoded_dim = (1, 58), 
             loss="mse", 
             optimizer="adadelta", 
             activation=("relu", "sigmoid", "tanh", "elu"),
             act_idx=(0, 0),
             kernel = 3):
        
            self.input_dim = input_dim
            self.encoded_dim = encoded_dim
    
    def prepare_clear_data(self, data_path, limit=2):
        train_eeg_dir = data_path
        train_eeg_names = [x for x in listdir(train_eeg_dir) 
                         if x[-3:] == ".h5"]
        data = np.zeros((0, self.input_dim[0], self.input_dim[1]))

        flag = 0
        for eeg_name in train_eeg_names:
            if flag == limit:
                break
            flag += 1
            h5_file = h5py.File(train_eeg_dir + eeg_name, 'r')
            a_group_key = list(h5_file.keys())[0]
            eeg_data = np.array(h5_file[a_group_key]).T
            batches = np.array(self._getBatches(eeg_data, batch_size=self.input_dim[0]))
            if len(batches.shape) == 3:
                data = np.concatenate((data, batches), axis=0)
            else:
                flag -= 1
        return data
    
    def prepare_raw_data(self, data_path, limit=2):
        def preparefile(file_path):
            raw = np.array(mne.io.read_raw_brainvision(file_path, preload=True).to_data_frame())
            batches = np.array(self._getBatches(raw, batch_size=self.input_dim[0]))
            del raw
            return batches
        files = []
        data = []
        for elem in walk(data_path):
            for file in elem[-1]:
                if file[-4:] == "vhdr":
                    files.append(file)
        data = np.ndarray(shape=(0, self.input_dim[0], self.input_dim[1]))
        flag = 0
        for file in files:
            file_name = data_path + file
            if flag == limit:
                break
            batches =  preparefile(file_name)
            data = np.concatenate((data, batches), axis=0)
            flag += 1
        return data
    
    def fit(self, X_train, epochs=50):
        X_scaled = self.scaler.fit_transform(X_train)
        self.autoencoder.fit(X_scaled, X_scaled, epochs = epochs)
    
    def encode(self, df):
        return self._predict(df, self.encoder, self.input_dim[0])
    
    def decode(self, df):
        return self._predict(df, self.decoder, self.encoded_dim[1])
    
    def run(self, df):
        return self._predict(df, self.autoencoder, self.input_dim[0])
    
    def save(self, path, part="autoencoder"):
        if part == "encoder":
            self.encoder.save(path)
        elif part == "decoder":
            self.decoder.save(path)
        elif part == "autoencoder":
            self.autoencoder.save(path)
        elif part == "model":
            self.save(path)
        pass
    
    def load(self, path, part="autoencoder"):
        if part == "encoder":
            self.encoder = keras.models.load_model(path)
        elif part == "decoder":
            self.decoder = keras.models.load_model(path)
        elif part == "autoencoder":
            self.autoencoder = keras.models.load_model(path)
        elif part == "model":
            self = keras.models.load_model(path)
        pass
    

    def _predict(self, df, model, batch_size):
        batches = self.scaler.transform(df)
        batches = tuple(self._predictBatch(batch.reshape((1, *batch.shape)), model) for batch in batches)
        batches = self._concatBatches(batches) 
        return self.scaler.reverse_transform(batches)
    
    def _predictBatch(self, batch, model):
        return model.predict(batch)
    
    def _getBatches(self, arr, batch_size, axis=0):
        n_batches = arr.shape[axis] // batch_size
        return np.array_split(arr, n_batches, axis=axis)
    
    def _concatBatches(self, batches, axis=0):
        return np.concatenate(batches, axis=axis)



In [None]:
class AU_Stage_1(AutoEncoder):
        def __init__(self, 
                 input_dim = (7, 58), 
                 encoded_dim = (1, 58), 
                 loss="mse", 
                 optimizer="adadelta", 
                 activation=("elu", "sigmoid"),
                 kernel = 7):
        
            self.input_dim = input_dim
            self.encoded_dim = encoded_dim

            #Encoder
            self._inputs = Input(shape=input_dim)
            self._flat = Flatten()(self._inputs)
            self._dense = Dense(units=np.prod(encoded_dim), activation=activation[0])(self._flat)
            self._encoded = Reshape(encoded_dim)(self._dense)

            #Decoder
            self._encoded_inputs = Input(shape=(encoded_dim[0]*encoded_dim[1],))
            self._flat_decoded = Dense(input_dim[0]*input_dim[1], activation=activation[1])(self._encoded_inputs)
            self._decoded = Reshape(input_dim)(self._flat_decoded)

            #Models
            self.encoder = Model(self._inputs, self._encoded)
            self.decoder = Model(self._encoded_inputs, self._decoded)
            self.autoencoder = Model(self._inputs, self.decoder(self.encoder(self._inputs)))
            
            self.autoencoder.compile(optimizer=optimizer, loss=loss)

            class MinMaxScaler():

                def __init__(self, minimum=None, maximum=None):
                    self.minimum = minimum
                    self.maximum = maximum

                def fit_transform(self, X):
                    if self.minimum is None or self.maximum is None:
                        self.minimum = np.min(X, axis=(0, 1))
                        self.maximum = np.max(X, axis=(0, 1))
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def transform(self, X):
                    return (np.array(X) - self.minimum) / (self.maximum - self.minimum)

                def reverse_transform(self, X_scl):
                    return X_scl * (self.maximum - self.minimum) + self.minimum

            self.scaler = MinMaxScaler()

In [41]:
class AU_Stage_2(AutoEncoder):
        def __init__(self, 
                 input_dim = (5, 58), 
                 encoded_dim = (1, 58), 
                 loss="mse", 
                 optimizer="adadelta", 
                 activation=("elu", "sigmoid"),
                 kernel = 5):
        
            self.input_dim = input_dim
            self.encoded_dim = encoded_dim

            #Encoder
            self._inputs = Input(shape=input_dim)
            self._conv = Conv1D(filters=encoded_dim[1], kernel_size=kernel)(self._inputs) 
            self._encoded = Dense(units=encoded_dim[1], activation=activation[0])(self._conv)

            #Decoder
            self._encoded_inputs = Input(shape=encoded_dim)
            self._flat_decoded = Dense(units=np.prod(input_dim), activation=activation[1])(self._encoded_inputs)
            self._decoded = Reshape(input_dim)(self._flat_decoded)

            #Models
            self.encoder = Model(self._inputs, self._encoded)
            self.decoder = Model(self._encoded_inputs, self._decoded)
            self.autoencoder = Model(self._inputs, self.decoder(self.encoder(self._inputs)))

            self.autoencoder.compile(optimizer=optimizer, loss=loss)
            
            class MinMaxScaler():

                def __init__(self, minimum=None, maximum=None):
                    self.minimum = minimum
                    self.maximum = maximum

                def fit_transform(self, X):
                    if self.minimum is None or self.maximum:
                        self.minimum = np.min(X, axis=(0, 1))
                        self.maximum = np.max(X, axis=(0, 1))
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def transform(self, X):
                    return (X - self.minimum) / (self.maximum - self.minimum)

                def reverse_transform(self, X_scl):
                    return X_scl * (self.maximum - self.minimum) + self.minimum

            self.scaler = MinMaxScaler()

In [42]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

epoch_numb = 50

In [43]:
au_model = AU_Stage_2(input_dim=(5, 58), kernel=5)
au_model.decoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_48 (InputLayer)        (None, 1, 58)             0         
_________________________________________________________________
dense_48 (Dense)             (None, 1, 290)            17110     
_________________________________________________________________
reshape_13 (Reshape)         (None, 5, 58)             0         
Total params: 17,110
Trainable params: 17,110
Non-trainable params: 0
_________________________________________________________________


In [44]:
import sklearn.metrics

bs = [3, 5]
history = []

for batch_size in bs:
        au_model = AU_Stage_2(input_dim=(batch_size, 58), kernel=batch_size)
        data_set = au_model.prepare_clear_data(clear_data_path, limit=2)
        train_data, test_data = train_test_split(data_set, random_state=0, test_size=0.3)
        
        au_model.fit(train_data, epochs=50)

        pr = au_model._predict(test_data, au_model.autoencoder, batch_size=batch_size)
        ds = np.concatenate(test_data)
        p = np.concatenate(pr)
        history.append({'batch_size': batch_size, 'r2': sklearn.metrics.r2_score(ds, p), 'MSE': sklearn.metrics.mean_squared_error(ds, p), 'MAE': sklearn.metrics.mean_absolute_error(ds, p)})
        print(batch_size, ':')
        print(sklearn.metrics.r2_score(ds, p))
        print(sklearn.metrics.mean_absolute_error(ds, p))
        print(sklearn.metrics.mean_squared_error(ds, p))
        print()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
3 :
0.9355267609929022
1.4353611306033853e-06
1.4855373024882978e-11

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50


Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
5 :
0.885035668950525
1.9683108128532147e-06
2.2997870921230004e-11



ValueError: zero-size array to reduction operation minimum which has no identity

In [46]:
bs = [7, 10, 12]

for batch_size in bs:
        au_model = AU_Stage_2(input_dim=(batch_size, 58), kernel=batch_size)
        data_set = au_model.prepare_clear_data(clear_data_path, limit=4)
        train_data, test_data = train_test_split(data_set, random_state=0, test_size=0.3)
        
        au_model.fit(train_data, epochs=50)

        pr = au_model._predict(test_data, au_model.autoencoder, batch_size=batch_size)
        ds = np.concatenate(test_data)
        p = np.concatenate(pr)
        history.append({'batch_size': batch_size, 'r2': sklearn.metrics.r2_score(ds, p), 'MSE': sklearn.metrics.mean_squared_error(ds, p), 'MAE': sklearn.metrics.mean_absolute_error(ds, p)})
        print(batch_size, ':')
        print(sklearn.metrics.r2_score(ds, p))
        print(sklearn.metrics.mean_absolute_error(ds, p))
        print(sklearn.metrics.mean_squared_error(ds, p))
        print()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
7 :
0.9048461376510634
1.045874503390704e-06
1.8217765913821238e-12

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
E

Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
10 :
0.8393327698740133
2.2765445397243576e-06
2.5172499444772298e-11

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
12 :
0.8133912910470965
2.4991187545520596e-06
2.2961386736280264e-11



In [48]:
history.sort(key=lambda x: x['MSE'])

for i in history:
    print(i['batch_size'])
    print('r2:', i['r2'])
    print('MSE', i['MSE'])
    print('MAE', i['MAE'])
    print()

7
r2: 0.9048461376510634
MSE 1.8217765913821238e-12
MAE 1.045874503390704e-06

3
r2: 0.9355267609929022
MSE 1.4855373024882978e-11
MAE 1.4353611306033853e-06

12
r2: 0.8133912910470965
MSE 2.2961386736280264e-11
MAE 2.4991187545520596e-06

5
r2: 0.885035668950525
MSE 2.2997870921230004e-11
MAE 1.9683108128532147e-06

10
r2: 0.8393327698740133
MSE 2.5172499444772298e-11
MAE 2.2765445397243576e-06



====================================================================

In [None]:
# bs = [3, 5, 7, 10, 12]

# for batch_size in bs:
#         au_model = AU_Stage_1(input_dim=(batch_size, 58), kernel=batch_size)
#         data_set = au_model.prepare_clear_data(clear_data_path, limit=4)
#         train_data, test_data = train_test_split(data_set, random_state=0, test_size=0.3)
        
#         au_model.fit(train_data, epochs=50)

#         pr = au_model._predict(test_data, au_model.autoencoder, batch_size=batch_size)
#         ds = np.concatenate(test_data)
#         p = np.concatenate(pr)
#         history.append({'batch_size': batch_size, 'r2': sklearn.metrics.r2_score(ds, p), 'MSE': sklearn.metrics.mean_squared_error(ds, p), 'MAE': sklearn.metrics.mean_absolute_error(ds, p)})
#         print(batch_size, ':')
#         print(sklearn.metrics.r2_score(ds, p))
#         print(sklearn.metrics.mean_absolute_error(ds, p))
#         print(sklearn.metrics.mean_squared_error(ds, p))
#         print()
#         del data_set
#         del train_data
#         del test_data