In [None]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.15
session = tf.Session(config=config)
tf.keras.backend.set_session(session)

In [None]:
import numpy as np
import os
from keras.models import Sequential, Model
from keras.layers import Conv1D, Flatten, Dense, Activation, MaxPooling1D, UpSampling1D, Cropping1D, ZeroPadding1D, Input, multiply, GlobalMaxPooling1D, Reshape
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam, RMSprop
from keras.callbacks import ModelCheckpoint
from sklearn.metrics import classification_report
from random import shuffle
from tqdm import tqdm

In [None]:
def auto_encoder():
    model = Sequential()

    #encoder
    model.add(ZeroPadding1D((3,2), input_shape = (251, 2), name='Padding'))

    model.add(Conv1D(filters=16, kernel_size=(15), activation='relu', padding='same'))
    model.add(Conv1D(filters=16, kernel_size=(15), activation='relu', padding='same'))
    model.add(MaxPooling1D(strides=2, pool_size=2, padding='same')) 

    model.add(Conv1D(filters=32, kernel_size=(3), activation='relu', padding='same'))
    model.add(Conv1D(filters=32, kernel_size=(3), activation='relu', padding='same'))
    model.add(MaxPooling1D(strides=2, pool_size=2, padding='same')) 

    model.add(Conv1D(filters=64, kernel_size=(3), activation='relu', padding='same'))
    model.add(Conv1D(filters=64, kernel_size=(3), activation='relu', padding='same'))
    model.add(MaxPooling1D(strides=2, pool_size=2, padding='same')) 

    model.add(Conv1D(filters=128, kernel_size=(3), activation='relu', padding='same'))
    model.add(Conv1D(filters=128, kernel_size=(3), activation='relu', padding='same'))
    model.add(MaxPooling1D(strides=2, pool_size=2, padding='same')) 

    model.add(Conv1D(filters=256, kernel_size=(3), activation='relu', padding='same'))
    model.add(Conv1D(filters=256, kernel_size=(3), activation='relu', padding='same'))
    model.add(MaxPooling1D(strides=2, pool_size=2, padding='same')) 


    # decoder
    model.add(Conv1D(filters=256, kernel_size=(3), activation='relu', padding='same'))
    model.add(Conv1D(filters=256, kernel_size=(3), activation='relu', padding='same'))

    model.add(UpSampling1D(2))
    model.add(Conv1D(filters=128, kernel_size=(3), activation='relu', padding='same'))
    model.add(Conv1D(filters=128, kernel_size=(3), activation='relu', padding='same'))

    model.add(UpSampling1D(2))
    model.add(Conv1D(filters=64, kernel_size=(3), activation='relu', padding='same'))
    model.add(Conv1D(filters=64, kernel_size=(3), activation='relu', padding='same'))

    model.add(UpSampling1D(2))
    model.add(Conv1D(filters=32, kernel_size=(3), activation='relu', padding='same'))
    model.add(Conv1D(filters=32, kernel_size=(3), activation='relu', padding='same'))

    model.add(UpSampling1D(2))
    model.add(Conv1D(filters=16, kernel_size=(3), activation='relu', padding='same'))
    model.add(Conv1D(filters=16, kernel_size=(3), activation='relu', padding='same'))

    model.add(UpSampling1D(2))
    model.add(Conv1D(filters=2, kernel_size=(15), activation='relu', padding='same'))
    model.add(Conv1D(filters=2, kernel_size=(15), activation='relu', padding='same'))

    model.add(Cropping1D((3,2), name='Cropping'))
    
    return model

In [None]:
model = auto_encoder()
model.summary()
model.compile(loss='mean_squared_error', optimizer = Adam())

In [None]:
batch_size = 128
epochs = 50

As the data is too large for most systems we are loading 50 files at a time, train on them, load the next 50 and so on. The order in which the files are loaded and the data inside the files are shuffeld each epoch. After each training phase (50 files) we save the current model.

In [None]:
data_path = 'path_to_training_files'
files = [f.name for f in os.scandir(data_path)]
for e in range(epochs):
    
    print("EPOCH: " + str(e+1) + "-"*100)
    
    shuffle(files)
    
    pbar = tqdm(total=len(files))
    
    #
    for i in range(0, len(files), 50):
        pbar.update(1)
        x_train = np.load(data_path + "/" + files[i])
        j = 1
        while j+i < len(files) and j<50:
            pbar.update(1)
            x_train = np.concatenate((x_train, np.load(data_path + "/" + files[i+j])))
            j += 1
            
        model.fit(x_train, x_train, batch_size=batch_size, verbose=1, shuffle=True)
        model.save_weights('latest_autoencoder.h5')