# A Simple MLP

In [None]:
# mlp for multi-output regression
import time
import numpy as np
from numpy import mean
from numpy import std
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.datasets import make_regression
from sklearn.model_selection import RepeatedKFold
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import *
from keras import Input
from keras.callbacks import CSVLogger, EarlyStopping

from scipy import signal

In [None]:
xls_train = pd.ExcelFile('./data/height/train.xlsx')
xls_test = pd.ExcelFile('./data/height/test.xlsx')

In [None]:
def norm_data(data):
    mean = data.mean(axis=0)
    std = data.std(axis=0)
    data = (data - mean)/std
    
    return data, mean, std

def preprocess_data(sheet, features_all):
    
    sheet.columns = features_all
    
    # Normalize sensor readings
    sheet['1'] = sheet['1']
    
    # Start from (0) position
    sheet['x'] = sheet['x'] - sheet['x'][0]
    
    # Down-sampling by moving average
    # new_data = sheet.values
    # new_data = signal.decimate(new_data, 10, axis=0)
    # new_data = signal.decimate(new_data, 10, axis=0)
    # new_data = signal.decimate(new_data, 2, axis=0)
    
    return sheet

def augment_data(data, features_all):
    start_idx = int(data.shape[0]/2)
    data_repeat = data[start_idx:]
    for i in range(10):
        data = np.concatenate((data, data_repeat), axis=0)
    new_sheet = pd.DataFrame(data, columns = features_all)
    preprocess_data
    return new_sheet

def load_data(xls, features_all):
    df = pd.read_excel(xls, sheet_name=None)
    print("Read {} sheets from excel file".format(len(df)))
    
    new_df = pd.DataFrame()
    for sheet in df.values():
        new_sheet = preprocess_data(sheet, features_all)
        # new_sheet = augment_data(new_data, features_all)
        new_df = pd.concat([new_df, new_sheet])
        
    print("DataFrame Shape: {} rows, {} columns".format(*new_df.shape))
    display(new_df.head())
    
    X = new_df[features_considered].values
    y = new_df[outputs_considered].values
    
    print(X.shape, y.shape)
    
    return X, y, mean, std

In [None]:
features_all = ['t','1','x','z','zi']
features_considered = ['t','1','x']
outputs_considered = features_all[-1]

# load train dataset
X_train, y_train, mean_train, std_train = load_data(xls_train, features_all)
print('Train data loaded')
# load test dataset
X_test, y_test, mean_test, std_test = load_data(xls_test, features_all)
print('Test data loaded')

In [None]:
# We use "lr_schedule" to see which "learning rate" is optimum 
# Run the model with less epoch to visualize "learning rate" vs "loss"
# lr_schedule = tf.keras.callbacks.LearningRateScheduler(
#                     lambda epoch: 1e-8 * 10**(epoch/20))
# Optimizer and loos parameters
# loss = tf.keras.losses.Huber()
# optimizer = tf.keras.optimizers.SGD(lr=1e-8, momentum=0.9)
# optimizer = 'adam'

# get the model
def get_model(n_inputs, n_outputs, optimizer):
    model = Sequential()
    model.add(Input(shape=(n_inputs,)))
    model.add(BatchNormalization(name = 'batch_norm_0'))
    model.add(Dense(8, name = 'dense_1', kernel_initializer='he_uniform', activation='relu'))
    model.add(BatchNormalization(name = 'batch_norm_1'))
    # model.add(Dense(16, name = 'dense_2', kernel_initializer='he_uniform', activation='relu'))
    # model.add(BatchNormalization(name = 'batch_norm_2'))
    # model.add(Dense(32, name = 'dense_3', kernel_initializer='he_uniform', activation='relu'))
    # model.add(BatchNormalization(name = 'batch_norm_3'))
    # model.add(Dense(16, name = 'dense_4', kernel_initializer='he_uniform', activation='relu'))
    # model.add(BatchNormalization(name = 'batch_norm_4'))
    model.add(Dense(8, name = 'dense_5', kernel_initializer='he_uniform', activation='relu'))
    model.add(BatchNormalization(name = 'batch_norm_5'))
    model.add(Dense(4, name = 'dense_6', kernel_initializer='he_uniform', activation='relu'))
    model.add(BatchNormalization(name = 'batch_norm_6'))
    model.add(Dense(n_outputs, name = 'dense_7'))
    model.compile(loss = 'mae', optimizer = optimizer, metrics = ['mae', 'mse'])
    
    # model = Sequential()
    # model.add(Input(shape=(n_inputs,)))
    # model.add(BatchNormalization(name = 'batch_norm_0'))
    # model.add(Dense(8, name = 'dense_1', kernel_initializer='he_uniform', activation='relu'))
    # model.add(Dropout(0.20, name = 'dropout_1'))
    # model.add(BatchNormalization(name = 'batch_norm_1'))
    # model.add(Dense(16, name = 'dense_2', kernel_initializer='he_uniform', activation='relu'))
    # model.add(Dropout(0.20, name = 'dropout_2'))
    # model.add(BatchNormalization(name = 'batch_norm_2'))
    # model.add(Dense(8, name = 'dense_3', kernel_initializer='he_uniform', activation='relu'))
    # model.add(Dropout(0.20, name = 'dropout_3'))
    # model.add(BatchNormalization(name = 'batch_norm_3'))
    # model.add(Dense(n_outputs, name = 'dense_6'))
    # model.compile(loss = "mse", optimizer=optimizer, metrics = ['mae', 'mse'])
    
    return model

In [None]:
EPOCHS = 1
BATCH_SIZE = 1024

# define model
optimizer = 'adam'
# optimizer = tf.keras.optimizers.SGD(lr=1e-3, momentum=0.8)
print("X_train shape:", X_train.shape, "y_train shape:", y_train.shape)
n_inputs, n_outputs = X_train.shape[1], 1
print(n_inputs, n_outputs)
model = get_model(n_inputs, n_outputs, optimizer)
print(model.summary())

# Train
# print("\nTRAIN MODEL...")
modelstart = time.time()

# model.fit(X_train, y_train, epochs=EPOCHS, verbose=1)
# history = model.fit(train_data, epochs=EPOCHS, validation_data=test_data, verbose=1)

results = list()
rkf = RepeatedKFold(n_splits=4, n_repeats=8, random_state=1)
for train_ix, test_ix in rkf.split(X_train):
    ix_range = X_train.shape[0]
    train_ix = train_ix[train_ix < ix_range]
    test_ix = test_ix[test_ix < ix_range]
    print("Filtered: TRAIN:", train_ix.shape, "TEST:", test_ix.shape)
    # prepare data
    X_train_, X_test_ = X_train[train_ix], X_train[test_ix]
    y_train_, y_test_ = y_train[train_ix], y_train[test_ix]
    
    # fit model
    model.fit(X_train_, y_train_, verbose=1, epochs=EPOCHS)
    # evaluate model on test set
    mse = model.evaluate(X_test_, y_test_, verbose=1)
    # store result
    # print('iteration %d: MSE = %.3f' %(len(results), mse))
    results.append(mse)

print('MSE: %.3f (%.3f)' % (mean(results), std(results))) 

print("\nModel Runtime: %0.2f Minutes"%((time.time() - modelstart)/60))
model.save('mlp_full_train.h5')

In [None]:
from skimage.measure import block_reduce

def moving_average(y_pred, w):
    f = int(w/2)
    y_pred_max = np.zeros_like(y_pred)
    N = y_pred.shape[0]
    # return np.convolve(y_pred, np.ones(w), 'same') / w
    # return block_reduce(y_pred, (w,), np.max
    for i in range(N):
        y_pred_max[i] = np.max(y_pred[i:i+w])
    return y_pred_max

def post_processing(y_pred):
    y = np.absolute(y_pred)
    normalized = (y - min(y)) / (max(y) - min(y))
    return 4.5*normalized

def plot_test_results(name, t, y_pred, y_test):
    
    print("Sheet No.", name)
    # y_pred_avg = moving_average(y_pred, 10)
    
    plt.figure(figsize=(4, 6))
    # plt.plot(t, y_pred, 'r', label='predicted z')
    plt.plot(t, y_pred, 'r', label='predicted z', linewidth=3)
    plt.plot(t, y_test, 'g', label='real z',  linewidth=3)
    plt.legend(loc='upper left')
    plt.show()
    
    return

In [None]:
#### Load test data
xls_train = pd.ExcelFile('./data/height/train.xlsx')
xls_test = pd.ExcelFile('./data/height/test.xlsx')
df_test = pd.read_excel(xls_test, sheet_name=None)
print("Read {} sheets from excel file".format(len(df_test)))

# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('./data/height/test_results.xlsx', engine='openpyxl')

for name, sheet in zip(df_test.keys(), df_test.values()):
    print("DataFrame Shape: {} rows, {} columns".format(*sheet.shape))
    new_data = preprocess_data(sheet, features_all)
    new_sheet = pd.DataFrame(new_data, columns = features_all)
    
    # t = new_sheet['t']
    x = new_sheet['x']
    X_test = new_sheet[features_considered].values
    # y_test = new_sheet[outputs_considered].values
    y_test = new_sheet['z'].values
    
    y_pred = model.predict(X_test)
    y_pred = np.array(y_pred)
    y_pred = post_processing(y_pred)
    y_test = np.array(y_test)
    plot_test_results(name, x, y_pred, y_test)
    
    combined = np.concatenate([x[:, np.newaxis], y_test[:, np.newaxis], y_pred], axis = 1)
    df = pd.DataFrame(combined, columns = ['x', 'real_z', 'pred_z'])
    df.to_excel(writer, index=False, sheet_name=name)
    
# Close the Pandas Excel writer and output the Excel file.
writer.save()