In [None]:
from resources.ml_mlp import MLP_Model
from resources.ml_mlp import Linear_Model
from resources.ml_mlp import MLP_Win_Model
from resources.ml_mlp import LSTM_Model
import resources
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scikeras.wrappers import KerasRegressor
import tensorflow as tf
tf.get_logger().setLevel('ERROR')
import multiprocessing
multiprocessing.set_start_method("spawn")
from launch_tb import launch_tb

In [None]:
plt.rcParams.update({
    # "text.usetex": True,
    # "font.family": "serif",
    # "font.serif": ["Computer Modern Roman"],
    "font.size": 12,
})
%matplotlib inline

## Pre-process data 
Load in feature datasets, remove wear in phase and combine.

In [None]:
exp5 = resources.load('Test 5')
exp7 = resources.load('Test 7')
exp8 = resources.load('Test 8')

exp9 = resources.load('Test 9')

In [None]:
dfs = [exp5.features.drop([23, 24]), exp7.features, exp8.features, exp9.features]

In [None]:
dfs = [df.drop([0, 1, 2, 3]) for df in dfs]

In [None]:
main_df = pd.concat(dfs)
main_df = main_df.drop(columns=['Runout', 'Form error', 'Peak radius', 'Radius diff'])#.drop([0, 1, 2, 3])
main_df.reset_index(drop=True, inplace=True)
print(f'Main df : {main_df.shape[0]} rows x {main_df.shape[1]} cols')
main_df.head()

In [None]:
def pred_plot(y: np.ndarray, y_pred: np.ndarray, title:str = ''):
    fig1, ax1 = plt.subplots()
    fig2, ax2 = plt.subplots()
    ax1.scatter(y, y_pred)

    # limits of max radius
    # xmax = main_df['Mean radius'].values.max()
    # xmin = main_df['Mean radius'].values.min()
    xmax = 0.68
    xmin = 0.6
    

    ax1.set_xlim([xmin, xmax])
    ax1.set_ylim([xmin, xmax])

    lims = [
        np.min([ax1.get_xlim(), ax1.get_ylim()]),
        np.max([ax1.get_xlim(), ax1.get_ylim()]),
    ]
    ax1.set_axisbelow(True)
    ax1.grid()
    ax1.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
    ax1.set_aspect('equal')
    ax1.set_xlabel('Actual Y (mm)')
    ax1.set_ylabel('Predicted Y (mm)')
    ax1.set_title(f'{title} - Predictions');
     
    diff = (y - y_pred)*1000

    ax2.hist(diff, bins=30)
    ax2.set_xlabel('Prediction Error / um')
    ax2.set_ylabel('No Occurances')
    ax2.set_title(f'{title} - Histogram');

    fig1.tight_layout()
    fig2.tight_layout()


## MLP

In [None]:
mlp_reg = MLP_Model(feature_df=main_df,
                    target='Mean radius',
                    tb=True,
                    tb_logdir='tmux-test',
                    params={'loss': 'mse',
                            'no_layers': 3,
                            'no_nodes': 128,
                            'epochs': 100,
                            'dropout': 0.01,
                            'batch_size': 20,
                            'init_mode': 'glorot_uniform',
                            },
                    random_state=11,
                    )

In [None]:
launch_tb(f'MLP/tmux-test')

In [None]:
# mlp_reg.cv(n_splits=10, n_repeats=10)
mlp_reg.fit(validation_split=0.1, verbose=0)
# mlp_reg.score(plot_fig=False);

In [None]:
# y = mlp_reg.val_data[1].values
# y_pred = mlp_reg.model.predict(mlp_reg.val_data[0].values, verbose=0)
# pred_plot(y, y_pred, 'MLP')

## MLP with Window

In [None]:
mlp_win_reg = MLP_Win_Model(feature_df=main_df,
                            target='Mean radius',
                            tb=False,
                            tb_logdir='tmux-test',
                            params={'seq_len': 5,
                                    'loss': 'mse',
                                    'epochs':2000,
                                    'no_nodes': 128,
                                    'no_layers': 3,
                                    'batch_size': 20,
                                    'init_mode': 'glorot_uniform',
                                    'dropout': 0.01,
                                    },
                            random_state=11,
                            )

In [None]:
mlp_win_reg.cv(n_splits=10, n_repeats=10)
mlp_win_reg.fit(validation_split=0.2, verbose=0)
mlp_win_reg.score(plot_fig=False);

In [None]:
# y = mlp_win_reg.val_data[1]
# y_pred = mlp_win_reg.model.predict(mlp_win_reg.val_data[0], verbose=0)
# pred_plot(y, y_pred, 'MLP_WIN')

## LSTM

In [None]:
lstm_reg = LSTM_Model(feature_df=main_df,
                      target='Mean radius',
                      tb=False,
                      tb_logdir='testing',
                      params={'seq_len': 15,
                              'loss': 'mse',
                              'epochs': 1500,
                              'no_layers': 3,
                              'no_dense': 1,
                              'no_nodes': 128,
                              'batch_size': 10,
                              'init_mode': 'glorot_uniform',
                              'dropout': 0.01,
                              },
                      random_state=11,
                      shuffle=True,
                      )

In [None]:
lstm_reg.cv(n_splits=10, n_repeats=10)
lstm_reg.fit(validation_split=0.2, verbose=0)
lstm_reg.score(plot_fig=False);

In [None]:
# y = lstm_reg.val_data[1]
# y_pred = lstm_reg.model.predict(lstm_reg.val_data[0], verbose=0)
# pred_plot(y, y_pred, 'LSTM')

## Linear Model

In [None]:
# lin_reg = Linear_Model(feature_df=main_df, target='Mean radius')

In [None]:
# lin_reg.fit()
# lin_reg.score(plot_fig=False);

In [None]:
# y = lin_reg.val_data[1].values
# y_pred = lin_reg.model.predict(lin_reg.val_data[0].values)
# pred_plot(y, y_pred, 'Linear')

## Figures for Journal

In [None]:
def pred_real_plot(y, y_pred, ax):
    ax.scatter(y, y_pred, marker='+', alpha=0.8, s=50)

    xmax = 0.68
    xmin = 0.6

    ax.set_xlim([xmin, xmax])
    ax.set_ylim([xmin, xmax])

    lims = [
        np.min([ax.get_xlim(), ax.get_ylim()]),
        np.max([ax.get_xlim(), ax.get_ylim()]),
    ]

    ax.set_axisbelow(True)
    ax.grid()
    ax.plot(lims, lims, 'k--', alpha=0.75, zorder=0, linewidth=1)
    ax.set_aspect('equal')
    return ax


models = {'MLP': mlp_reg, 
          'MLP_WIN': mlp_win_reg, 
          'LSTM': lstm_reg
          }

y = {name: mod.val_data[1] for name, mod in models.items()}
y_pred = {name: mod.model.predict(mod.val_data[0], verbose=0) for name, mod in models.items()}

fig, ax = plt.subplots(1, len(models), figsize=(12, 5), sharey=True, dpi = 100)
ax = ax.ravel()
for i, m in enumerate(models):
    ax[i] = pred_real_plot(y[str(m)], y_pred[str(m)], ax[i])
    ax[i].set_title(str(m), fontsize=12)
    ax[i].set_xlabel('Actual Y (mm)', fontsize=12)
ax[0].set_ylabel('Predicted Y (mm)', fontsize=12)
fig.tight_layout()

y = {name: mod.train_data[1] for name, mod in models.items()}
y_pred = {name: mod.model.predict(mod.train_data[0], verbose=0) for name, mod in models.items()}

fig, ax = plt.subplots(1, len(models), figsize=(12, 5), sharey=True, dpi = 100)
ax = ax.ravel()
for i, m in enumerate(models):
    ax[i] = pred_real_plot(y[str(m)], y_pred[str(m)], ax[i])
    ax[i].set_title(str(m), fontsize=12)
    ax[i].set_xlabel('Actual Y (mm)', fontsize=12)
ax[0].set_ylabel('Predicted Y (mm)', fontsize=12)
fig.tight_layout()

#### Show prediction of entire wear cycle

In [None]:
from collections import deque
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def sequence_data(d: np.ndarray, mod):
    seq_data = []
    seq_len = mod.seq_len
    prev_points = deque(maxlen=seq_len)

    for i in d:
        prev_points.append([n for n in i])
        if len(prev_points) == seq_len:
            seq_data.append([np.array(prev_points)])
    return seq_data


dfs = [exp5.features.drop([23, 24]), exp7.features, exp8.features, exp9.features]

fig, ax = plt.subplots(1, len(dfs), figsize=(15, 5), dpi = 300)
ax = ax.ravel()

for i, df in enumerate(dfs):

    df = df.drop(columns=['Runout', 'Form error', 'Peak radius', 'Radius diff']).drop([0, 1, 2, 3])
    df.reset_index(drop=True, inplace=True)
    df.head()

    mods = {'MLP': mlp_reg,
            'MLP_WIN': mlp_win_reg,
            'LSTM': lstm_reg,
            }

    x = df.drop(columns=['Mean radius']).to_numpy()
    y = df['Mean radius'].to_numpy()

    x_mlp = mods['MLP'].scaler.transform(x)

    x_mlp_win = sequence_data(mods['MLP_WIN'].scaler.transform(x), mods['MLP_WIN'])
    x_mlp_win = np.asarray(x_mlp_win).reshape(len(x_mlp_win), mods['MLP_WIN']._no_features)

    x_lstm = sequence_data(mods['LSTM'].scaler.transform(x), mods['LSTM'])
    x_lstm = np.asarray(x_lstm).squeeze()

    y_pred_mlp = mods['MLP'].model.predict(x_mlp, verbose=0)
    y_pred_mlp_win = mods['MLP_WIN'].model.predict(x_mlp_win, verbose=0)
    y_pred_lstm = mods['LSTM'].model.predict(x_lstm, verbose=0)

    # print(f'Test {i + 1}')
    # print('-' * 65)
    # print('\tMLP')
    # mods['MLP'].score(X=x_mlp, y=y, plot_fig=False)
    # print('-' * 65)
    # print('\tMLP_WIN')
    # mods['MLP_WIN'].score(X=x_mlp_win, y=y[-len(x_mlp_win):], plot_fig=False)
    # print('-' * 65)
    # print('\tLSTM')
    # mods['LSTM'].score(X=x_lstm, y=y[-len(x_lstm):], plot_fig=False)
    # print('\n')

    mlp_win_slen = mods['MLP_WIN'].seq_len
    lstm_slen = mods['LSTM'].seq_len

    ax[i].plot(y, label='Actual')
    ax[i].plot(y_pred_mlp, label='MLP')
    ax[i].plot(np.insert(y_pred_mlp_win, 0, [np.NaN] * (mlp_win_slen - 1)) , label='MLP_WIN')
    ax[i].plot(np.insert(y_pred_lstm, 0, [np.NaN] * (lstm_slen - 1)), label='LSTM')
    ax[i].legend(fontsize=8)
    ax[i].set_xlabel('Cut No.')
    ax[i].set_title(f'Test {i+1}')
    ax[i].autoscale(enable=True, axis='x', tight=True)

ax[0].set_ylabel('Mean radius (mm)')
fig.tight_layout()

# fig, ax = plt.subplots()
# pred_real_plot(y[-len(y_pred_lstm):], y_pred_lstm, ax)

#### Train MLP models on all but one Test

In [None]:
# from collections import deque
# from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# def sequence_data(d: np.ndarray, mod):
#     seq_data = []
#     seq_len = mod.seq_len
#     prev_points = deque(maxlen=seq_len)

#     for i in d:
#         prev_points.append([n for n in i])
#         if len(prev_points) == seq_len:
#             seq_data.append([np.array(prev_points)])
#     return seq_data

# mod = lstm_reg
# for i, val_df in enumerate(dfs):
#     tr_df = dfs[:i] + dfs[i + 1:]
#     tr_df = pd.concat(tr_df)
#     tr_df = tr_df.drop(columns=['Runout', 'Form error', 'Peak radius', 'Radius diff'])
#     tr_df.reset_index(drop=True, inplace=True)

#     mod.main_df = tr_df
#     mod.pre_process(val_frac=0.1)
#     mod.fit(verbose=0)
#     mod.score(plot_fig=False)

#     val_df = val_df.drop(columns=['Runout', 'Form error', 'Peak radius', 'Radius diff'])
#     val_x = val_df.drop(columns=['Mean radius']).to_numpy()
#     val_x = sequence_data(mod.scaler.transform(x), mod)
#     val_x = np.asarray(val_x).squeeze()
#     val_y = val_df['Mean radius'].to_numpy()
    
#     val_pred = mod.model.predict(val_x, verbose=0)
#     fig, ax = plt.subplots()
#     ax.plot(val_y, label='Actual')
#     ax.plot(np.insert(val_pred, 0, [np.NaN] * (mod.seq_len - 1)), label='Predicted')
#     ax.legend(fontsize=8)
#     ax.set_xlabel('Cut No.')
#     ax.set_ylabel('Mean radius (mm)')
#     ax.set_title(f'Val data {i+1}')
#     ax.autoscale(enable=True, axis='x', tight=True)
