### Compare new model with prediction

Compare the prediction of a new model with the prediction of the model in the app based on defined validation data
- Need to provide:
    - a path to the validation data as csv files
    - a path to a pipeline which should be used to transform the validation data
    - a path to the model which should be compared with the online prediction
    - The data will be transformed into the format expected by the lstm, specify using window and shift

In [None]:
dataPath = "X:\\KI Praktikum\\validate_Data\\2023_08_23_filtered_data\\interpoliert"
parameters = ["t_bett", "t_motor", "t_spindle", "M8", "M121", "M127", "M7"]
pipePath = "X:\\KI Praktikum\\pipeline.p"
modelPath = "C:\\Users\\wch002\\Desktop\\training_data\\23-09-08_model_train_s6_w60_maxErr-9-44.h5"
window = 60
shift = 6

In [None]:
import numpy as np
import pandas as pd
import glob
import os

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.decomposition import PCA
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split


from joblib import dump, load
 
import tensorflow as tf
import keras as ks
from keras.models import Sequential
from keras.layers import Dense, Input, GRU, LSTM
from keras.activations import relu, tanh, linear
from keras.layers import Dropout

from keras.callbacks import EarlyStopping, CSVLogger, TerminateOnNaN
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.utils import shuffle
from sklearn.metrics import make_scorer

import json

from collections import Counter
from scipy.ndimage import convolve1d

import plotly.graph_objects as go
from plotly.subplots import make_subplots


In [None]:
from weightedValues import weightValues
from readIn import readIn

In [None]:
# generateDataSource() -> 3D array von den Daten
### window  represents time period by each entry in the buffer
### Shift represents the jump from value to next one in the buffer
### sampling rate

def generateDataSource(signal_input=None, input_columns: list = [], output_length: int = 1, signal_output=None, window=1, shift=1, sample_rate=1):
    #subsequence_len= (window -1) *shift + 1
    subsequence_len= (window) *shift
    Signal_Length = signal_input.shape[0]
    num_samples = 1 + int((Signal_Length - subsequence_len) / sample_rate)
    x = np.zeros(shape=(num_samples, window, signal_input.shape[1]))
    y = np.zeros(shape=(num_samples, output_length, 1))
    for i in range(num_samples):
        x[i] = np.asarray([signal_input[i*sample_rate + j * shift] for j in range(0,window)])
        y[i] = signal_output[i*sample_rate + (window-1) * shift :i*sample_rate+ (window-1) * shift + output_length]
    return x, y

In [None]:

df_val_data = readIn(dataPath, False, False)
#print(df_val_data.tail(3))

y_validation = 1000*df_val_data["welle_z_ipo"]
x_validation = df_val_data[parameters].to_numpy()
prediction = 1000*df_val_data["prediction"]

In [None]:
pipeline = load(pipePath)
model = tf.keras.models.load_model(modelPath, compile = False)

In [None]:
x_validation = pipeline.transform(x_validation)

In [None]:
x_validation, y_validation = generateDataSource(signal_input = x_validation, signal_output = y_validation, window = window, shift = shift, sample_rate=1)

In [None]:
# The validation data needs to be adapted to be fed into the lstm model
# Therefore not all values are used.
# To make the lstm model comparable to the online model, the compared
# predictions need to match the dimensions
sample_rate = 1
subsequence_len= (window) *shift
Signal_Length = prediction.shape[0]
num_samples = 1 + int((Signal_Length - subsequence_len) / sample_rate)

p = np.zeros(shape=(num_samples, 1, 1))
for i in range(num_samples):
    p[i] = prediction[i*sample_rate + (window-1) * shift :i*sample_rate+ (window-1) * shift + 1]
prediction = p[:,0,0]

In [None]:
y_validation = y_validation[:,0,0]
y_pred = model.predict(x_validation)

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as ex
import plotly.io as pio

scatter_mode= 'markers'

nrrows = 1
fig= make_subplots(rows=nrrows, cols=1, shared_xaxes= True, print_grid= True, vertical_spacing=0.01)

fig.add_trace(go.Scatter(x = y_validation, y = y_pred.flatten(), name= "y_pred", mode= scatter_mode), row= 1, col= 1)
fig.add_trace(go.Scatter(x = y_validation, y = prediction, name= "y_online", mode= scatter_mode), row= 1, col= 1)
fig.add_trace(go.Scatter(x = y_validation, y = y_validation, name= "optimal line", mode="lines"), row= 1, col= 1)
fig.add_trace(go.Scatter(x = (y_validation), y = (y_validation+5), name= "Upper bound", mode="lines", line_color = "black"), row= 1, col= 1)
fig.add_trace(go.Scatter(x = (y_validation), y = (y_validation-5), name= "Lower bound", mode="lines", line_color="black"), row= 1, col= 1)
#fig.add_trace(go.Scatter(y = y_validation, name= "y_true", mode= scatter_mode), row= 1, col= 1)

fig.update_layout(height=700, width=800, title_text="Compare prediction by my model and model on the machine")
fig.show()

In [None]:
abs_err_online = tf.keras.losses.mean_absolute_error(prediction, y_validation)
abs_err_model = tf.keras.losses.mean_absolute_error(y_pred.flatten(), y_validation)
print("abs_err_online: ", abs_err_online)
print("abs_err_model: ", abs_err_model)

differenceModel = y_validation - y_pred.flatten()
differenceOnline = y_validation - prediction
max_err_online = max(abs(differenceOnline))
max_err_model = max(abs(differenceModel))
print("max_err_online: ", max_err_online)
print("max_err_model: ", max_err_model)


In [None]:
scatter_mode= 'lines'

nrrows = 1
fig= make_subplots(rows=nrrows, cols=1, shared_xaxes= True, print_grid= True, vertical_spacing=0.01)

fig.add_trace(go.Scatter(y = y_pred.flatten(), name= "y_pred", mode= scatter_mode), row= 1, col= 1)
fig.add_trace(go.Scatter(y = prediction, name= "y_online", mode= scatter_mode), row= 1, col= 1)
fig.add_trace(go.Scatter(y = y_validation,  name= "validation", mode="lines"), row= 1, col= 1)
# fig.add_trace(go.Scatter(x = (y_validation), y = (y_validation+5), name= "Upper bound", mode="lines", line_color = "black"), row= 1, col= 1)
# fig.add_trace(go.Scatter(x = (y_validation), y = (y_validation-5), name= "Lower bound", mode="lines", line_color="black"), row= 1, col= 1)
#fig.add_trace(go.Scatter(y = y_validation, name= "y_true", mode= scatter_mode), row= 1, col= 1)

fig.update_layout(height=700, width=1200, title_text="Compare prediction by my model and model on the machine")
fig.show()