### CV for different data settings for an LSTM to predict the magnet temperature

Script loads a pipeline from a defined path and json files with different data settings.

Then cross-validation is performed for each of these data settings.

Currently the found models are **not** saved, only the parameters of the best models printed.

In [None]:
import numpy as np
import pandas as pd
import sklearn as sk

import glob
import os

import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as ex
import plotly.io as pio

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.decomposition import PCA

from joblib import dump, load
 
import tensorflow as tf
import keras as ks
from keras.models import Sequential
from keras.layers import Dense, Input, GRU, LSTM
from keras.activations import relu, tanh, linear
from keras.layers import Dropout
from sklearn.neural_network import MLPRegressor

from keras.callbacks import EarlyStopping, CSVLogger, TerminateOnNaN, ModelCheckpoint
from sklearn.model_selection import RandomizedSearchCV
from sklearn.utils import shuffle
from sklearn.metrics import make_scorer

from scikeras.wrappers import KerasRegressor

import json

In [None]:
#pipelinePath = "X:\\RotorTempKI\\pipeline.p"

In [None]:
# def usePipeline(pipelinePath, x_train, x_test):
#     newpipeline = load(pipelinePath)
#     newpipeline.steps[2][1].explained_variance_ratio_
#     x_transform = newpipeline.fit_transform(x_train)
#     x_validation = newpipeline.transform(x_test)

#     return x_transform, x_validation

In [None]:
earlyStop = EarlyStopping(monitor='val_loss', patience=100, verbose =1, mode = "auto")
#csvLogger = CSVLogger('X:\\KI Praktikum\\csvLoggerCustomLoss.xlsx')
stopNaN = TerminateOnNaN()


In [None]:
def get_lstm_model(filepath, layers=[5, 5, 5], dropout=0, activation = "relu", input_shape = (1, 1), loss ="mean_squared_error"):
    model = Sequential()
    model.add(Input(input_shape))
    
    for i in range(len(layers)):
        if i == len(layers)-1:
            model.add(LSTM(layers[i], stateful = False, dropout=dropout, activation = activation, return_sequences=False))
        else:
            model.add(LSTM(layers[i], stateful = False, dropout=dropout, activation = activation, return_sequences=True))
        #model.add(Dropout(dropout))

    model.add(Dense(1, activation="linear"))
    # compile the model
    model.compile(
        loss=loss,
        #metrics=[ "max_loss"],
        optimizer = "Nadam")
    # return compiled model
    return model

In [None]:
def OpenJsontoArr(path):
    file = open(path)
    x_3d = json.load(file)
    file.close()
    x_3d = np.asarray(x_3d)
    return x_3d

In [None]:
"""Custom loss function to reduce the absolute error of the individual sample as well as the 
maximum error"""
def customLoss(y_true, y_pred):
    weight = 1.5
    difference = tf.abs(y_true - y_pred)
    exponent = tf.exp(tf.multiply(weight, difference))
    weighted_muls = tf.multiply(difference, exponent)
    boltzmann_op = tf.reduce_sum(weighted_muls) / tf.reduce_sum(exponent)
    loss = tf.add(boltzmann_op, tf.losses.mean_absolute_error(y_true, y_pred))

    return loss

In [None]:
losses = [customLoss,          
          tf.keras.losses.MeanSquaredError(),
          tf.keras.losses.MeanAbsoluteError()]
x_files_train = glob.glob(os.path.join("C:\\Users\\wch002\\Desktop\\RotorTempDRZ\\8Tempsensors\\LSTMjsonTrain", "*_x.json"))
y_files_train = glob.glob(os.path.join("C:\\Users\\wch002\\Desktop\\RotorTempDRZ\\8Tempsensors\\LSTMjsonTrain", "*_y.json"))
x_files_val = glob.glob(os.path.join("C:\\Users\\wch002\\Desktop\\RotorTempDRZ\\8Tempsensors\\LSTMjsonVal", "*_x.json"))
y_files_val = glob.glob(os.path.join("C:\\Users\\wch002\\Desktop\\RotorTempDRZ\\8Tempsensors\\LSTMjsonVal", "*_y.json"))
windows = [50, 60, 70, 50, 60, 70, 50, 60, 70]
i = 0

In [None]:
for i in range(len(x_files_train)):
    print(x_files_train[i])
    x_cv = OpenJsontoArr(x_files_train[i])
    x_test = OpenJsontoArr(x_files_val[i])
    y_cv = OpenJsontoArr(y_files_train[i])
    y_test = OpenJsontoArr(y_files_val[i])
    y_cv = y_cv.flatten()
    y_test = y_test.flatten()
    
    filepath = os.path.join("C:\\Users\\wch002\\Desktop\\RotorTempDRZ\\combined\\modelle", "model_" + x_files_train[i][-19:-7] +".h5")
    checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=0, save_best_only=True, mode='min')
    model = KerasRegressor(build_fn=get_lstm_model, filepath=filepath, verbose=0, layers=[5], dropout=0, activation = "relu", 
                            input_shape = (windows[i], 6), loss = losses)
    #model.fit(x_cv, y_cv)
    
    grid = dict(layers = [[1], [5], [1, 2], [2, 2], [5, 2]],          #[[2], [5], [2, 2], [2, 5], [5, 2], [5, 5], [10, 5], [10, 10], [2, 2, 2]]       , [5, 5], [10, 5], [2, 2, 2]
                dropout = [0],
                activation = ["relu", "tanh", "selu", "elu"],
                loss = losses)

    searcher = RandomizedSearchCV(estimator=model, n_jobs=1, cv=3, param_distributions=grid, scoring='neg_mean_absolute_error', n_iter = 5)
    searchResults = searcher.fit(x_cv, y_cv, batch_size = 500, epochs=500, callbacks = [earlyStop, stopNaN, checkpoint], use_multiprocessing = True)

    bestScore = searchResults.best_score_
    bestParams = searchResults.best_params_
    bestModel = searchResults.best_estimator_
    print("[INFO] best score is {:.2f} using {}".format(bestScore, bestParams))
    #bestModel.save(modelpaths[i])

    best_pred_train = bestModel.predict(x_cv)
    difference_train = y_cv - best_pred_train
    error_avg_train = np.mean(abs(difference_train))
    error_max_train = max(abs(difference_train))
    print("Error over the training data:")
    print("max: ", error_max_train)
    print("avg: ", error_avg_train)

    best_pred_test = bestModel.predict(x_test)
    difference_test = y_test - best_pred_test
    error_avg_test = np.mean(abs(difference_test))
    error_max_test = max(abs(difference_test))
    print("Validation error:")
    print("max: ", error_max_test)
    print("avg: ", error_avg_test)
    
    i += 1