In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as ex
import plotly.io as pio
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA, KernelPCA,IncrementalPCA,SparsePCA
from joblib import dump, load
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from datetime import datetime
import pandas as pd
import sklearn as sk
import matplotlib.pyplot as plt
import glob
import os
from scipy.signal import savgol_filter
import numpy as np
import seaborn as sn
import onnx
import tf2onnx
%load_ext tensorboard


In [None]:
version= 'v0.0.7'
workspace= "G:\\Innovations@HELLER\\DN\\KI\\Zollern-FH-MillTrunMotor\\Datasets\\workspace\\data\\"
validation_data= "G:\\Innovations@HELLER\\DN\\KI\\Zollern-FH-MillTrunMotor\\Datasets\\workspace\\data\\validation_data"
assests_dir= 'G:\\Innovations@HELLER\\DN\\KI\\Zollern-FH-MillTrunMotor\\Datasets\\workspace\\assets\\'+version+'\\'
if not os.path.exists(assests_dir):
    print('Directory: {dir} is not exist! Creating Directory'.format(dir= assests_dir))
    os.makedirs(name= assests_dir)

Read the files

In [None]:
os.chdir(validation_data)
df:pd.DataFrame= None
for file in glob.glob('*.csv'):
    df_file = pd.read_csv(file)
    df_file['file_name']= file
    print('Current File: ', file)
    if df is None:
        df= df_file
    else:
        df= pd.concat([df, df_file], axis=0)


In [None]:
#Calculate the cooling power
#df['kuehleistung']=(df['Ruecklauftemperatur']-df['Vorlauftempertatur'])*df['Volumenstrom_Kuehlung'] * 1.16 * 60/1000

In [None]:
df.columns

In [None]:
df.info()

In [None]:
##Filtering motor temperature
df= df[df['T_MOTOR'] != 0]

Drawing signals for each expriment

In [None]:
selected_columns= ['DRZ5', 'T_KLEMMUNG', 'T_LAGER', 'T_MOTOR',
       'T_BETT', 'magnet_temperature']
selected_columns_with_units= ['Drehzahl (RPM)', 'Temp_Klemmung (C°)','Temp_Lager (C°)','Temp_Motor (C°)','T_BETT (C°)','Temperatur_Magnet (C°)']
experiments= list(df['file_name'].unique())
for experiment in experiments:
    current_df= df[df['file_name'] == experiment]
    fig= make_subplots(rows=len(selected_columns) ,cols=1,shared_xaxes= True, print_grid= True, subplot_titles= selected_columns_with_units, vertical_spacing=0.02)
    for j in range(len(selected_columns)):
        fig.add_trace(go.Scatter(x= current_df['date'],y= current_df[selected_columns[j]], name=selected_columns[j], mode= 'lines'), row= j+1, col= 1)
        #fig.update_yaxes(title_text= selected_columns[j], row= j+1, col= 1)
    fig.update_xaxes(title_text= 'Zeit ',row= len(selected_columns), col= 1)
    fig.update_layout(height=1200, width=1200, title_text='MillTurn-Motor {experiment}'.format(experiment= experiment))
    fig.show()

Draw Distribution of Signals

In [None]:
#df= df[df['file_name']=='S1_500_M3_k.xlsx']

In [None]:
ax= df.hist(figsize= (15,15),bins=50,xlabelsize=10, ylabelsize= 10)
fig= ax[0][0].get_figure()
plt.xlabel('values')
plt.ylabel('counts')
plt.savefig(assests_dir + 'row_data_hist.jpg')
plt.show()

Drawing Correlation Heatmap

In [None]:
neededColumns= [ 'DRZ5', 'T_KLEMMUNG', 'T_LAGER', 'T_MOTOR', 'T_BETT',  'magnet_temperature']
# df['T_MOTOR']= df['T_MOTOR'] -df['T_BETT']
# df['T_LAGER']= df['T_LAGER'] -df['T_BETT']
correlations= df[neededColumns].corr()
fig, ax = plt.subplots(figsize=(15, 15))
plt.title(' Heat-Map für die Korrelationsmatrix')
ax= sn.heatmap(correlations, annot=True, vmin=-1, vmax=1, cmap='rainbow', annot_kws={"size": 15, 'color': 'black'})
plt.savefig(assests_dir +'Korreation_Heatmap.jpg')
plt.show()

Signalgättung

In [None]:
neededColumns= [ 'T_MOTOR', 'T_LAGER', 'T_KLEMMUNG', 'magnet_temperature']
correlations= df[neededColumns].corr()
fig, ax = plt.subplots(figsize=(15, 15))
plt.title(' Heat-Map für die Korrelationsmatrix')
ax= sn.heatmap(correlations, annot=True, vmin=-1, vmax=1, cmap='rainbow', annot_kws={"size": 10, 'color': 'black'})
plt.savefig(assests_dir +'Korreation_Heatmap_kleinerform.jpg')
plt.show()

In [None]:
# neededColumns= [ 'Feldstrom', 'Strom_Betrag',
#        'Querspannung', 'Temp_Klemmung', 'Temp_Lager',
#        'Temp_Motor', 'kuehleistung']#['Temp_Lager', 'Temp_Motor']
neededColumns= [ 'T_MOTOR', 'T_LAGER', 'T_KLEMMUNG']
target= df['magnet_temperature'].reset_index()
reduced_data= df[neededColumns]
reduced_data.reset_index(inplace= True)
#print(reduced_data.info())
#reduced_data= reduced_data.to_numpy(dtype= np.float64)
pca1=PCA(n_components=3)
transformed_data= pca1.fit_transform(reduced_data)
new_df= pd.DataFrame(transformed_data,columns=['PC1', 'PC2', 'PC3'])
new_df['Temperatur_Magnet']= target['magnet_temperature']

In [None]:
pca1.explained_variance_ratio_.sum()

In [None]:
correlations= new_df.corr()
fig, ax = plt.subplots(figsize=(15, 15))
plt.title(' Heat-Map für die Korrelationsmatrix')
ax= sn.heatmap(correlations, annot=True, vmin=-1, vmax=1, cmap='rainbow', annot_kws={"size": 10, 'color': 'black'})
plt.savefig(workspace+'pca_Korrelation_heatmap.jpg' )
plt.show()

In [None]:
#assests_dir= 'G:\\Innovations@HELLER\\DN\\KI\\Zollern-FH-MillTrunMotor\\Datasets\\workspace\\assets\\'
preprocessor_name= 'preprocessor.p'
window=20
shift=1
sample_rate=1

In [None]:
from sklearn.preprocessing import FunctionTransformer

In [None]:
def rounding(signals: np.ndarray):
    return np.round(signals,3)

In [None]:
### window  represents time period by each entry in the buffer
### Shift represents the jump from value to next one in the buffer
### sampling rate 
def generateDataSource(signal_input=None, input_columns: list = [], output_length: int = 1, signal_output=None, window=1, shift=1, sample_rate=1):
    #subsequence_len= (window -1) *shift + 1
    subsequence_len= (window) *shift
    Signal_Length = signal_input.shape[0]
    num_samples = 1 + int((Signal_Length - subsequence_len) / sample_rate)
    x = np.zeros(shape=(num_samples, window, signal_input.shape[1]))
    y = np.zeros(shape=(num_samples, output_length, 1))
    for i in range(num_samples):
        x[i] = np.asarray([signal_input[i*sample_rate + j * shift] for j in range(0,window)])
        y[i] = signal_output[i*sample_rate + (window-1) * shift :i*sample_rate+ (window-1) * shift + output_length]

    return x, y

Build a preprocessor

In [None]:
os.chdir(workspace+'training\\')
df:pd.DataFrame= None
for file in glob.glob('*.csv'):
    df_file = pd.read_csv(file)
    df_file['file_name']= file
    print('Current File: ', file)
    if df is None:
        df= df_file
    else:
        df= pd.concat([df, df_file], axis=0)
###Filtering motor Temperature
df= df[df['T_MOTOR'] != 0]

In [None]:
def remove_t_bett(data: np.ndarray):
    result = data[:, 1:]
    result[:, 0] = result[:, 0] - data[:, 0]
    result[:, 1] = result[:, 1] - data[:, 0]
    return result

In [None]:
scaler = StandardScaler(with_mean=True, with_std=True)
scaler2 = StandardScaler(with_mean=True, with_std=True)
pca= PCA(n_components= 3)
pipeline = Pipeline(steps=[ ('stdscaler', scaler),('pca', pca),('rounding2', FunctionTransformer(rounding))])#('t_bett_removal', FunctionTransformer(remove_t_bett)),

In [None]:
neededColumns= ['T_LAGER', 'T_MOTOR', 'T_KLEMMUNG']
target= df['magnet_temperature'].reset_index().round(decimals=2)
reduced_data= df[neededColumns]
reduced_data.reset_index(inplace= True)
reduced_data= reduced_data[neededColumns]

In [None]:
reduced_data

In [None]:
remove_t_bett(reduced_data.to_numpy())

In [None]:
transformed_data= pipeline.fit_transform(reduced_data)
dump(pipeline, str(assests_dir + preprocessor_name))

In [None]:
transformed_data

In [None]:
#pipeline.steps[1][1].explained_variance_ratio_

In [None]:
# new_df= pd.DataFrame(transformed_data,columns=['PC1', 'PC2', 'PC3'])
# new_df['Temperatur_Magnet']= target['Temperatur_Magnet']
# correlations= new_df.corr()
# fig, ax = plt.subplots(figsize=(15, 15))
# plt.title(' Heat-Map für die Korrelationsmatrix')
# ax= sn.heatmap(correlations, annot=True, vmin=-1, vmax=1, cmap='rainbow', annot_kws={"size": 10, 'color': 'black'})
# plt.savefig(assests_dir +'best_pca_Korrelation_heatmap.jpg')
# plt.show()

Prepare Input dataset

In [None]:
os.chdir(workspace+'training\\')
df:pd.DataFrame= None
x= None
y= None
batch_size= 1000000000
for file in glob.glob('*.csv'):
    print('Read File= ', file)
    df_file = pd.read_csv(file)
    rough_data= df_file[neededColumns]
    target= df_file['magnet_temperature']
    if batch_size > len(df_file):
        batch_size= len(df_file)
    transformed_data= pipeline.transform(rough_data)
    partitions, target = generateDataSource(signal_input=transformed_data, input_columns=neededColumns, output_length=1, signal_output=target, window=window, shift=shift, sample_rate=sample_rate)
    if x is None:
        x= partitions
        y= target
    else:
        x= np.concatenate((x, partitions), axis= 0)
        y= np.concatenate((y, target), axis= 0)
    print('X: ', x.shape, ' y:', y.shape)
    print('batch_size= ', batch_size)

Read Testing datasets

In [None]:
os.chdir(workspace+'testing\\')
df:pd.DataFrame= None
xtest= None
ytest= None
for file in glob.glob('*.csv'):
    print('Read File= ', file)
    df_file = pd.read_csv(file)
    rough_data= df_file[neededColumns]
    target= df_file['magnet_temperature']
    transformed_data= pipeline.transform(rough_data)
    partitions, target = generateDataSource(signal_input=transformed_data, input_columns=neededColumns, output_length=1, signal_output=target, window=window, shift=shift, sample_rate=sample_rate)
    if xtest is None:
        xtest= partitions
        ytest= target
    else:
        xtest= np.concatenate((xtest, partitions), axis= 0)
        ytest= np.concatenate((ytest, target), axis= 0)
    print('X_test: ', xtest.shape, ' y_test:', ytest.shape)

Splitting to training and testing dataset

In [None]:
from keras.activations import selu
from keras.models import Sequential
from keras.layers import LSTM, Input, Activation, Dense
from keras.losses import LossFunctionWrapper, mean_absolute_error, mean_squared_error
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.regularizers import L1L2
import tensorflow as tf


In [None]:
def max_loss(y_true: tf.Tensor, y_pred: tf.Tensor):
    error = tf.add(
                    tf.reduce_max(tf.abs((y_true - y_pred))),
                    mean_absolute_error(y_true, y_pred))
    return error
def smoothed_max_loss(y_true: tf.Tensor, y_pred: tf.Tensor):
    a=0.5
    diffs= tf.abs((y_true - y_pred))
    muls= tf.multiply(a, diffs)
    exps= tf.exp(muls)
    weighted_muls= tf.multiply(diffs, exps)
    nominater= tf.reduce_sum(weighted_muls)
    denominator= tf.reduce_sum(exps)
    boltzmann_operator= nominater/denominator
    error = tf.add(boltzmann_operator, mean_absolute_error(y_true, y_pred))
    return error
special_loss = LossFunctionWrapper(smoothed_max_loss)

In [None]:
def build_Model_LSTM(input_shape, activation_Function = selu,
    dropout = 0.0,
    l1_v = 0.00,
    l2_v = 0.00,
    structure= [],
    optimizer= 'Nadam'):
    #structure=  [n_units for i in range(0,n_hidden_layers)]#[50,50,40,40,30,30,20]##   
    unroll = False
    kernal_init = 'he_normal'#RandomUniform()  # 
    model = Sequential()
    model.add(Input(shape=input_shape))
    for i in range(1,len(structure)+1):
        layer_size= structure[i-1]
        if i == len(structure):
            model.add(LSTM(layer_size,stateful= False,return_sequences=False,unroll=unroll,kernel_initializer= kernal_init, dropout=dropout, kernel_regularizer=L1L2(l1=l1_v, l2=l2_v)))
        else:
            model.add(LSTM(layer_size,stateful= False, return_sequences=True,unroll=unroll, kernel_initializer= kernal_init,dropout=dropout,  kernel_regularizer=L1L2(l1=l1_v, l2=l2_v)))
        #model.add(BatchNormalization())
        model.add(Activation(activation=activation_Function))
        #model.add(BatchNormalization())
        #model.add(PReLU())
    model.add(Dense(1, activation='linear'))
    model.compile(optimizer=optimizer, loss= special_loss, metrics=[ max_loss])#'Adagrad'
    return model

In [None]:
def map(old_value, old_min, old_max, new_max, new_min):
    new_value= ( (old_value - old_min) / (old_max - old_min) ) * (new_max - new_min) + new_min
    return new_value

In [None]:
def calculate_weights(y_train: np.ndarray, occurance_threshold= 0):
    old_min = 0
    old_max = 0
    y_train_rounded = np.round(y_train,decimals=3)
    N = len(y_train_rounded)
    y_train_rounded = np.reshape(y_train_rounded, newshape=(N,))
    weights = np.ones(shape=(N,),dtype= np.float32)
    for i in range(0, N):
        current_value = y_train_rounded[i]
        occurences = np.count_nonzero(y_train_rounded == current_value)
        if occurences <= occurance_threshold:
             weights[i]=  N/(occurences + 1 )#N/(occurences + y_train_rounded[i])#N/occurences # 0
        else:
            weights[i] =  N/(occurences + 1)#(8 if y_train_rounded[i] < 20 else 1)
    old_min= weights.min()
    old_max= weights.max()
    new_min = 1.2
    new_max = 1.5
    #weights_scaled= np.apply_along_axis(map, 1, weights)
    weights_scaled= np.asanyarray([ map(weights[i], old_min, old_max, new_max, new_min) for i in range(0, weights.shape[0])]).reshape(weights.shape)
    #weights = weights - weights.min() + 1
    #weights = (weights - weights.min())/(weights.max() - weights.min())
    fig= make_subplots(rows=2,cols=1,shared_xaxes= True, print_grid= True,  vertical_spacing=0.02)
    fig.add_trace(go.Line(y=weights_scaled,name='weight of Labels')#visualisation_selected_Columns[-2])
    , row= 1, col= 1)
    fig.add_trace(go.Line(y=np.reshape(y_train, newshape=(N,)),name='Labels')#visualisation_selected_Columns[-2])
    , row= 2, col= 1)
    fig.update_layout(height=900, width=900, title_text= 'weight of Labels')
    fig.show()

    return weights_scaled

In [None]:
checkpoint = ModelCheckpoint(filepath=str(assests_dir+'best_model.h5'), monitor='val_loss', verbose=1,
                             save_best_only=True, save_weights_only=False, mode='min', save_freq='epoch')
earlyStopping = EarlyStopping(
    monitor='val_loss', mode='min', patience=1000, verbose=1)

In [None]:
#xtrain, xtest, ytrain, ytest = train_test_split(xtest,ytest,shuffle= False, test_size=0.1, random_state=49)
xtrain= x
ytrain= y

In [None]:
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)

In [None]:
#weights = calculate_weights(y_train=ytrain, occurance_threshold= 5)
model = build_Model_LSTM( (xtrain.shape[1], xtrain.shape[2]),activation_Function = 'elu',
    dropout = 0.0,
    l1_v = 0.001,
    l2_v = 0.001,
    structure=[5],
    optimizer= 'Nadam')
reset= True##############################################################################################################
try:
    if not reset:
        #model.set_weights(model_w)
        #model.load_weights(filepath=weights_path)
        print('Previous weights loaded Successfully')
except:
    print('No Previous weights')
print('input shape ', model.input_shape)
print(model.output_shape)
print(model.summary())
summary = model.fit(x=xtrain, y=ytrain, shuffle=False, batch_size= int(batch_size/2),  epochs=15000, validation_data=(xtest, ytest),  #int(batch_size * 0.5)
                    callbacks=[earlyStopping], verbose=2, workers=32, use_multiprocessing=True)#, sample_weight=weights)

In [None]:
model.save(str(assests_dir+'model.h5'), save_format='h5')
input_signature = [tf.TensorSpec([None, xtrain.shape[1], xtrain.shape[2]], tf.float32, name='x')]
onnx_model, _ = tf2onnx.convert.from_keras(
    model=model, input_signature=input_signature, opset=10)
onnx.save_model(onnx_model, str(assests_dir+'model.onnx'))
print(".onnx model saved successfully")

In [None]:
#model.save_weights(filepath=weights_path)
metric_loss = summary.history['loss']
metric_val_loss = summary.history['val_loss']
fig = make_subplots(rows=1, cols=1, shared_xaxes=True,
                    print_grid=True,  vertical_spacing=0.02)
fig.add_trace(go.Line(y=metric_loss, name='Training Loss'), row=1, col=1)
fig.add_trace(go.Line(y=metric_val_loss , name='Validation Loss'), row=1, col=1)
#fig.add_trace(go.Line(y=mae_loss, name='Training {}'.format('mae')), row=1, col=1)
#fig.add_trace(go.Line(y=val_mae_loss, name='Validation {}'.format('mae')), row=1, col=1)
fig.update_xaxes(title_text='Epochs', row=1, col=1)
fig.update_yaxes(title_text='Loss', row=1, col=1)
fig.update_layout(height=900, width=900, title_text='Training Curve')
fig.show()
pio.write_image(fig, str(assests_dir+'trainingCurve.jpg'), format='jpg')

Results of training set

In [None]:

x_steps = np.linspace(0, 130, 10)
print(model.summary())
pred = model.predict(xtrain)
#print (' For All Data mse= ',mean_squared_error(targets,pred), ' mae= ', mean_absolute_error(targets,pred), ' mape= ', mean_absolute_percentage_error(targets,pred))
#pred= post_processer.inverse_transform(pred)
#org_targets= post_processer.inverse_transform(targets_post)
fig, ax = plt.subplots(figsize=(15, 15))
plt.title('Results of LSTM Algorithm')
plt.xlabel('True (C°)')
plt.ylabel('Predicted (C°)')
pred.reshape((-1,))
plt.scatter(ytrain, pred, label='predictions_train', color='blue')
plt.plot(x_steps, x_steps, label='Optimal line', color='red')
plt.legend()
plt.savefig(str(assests_dir+'training_results.jpg'))
plt.show()

Results of testing set

In [None]:
print(ytest)

In [None]:

x_steps = np.linspace(0, 130, 10)
print(model.summary())
pred = model.predict(xtest)
#print (' For All Data mse= ',mean_squared_error(targets,pred), ' mae= ', mean_absolute_error(targets,pred), ' mape= ', mean_absolute_percentage_error(targets,pred))
#pred= post_processer.inverse_transform(pred)
#org_targets= post_processer.inverse_transform(targets_post)
fig, ax = plt.subplots(figsize=(15, 15))
plt.title('Results of LSTM Algorithm')
plt.xlabel('True (C°)')
plt.ylabel('Predicted (C°)')
pred.reshape((-1,))
print(ytest.shape)
plt.scatter(ytest, pred, label='predictions_Testing', color='blue')
plt.plot(x_steps, x_steps, label='Optimal line', color= 'red')
plt.legend()
plt.savefig(str(assests_dir+'testing_results.jpg'))
plt.show()

Load assets

In [None]:
from keras.models import load_model
model= load_model(str(assests_dir+'model.h5'), compile= False)
pipeline= load(str(assests_dir + preprocessor_name))
print(model.input_shape)

Testing Experiments

In [None]:
os.chdir(validation_data)
df:pd.DataFrame= None
x= None
ytest= None
for file in glob.glob('*.csv'):
    print('Read File= ', file)
    df_file = pd.read_csv(file)
    rough_data= df_file[neededColumns]
    target= df_file['magnet_temperature']
    transformed_data= pipeline.transform(rough_data)
    partitions, target = generateDataSource(signal_input=transformed_data, input_columns=neededColumns, output_length=1, signal_output=target, window=window, shift=shift, sample_rate=sample_rate)
    pred= model.predict(partitions)
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, print_grid=True,  vertical_spacing=0.02)
    fig.add_trace(go.Line(y=target.flatten(), name='Magent_temp_True'), row=1, col=1)
    fig.add_trace(go.Line(y=pred.flatten(), name='Magent_temp_Pred'), row=1, col=1)
    fig.update_yaxes(title_text='Magnet Temp (C°)', row=1, col=1)
    fig.add_trace(go.Line(y=target.flatten() - pred.flatten(), name='Prediction Error (True -Predicted)'), row=2, col=1)
    fig.update_yaxes(title_text='Prediction Error (C°)', row=2, col=1)
    fig.update_xaxes(title_text='Zeit 1 = 2 Sek', row=2, col=1)
    fig.update_layout(height=900, width=900, title_text='Vorhersage der Magnet-Temp {file}'.format(file = file))
    fig.show()
    pio.write_image(fig, str(assests_dir+'{file}.jpg'.format(file= file)), format='jpg')