# A machine learning approach for predicting power curves of manufacturing processes

This Version (v1.0) was published alongside the homonymous CIRP paper: <a href="url">link text</a>.
The program is part of an on-going research project of the Laboratory for Machine Tools and Production Engineering WZL, RWTH Aachen University, Aachen, Germany. Further adaptions and other use cases are foreseen.

<style type="text/css">
.tg  {border-collapse:collapse;border-spacing:0;}
.tg td{font-family:Arial, sans-serif;font-size:14px;padding:10px 5px;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:black;}
.tg th{font-family:Arial, sans-serif;font-size:14px;font-weight:normal;padding:10px 5px;border-style:solid;border-width:1px;overflow:hidden;word-break:normal;border-color:black;}
.tg .tg-5ua9{font-weight:bold;text-align:left}
.tg .tg-s268{text-align:left !important;} 
</style>
<table class="tg">
  <tr>
    <th class="tg-5ua9">﻿Name</th>
    <th class="tg-5ua9">Input</th>
    <th class="tg-5ua9">Output</th>
    <th class="tg-5ua9">Description</th>
  </tr>
  <tr>
    <td class="tg-5ua9">data_import_and_preparation</td>
    <td class="tg-s268">-</td>
    <td class="tg-s268">np.array of all measurements</td>
    <td class="tg-s268">import data from .csv file, one hot encoding, shuffle series of measurements</td>
  </tr>
  <tr>
    <td class="tg-5ua9">seperate_Xy</td>
    <td class="tg-s268">measurement series</td>
    <td class="tg-s268">Print settings (input), Energy measurements (output)</td>
    <td class="tg-s268">seperating input and output of the datafile</td>
  </tr>
  <tr>
    <td class="tg-5ua9">normalization</td>
    <td class="tg-s268">measurement series, scale mode</td>
    <td class="tg-s268">Normalized train- &amp; testdata, original input and output values</td>
    <td class="tg-s268">normalize the measurements to make the NN learn better</td>
  </tr>
  <tr>
    <td class="tg-5ua9">DNN_model</td>
    <td class="tg-s268">various NN settings</td>
    <td class="tg-s268">DNN model</td>
    <td class="tg-s268">creates an sequential model of the given input settings</td>
  </tr>
  <tr>
    <td class="tg-5ua9">train_dnn</td>
    <td class="tg-s268">model, X_train, y_train, epochs, verbose,NAME</td>
    <td class="tg-s268">model, history</td>
    <td class="tg-s268">trains the NN with the given settings</td>
  </tr>
  <tr>
    <td class="tg-5ua9">plot_model_history</td>
    <td class="tg-s268">history, NAME</td>
    <td class="tg-s268">-</td>
    <td class="tg-s268">plots the accuracy and loss values for train and validation data of the model</td>
  </tr>
  <tr>
    <td class="tg-5ua9">descale</td>
    <td class="tg-s268">predictions, upper_limit</td>
    <td class="tg-s268">predictions</td>
    <td class="tg-s268">takes the predicted values of the noralized inputs and converts them back to the original scale</td>
  </tr>
  <tr>
    <td class="tg-5ua9">rmse_mae</td>
    <td class="tg-s268">predictions,y_test,lower_limit, upper_limit, return_single_values</td>
    <td class="tg-s268">rmses, maes</td>
    <td class="tg-s268">takes the predicted values and calculates the rmses and maes of each measurement.</td>
  </tr>
  <tr>
    <td class="tg-5ua9">prediction_graph</td>
    <td class="tg-s268">predictions, lower_limit, upper_limit</td>
    <td class="tg-s268">-</td>
    <td class="tg-s268">plots predictions together with the original data</td>
  </tr>
  <tr>
    <td class="tg-5ua9">get_X_labels</td>
    <td class="tg-s268">X_values</td>
    <td class="tg-s268">NAMES</td>
    <td class="tg-s268">Converts the one hot encoding back to normal job labels</td>
  </tr>
  <tr>
    <td class="tg-5ua9">model_1</td>
    <td class="tg-s268">input_layer_size,hidden_layers,hidden_layer_size,epochs,optimizer</td>
    <td class="tg-s268">model, history, prediction</td>
    <td class="tg-s268">creates an DNN via the integraded approach and makes test predictions</td>
  </tr>
  <tr>
    <td class="tg-5ua9">y_single_value_filter</td>
    <td class="tg-s268">y_data, zeitpunkt</td>
    <td class="tg-s268">y_single_value</td>
    <td class="tg-s268">return all measurements for a specific point of time</td>
  </tr>
  <tr>
    <td class="tg-5ua9">model_2</td>
    <td class="tg-s268">input_layer_size, hidden_layers, hidden_layer_size, epochs, optimizer, upper_limit</td>
    <td class="tg-s268">predictions_descaled</td>
    <td class="tg-s268">creates models for each measurement timestep and returns predictions on these models on the test data</td>
  </tr>
  <tr>
    <td class="tg-5ua9">One Run --&gt; Excel</td>
    <td class="tg-s268">runse</td>
    <td class="tg-s268"></td>
    <td class="tg-s268">Makes Predictions on both models for shuffled data various times</td>
  </tr>
</table>

## Imports

In [1]:
import pandas as pd  
import numpy as np
from matplotlib import pyplot
from math import exp
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import random
from sklearn import preprocessing 
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from keras import metrics
import warnings
import time
from time import sleep
import sys


np.set_printoptions(precision=3)  
# Set global variables
num_of_printers = 5
y_position_in_csv, attributes_of_part = num_of_printers+5,num_of_printers+4

Using TensorFlow backend.


## Preparation Functions

In [2]:
def data_import_and_preparation():
    dataset = pd.read_csv('DruckerDaten_DOE_V6.csv', header=0, sep=';')
    dataset.sample(5)
    
    origin = dataset.pop('Geometrie')
    
    dataset['Quader'] = (origin == 'Quader')*1.0
    dataset['Kegel'] = (origin == 'Kegel')*1.0

    origin = dataset.pop('DruckerID')
    
    for i in range(num_of_printers):
        dataset['Drucker_%s'%(i+1)] = (origin == (i+1))*1.0

    dataset.shape[0]
    dataset = dataset[['Auftrag','Filamentdichte','Drucker_1','Drucker_2','Drucker_3','Drucker_4','Drucker_5','Volumen','Quader','Kegel', 'Wirkleistung']]
    print(dataset.sample(3),"\n\n")
    
    def get_y(dataset):
        messwert = []
        y = []
        for i in range(dataset.shape[0]):
            def new_part ():
                messwert.append(dataset.iloc[i,y_position_in_csv])
            def add_to_parts():
                y.append(messwert)

            if i == 0:
                new_part() 
            elif dataset.iloc[i, 0] == dataset.iloc[i-1, 0]:
                messwert.append(dataset.iloc[i,y_position_in_csv])
                if i == dataset.shape[0] - 1:
                    add_to_parts()
            else:
                add_to_parts()
                messwert = []
                new_part()
        return y
        
    y_data = get_y(dataset)

    datenmenge = len(y_data)
    longest_print = max(y_data, key=len)
    longest_print = len(longest_print)

    for y in y_data:
        if len(y) < longest_print:
            to_go = longest_print-len(y)
            for z in range(to_go):
                y.append(0)

    y_data = np.array(y_data)

    X_dataset = dataset.drop(['Wirkleistung','Auftrag'], axis=1)
    X_dataset = X_dataset.drop_duplicates()
    X_data = np.array(X_dataset)
    
    all_data = X_data.tolist()
    
    for i in range(len(X_data)):
        for k in range(len(y_data[i])):
            all_data[i].append(y_data[i][k])
            
    random.shuffle(all_data)
    
    all_data = np.array(all_data, dtype="f")
    return all_data, longest_print

In [3]:
def seperate_Xy(all_data):
    X_data=[]
    y_data =[]
    for i in range(len(all_data)):
        X_data.append(all_data[i][:y_position_in_csv-1])
        y_data.append(all_data[i][y_position_in_csv-1:])
    y_data = np.array(y_data)
    X_data = np.array(X_data)
    return X_data, y_data

In [4]:
def normalization(all_data, scaler):
    X_data, y_data = seperate_Xy(all_data)
    number_of_datapoints = len(X_data)
    
    if scale_mode == "x_y_seperate":
        scaled_X_data  = scaler.fit_transform(X_data)
        scaled_y_data  = scaler.fit_transform(y_data) 

    if scale_mode == "x_y_together": 
        all_scaled_data = scaler.fit_transform(all_data.transpose())
        all_scaled_data = all_scaled_data.transpose()
        scaled_X_data, scaled_y_data = seperate_Xy(all_scaled_data)

    if scale_mode == "x_only":
        scaled_X_data = scaler.fit_transform(X_data)
        scaled_y_data = y_data

    number_of_datapoints = len(X_data)

    org_y_test = np.array(y_data[round(0.9*number_of_datapoints):]) #10 Prozent testdaten, 90 Prozent zum Trainieren des Models
    org_X_test = np.array(X_data[round(0.9*number_of_datapoints):])

    X_train = np.array(scaled_X_data[:round(0.9*number_of_datapoints)])
    X_test = np.array(scaled_X_data[round(0.9*number_of_datapoints):])

    y_train = np.array(scaled_y_data[:round(0.9*number_of_datapoints)])
    y_test = np.array(scaled_y_data[round(0.9*number_of_datapoints):])

    print("#Trainingsdaten:",X_train.shape[0])
    print("#Testdaten:",X_test.shape[0], "\n")
    
    return X_train, X_test, y_train, y_test, org_X_test, org_y_test

## DNN Functions

In [5]:
import tensorflow as tf
import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard

#Open a console, change to your working directory, and type: tensorboard --logdir=logs/

In [6]:
def DNN_model(input_layer_size, dense_layers, hidden_layer_size, output_layer_size, optimizer):
    model = Sequential()
    model.add(Dense(input_layer_size, activation='relu',input_shape = (X_train.shape[1:])))
    
    for l in range(dense_layers):
        model.add(Dense(hidden_layer_size, activation= 'relu'))
        #model.add(BatchNormalization(axis=1))
        model.add(Dropout(0.25))
    model.add(Dense(output_layer_size))
    model.compile(loss='mse', 
                  optimizer = optimizer, 
                  metrics=['accuracy','mse','mae'])
    return model

In [7]:
def train_DNN(model, X_train, y_train, epochs, verbose,NAME):
    
    tbCallBack = TensorBoard(log_dir="logs/{}".format(NAME))
    history = model.fit(X_train, y_train,
                        batch_size= (round(X_train.shape[0]/3)),
                        validation_split = 0.1,
                        epochs = epochs,
                        verbose=verbose,
                        shuffle=True,
                        callbacks = [tbCallBack])
    return model, history

In [8]:
def plot_model_history(history, NAME):
    pyplot.figure(100)
    pyplot.plot(history.history['loss'], label='training')
    pyplot.plot(history.history['val_loss'], label='validation')
    pyplot.title(NAME)
    pyplot.figure(101)
    pyplot.plot(history.history['acc'], label='training')
    pyplot.plot(history.history['val_acc'], label='validation')
    pyplot.legend()  
    pyplot.show()

In [9]:
def descale(predictions, upper_limit):
    if scale_mode in ["x_y_seperate"]:
        predictions = scaler.inverse_transform(predictions)
    if scale_mode == "x_y_together":
        predictions = np.concatenate((X_test, predictions), axis = 1) 
        all_data_but_pred = np.concatenate((X_train, y_train[:,:upper_limit]), axis = 1)
        all_data_but_pred = np.concatenate((all_data_but_pred, predictions), axis =0)
        test_predictions_rescaled = scaler.inverse_transform((all_data_but_pred.transpose()))
        test_predictions_rescaled = test_predictions_rescaled.transpose()
        predictions = test_predictions_rescaled[X_train.shape[0]:,attributes_of_part:] 
    return predictions

In [10]:
def rmse_mae(predictions,y_test,lower_limit, upper_limit, return_single_values):
    rmses, maes = [], []

    if return_single_values == True:
        for i in range(predictions.shape[0]):
            rmse = sqrt(mean_squared_error(predictions[i], org_y_test[i,lower_limit:upper_limit]))
            mae = mean_absolute_error(predictions[i], org_y_test[i,lower_limit:upper_limit])
            rmses.append(rmse)
            maes.append(mae)  
    if return_single_values == False:        
        rmses = sqrt(mean_squared_error(predictions, org_y_test[:,lower_limit:upper_limit]))
        maes = mean_absolute_error(predictions, org_y_test[:,lower_limit:upper_limit])
    return rmses, maes  

In [11]:
def prediction_graph(predictions, lower_limit, upper_limit):
    for i in range(len(y_test)):
        pyplot.figure(i)
        pyplot.plot(predictions[i], 'r',label='Vorhersage')
        pyplot.plot(org_y_test[i,lower_limit:upper_limit],label='Leistungskurve')
        pyplot.legend()
        pyplot.ylabel('Wirkleistung [W]')
        pyplot.xlabel('Zeitschritte [min]')
        pyplot.title(org_X_test[i])
    pyplot.show()

In [12]:
def get_X_labels(X_values):
    NAMES = []
    for i in range(X_values.shape[0]):
        for j in range(num_of_printers):
            if X_values[i,j+1] == 1:
                printer = "Printer%s"%(j+1)
        if X_values[i,num_of_printers+2] == 1:
            geometry = "Quader"
        else:
            geometry = "Kegel"
        NAME = "%s %s F%s V%s" %(printer,geometry,X_values[i,0],X_values[i,num_of_printers+1])
        NAMES.append(NAME)
    return NAMES

## Model_01 - Integrated Approach

In [13]:
def model_1 (input_layer_size,hidden_layers,hidden_layer_size,epochs,optimizer):
    NAME = "%s-inlay_%s-hlayers_%s-hsize_%s"%(input_layer_size,hidden_layers,hidden_layer_size,int(time.time()))
    lower_limit, upper_limit = 0, longest_print
    input_layer_size = input_layer_size
    hidden_layers = hidden_layers
    hidden_layer_size = hidden_layer_size
    output_layer_size = longest_print
    optimizer = optimizer
    batch_size = [round(X_train.shape[0]/3)]
    verbose = 0
    epochs = epochs
    
    print("Training model_1 (appx. 15s)...")
    model = DNN_model(input_layer_size, hidden_layers, hidden_layer_size, output_layer_size,optimizer)
    model, history = train_DNN(model,X_train, y_train, epochs, verbose,NAME)

    prediction = model.predict(X_test)
    prediction = descale(prediction, upper_limit) 
    
    return model, history, prediction

## Model_02 - Multi Net Approach

In [14]:
def y_single_value_filter(y_data, timestep):
    y_single_value = y_data.transpose()[timestep]
    y_single_value = np.array([y_single_value])
    y_single_value = y_single_value.transpose()
    return y_single_value

In [15]:
def train_model_02(model,X_train, y_train, epochs, verbose):
    history = model.fit(X_train, y_train,
                        batch_size=(round(X_train.shape[0]/3)),
                        validation_split = 0.1,
                        epochs = epochs,
                        verbose=verbose,
                        shuffle=True)
    prediction = model.predict(X_test)
    return prediction, history

In [None]:
def model_2(input_layer_size, hidden_layers, hidden_layer_size, epochs, optimizer, upper_limit): 
    NAME = "%s-inlay_%s-hlayers_%s-hsize_%s"%(input_layer_size,hidden_layers,hidden_layer_size,int(time.time()))
    lower_limit, upper_limit = 0, upper_limit
    input_layer_size = input_layer_size
    hidden_layers = hidden_layers
    hidden_layer_size = hidden_layer_size
    output_layer_size = 1
    optimizer = optimizer
    epochs = epochs
    verbose = 0
    
    model = DNN_model (input_layer_size, hidden_layers, hidden_layer_size, output_layer_size, optimizer)
    predictions = np.array([[0]]*X_test.shape[0]) 
    print("Training model_2 (appx. 5min)",end="")
    for y_stelle in range(lower_limit,upper_limit,1): 
        print(".", end="")
        y_single_train = y_single_value_filter(y_train,y_stelle)
        y_single_test = y_single_value_filter(y_test, y_stelle)

        model, history = train_DNN(model, X_train, y_single_train,epochs, verbose,NAME)
        prediction = model.predict(X_test)
        predictions = np.concatenate((predictions, prediction), axis=1)

    predictions = np.delete(predictions,0,1) 
    predictions_descaled = descale(predictions, upper_limit)

    return predictions_descaled

## One Run -->  Excel

In [17]:
def to_excel(runs):
    
    from threading import Thread

    keras.optimizers.Adam(lr=0.0001)
    keras.optimizers.RMSprop(lr=0.001)
    batch_size = (round(X_train.shape[0]/3))
    optimizer = 'adam'
    codes = [666,999]
    writer = pd.ExcelWriter('run_%s.xlsx'%(runs+1))
    df_model = pd.DataFrame()
    
    
    model_m1, history_m1, predictions_m1 = model_1(16,2,400,1000, optimizer)
    rmse_m1, mae_m1 = rmse_mae(predictions_m1, y_test,0, longest_print, True) 
    rmse_mae_m1 = []
    
    predictions_m2 = model_2(64,1,64,100, optimizer, longest_print)
    rmse_m2, mae_m2 = rmse_mae(predictions_m2, y_test, 0, longest_print, True)
    rmse_mae_m2 = []
    
    for i in range(len(rmse_m1)):
        rmse_mae_m1.append(rmse_m1[i])
        rmse_mae_m1.append(mae_m1[i])
        rmse_mae_m2.append(rmse_m2[i])
        rmse_mae_m2.append(mae_m2[i])
    labels = get_X_labels(org_X_test)

    for i in range(org_y_test.shape[0]):
        df_model["%s" %(labels[i])] = np.concatenate((org_y_test[i],codes),axis=None)
        df_model["%s-pre_m1" %(labels[i])] = np.concatenate((predictions_m1[i], rmse_mae_m1[i*2:(i+1)*2]),axis=None)
        df_model["%s-pre_m2" %(labels[i])] = np.concatenate((predictions_m2[i], rmse_mae_m2[i*2:(i+1)*2]), axis=None)
    df_model.to_excel(writer,'Run_%s'%(runs+1))
    writer.save()
    print("Run_%s saved"%(runs+1),"\n")

# Run Code

In [18]:
scaler = StandardScaler()
scale_mode = "x_y_together"

for i in range(1):
    all_data, longest_print = data_import_and_preparation() #geshuffelten daten nparray
    X_train, X_test, y_train, y_test, org_X_test, org_y_test = normalization(all_data, scaler)
    print("Starting Run_%s"%(i+1))
    to_excel(i)

      Auftrag  Filamentdichte  Drucker_1  Drucker_2  Drucker_3  Drucker_4  \
6560       39             100        0.0        0.0        0.0        1.0   
9516       61              10        0.0        0.0        0.0        1.0   
8659       53              10        0.0        0.0        1.0        0.0   

      Drucker_5  Volumen  Quader  Kegel  Wirkleistung  
6560        0.0  125.000     1.0    0.0     60.565895  
9516        0.0   67.000     1.0    0.0     63.602010  
8659        0.0   19.635     0.0    1.0     56.347512   


#Trainingsdaten: 79
#Testdaten: 9 

Starting Run_1
Training model_1...
Training model_2:........................................................................................................................................................................................................................................................................................................................................................................................

KeyboardInterrupt: 