# Train Neural Network
## Parse and prepare the data

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import joblib
import pandas as pd
import tensorflow as tf
import os
import fileinput
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler, normalize
from tensorflow import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers import Ftrl
from tensorflow.keras.layers import Dropout

import time

print(f"TensorFlow version = {tf.__version__}\n")

# Set a fixed random seed value, for reproducibility, this will allow us to get
# the same random numbers each time the notebook is run

TensorFlow version = 2.6.2



In [2]:
# SEED = 1337
# np.random.seed(SEED)
# tf.random.set_seed(SEED)

CLASSES = [];

# get all csv files and uses their names for the classes
for file in os.listdir("../data/17-04"):
    if file.endswith(".csv"):
        CLASSES.append(os.path.splitext(file)[0])
# sort the classes
CLASSES.sort()

print(CLASSES)

['box-own', 'laptop-own', 'monitor-combined', 'pc-combined', 'phone-own', 'printer-combined', 'switch-afstand', 'tv_combined']


In [3]:
NUM_CLASSES = len(CLASSES) # get the number of classes

# create a one-hot encoded matrix that is used in the output
ONE_HOT_ENCODED_CLASSES = np.eye(NUM_CLASSES)

# the input and output tensor
inputs = []
outputs = []

# determines how many samples to use for each input
SAMPLES_PER_CLASS = 30

# read each csv file and push an input and output
for class_index in range(NUM_CLASSES):
  objectClass = CLASSES[class_index]
  df = pd.read_csv("../data/17-04/" + objectClass + ".csv")
  # drop any unused parameters
  df = df.drop(columns=['time'])
  df = df.drop(columns=['Voltage'])
  df = df.drop(columns=['Factor'])
  columns = list(df)
  # get rid of empty value lines of csv which cause NaN inputs to TensorFlow
  df = df.dropna()
  df = df.reset_index(drop=True)

  # calculate the number of objectClass recordings in the file
  num_recordings = int(df.shape[0] / SAMPLES_PER_CLASS)
  print(f"\u001b[32;4m{objectClass}\u001b[0m class will be output \u001b[32m{class_index}\u001b[0m of the classifier")
  print(f"{num_recordings} samples captured for training with inputs {list(df)} \n")
  
  #tensors
  output = ONE_HOT_ENCODED_CLASSES[class_index]
  # fill the input and output tensors
  for i in range(num_recordings):
    tensor = []
    for j in range(SAMPLES_PER_CLASS):
        # the index of the next sample to add
        index = i * SAMPLES_PER_CLASS + j
        tensor += [
            df['ApparentPower'][index],
            df['Current'][index],
            # df['Factor'][index],
            df['Power'][index],
            df['ReactivePower'][index],        
        ]
    inputs.append(tensor)
    outputs.append(output)

# convert the list to numpy array
inputs = np.array(inputs)

# Scale the columns of X to be between 0 and 1 using MinMaxScaler
scaler = MinMaxScaler()
inputs_scaled = scaler.fit_transform(inputs)
joblib.dump(scaler, 'scaler.gz')

# print shape of an input
print(inputs[0].shape)
outputs = np.array(outputs) # convert the outputs to a numpy array
print(CLASSES)


[32;4mbox-own[0m class will be output [32m0[0m of the classifier
2629 samples captured for training with inputs ['ApparentPower', 'Current', 'Power', 'ReactivePower'] 

[32;4mlaptop-own[0m class will be output [32m1[0m of the classifier
1037 samples captured for training with inputs ['ApparentPower', 'Current', 'Power', 'ReactivePower'] 

[32;4mmonitor-combined[0m class will be output [32m2[0m of the classifier
7143 samples captured for training with inputs ['ApparentPower', 'Current', 'Power', 'ReactivePower'] 

[32;4mpc-combined[0m class will be output [32m3[0m of the classifier
17245 samples captured for training with inputs ['ApparentPower', 'Current', 'Power', 'ReactivePower'] 

[32;4mphone-own[0m class will be output [32m4[0m of the classifier
226 samples captured for training with inputs ['ApparentPower', 'Current', 'Power', 'ReactivePower'] 

[32;4mprinter-combined[0m class will be output [32m5[0m of the classifier
9933 samples captured for training with

In [4]:
# Randomize the order of the inputs, so they can be evenly distributed for training, testing, and validation
# https://stackoverflow.com/a/37710486/2020087
num_inputs = len(inputs)
randomize = np.arange(num_inputs)
np.random.shuffle(randomize)

# Swap the consecutive indexes (0, 1, 2, etc) with the randomized indexes
inputs_scaled = inputs_scaled[randomize]
outputs = outputs[randomize]

# Split the recordings (group of samples) into three sets: training, testing and validation
TRAIN_SPLIT = int(0.6 * num_inputs)
TEST_SPLIT = int(0.2 * num_inputs + TRAIN_SPLIT)

inputs_train, inputs_test, inputs_validate = np.split(inputs_scaled, [TRAIN_SPLIT, TEST_SPLIT])
outputs_train, outputs_test, outputs_validate = np.split(outputs, [TRAIN_SPLIT, TEST_SPLIT])

print("Data set randomization and splitting complete.")
print(inputs_test)
print(num_inputs)

Data set randomization and splitting complete.
[[0.02554028 0.0294365  0.01126126 ... 0.02850755 0.01121076 0.03661972]
 [0.01571709 0.01654051 0.0045045  ... 0.01453326 0.0044843  0.01971831]
 [0.01178782 0.01261564 0.00225225 ... 0.00894354 0.         0.01408451]
 ...
 [0.18860511 0.22960471 0.20045045 ... 0.23420906 0.19955157 0.11267606]
 [0.00589391 0.006448   0.00225225 ... 0.00503074 0.00224215 0.0084507 ]
 [0.00392927 0.00364452 0.00225225 ... 0.00670766 0.00224215 0.01126761]]
44679


# Test code voor de tijd in de csv om te zetten

#Build & Train the Model
Build and train a TensorFlow model using the high-level Keras API.

In [None]:
# https://www.kaggle.com/getting-started/174307
from sklearn.metrics import accuracy_score

# callback to only print metrics every X epochs
class Callback(tf.keras.callbacks.Callback):
    def __init__(self, X_val, y_val):
        super().__init__()
        self.X = X_val
        self.y = y_val.argmax(axis=1)
        
    def on_epoch_end(self, epoch, logs=None):
        if epoch == 0:
            return
        if epoch%10==0: #Hier aanpassan na hoeveel epochs je wilt zien
            pred = (model.predict(self.X))
            print('epoch: ',epoch, ' | loss: ', str(logs['loss']), '| val_loss: ', str(logs['val_loss']), '| accuracy: ', accuracy_score(self.y,pred.argmax(axis=1)))

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10) # early stopping -> will sotp training if no improvement for 10 epochs
# build the model and train it
model = tf.keras.Sequential()
# model.add(Dropout(0.5))
model.add(tf.keras.layers.Dense(100, activation='relu')) # relu is used for performance (50
model.add(Dropout(0.2))
model.add(tf.keras.layers.Dense(200, activation='relu')) #30
model.add(Dropout(0.2))
model.add(tf.keras.layers.Dense(100, activation='relu')) #20
model.add(Dropout(0.2))
model.add(tf.keras.layers.Dense(NUM_CLASSES, activation='softmax')) # softmax is used, because we only expect one class to occur per input
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])

history = model.fit(inputs_train, outputs_train, epochs=150, batch_size=16, validation_data=(inputs_validate, outputs_validate), callbacks=[Callback(inputs_validate, outputs_validate)], verbose=0)
model.summary() #500 32

2023-05-04 13:52:03.659199: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2023-05-04 13:52:03.659217: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: 9712c8604b07
2023-05-04 13:52:03.659221: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: 9712c8604b07
2023-05-04 13:52:03.659262: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 525.105.17
2023-05-04 13:52:03.659272: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 525.105.17
2023-05-04 13:52:03.659275: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 525.105.17
2023-05-04 13:52:03.659469: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU in

epoch:  10  | loss:  0.012057509273290634 | val_loss:  0.010248799808323383 | accuracy:  0.9500951102159562
epoch:  20  | loss:  0.010103769600391388 | val_loss:  0.008686400949954987 | accuracy:  0.9572563500055947
epoch:  30  | loss:  0.005903718527406454 | val_loss:  0.004120195750147104 | accuracy:  0.9782924918876581
epoch:  40  | loss:  0.005252835340797901 | val_loss:  0.004258202388882637 | accuracy:  0.9806422736936332
epoch:  50  | loss:  0.004474283661693335 | val_loss:  0.003557030111551285 | accuracy:  0.9824325836410428
epoch:  60  | loss:  0.004522552713751793 | val_loss:  0.003913470543920994 | accuracy:  0.9817612174107643
epoch:  70  | loss:  0.003772989846765995 | val_loss:  0.0033340773079544306 | accuracy:  0.9846704710753049
epoch:  80  | loss:  0.003979701083153486 | val_loss:  0.00329806050285697 | accuracy:  0.9837753161016001
epoch:  90  | loss:  0.003955136518925428 | val_loss:  0.003283480415120721 | accuracy:  0.9850061541904442


In [None]:
# get the training loss and validation loss history values
loss = history.history['loss']
val_loss = history.history['val_loss']

# the number of epochs
epochs = range(1, len(loss)+1)

# plot the training and validation loss
plt.plot(epochs, loss, 'g.', label="Training loss")
plt.plot(epochs, val_loss, 'b.', label="val loss")

plt.xlabel("epochs")
plt.ylabel("loss")
plt.legend()
plt.show()

# Run with Test Data

In [None]:
# use the model to predict the test inputs
predictions = model.predict(inputs_test)
print(inputs_test.shape)
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# print the predictions and the expected ouputs
print("predictions =\n", np.round(predictions, decimals=3))
print("actual =\n", outputs_test)

y_prediction = np.argmax(predictions, axis = 1)
y_test=np.argmax(outputs_test, axis=1)
#Create confusion matrix and normalizes it over predicted (columns)
result = confusion_matrix(y_test, y_prediction , normalize='pred')
disp =  ConfusionMatrixDisplay(confusion_matrix=result)
print(result)
disp.plot()

In [None]:
# save the model
model.save('classification_17-04V3/')

In [None]:
# save the notebook
!tar chvfz notebook.tar.gz *