**Import of Modules**

In [24]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.python.client import device_lib

**For Import of Custom Modules in Google Colab**
  - It need to put the PS_IndustriAL folder directory in the input variable path_of_PS_IndustriALL (str) below

In [23]:
path_of_PS_IndustriALL = "/content/drive/MyDrive"
sys.path.insert(0,os.path.join(path_of_PS_IndustriALL, "PS_IndustriALL/src/ultlities_modules"))
from note_taker import NoteTaker
from ultlities import test

**Object to Write Important Information to a Text File:**

In [19]:
note_taker = NoteTaker()

**Load the data from the csv file:**

In [4]:
dataframe_data = pd.read_csv(os.path.join(path_of_PS_IndustriALL, "PS_IndustriALL/src/data/data.csv")) # the dataframe of data
data_size = len(dataframe_data)
note_taker.write_line("Total amount data", data_size)
display(dataframe_data)
dataframe_data.drop(columns=["timestamp"], inplace=True)
number_input = len(dataframe_data.columns)
note_taker.write_line("Number of Inputs", number_input)

Total amount data: 119103


Unnamed: 0,timestamp,target_iALL_PS,TAG_iALL_PS_00,TAG_iALL_PS_04,TAG_iALL_PS_05,TAG_iALL_PS_06,TAG_iALL_PS_10,TAG_iALL_PS_11,TAG_iALL_PS_12,TAG_iALL_PS_13,TAG_iALL_PS_22,TAG_iALL_PS_23,TAG_iALL_PS_26,TAG_iALL_PS_28,TAG_iALL_PS_37,TAG_iALL_PS_48,TAG_iALL_PS_50,TAG_iALL_PS_51
0,2018-04-01 00:00:00,0.0,4.548754,1011.733181,97.284889,33.248746,71.130001,123.338718,58.828461,9.271792,843.930188,2220.187843,1842.687291,1463.647280,156.908398,310.022461,426.651658,410.820484
1,2018-04-01 00:01:00,0.0,7.887998,1358.466600,202.583688,32.494870,98.697513,74.897785,63.746416,7.989979,952.461448,2345.102397,1810.309041,1437.274642,160.845276,306.084796,375.316113,143.620728
2,2018-04-01 00:02:00,0.0,4.975919,1056.489015,147.098428,36.402837,39.329786,62.800193,65.154836,8.771413,1096.085499,2426.781646,1508.671751,1565.113515,157.841402,353.863854,444.809188,618.898000
3,2018-04-01 00:03:00,0.0,6.304142,1619.924847,307.722320,34.283344,126.751846,96.756731,86.551891,9.234399,781.719110,1811.656947,1483.740243,1966.270851,156.106507,301.563110,414.052496,427.323378
4,2018-04-01 00:04:00,0.0,1.671733,591.648283,-7.684779,30.980682,41.526627,124.592349,83.649076,9.101509,1001.099246,2267.969902,1600.106478,1396.339609,163.462727,298.957820,431.548430,514.659884
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119098,2018-07-09 12:39:00,0.0,5.191826,1963.160736,236.208078,28.848354,80.550983,101.501932,36.386412,6.794132,1201.115673,1781.618524,2215.462868,1479.371410,86.868269,301.394243,1170.190231,460.248620
119099,2018-07-09 12:40:00,0.0,6.849015,1136.882850,110.471072,23.773084,135.584829,107.572776,47.251239,7.719693,862.235189,2001.807009,1711.979868,2012.456761,110.493838,261.988969,1249.646796,548.666329
119100,2018-07-09 12:41:00,0.0,2.832383,383.606596,151.194093,41.133065,55.090741,114.515366,69.833234,7.779601,811.071846,2099.537425,1250.751776,1886.055740,92.372019,344.363066,1186.408225,526.184433
119101,2018-07-09 12:42:00,0.0,6.284776,1081.530595,106.541216,31.526591,131.685119,3.078141,76.481285,7.962738,1143.806963,1725.221142,2174.796109,1699.671456,106.665885,392.541203,1247.186205,88.419456


Number of Inputs: 17


**Set the analysis period, minutes, for the RNN**
  - analysis_period (int)

In [20]:
analysis_period = 10

**Split Dataset, According to Predict Minutes:**
  - 80% Train
  - 10% Validation
  - 10% Test

In [6]:
predict_minute = 63 # minute to make the predict, selected in data_preprocessing.ipynb

# input and output data
input_data = dataframe_data.iloc[:-predict_minute]
output_data = dataframe_data.shift(-predict_minute).dropna()

# size of Split Dataset
train_size = int(0.8 * data_size)
val_size = int(0.1 * data_size)
test_size = data_size - train_size - val_size

# Split Dataset

input_train_data = input_data.loc[:train_size].to_numpy()
input_val_data = input_data.loc[train_size:train_size+val_size].to_numpy()
input_test_data = input_data.loc[train_size+val_size-analysis_period:].to_numpy()


output_train_data = output_data.loc[:train_size]["target_iALL_PS"].to_numpy()
output_val_data = output_data.loc[train_size:train_size+val_size]["target_iALL_PS"].to_numpy()
output_test_data = output_data.loc[train_size+val_size-analysis_period:]["target_iALL_PS"].to_numpy()


**Dataset Normalization:**

In [7]:
# set the normalization, 0 for minium value e 1 for max value
input_scaler = MinMaxScaler(feature_range=(0, 1))
output_scaler = MinMaxScaler(feature_range=(0, 1))

# Normalizing the Dataset
# And transform array to have the second dimension equal to 1, that is, a vector. Is a flatten
input_train_data = input_scaler.fit_transform(input_train_data) # fit_transform is with traing of input_scaler (MinMaxScaler)
input_val_data = input_scaler.transform(input_val_data)
input_test_data = input_scaler.transform(input_test_data)

output_train_data = output_scaler.fit_transform(output_train_data.reshape(-1, 1)) # fit_transform is with traing of output_scaler (MinMaxScaler)
output_val_data = output_scaler.transform(output_val_data.reshape(-1, 1))
output_test_data = output_scaler.transform(output_test_data.reshape(-1, 1))


**Generating Input and Output**

In [8]:
# train:
input_train = list()
output_train = list()
for i in range(analysis_period, train_size):
  input_train.append(input_train_data[i-analysis_period:i])
  output_train.append(output_train_data[i])

# convert to array format
input_train = np.array(input_train)
output_train = np.array(output_train)

# valitation:
input_val = list()
output_val = list()
for i in range(analysis_period, val_size):
  input_val.append(input_val_data[i-analysis_period:i])
  output_val.append(output_val_data[i])

# convert to array format
input_val = np.array(input_val)
output_val = np.array(output_val)

# test:
input_test = list()
output_test = list()

for i in range(analysis_period, len(output_test_data)):
  input_test.append(input_test_data[i-analysis_period:i])
  output_test.append(output_test_data[i])

input_test = np.array(input_test)
output_test = np.array(output_test)

**Construction of the Predictive Model Architecture**

In [9]:
model = Sequential() # model with forward and back propagation

# Two LSTM Layer, with 128 inputs, return_sequences False one output per analysis_period, True number_input per analysis_period
model.add(LSTM(128, return_sequences=True, input_shape=(analysis_period, number_input)))
model.add(LSTM(128, return_sequences=False, input_shape=(analysis_period, number_input)))

model.add(Dense(1, activation='sigmoid')) # activation funcion

model.compile(loss='mse', optimizer='adam') # loss funcion and otimizer

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10, 128)           74752     
                                                                 
 lstm_1 (LSTM)               (None, 128)               131584    
                                                                 
 dense (Dense)               (None, 1)                 129       
                                                                 
Total params: 206465 (806.50 KB)
Trainable params: 206465 (806.50 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


**Checking if GPU is Available**

  - In Google Colab acess: Runtime -> Notebook Setting

In [10]:
device_name = tf.test.gpu_device_name() #
if device_name != '/device:GPU:0':
  device_name = "/device:CPU:0"
  print(f'Not Found GPU, device at in CPU: {device_name}')
else:
  print(f'Found GPU at: {device_name}')

Not Found GPU, device at in CPU: /device:CPU:0


**Model Training:**

In [None]:
with tf.device(device_name): # train model with GPU

  # weights folder
  path_of_weights = os.path.join(path_of_PS_IndustriALL, "PS_IndustriALL/src/weights")

  # hyperparameters of train
  epochs = 300
  batch_size = 32

  # model Traing
  train_result = model.fit(input_train, output_train, validation_data = (input_val, output_val), epochs=epochs, batch_size=batch_size)

  # save the weights of train, in weights folder
  model.save_weights(os.path.join(path_of_weights, "main_weights.h5"))

  # Plot the losses curve
  plt.plot(train_result.history["loss"], label='train_loss')
  plt.plot(train_result.history["val_loss"], label='val_loss')
  plt.legend()
  plt.show()

**Or Load an Already Trained Model**

In [11]:
# weights folder
path_of_weights = os.path.join(path_of_PS_IndustriALL, "PS_IndustriALL/src/weights")

# load the model, this weights are referent the training of 300 epochs
model.load_weights(os.path.join(path_of_weights, "main_weights.h5"))

**Test of Model with Test Dataset**
  - The metric used was precision (true positive per amount of prediction)

In [12]:
with tf.device(device_name):
  test_result = test(model, input_test, output_test, analysis_period, number_input)

**Results**

In [16]:
for key, value in test_result.items():
  note_taker.write_line(key, value)

total_amount: 11848
amount_class_0: 10265
amount_class_1: 1583
precision_total: 0.9753544902093181
precision_class_0: 0.9880175353141744
precision_class_1: 0.8932406822488945


**Save the Annotations**

In [27]:
note_taker.save("train_main_model.txt", os.path.join(path_of_PS_IndustriALL, "PS_IndustriALL/src/annotations"))