# First load imports needed for the project

In [33]:
# Necessary to use this notebook in google colab

import os

colab = True
cwd = os.getcwd()
cuda = None

if colab is True and cwd != "/content/Bsc_Thesis":
  ! git clone https://github.com/SergioTallo/Bsc_Thesis.git
  % cd Bsc_Thesis

print(cwd)

/content/Bsc_Thesis


In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import torch
from torch import device, tensor
import torch.nn as nn
import utils_bsc
from TransformerTallo import Transformer as TransformerTallo

if torch.cuda.is_available():
  device = torch.device('cuda:0')
  print('Device: GPU =', torch.cuda.get_device_name(0))
else:
  device = torch.device('cpu')
  print('Device: CPU')


Device: GPU = Tesla P100-PCIE-16GB


In [35]:
utils_bsc.print_versions()

versions of packages:
Python: 3.7.12
Pandas: 1.3.5
Numpy: 1.21.5
PyTorch: 1.10.0+cu111
Sklearn: 1.0.2


# Data loading and preparation

Now, we should create a dataset with all the data stored in the .csv file

Description of the data:

*   time: Timestamp (YYYY-MM-DD HH:MM:SS)
*   PLN1: Power in the phase 1 (W)
*   PLN2: Power in the phase 2 (W)
*   PLN3: Power in the phase 3 (W)
*   ULL1: Current Voltage between 2 phases (V)
*   ULL2: Current Voltage between 2 phases (V)
*   ULL3: Current Voltage between 2 phases (V)
*   COS_PHI1: Phase shift (Cos)
*   COS_PHI2: Phase shift (Cos)
*   COS_PHI3: Phase shift (Cos)
*   FREQ: Electricity Frequency (Hz)
*   RC_DC: Fault currents
*   RC_AC: Fault currents
*   RC_50Hz: Fault currents
*   RC_150Hz: Fault currents
*   RC_<100Hz: Fault currents
*   RC_100Hz-1kHz: Fault currents
*   RC_>10kHz: Fault currents


In [36]:
dataset = pd.read_csv('data_factory.csv')
dataset.head()

Unnamed: 0,time,PLN1,PLN2,PLN3,ULL1,ULL2,ULL3,COS_PHI1,COS_PHI2,COS_PHI3,FREQ,RC_DC,RC_AC,RC_50Hz,RC_150Hz,RC_<100Hz,RC_100Hz-1kHz,RC_>1kHz,RC_>10kHz
0,2020-06-01 00:00:00,1141.0819,519.5034,482.9381,398.8613,400.1982,395.601,0.8091,0.6864,0.4875,49.9927,4.0,91.0,10.0,39.0,36.0,86.0,82.0,7.0
1,2020-06-01 00:01:00,1145.1162,519.1807,491.4436,398.6934,400.1579,395.5431,0.808,0.6903,0.4904,49.9779,5.0,64.0,7.0,27.0,25.0,60.0,55.0,2.0
2,2020-06-01 00:02:00,1140.9558,743.3837,484.9942,398.4367,400.1205,395.5259,0.8113,0.9274,0.4806,49.9782,4.0,64.0,7.0,27.0,25.0,60.0,55.0,2.0
3,2020-06-01 00:03:00,1151.9409,741.4836,487.4224,398.98,400.4375,395.8621,0.8249,0.9123,0.4778,49.985,5.0,66.0,8.0,28.0,25.0,61.0,57.0,2.0
4,2020-06-01 00:04:00,1142.1594,741.9858,486.7629,398.7133,400.3145,395.6446,0.8081,0.9291,0.4552,49.9856,4.0,85.0,11.0,45.0,41.0,75.0,68.0,6.0


Once we have the dataset, we should prepare it. Finding the missing or the NaN values and replace them with suitable values (in this case we use the previous value).

In [37]:
# Replace all mising values with NaN
dataset = dataset.replace(' ', np.nan)
# Search for all the rows with NaN values
nan_values = dataset[dataset.isna().any(axis=1)]
# Print the shape to know how many are there
print(f'Number of rows with NaN values before cleaning: {nan_values.shape[0]}') 

# Fill all NaN values with the previous row value
dataset_clean = dataset.fillna(method='ffill')

# Check that there isn't any NaN values
nan_values = dataset_clean[dataset_clean.isna().any(axis=1)]
# Print the shape to know how many are there
print(f'Number of rows with NaN values after cleaning: {nan_values.shape[0]}') 

#Total number of samples
print(f'Total number of samples: {dataset_clean.shape[0]}')
print(f'Number of features: {dataset_clean.shape[1]}')

# Set to True to print the graphs
print_graphs = False

Number of rows with NaN values before cleaning: 2546
Number of rows with NaN values after cleaning: 0
Total number of samples: 63360
Number of features: 19


# Distribution of the data

Now we look at the distribution of the different features of the data over different time intervals

In [38]:
if print_graphs is True:

    # PLN_1 in a weekly interval

    utils_bsc.week_plot(dataset_clean, 1, 'PLN_1')

    # PLN_1 in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 1, 'PLN_1')

    # PLN_2 in a weekly interval

    utils_bsc.week_plot(dataset_clean, 2, 'PLN_2')

    # PLN_2 in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 2, 'PLN_2')

    # PLN_3 in a weekly interval

    utils_bsc.week_plot(dataset_clean, 3, 'PLN_3')

    # PLN_3 in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 3, 'PLN_3')

    # ULL1 in a weekly interval

    utils_bsc.week_plot(dataset_clean, 4, 'ULL1')

    # ULL1 in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 4, 'ULL1')

    # ULL2 in a weekly interval

    utils_bsc.week_plot(dataset_clean, 5, 'ULL2')

    # ULL2 in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 5, 'ULL2')

    # ULL3 in a weekly interval

    utils_bsc.week_plot(dataset_clean, 6, 'ULL3')

    # ULL3 in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 6, 'ULL3')

    # COS_PHI1 in a weekly interval

    utils_bsc.week_plot(dataset_clean, 7, 'COS_PHI1')

    # COS_PHI1 in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 7, 'COS_PHI1')

    # COS_PHI2 in a weekly interval

    utils_bsc.week_plot(dataset_clean, 8, 'COS_PHI2')

    # COS_PHI2 in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 8, 'COS_PHI2')

    # COS_PHI3 in a weekly interval

    utils_bsc.week_plot(dataset_clean, 9, 'COS_PHI3')

    # COS_PHI3 in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 9, 'COS_PHI3')

    # FREQ in a weekly interval

    utils_bsc.week_plot(dataset_clean, 10, 'FREQ')

    # FREQ in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 10, 'FREQ')

    # RC_DC in a weekly interval

    utils_bsc.week_plot(dataset_clean, 11, 'RC_DC')

    # RC_DC in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 11, 'RC_DC')

    # RC_AC in a weekly interval

    utils_bsc.week_plot(dataset_clean, 12, 'RC_AC')

    # RC_AC in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 12, 'RC_AC')

    # RC_50Hz in a weekly interval

    utils_bsc.week_plot(dataset_clean, 13, 'RC_50Hz')

    # RC_50Hz in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 13, 'RC_50Hz')

    # RC_150Hz in a weekly interval

    utils_bsc.week_plot(dataset_clean, 14, 'RC_150Hz')

    # RC_150Hz in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 14, 'RC_150Hz')

    # RC_100Hz_1kHz in a weekly interval

    utils_bsc.week_plot(dataset_clean, 15, 'RC_100Hz_1kHz')

    # RC_100Hz_1kHz in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 15, 'RC_100Hz_1kHz')

    # RC_100Hz_1kHz in a weekly interval

    utils_bsc.week_plot(dataset_clean, 16, 'RC_100Hz_1kHz')

    # RC_100Hz_1kHz in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 16, 'RC_100Hz_1kHz')

    # RC_more_1kHz in a weekly interval

    utils_bsc.week_plot(dataset_clean, 17, 'RC_more_1kHz')

    # RC_more_1kHz in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 17, 'RC_more_1kHz')

    # RC_more_10kHz in a weekly interval

    utils_bsc.week_plot(dataset_clean, 18, 'RC_more_10kHz')

    # RC_more_10kHz in a daily interval (only the values of weekdays between 4:00 and 19:30)

    utils_bsc.daily_plot(dataset_clean, 18, 'RC_more_10kHz')

# Preparation Training and Test set

Once the dataset is prepared, make batches of data,put them togheter in an array and split them into train and test sets.
After looking through the dataset and the features, i decided to takeonly the values with a timestap of a weekday between 4:00 and 19:30. In many of the features in the interval otside those timestamps there i only noise, which can be a sign that the machine is off in that time interval.

In [39]:
# Create 63300 batches of longitud 60

endset = utils_bsc.create_batches(dataset_clean, 60, device)
    
print(f'{len(endset)} sequences of longitud {endset[0].shape[0]} with {endset[0].shape[1]} features')

# Spliting into train and test sets

training_data, testing_data = train_test_split(endset, test_size=0.99, random_state=25)

train_set = torch.stack(training_data).float().to(device)
test_set = torch.stack(testing_data).float().to(device)

print(f'length of training set: {train_set.shape[0]}')
print(f'length of test set: {test_set.shape[0]}')

cuda:0
GPU
27840 sequences of longitud 60 with 18 features
length of training set: 278
length of test set: 27562


# Model settings

Now, we define a class with the transformer model that we are going to use:

Using the already written pytorch library for Transformers:

1) torch.nn.TransformerEncoderLayer

*   d_model –> the number of expected features in the input (required).
*   nhead –> the number of heads in the multiheadattention models (required).
*   dropout –> the dropout value (default=0.1).
*   activation –> the activation function of the intermediate layer, can be a string (“relu” or “gelu”) or a unary callable. (default: relu)
*   layer_norm_eps –> the eps value in layer normalization components (default=1e-5).
*   batch_first –> If True, then the input and output tensors are provided as (batch, seq, feature). (default: False)
*   norm_first –> if True, layer norm is done prior to attention and feedforward operations, respectivaly. Otherwise it’s done after. (default: False (after))

2) torch.nn.TransformerDecoderLayer

* d_model –> the number of expected features in the input (required).
* nhead –> the number of heads in the multiheadattention models (required).
* dim_feedforward –> the dimension of the feedforward network model (default=2048).
* dropout –> the dropout value (default=0.1).
* activation –> the activation function of the intermediate layer, can be a string (“relu” or “gelu”) or a unary callable. Default: relu
* layer_norm_eps –> the eps value in layer normalization components (default=1e-5).
* batch_first –> If True, then the input and output tensors are provided as (batch, seq, feature). Default: False.
* norm_first –> if True, layer norm is done prior to self attention, multihead attention and feedforward operations, respectivaly. Otherwise it’s done after. Default: False (after).

3) torch.nn.TransformerEncoder

* encoder_layer –> an instance of the TransformerEncoderLayer() class (required).
* num_layers –> the number of sub-encoder-layers in the encoder (required).
* norm –> the layer normalization component (optional).


4) torch.nn.TransformerDecoder

* decoder_layer – an instance of the TransformerDecoderLayer() class (required).
* num_layers – the number of sub-decoder-layers in the decoder (required).
* norm – the layer normalization component (optional).


In [40]:
class Transformer(nn.Module):
    def __init__(self, feature_size, output_size, num_encoder_layers, num_enc_heads, num_dec_heads, num_decoder_layers, device, dropout=0.1):
        super(Transformer, self).__init__()
        
        encoder_layer = nn.TransformerEncoderLayer(d_model= feature_size, nhead= feature_size, dropout=dropout, device=device)
        decoder_layer = nn.TransformerDecoderLayer(d_model= feature_size, nhead= feature_size, dropout=dropout, device=device)
        
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers= num_encoder_layers)
        self.decoder = nn.TransformerDecoder(decoder_layer, num_layers= num_decoder_layers)

        self.output_size = output_size
        self.device = device

    def generate_square_mask(self, dim):
        return torch.triu(torch.ones(dim, dim) * float('-inf'), diagonal=1)
        
    def forward (self, src):
        mask = self.generate_square_mask(len(src)).to(self.device)
        print('mask')
        output = self.encoder (src, mask)
        print('encoder')
        output = self.decoder (output, src)
        print('decoder')
        return output

In [41]:
from time import sleep

# Transformer Encoder

model = Transformer (num_encoder_layers=1,
                     num_decoder_layers=1,
                     feature_size=18,
                     output_size=18,
                     num_enc_heads=1,
                     num_dec_heads=1,
                     device = device)

model.forward(train_set)


278
mask
encoder
decoder


tensor([[[-0.5048,  1.4552,  0.0311,  ...,  1.1788, -1.1941, -0.5325],
         [-0.2750,  1.0815, -0.2695,  ...,  1.1060, -0.4341, -0.0790],
         [-0.3090,  1.7259, -0.0692,  ...,  1.0031, -0.7117, -0.8636],
         ...,
         [-0.2621,  1.1847,  0.0406,  ...,  1.1765, -1.1251, -0.3163],
         [-0.4983,  1.5050,  0.0466,  ...,  1.2540, -1.0932, -0.5231],
         [-0.3616,  1.4076,  0.5884,  ...,  0.4921, -1.5822, -0.4251]],

        [[-0.7988,  1.1786,  0.4170,  ...,  0.6609, -0.9185, -0.3317],
         [-0.2227,  1.3878, -0.3199,  ...,  1.2745, -0.4876, -0.0926],
         [-0.3079,  1.7133,  0.5300,  ...,  1.1362, -0.5735, -0.1992],
         ...,
         [-0.1393,  0.7656, -0.5794,  ...,  0.9155, -0.0573, -0.2670],
         [-1.0570,  1.1683, -0.2892,  ...,  1.2640, -1.4210, -0.5937],
         [-0.4628, -0.1701,  0.4189,  ...,  2.4020, -1.3045, -1.1958]],

        [[-0.7574,  1.3791, -0.8945,  ...,  1.1985, -1.4474, -0.3978],
         [-0.4140,  0.2241, -0.2071,  ...,  0

  0%|          | 0/278 [00:00<?, ?it/s]


RuntimeError: ignored

Ideas, things to remember, to search, etc...

reconstruction, vergelich mit base line model