In [17]:
import torch
from torch.nn import functional as F
from torch import nn
from torch.utils.data import DataLoader, Dataset
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import pyreadr
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [2]:
# load data
df = pd.DataFrame()

for path in glob.glob("data/*.RData"):
    _df = pyreadr.read_r(path)
    k = list(_df.keys())[0]
    _df =  _df[k]
    df = pd.concat([df, _df])

df = df.reset_index()
df

Unnamed: 0,index,faultNumber,simulationRun,sample,xmeas_1,xmeas_2,xmeas_3,xmeas_4,xmeas_5,xmeas_6,...,xmv_2,xmv_3,xmv_4,xmv_5,xmv_6,xmv_7,xmv_8,xmv_9,xmv_10,xmv_11
0,0,0.0,1.0,1,0.25171,3672.4,4466.3,9.5122,27.057,42.473,...,54.494,24.527,59.710,22.357,40.149,40.074,47.955,47.300,42.100,15.345
1,1,0.0,1.0,2,0.25234,3642.2,4568.7,9.4145,26.999,42.586,...,53.269,24.465,60.466,22.413,39.956,36.651,45.038,47.502,40.553,16.063
2,2,0.0,1.0,3,0.24840,3643.1,4507.5,9.2901,26.927,42.278,...,54.000,24.860,60.642,22.199,40.074,41.868,44.553,47.479,41.341,20.452
3,3,0.0,1.0,4,0.25153,3628.3,4519.3,9.3347,26.999,42.330,...,53.860,24.553,61.908,21.981,40.141,40.066,48.048,47.440,40.780,17.123
4,4,0.0,1.0,5,0.21763,3655.8,4571.0,9.3087,26.901,42.402,...,53.307,21.775,61.891,22.412,37.696,38.295,44.678,47.530,41.089,18.681
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15329995,4999995,20.0,500.0,496,0.23419,3655.3,4461.7,9.3448,27.008,42.481,...,53.670,23.350,61.061,20.719,40.999,38.653,47.386,47.528,40.212,17.659
15329996,4999996,20.0,500.0,497,0.26704,3647.4,4540.2,9.3546,27.034,42.671,...,54.650,26.362,60.020,20.263,41.579,33.624,47.536,47.647,41.199,18.741
15329997,4999997,20.0,500.0,498,0.26543,3630.3,4571.6,9.4089,27.129,42.470,...,54.274,26.521,59.824,20.189,41.505,40.967,52.437,47.802,41.302,23.199
15329998,4999998,20.0,500.0,499,0.27671,3655.7,4498.9,9.3781,27.353,42.281,...,53.506,26.781,62.818,20.453,40.208,40.957,47.628,48.086,40.510,15.932


In [3]:
# preprocess data
df["faultNumber"] = df["faultNumber"].astype(int)
df = df.drop(["simulationRun", "sample", "index"], axis=1)

In [4]:
# remove 3, 9 and 15
mask = ~df["faultNumber"].isin([3, 9, 15])

In [5]:
df = df[mask]
df["faultNumber"].unique()

array([ 0,  1,  2,  4,  5,  6,  7,  8, 10, 11, 12, 13, 14, 16, 17, 18, 19,
       20])

In [6]:
# features
X = df.loc[:, df.columns != "faultNumber"].values
# labels
y = df["faultNumber"].values

X.shape, y.shape

((13140000, 52), (13140000,))

In [7]:
def create_samples(X, y, lookback=5):
    x_out = []
    y_out = []
    with tqdm(total=len(X)-lookback-1) as pbar:
        for i in range(len(X)-lookback-1):
            _x = X[i:i+lookback, :]
            _y = y[i+lookback+1]
            x_out.append(_x)
            y_out.append(_y)
            pbar.update(1)

    print("Preparing numpy return. This could take some seconds.")
    return np.array(x_out), np.array(y_out)

In [8]:
_x, _y = create_samples(X, y)
_x.shape, _y.shape

100%|█████████████████████████████████████████████████████████████████| 13139994/13139994 [00:05<00:00, 2405361.40it/s]


Preparing numpy return. This could take some seconds.


((13139994, 5, 52), (13139994,))

### Scaler
The recommended way (see 'Elements of Statistical Learning', chapter 'The Wrong and Right Way to Do Cross-validation') is to calculate the **mean** and the **standard deviation** of the values in the **training set** and then **apply them for standardizing both the training and testing sets**.

The idea behind this is to preven**t data leaka**ge from the testing to the training set because the aim of model validation is to subject the testing data to the same conditions as the data used for the model training.

[Link](https://datascience.stackexchange.com/questions/63717/how-to-use-standardization-standardscaler-for-train-and-test)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(_x, _y, test_size=0.3, random_state=1)

# LSTM VAE

### Encoder

In [10]:
class Encoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=False
        )

    def forward(self, X):
        output, (hidden_state, cell_state) = self.lstm(X)
        return hidden_state

### Decoder

In [11]:
class Decoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            bidirectional=False
        )
        # reconstruction
        self.linear_recon = nn.Linear(hidden_size, output_size)

    def forward(self, X):
        output, (hidden_state, cell_state) = self.lstm(X)
        return self.linear_recon(output)

### VAE

In [12]:
class VAE(nn.Module):
    def __init__(self, input_size, hidden_size, latent_size, num_layers, device):
        super(VAE, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.latent_size = latent_size
        self.device = device

        self.encoder = Encoder(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers
        ).to(self.device)

        self.decoder = Decoder(
            input_size=latent_size, # compressed vector size
            hidden_size=hidden_size,
            output_size=input_size, # reconstruction of features at timestep
            num_layers=num_layers
        ).to(self.device)

        self.mean_linear = nn.Linear((self.hidden_size*self.num_layers), self.latent_size).to(self.device)
        self.logvar_linear = nn.Linear((self.hidden_size*self.num_layers), self.latent_size).to(self.device)
        #self.compressed_linear = nn.Linear(self.latent_size, self.hidden_size).to(self.device)

    def reparametrize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        # Gaussian (normal)
        noise = torch.randn_like(std, device=self.device)
        return mu + (noise * std)

    def forward(self, X):
        batch_size, seq_len, features_dim = X.shape

        # encode
        # tensor of shape: 1*num_layers, batch_size, hidden_size
        enc_hidden = self.encoder(X)
        enc_hidden = enc_hidden.transpose(0, 1).contiguous().view(batch_size, -1) # (batch_size, hidden_size*num_layers)

        # extract latent variable z (hidden to latent)
        _mean = self.mean_linear(enc_hidden)
        _logvar = self.logvar_linear(enc_hidden)
        _z = self.reparametrize(_mean, _logvar) # Shape: batch_size, latent_size
        _z = _z.unsqueeze(1).repeat(1, seq_len, 1)

        # hidden state
        #_h = self.compressed_linear(_z) # batch_size, hidden_size
        #_h = _h.unsqueeze(1).repeat(1, seq_len, 1)

        pred = self.decoder(_z)

        return pred, _mean, _logvar


In [32]:
def vae_loss(pred, label, _mean, _logvar):
    #reconstruction_loss = nn.BCELoss(pred, label)
    reconstruction_loss = F.mse_loss(pred, label)
    kld_loss = torch.mean(
            -0.5 * torch.sum(1 + _logvar - _mean**2 - _logvar.exp(), dim=1), dim=0
        )
    loss = reconstruction_loss + kld_loss #*kld_weight
    return loss, reconstruction_loss

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [18]:
class TEP(Dataset):
    def __init__(self, x, y):
        super(TEP, self).__init__()
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        _x = self.x[idx]
        _y = self.y[idx]
        return _x, _y

In [48]:
train_data = TEP(X_train, y_train)
trainloader = DataLoader(train_data, batch_size=64, shuffle=True)

In [22]:
sample_x, samle_y = next(iter(trainloader))

In [23]:
sample_x.shape

torch.Size([1, 5, 52])

## Overfitting on one example

In [42]:
num_epochs = 2000

sample_x = sample_x.to(torch.float32).to(device)

learning_rate = 0.002

model = VAE(input_size=52, hidden_size=128, latent_size=16, num_layers=12, device=device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

model.train()
for i in range(num_epochs):
    pred, _mean, _logvar = model(sample_x)
    loss, reconstruction_loss = vae_loss(pred, sample_x, _mean, _logvar)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    #print(f"Reconstruction Loss: {reconstruction_loss}")

print(reconstruction_loss)

tensor(879209.2500, device='cuda:0', grad_fn=<MseLossBackward0>)


In [63]:
num_epochs = 10

# trainloader
train_data = TEP(X_train, y_train)
trainloader = DataLoader(train_data, batch_size=256, shuffle=True)

# model
learning_rate = 0.0002
model = VAE(input_size=52, hidden_size=128, latent_size=32, num_layers=8, device=device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

model.train()
for i in range(num_epochs):
    r_losses = []
    loop = tqdm(enumerate(trainloader))
    for _, (x, y) in loop:
        x = x.to(torch.float32).to(device)
        pred, _mean, _logvar = model(x)
        loss, reconstruction_loss = vae_loss(pred, x, _mean, _logvar)
    
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        _r = reconstruction_loss.detach().item()
        r_losses.append(_r)

        loop.set_postfix(loss=_r)

    print(f"Reconstruction Loss: {np.mean(r_losses)}")

7958it [02:17, 58.03it/s, loss=9.9e+5] 


KeyboardInterrupt: 