In [1]:
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
import easydict


In [25]:
# https://github.com/JoungheeKim/autoencoder-lstm/blob/main/autoencoderLSTM_tutorial(english).ipynb

In [2]:
class Encoder(nn.Module):
    def __init__(self,inputSize=3,hiddenSize=64,layers=2):
        super(Encoder, self).__init__()
        self.hidden = hiddenSize
        self.layers = layers
        self.lstm = nn.LSTM(inputSize, hiddenSize, layers, batch_first=True, dropout=0.1, bidirectional=False)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        output, (hidden, cell) = self.lstm(x)
        return (hidden, cell)

In [3]:
class Decoder(nn.Module):
    def __init__(self,inputSize=3,hiddenSize=64,layers=2,outputSize=3):
        super(Decoder, self).__init__()
        self.hidden = hiddenSize
        self.layers = layers
        self.lstm = nn.LSTM(inputSize, hiddenSize, layers, batch_first=True, dropout=0.1, bidirectional=False)
        self.fc = nn.Linear(hiddenSize, outputSize)
        self.relu = nn.ReLU()

    def forward(self, x, hidden):
        output, (hidden, cell) = self.lstm(x, hidden)
        prediction = self.fc(output)
        return prediction, (hidden, cell)

In [4]:
class Autoencoder(nn.Module):
    def __init__(self,args):
        super(Autoencoder,self).__init__()
        self.encoder = Encoder(args.inputSize, args.hiddenSize, args.layers)
        self.decoder = Decoder(args.inputSize, args.hiddenSize, args.layers, args.inputSize) # inputSize = outputSize - as you want to reconstruct the input
        self.criterion = nn.MSELoss()
        self.layers = args.layers

    def forward(self, x, hidden=None):
        batch_size = x.size(0)
        if hidden is None:
            _, (hidden, cell) = self.encoder(x)
            hidden = hidden.repeat(self.layers, 1, 1)
            cell = cell.repeat(self.layers, 1, 1)
        else:
            hidden, cell = hidden

        decode, _ = self.decoder(x, (hidden, cell))
        return decode

In [29]:
#model = Autoencoder(inputSize=3, hiddenSize=64, layers=2)

In [16]:
def train(args,model,train,test):
    optim = torch.optim.Adam(model.parameters(), lr=args.learningRate)

    epochs = range(args.maxIter//len(train)+1) #fix var name of max_iter

    count = 0
    for epoch in epochs:
        model.train()
        optim.zero_grad()
        for i, data in enumerate(train):
            if count > args.maxIter:
                return model
            count += 1

            input_data = data.unsqueeze(1).to(args.device)
            #print("Shape of input_data:", input_data.shape)  # Add this line to print the shape of input_data

            # Initialize hidden and cell states
            batch_size = input_data.size(0)
            hidden = torch.zeros((args.layers * 1, batch_size, args.hiddenSize)).to(args.device)
            cell = torch.zeros((args.layers * 1, batch_size, args.hiddenSize)).to(args.device)

            # Forward pass
            output = model(input_data, (hidden, cell))
            loss = model.criterion(output, input_data)

            optim.zero_grad()
            loss.backward()
            optim.step()

        model.eval()
        evalLoss = 0
        with torch.no_grad():
            for i, data in enumerate(test):
                input_data = data.unsqueeze(1).to(args.device)
                # Initialize hidden and cell states
                batch_size = input_data.size(0)
                hidden = torch.zeros((args.layers * 1, batch_size, args.hiddenSize)).to(args.device)
                cell = torch.zeros((args.layers * 1, batch_size, args.hiddenSize)).to(args.device)

                # Forward pass
                output = model(input_data, (hidden, cell))
                loss = model.criterion(output, input_data)
                evalLoss += loss.item()
        evalLoss /= len(test)
        print(f"Epoch {epoch} - Eval Loss: {evalLoss}")
    return model
    


In [6]:
## option Setting
args = easydict.EasyDict({
    "batch_size": 64, ## batch size setting
    "device": torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu'), ## whether use GPU
    "inputSize": 3, ## input dimension setting (image is 64x64 = 4096)
    "hiddenSize": 64, ## Hidden dimension setting
    "outputSize": 3, ## output dimension setting
    "layers": 2,     ## number of LSTM layer
    "learningRate" : 0.0005, ## learning rate setting
    "maxIter" : 100, ## max iteration setting
})

In [7]:
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader

# Step 1: Prepare the Dataset
clear = pd.read_csv('testClear.csv')
# Assuming your data is in a single column and each row represents a timestep
timeseries_tensorC = torch.tensor(clear[['value-0','value-1','value-2']].values, dtype=torch.float32)

anom = pd.read_csv('testAnom.csv')
# Assuming your data is in a single column and each row represents a timestep
timeseries_tensorA = torch.tensor(anom[['value-0','value-1','value-2']].values, dtype=torch.float32)



# Step 2: Create a Custom Dataset Class
class TimeSeriesDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Step 3: Use DataLoader to Load the Data
datasetC = TimeSeriesDataset(timeseries_tensorC)
datasetA = TimeSeriesDataset(timeseries_tensorA)
batch_size = 64  # Set your desired batch size
shuffle = False  # Set to True to have the data reshuffled at every epoch

dataLoaderC = DataLoader(datasetC, batch_size=batch_size, shuffle=shuffle)
dataLoaderA = DataLoader(datasetA, batch_size=1, shuffle=shuffle)

# Usage example:
# Iterate through the data_loader to get batches of data
#for batch in data_loader:
#    # Process each batch as needed
#    print(batch)


In [8]:
model = Autoencoder(args)
model.to(args.device)

Autoencoder(
  (encoder): Encoder(
    (lstm): LSTM(3, 64, num_layers=2, batch_first=True, dropout=0.1)
    (relu): ReLU()
  )
  (decoder): Decoder(
    (lstm): LSTM(3, 64, num_layers=2, batch_first=True, dropout=0.1)
    (fc): Linear(in_features=64, out_features=3, bias=True)
    (relu): ReLU()
  )
  (criterion): MSELoss()
)

In [17]:
model = train(args,model,dataLoaderC,dataLoaderC)

Epoch 0 - Eval Loss: 0.0019196676148567349
Epoch 1 - Eval Loss: 0.0018189580005127937
Epoch 2 - Eval Loss: 0.0017798810877138749
Epoch 3 - Eval Loss: 0.0017344260340905748
Epoch 4 - Eval Loss: 0.0016503385122632608
Epoch 5 - Eval Loss: 0.0016008839666028507


In [24]:
# Now you can use the trained model to make predictions on dataLoaderA

model.eval()
with open('output.csv', 'w') as f:
    f.write("timestamp,reconstructionError-0,reconstructionError-1,reconstructionError-2\n")
    with torch.no_grad():
        for i, data in enumerate(dataLoaderA):
            input_data = data.unsqueeze(1).to(args.device)
            # Initialize hidden and cell states
            batch_size = input_data.size(0)
            hidden = torch.zeros((args.layers * 1, batch_size, args.hiddenSize)).to(args.device)
            cell = torch.zeros((args.layers * 1, batch_size, args.hiddenSize)).to(args.device)

            # Forward pass
            output = model(input_data, (hidden, cell))
            outputIndex1 = output[:,:,0] # This would be looped over for each output but can be static for now
            outputIndex2 = output[:,:,1]
            outputIndex3 = output[:,:,2]
            loss1 = model.criterion(outputIndex1, input_data)
            loss2 = model.criterion(outputIndex2, input_data)
            loss3 = model.criterion(outputIndex3, input_data)
            f.write(f"{i},{loss1.item()},{loss2.item()},{loss3.item()}\n")
            

  return F.mse_loss(input, target, reduction=self.reduction)
