<h1>Dual Adversarial Decoders with Adaptive Bottlenecks for time series forecasting</h1>

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime

import sys
from tqdm import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from sklearn.preprocessing import MinMaxScaler

plt.style.use('dark_background')

# Creating a data loader

In [6]:
class TimeSeriesDataset(Dataset):
    def __init__(self, path, seq_length, step_size, split='train', train_size=0.7, val_size=0.15, scaler=None):
        '''
        Args:
            path (str): path to csv file with data
            seq_length (int): sequence length for input data
            step_size (int): steps count forward for prediction (horizon)
            split (str): type of sample ('train', 'val', 'test')
            train_size (float): data proportion for train sample
            val_size (float): data proportion for val sample -> test_size = 1 - train_size - val_size
            scaler (sklearn.preprocessing)
        '''
        
        cols = ['time', 'temp_ice', 'flood']
        self.data = pd.read_csv(path, usecols=cols)
        self.seq_length = seq_length
        self.step_size = step_size
        self.scaler = scaler

        self.data['time'] = pd.to_datetime(self.data['time'])
        self.data['min_sin'] = np.sin(2 * np.pi * self.data['time'].dt.minute / 60)
        self.data['min_cos'] = np.cos(2 * np.pi * self.data['time'].dt.minute / 60)
        self.data['hour_sin'] = np.sin(2 * np.pi * self.data['time'].dt.hour / 24)
        self.data['hour_cos'] = np.cos(2 * np.pi * self.data['time'].dt.hour / 24)
        self.data['dow_sin'] = np.sin(2 * np.pi * self.data['time'].dt.day_of_week / 7)
        self.data['dow_cos'] = np.cos(2 * np.pi * self.data['time'].dt.day_of_week / 7)

        self.features = ['temp_ice', 'flood', 'min_sin', 'min_cos', 'hour_sin', 'hour_cos', 'dow_sin', 'dow_cos']
        
        self.scaler_features = ['temp_ice']

        # split train, validation and test sampling
        n = len(self.data)
        train_end = int(n * train_size)
        val_end = train_end + int(n * val_size)

        if split == 'train':
            self.data = self.data[:train_end]
            self.scaler = MinMaxScaler()
            self.scaler.fit(self.data[self.scaler_features])
        elif split == 'val':
            self.data = self.data[train_end:val_end]
        elif split == 'test':
            self.data = self.data[val_end:]
        else:
            raise ValueError('split must be "train", "val" or "test"')

        if self.scaler is not None:
            self.data[self.scaler_features] = self.scaler.transform(self.data[self.scaler_features])
        else:
            raise ValueError('Scaler not defined, create a train_dataset instance for it')
        

    def __len__(self):
        return len(self.data) - self.seq_length - self.step_size + 1
    

    def __getitem__(self, idx):
        x = self.data.iloc[idx: idx + self.seq_length][self.features].values
        y = self.data.iloc[idx + self.seq_length + self.step_size - 1]['flood']

        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)
    


path = '../data/resurfacing_marking_data.csv'
seq_length = 60
step_size = 30
batch_size = 256

train_dataset = TimeSeriesDataset(path, seq_length, step_size, split='train', train_size=0.7, val_size=0.15)
val_dataset = TimeSeriesDataset(path, seq_length, step_size, split='val', train_size=0.7, val_size=0.15, scaler=train_dataset.scaler)
test_dataset = TimeSeriesDataset(path, seq_length, step_size, split='test', train_size=0.7, val_size=0.15, scaler=train_dataset.scaler)

In [7]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

# Creating a model

In [10]:
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_prob=0.2):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # LSTM слой
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_prob)
        
        # Полносвязный слой для преобразования выхода LSTM
        self.fc = nn.Linear(hidden_size, output_size)
        
        # Сигмоидная функция активации на выходе
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        # Инициализация скрытых и ячеечных состояний
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Прямое распространение через LSTM
        out, _ = self.lstm(x, (h0, c0))
        
        # Получение последнего временного шага
        out = out[:, -1, :]
        
        # Пропуск через полносвязный слой
        out = self.fc(out)
        
        # Применение сигмоидной активации
        out = self.sigmoid(out)
        return out

# Параметры модели
input_size = 10  # Количество входных признаков
hidden_size = 64  # Количество нейронов в LSTM слое
num_layers = 2  # Количество слоев LSTM
output_size = 1  # Размер выхода (вероятность)

# Инициализация модели
lstm_model = LSTM(input_size, hidden_size, num_layers, output_size).to(device)
bce_loss_model = nn.BCELoss()
opt = torch.optim.Adam(lstm_model.parameters(), lr=0.001)
lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(opt, mode='min', factor=0.1, patience=5)


In [20]:
tensor = torch.rand([1, 60, 8], dtype=torch.float32)  # batch_size=1, sequence_length=60, input_size=8

In [22]:
tensor = torch.rand([256, 60, 8], dtype=torch.float32).to(device)
lstm_model(tensor)

RuntimeError: input.size(-1) must be equal to input_size. Expected 10, got 8

In [129]:
# Предположим, x имеет размер [batch_size, seq_length, input_dim]
x = torch.randn(32, 100, input_dim)  # Пример входных данных

# Получение выходных данных модели
output_normal, output_anomaly = dada_model(x)

# Вывод результатов
print("Normal Output:", output_normal)
print("Anomaly Output:", output_anomaly)

RuntimeError: The size of tensor a (32) must match the size of tensor b (64) at non-singleton dimension 1