# Example of usage for simple forecasting

### Import

In [1]:
import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import mean_squared_error

from cesnet_tszoo.utils.enums import FillerType, ScalerType
from cesnet_tszoo.benchmarks import load_benchmark

### Preparing dataset

In [2]:
benchmark = load_benchmark(identifier="0d523e69c328",  data_root="/some_directory/")
dataset = benchmark.get_initialized_dataset()

File size: 0.01GB
Remaining: 0.01GB


100%|██████████| 9.59M/9.59M [00:00<00:00, 25.2MB/s]
100%|██████████| 283/283 [00:03<00:00, 71.15it/s]


Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_DAY
    Source: SourceType.INSTITUTIONS

    Time series
        Time series IDS: [ 30 222 276  48 243 ... 112  19  15 101 117], Length=283
        Test time series IDS: None
    Time periods
        Train time periods: range(0, 168)
        Val time periods: range(161, 196)
        Test time periods: range(189, 280)
        All time periods: range(0, 280)
    Features
        Taken features: ['n_bytes']
        Default values: [nan]
        Time series ID included: False
        Time included: False
    Sliding window
        Sliding window size: 7
        Sliding window prediction size: 1
        Sliding window step size: 1
        Set shared size: 7
    Fillers
        Filler type: None
    Scalers
        Scaler type: None
    Batch sizes
        Train batch size: 32
        Val batch size: 64
        Test batch size: 128
        All batch size: 128
    Default workers
        Init wor




### Changing used config values

In [3]:
# (optional) Set default value for missing data 
dataset.set_default_values(0)

# (optional) Set filler for filling missing data 
dataset.apply_filler(FillerType.MEAN_FILLER)

# (optional) Set scaller for data
dataset.apply_scaler(ScalerType.MIN_MAX_SCALER)

# (optional) Change sliding window setting
dataset.set_sliding_window(sliding_window_size=24, sliding_window_prediction_size=1, sliding_window_step=1, set_shared_size=24)

# (optional) Change batch sizes
dataset.set_batch_sizes()

# Display final config
dataset.display_config()

100%|██████████| 283/283 [00:03<00:00, 72.20it/s]
100%|██████████| 283/283 [00:04<00:00, 70.63it/s]
100%|██████████| 283/283 [00:03<00:00, 70.90it/s]
100%|██████████| 283/283 [00:03<00:00, 72.05it/s]


Config Details
    Used for database: CESNET-TimeSeries24
    Aggregation: AgreggationType.AGG_1_DAY
    Source: SourceType.INSTITUTIONS

    Time series
        Time series IDS: [ 30 222 276  48 243 ... 112  19  15 101 117], Length=283
        Test time series IDS: None
    Time periods
        Train time periods: range(0, 168)
        Val time periods: range(144, 196)
        Test time periods: range(172, 280)
        All time periods: range(0, 280)
    Features
        Taken features: ['n_bytes']
        Default values: [0.]
        Time series ID included: False
        Time included: False
    Sliding window
        Sliding window size: 24
        Sliding window prediction size: 1
        Sliding window step size: 1
        Set shared size: 24
    Fillers
        Filler type: mean_filler
    Scalers
        Scaler type: min_max_scaler
        Is scaler per Time series: True
        Are scalers premade: False
        Are premade scalers partial_fitted: False
    Batch sizes
      




### Using simple LSTM model

#### Creating class for model

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SimpleLSTM(nn.Module):
    def __init__(self, input_size=1, hidden_size=8, output_size=1):
        super(SimpleLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        _, (h_n, _) = self.lstm(x)  # h_n: (1, batch, hidden)
        out = self.fc(h_n[-1])      # (batch, output_size)
        return out.unsqueeze(1)     # (batch, 1, output_size)
    
    def fit(self, train_dataloader, val_dataloader, n_epochs, device):
        self.train()
        criterion = nn.MSELoss()
        optimizer = optim.Adam(self.parameters(), lr=0.01)
        for epoch in range(n_epochs):
            train_losses = []
            val_losses = []
            for (batch_train, batch_val) in zip(train_dataloader, val_dataloader):
                batch_x, batch_y = batch_train
                batch_x = torch.tensor(batch_x, dtype=torch.float32).to(device)
                batch_y = torch.tensor(batch_y, dtype=torch.float32).to(device)

                optimizer.zero_grad()
                output = self(batch_x)
                loss = criterion(output, batch_y)
                loss.backward()
                optimizer.step()
                train_losses.append(loss.item())

                # validation loss
                with torch.no_grad():
                    batch_x_val, batch_y_val = batch_val
                    batch_x_val = torch.tensor(batch_x_val, dtype=torch.float32).to(device)
                    batch_y_val = torch.tensor(batch_y_val, dtype=torch.float32).to(device)
                    val_output = self(batch_x_val)
                    val_loss = criterion(val_output, batch_y_val)
                    val_losses.append(val_loss.item())

    
    def predict(self, test_dataloader, device):
        self.eval()
        all_preds = []
        all_targets = []

        with torch.no_grad():
            for batch_x_test, batch_y_test in test_dataloader:
                batch_x_test = torch.tensor(batch_x_test, dtype=torch.float32).to(device)
                batch_y_test = torch.tensor(batch_y_test, dtype=torch.float32).to(device)

                output = self(batch_x_test)
                all_preds.append(output.cpu().numpy().flatten())
                all_targets.append(batch_y_test.cpu().numpy().flatten())

        y_pred = np.concatenate(all_preds)
        y_true = np.concatenate(all_targets)
        return y_pred, y_true

#### Training model

In [5]:
results = []
for ts_id in tqdm.tqdm(dataset.get_data_about_set(about='train')['ts_ids']):
    model = SimpleLSTM().to(device)
    model.fit(
        dataset.get_train_dataloader(ts_id), 
        dataset.get_val_dataloader(ts_id), 
        n_epochs=5, 
        device=device,
    )
    y_pred, y_true = model.predict(
        dataset.get_test_dataloader(ts_id), 
        device=device,
    )
    
    rmse = mean_squared_error(y_true, y_pred)
    results.append(rmse)


100%|██████████| 283/283 [02:08<00:00,  2.21it/s]


#### Final prediction scores on test set

In [6]:
print(f"Mean RMSE: {np.mean(results):.6f}")
print(f"Std RMSE: {np.std(results):.6f}") 

Mean RMSE: 0.082187
Std RMSE: 0.146893
