# PatchTST for the forecast of German Energy consumption

TODO
- Import necessary code
- See Nick's structure and write a similar notebook with similar sections.
- Check in original PatchTST repo code where are the preds
- check to see if I can just discard the first 56
- run my model with my dataset and with regular dataset but discarding 56 outputs and see which is more accurate
- If I can't find it, just run the model with my code, verify it works (how?) and stick to that way of windowing
- Create new dataset class for the supermarket data (gap?)
- Order everything in this notebook

In [None]:
import random

import numpy as np
import pandas as pd
import torch

import PatchTST
from utils.tools import EarlyStopping, adjust_learning_rate


## Loading data

PyTorch uses dataset and dataload classes to handle data for the model. In order to prepare for these classes, we load the data and store it as a .csv file

In [None]:
# Set seed for reproducibility
SEED = 42
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

In [None]:
# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Parameters

#PatchTST parameters
seq_len=672 #Context window
pred_len=96 #Forecast horizon
patch_len=16
stride=8
#PatchTST defaults
enc_in=1
e_layers=3
n_heads=16
d_model=128
d_ff=256
dropout=0.2
fc_dropout=0.2
head_dropout=0
individual_head=0 #True 1 False 0
padding_patch='end' #end: padding on the end
revin=1 # True 1 False 0
affine=0 # True 1 False 0
subtract_last=0 # 0: subtract mean; 1: subtract last
decomposition=0 # decomposition; True 1 False 0
kernel_size=25 

# Training parameters
batch_size=32 #default 128
learning_rate=0.0001
num_epochs=100
label_len=0 #start token length | default 48 
pct_start=0.3 # for cosine warmup
patience = 20
features='S' # 'S' single variable; 'MS' multivariate
debugging=True
if debugging:
    num_epochs=2
    batch_size=2
    PatchTST.e_layers=1
    PatchTST.d_model=8
    PatchTST.d_ff=16
    PatchTST.n_heads=2
    num_workers=0
else:
    num_workers=10
# drop_last=True


In [None]:
# model_id_name='SMARD_PatchTST_seq' + str(seq_len) + '_pred' + str(pred_len)+'_patch'+str(patch_len)+'_stride'+str(stride)

In [None]:
from argparse import Namespace

configs = Namespace(
    enc_in=enc_in,
    seq_len=seq_len,
    pred_len=pred_len,
    e_layers=e_layers,
    n_heads=n_heads,
    d_model=d_model,
    d_ff=d_ff,
    dropout=dropout,
    fc_dropout=fc_dropout,
    head_dropout=head_dropout,
    individual=individual_head,
    patch_len=patch_len,
    stride=stride,
    padding_patch=padding_patch,
    revin=revin,
    affine=affine,
    subtract_last=subtract_last,
    decomposition=decomposition,
    kernel_size=kernel_size  
)


In [None]:
# deal with data loading later. For the time being, we use data already stored as .csv
data_path = '../experiments/PatchTST_supervised/dataset/'  # path to the data file
root_path_name=data_path
data_path_name='SMARD_converted.csv'

In [None]:
setting = f'{data_path_name}_ft{features}_sl{seq_len}_ll{label_len}_pl{pred_len}_dm{d_model}_nh{n_heads}_el{e_layers}_df{d_ff}'

In [None]:
from datasets import Dataset_Custom, Dataset_SMARD

# Load data
train_data = Dataset_SMARD(root_path=root_path_name,
                            data_path=data_path_name,
                            flag='train',
                            size=[seq_len, label_len, pred_len],
                            features=features, 
                            target='OT',
                            split_mode='fixed')

val_data = Dataset_SMARD(root_path=root_path_name,
                            data_path=data_path_name,
                            flag='val',
                            size=[seq_len, label_len, pred_len],
                            features=features, 
                            target='OT',
                            split_mode='fixed')

test_data = Dataset_SMARD(root_path=root_path_name,
                            data_path=data_path_name,
                            flag='test',
                            size=[seq_len, label_len, pred_len],
                            features=features, 
                            target='OT',
                            split_mode='fixed')

In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
        train_data,
        batch_size=batch_size,
        shuffle=True, ## Should this be false?
        num_workers=num_workers,
        drop_last=True)

val_loader = DataLoader(
        val_data,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        drop_last=True)

test_loader = DataLoader(
        test_data,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        drop_last=True)

In [None]:
for i in range(3):
    seq_x, seq_y = test_data[i]
    print(f"Sample {i} — seq_x[0:3]:\n", seq_x[:3])
    print(f"Sample {i} — seq_y[0:3]:\n", seq_y[:3])


In [None]:
examples = iter(test_loader)
example_batch = next(examples)
print(f"shape of this example: {example_batch[0].shape}")
print(f"num of examples in each batch: {len(example_batch)}")

print(f"num of batches: {len(examples)}")


In [None]:
import os 
# Initialize model
model = PatchTST.Model(configs).to(device)
print(model)
path = os.path.join('./checkpoints/', setting)
if not os.path.exists(path):
    os.makedirs(path)

train_steps = len(train_loader)
# Define loss function and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer = optimizer,
                                            steps_per_epoch = train_steps,
                                            pct_start = pct_start,
                                            epochs = num_epochs,
                                            max_lr = learning_rate)
early_stopping = EarlyStopping(patience=patience, verbose=True)


In [None]:
def vali(vali_loader, criterion):
        total_loss = []
        model.eval()
        with torch.no_grad():
            for i, (batch_x, batch_y) in enumerate(vali_loader):
                batch_x = batch_x.float().to(device)
                batch_y = batch_y.float()

                # decoder input
                dec_inp = torch.zeros_like(batch_y[:, -pred_len:, :]).float()
                dec_inp = torch.cat([batch_y[:, :label_len, :], dec_inp], dim=1).float().to(device)
                # encoder - decoder

                outputs = model(batch_x)
                f_dim = -1 if features == 'MS' else 0
                outputs = outputs[:, -pred_len:, f_dim:]
                batch_y = batch_y[:, -pred_len:, f_dim:].to(device)

                pred = outputs.detach().cpu()
                true = batch_y.detach().cpu()

                loss = criterion(pred, true)

                total_loss.append(loss)
        total_loss = np.average(total_loss)
        model.train()
        return total_loss

In [None]:
# Train the model

import time

for epoch in range(num_epochs):
    iter_count = 0
    train_loss = []
    time_now = time.time()
    model.train()
    epoch_time = time.time()
    for i, (batch_x, batch_y, ) in enumerate(train_loader):
        iter_count += 1
        optimizer.zero_grad()
        batch_x = batch_x.float().to(device)
        batch_y = batch_y.float().to(device)

        # decoder input
        dec_inp = torch.zeros_like(batch_y[:, -pred_len:, :]).float()
        dec_inp = torch.cat([batch_y[:, :label_len, :], dec_inp], dim=1).float().to(device)
        
        # encoder - decoder
    
        outputs = model(batch_x)
        # print(outputs.shape,batch_y.shape)
        f_dim = -1 if features == 'MS' else 0
        outputs = outputs[:, -pred_len:, f_dim:]
        batch_y = batch_y[:, -pred_len:, f_dim:].to(device)
        loss = criterion(outputs, batch_y)
        train_loss.append(loss.item())

        if (i + 1) % 100 == 0:
            print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
            speed = (time.time() - time_now) / iter_count
            left_time = speed * ((num_epochs - epoch) * train_steps - i)
            print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
            iter_count = 0
            time_now = time.time()

        loss.backward()
        optimizer.step()
            
        #Adjust learning rate
        lr_adjust = {epoch: scheduler.get_last_lr()[0]}
        if epoch in lr_adjust.keys():
            lr = lr_adjust[epoch]
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
            if False: print('Updating learning rate to {}'.format(lr))
        scheduler.step()

    print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
    train_loss = np.average(train_loss)
    vali_loss = vali(val_loader, criterion)
    test_loss = vali(test_loader, criterion)

    print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
        epoch + 1, train_steps, train_loss, vali_loss, test_loss))
    early_stopping(vali_loss, model, path)
    if early_stopping.early_stop:
        print("Early stopping")
        break

    print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))

best_model_path = path + '/' + 'checkpoint.pth'
model.load_state_dict(torch.load(best_model_path))