In [None]:
from torch import rand
from argparse import Namespace
import random

import numpy as np
import pandas as pd
import torch
import os

import datetime
from deutschland import feiertage
from deutschland.feiertage.api import default_api
configuration = feiertage.Configuration(
    host = "https://feiertage-api.de/api"
)

import PatchTST
from utils.tools import EarlyStopping
from datasets import Dataset_Custom, Dataset_SMARD
from torch.utils.data import DataLoader

import time
from scipy import stats

import seaborn as sns
import matplotlib.pyplot as plt

from matplotlib.colors import TwoSlopeNorm
import matplotlib.dates as mdates


In [None]:
def set_seed(seed=42):
    SEED = seed
    torch.manual_seed(SEED)
    random.seed(SEED)
    np.random.seed(SEED)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
def get_configs(
                name,
                enc_in=1, 
                seq_len = 672, 
                pred_len = 96, 
                e_layers = 3, 
                n_heads = 16, 
                d_model = 128, 
                d_ff = 256, 
                dropout = 0.2, 
                fc_dropout = 0.2,
                head_dropout = 0,
                individual_head = 0, 
                patch_len = 16, 
                stride = 8, 
                padding_patch = 'end', 
                revin = 1,  
                affine = 0, 
                subtract_last = 0, 
                decomposition = 0, 
                kernel_size = 25, # end of PatchTST params
                label_len = 0,
                features = 'S',
                batch_size = 128,
                learning_rate = 0.0001,
                num_epochs = 100,
                pct_start = 0.4, #for scheduler
                patience = 20,
                root_path_name='./dataset/',
                data_path_name='SMARD_converted.csv',
                num_workers=10,
                window_split=1,
                shuffle_train=True
                ):
    configs = Namespace(
        name = name,
        enc_in=enc_in,
        seq_len=seq_len,
        pred_len=pred_len,
        e_layers=e_layers,
        n_heads=n_heads,
        d_model=d_model,
        d_ff=d_ff,
        dropout=dropout,
        fc_dropout=fc_dropout,
        head_dropout=head_dropout,
        individual=individual_head,
        patch_len=patch_len,
        stride=stride,
        padding_patch=padding_patch,
        revin=revin,
        affine=affine,
        subtract_last=subtract_last,
        decomposition=decomposition,
        kernel_size=kernel_size,
        label_len=label_len,
        features = features,
        batch_size = batch_size,
        learning_rate = learning_rate,
        num_epochs = num_epochs,
        pct_start = pct_start,
        patience = patience,
        root_path_name=root_path_name,
        data_path_name=data_path_name,
        num_workers=num_workers,
        window_split=window_split,
        shuffle_train=shuffle_train
    )
    return configs

In [None]:
def get_datasets(configs):
    train_data = Dataset_SMARD(root_path=configs.root_path_name,
                                data_path=configs.data_path_name,
                                flag='train',
                                size=[configs.seq_len, configs.label_len, configs.pred_len],
                                features=configs.features, 
                                target='OT',
                                split_mode='fixed',
                                scale=True,
                                window_split=configs.window_split)

    val_data = Dataset_SMARD(root_path=configs.root_path_name,
                                data_path=configs.data_path_name,
                                flag='val',
                                size=[configs.seq_len, configs.label_len, configs.pred_len],
                                features=configs.features, 
                                target='OT',
                                split_mode='fixed',
                                scale=True)

    test_data = Dataset_SMARD(root_path=configs.root_path_name,
                                data_path=configs.data_path_name,
                                flag='test',
                                size=[configs.seq_len, configs.label_len, configs.pred_len],
                                features=configs.features, 
                                target='OT',
                                split_mode='fixed',
                                scale=True)
    
    return train_data, val_data, test_data

In [None]:
# def get_non_augmented_train_dataset(configs):
#     train_data = Dataset_SMARD(root_path=configs.root_path_name,
#                                 data_path=configs.data_path_name,
#                                 flag='train',
#                                 size=[configs.seq_len, configs.label_len, configs.pred_len],
#                                 features=configs.features, 
#                                 target='OT',
#                                 split_mode='fixed',
#                                 scale=True)
    
#     return train_data

In [None]:
def get_dataloaders(configs, train_data, val_data, test_data):

        train_loader = DataLoader(
                train_data,
                batch_size=configs.batch_size,
                shuffle=configs.shuffle_train, ## Should this be false?
                num_workers=configs.num_workers,
                drop_last=True)

        val_loader = DataLoader(
                val_data,
                batch_size=configs.batch_size,
                shuffle=False,
                num_workers=configs.num_workers,
                drop_last=False)

        test_loader = DataLoader(
                test_data,
                batch_size=configs.batch_size,
                shuffle=False,
                num_workers=configs.num_workers,
                drop_last=False)

        return train_loader, val_loader, test_loader

In [None]:
# def get_no_shuffle_train_loader(configs, train_data):

#         train_loader = DataLoader(
#                 train_data,
#                 batch_size=configs.batch_size,
#                 shuffle=False,
#                 num_workers=configs.num_workers,
#                 drop_last=True)

#         return train_loader

In [None]:
def initialize_model(configs, device, train_loader, seed):
    model = PatchTST.Model(configs).to(device)
    print(model)

    # setting = f'ft{configs.features}_sl{configs.seq_len}_pl{configs.pred_len}_dm{configs.d_model}_nh{configs.n_heads}_el{configs.e_layers}_df{configs.d_ff}_ds{configs.dropout}_eb{configs.batch_size}_seed{seed}'
    # setting = f'ft{configs.features}_sl{configs.patch_len}_pl{configs.stride}_dm{configs.d_model}_nh{configs.n_heads}_el{configs.e_layers}_df{configs.d_ff}_ds{configs.dropout}_eb{configs.batch_size}_seed{seed}'
    setting = f'{configs.name}ft{configs.features}_pl{configs.patch_len}_st{configs.stride}_dm{configs.d_model}_nh{configs.n_heads}_el{configs.e_layers}_df{configs.d_ff}_ds{configs.dropout}_eb{configs.batch_size}_seed{seed}'


    weights_path = os.path.join('./checkpoints/', setting)
    if not os.path.exists(weights_path):
        os.makedirs(weights_path)

    train_steps = len(train_loader)
    # Define loss function and optimizer
    criterion = torch.nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=configs.learning_rate)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer = optimizer,
                                                steps_per_epoch = train_steps,
                                                pct_start = configs.pct_start,
                                                epochs = configs.num_epochs,
                                                max_lr = configs.learning_rate)
    early_stopping = EarlyStopping(patience=configs.patience, verbose=True)
    
    return model, criterion, optimizer, scheduler, early_stopping, weights_path


In [None]:
def vali(model, configs, device, val_loader, criterion):
        total_loss = []
        model.eval()
        with torch.no_grad():
            for i, (batch_x, batch_y) in enumerate(val_loader):
                batch_x = batch_x.float().to(device)
                batch_y = batch_y.float().to(device)

                outputs = model(batch_x)
                f_dim = -1 if configs.features == 'MS' else 0
                outputs = outputs[:, -configs.pred_len:, f_dim:]
                batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)

                pred = outputs.detach().cpu()
                true = batch_y.detach().cpu()

                loss = criterion(pred, true)

                total_loss.append(loss)
        total_loss = np.average(total_loss)
        model.train()
        return total_loss

In [None]:

def train_model(configs, model, train_loader, val_loader, test_loader, device, criterion, optimizer, scheduler, early_stopping, weights_path):

    for epoch in range(configs.num_epochs):
        iter_count = 0
        train_loss = []
        # time_now = time.time()
        model.train()
        epoch_time = time.time()
        for i, (batch_x, batch_y) in enumerate(train_loader):
            iter_count += 1
            optimizer.zero_grad()
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)
        
            outputs = model(batch_x)
            f_dim = -1 if configs.features == 'MS' else 0
            outputs = outputs[:, -configs.pred_len:, f_dim:]
            batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
            loss = criterion(outputs, batch_y)
            train_loss.append(loss.item())

            # if (i + 1) % 100 == 0:
            #     print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
            #     speed = (time.time() - time_now) / iter_count
            #     left_time = speed * ((configs.num_epochs - epoch) * configs.train_steps - i)
            #     print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
            #     iter_count = 0
            #     time_now = time.time()

            loss.backward()
            optimizer.step()
                
            #Adjust learning rate
            lr_adjust = {epoch: scheduler.get_last_lr()[0]}
            if epoch in lr_adjust.keys():
                lr = lr_adjust[epoch]
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr
                # if False: print('Updating learning rate to {}'.format(lr))
            scheduler.step()

        print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
        train_loss = np.average(train_loss)
        val_loss = vali(model, configs, device, val_loader, criterion) 
        
        early_stopping(val_loss, model, weights_path)
        if early_stopping.early_stop:
            print("Early stopping")
            test_loss = vali(model, configs, device, test_loader, criterion)
            print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
            epoch + 1, len(train_loader), train_loss, val_loss, test_loss))
            break

        print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))

In [None]:
def test(configs, model, loader, device):
    preds = []
    trues = []
    inputx = []

    model.eval()
    with torch.no_grad():
        for i, (batch_x, batch_y) in enumerate(loader):
            batch_x = batch_x.float().to(device)
            batch_y = batch_y.float().to(device)

            outputs = model(batch_x)
            f_dim = -1 if configs.features == 'MS' else 0
            outputs = outputs[:, -configs.pred_len:, f_dim:]
            batch_y = batch_y[:, -configs.pred_len:, f_dim:].to(device)
            outputs = outputs.detach().cpu().numpy()
            batch_y = batch_y.detach().cpu().numpy()

            pred = outputs
            true = batch_y

            preds.append(pred)
            trues.append(true)
            inputx.append(batch_x.detach().cpu().numpy())

    preds = np.concatenate(preds, axis=0)
    trues = np.concatenate(trues, axis=0)
    inputx = np.concatenate(inputx, axis=0)

    return preds, trues, inputx


In [None]:
def read_SMARD_prediction_data(path, remove_bad_columns=False):

    df = pd.read_csv(path, delimiter=';', thousands='.', decimal=',', dtype={"Datum":str})

    df["Date"] = pd.to_datetime(df.pop("Datum")+' '+df.pop("Anfang"), format="%d.%m.%Y %H:%M")
    df["Date"] = df["Date"].dt.tz_localize("Europe/Berlin", ambiguous='infer').dt.tz_convert('UTC')

    df = df.rename(
        columns={
        'Gesamt (Netzlast) [MWh] Originalauflösungen': 'Total Load [MWh]',
        'Residuallast [MWh] Originalauflösungen': 'Residual Load [MWh]'
        }
    )
    if remove_bad_columns==True:
        df = df.drop(['Residual Load [MWh]'], axis="columns")
        df.pop('Ende')
    return df

In [None]:
def organize_results(val_dates, test_dates, val_trues, test_trues, val_results, test_results):
    # take dates, trues and preds
    # create a dataframe for validation and another one for test results, 
    # the columns for each are the dates, trues and preds for each model
    # retrieve SMARD data and merge with results dataframes
    # concatenate both dataframes
    all_val_results = pd.DataFrame({
        "Date": pd.to_datetime(val_dates, format="ISO8601"),
        "True Value": val_trues
    })
    for i, val_pred in enumerate(val_results):
        all_val_results[f"Model {i+1} Forecast"] = val_pred

    for i, val_pred in enumerate(val_results):
        all_val_results[f"Model {i+1} Absolute Error"] = np.abs(all_val_results["True Value"] - all_val_results[f"Model {i+1} Forecast"])
    
    for i, val_pred in enumerate(val_results):
        all_val_results[f"Model {i+1} Absolute Percentage Error"] = (abs(all_val_results["True Value"] - all_val_results[f"Model {i+1} Forecast"])/all_val_results["True Value"]*100)

    all_test_results = pd.DataFrame({
        "Date": pd.to_datetime(test_dates, format="ISO8601"),
        "True Value": test_trues
    })

    for i, test_pred in enumerate(test_results):
        all_test_results[f"Model {i+1} Forecast"] = test_pred
    
    for i, test_pred in enumerate(test_results):
        all_test_results[f"Model {i+1} Absolute Error"] = np.abs(all_test_results["True Value"] - all_test_results[f"Model {i+1} Forecast"])
    
    for i, test_pred in enumerate(test_results):
        all_test_results[f"Model {i+1} Absolute Percentage Error"] = (abs(all_test_results["True Value"] - all_test_results[f"Model {i+1} Forecast"])/all_test_results["True Value"]*100)
    
    display(all_val_results.describe())
    display(all_test_results.describe())

    url="https://raw.githubusercontent.com/koljaeger/smardcast/main/data/Prognostizierter_Stromverbrauch_"

    SMARD_prediction_df = pd.concat([read_SMARD_prediction_data(url+"202101010000_202112312359_Viertelstunde.csv", remove_bad_columns=True),
                                    read_SMARD_prediction_data(url+"202201010000_202212312359_Viertelstunde.csv", remove_bad_columns=True),
                                    read_SMARD_prediction_data(url+"202301010000_202312312359_Viertelstunde.csv", remove_bad_columns=True)])

    all_val_results["SMARD Forecast"] = list(SMARD_prediction_df["Total Load [MWh]"][(SMARD_prediction_df["Date"] >= all_val_results["Date"].iloc[0]) & (all_val_results["Date"].iloc[-1] >= SMARD_prediction_df["Date"])])
    all_test_results["SMARD Forecast"] = list(SMARD_prediction_df["Total Load [MWh]"][(SMARD_prediction_df["Date"] >= all_test_results["Date"].iloc[0]) & (all_test_results["Date"].iloc[-1] >= SMARD_prediction_df["Date"])])

    results = pd.concat([all_val_results, all_test_results])
    results["SMARD Absolute Error"] = abs(results["True Value"] - results["SMARD Forecast"])

    results["SMARD Absolute Percentage Error"] = (abs(results["True Value"] - results["SMARD Forecast"])/results["True Value"]*100)

    results["Date"] = pd.to_datetime(results["Date"], format="%d.%m.%Y %H:%M")
    display(results.describe())

    return results


In [None]:
def do_experiment(configs, train= True, debug=False):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    train_data, val_data, test_data = get_datasets(configs)
    train_loader, val_loader, test_loader = get_dataloaders(configs, train_data, val_data, test_data)
    
    # train a few times with different random seeds and collect the results
    # random_seeds = [42, 1337, 987654, 202411, 7777777]
    random_seeds = [42, 1337, 987654]

    # random_seeds = [987654] #remove after testing

    elapsed_times = []
    val_dates = []
    test_dates = []
    val_results = []
    test_results = []
    first = True

    for seed in random_seeds:
        set_seed(seed)
        
        #create model
        model, criterion, optimizer, scheduler, early_stopping, path = initialize_model(configs, device, train_loader, seed)
       
        if train:
            #train model
            start = time.time()
            train_model(configs, model, train_loader, val_loader, test_loader, device, criterion, optimizer, scheduler, early_stopping, path)
            elapsed_times.append(time.time() - start)
        # else:
            #load weights
        model.load_state_dict(torch.load(os.path.join(path, 'checkpoint.pth')))
        
        print(path)

        #test model
        val_preds, val_trues, _ = test(configs, model, val_loader, device)
        test_preds, test_trues, _ = test(configs, model, test_loader, device)
        
        #store results
        if first:
            val_dates = val_data.get_timestamps().flatten()  # get timestamps for prediction sequences
            test_dates = test_data.get_timestamps().flatten()  # get timestamps for prediction sequences
            val_trues_1_dim = val_data.inverse_transform(val_trues.reshape(-1, val_trues.shape[-1]))[:, -1]
            test_trues_1_dim = test_data.inverse_transform(test_trues.reshape(-1, test_trues.shape[-1]))[:, -1]
            first = False
        val_results.append(val_data.inverse_transform(val_preds.reshape(-1, val_preds.shape[-1]))[:, -1])
        test_results.append(test_data.inverse_transform(test_preds.reshape(-1, test_preds.shape[-1]))[:, -1])
    
    #Organize results
    results = organize_results(val_dates, test_dates, val_trues_1_dim, test_trues_1_dim, val_results, test_results)

    mapes = [results[col].mean() for col in results.columns if 'Model' in col and 'Absolute Percentage Error' in col]
    print("MAPEs for each model: ", mapes)
    mean = np.mean(mapes)
    sem= stats.sem(mapes)
    ci = stats.t.interval(0.95, len(mapes)-1, loc=mean, scale=sem)
    print(f"Overall MAPE 95% confidence interval: {ci}")

    params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Number of trainable parameters: ", params)

    all_params = sum(p.numel() for p in model.parameters())
    print("Total number of parameters: ", all_params)

    return ci, results, elapsed_times, params, all_params


In [None]:
experiment_names = [
    "ettm_configs", #0
    "electricity_configs", #1
    "ettm_configs_multivariate", #2
    "electricity_configs_multivariate", #3
    "not_augmented", #4
    "32_patches_electricity_configs", #5
    "64_patches_electricity_configs", #6
    "one_head_electricity_configs", #7
    "eight_heads_electricity_configs", #8
    "32_heads_electricity_configs", #9
    "train_set_not_shuffled_electricity_configs", #10
    "patch_len_2_stride=1_electricity_configs", #11
    "patch_len_1,_stride_1_electricity_configs", #12
    "32_dims_electricity_configs", #13
    "32_patches_electricity_configs_Multivariate", #14
    "16_patches_electricity_configs" #15
]

experiment_configs = [
    get_configs(name=experiment_names[0]),
    get_configs(name=experiment_names[1], enc_in=321, pct_start = 0.2, patience=10, batch_size=32),
    get_configs(name=experiment_names[2], features='M'),
    get_configs(name=experiment_names[3], enc_in=321, pct_start = 0.2, patience=10, batch_size=32, features='M'),
    get_configs(name=experiment_names[4], window_split=96),
    get_configs(name=experiment_names[5], enc_in=321, pct_start = 0.2, patience=10, batch_size=32, patch_len=72, stride=20),
    get_configs(name=experiment_names[6], enc_in=321, pct_start = 0.2, patience=10, batch_size=32, patch_len=52, stride=10),
    get_configs(name=experiment_names[7], n_heads = 1, enc_in=321, pct_start = 0.2, patience=10, batch_size=32),
    get_configs(name=experiment_names[8], n_heads = 8, enc_in=321, pct_start = 0.2, patience=10, batch_size=32),
    get_configs(name=experiment_names[9], n_heads = 32, enc_in=321, pct_start = 0.2, patience=10, batch_size=32),
    get_configs(name=experiment_names[10], shuffle_train=False, enc_in=321, pct_start = 0.2, patience=10, batch_size=32),
    get_configs(name=experiment_names[11], enc_in=321, pct_start = 0.2, patience=10, batch_size=32, patch_len=2, stride=1),
    get_configs(name=experiment_names[12], enc_in=321, pct_start = 0.2, patience=10, batch_size=32, patch_len=1, stride=1),
    get_configs(name=experiment_names[13], d_model=32, enc_in=321, pct_start = 0.2, patience=10, batch_size=32),
    get_configs(name=experiment_names[14], enc_in=321, pct_start = 0.2, patience=10, batch_size=32, patch_len=72, stride=20, features='M'),
    get_configs(name=experiment_names[15], enc_in=321, pct_start = 0.2, patience=10, patch_len=112, stride=40)
]

elapsed_np_names = [
    'elapsed_default.npy',
    'e_elapsed.npy',
    'elapsed_times_default_M.npy',
    'elapsed_times_electricity_M.npy',
    'elapsed_na.npy',
    'elapsed_32.npy',
    'elapsed_64.npy',
    'elapsed_1_head.npy',
    'elapsed_8_heads.npy',
    'elapsed_32_heads.npy',
    'elapsed_no_shuffle.npy',
    'elapsed_l2_s1.npy',
    'elapsed_l1_s1.npy',
    'elapsed_model_dim_32.npy',
    'eelapsed_32_patches_M.npy',
    'elapsed_16_patches_S.npy'
]

cis = []
results = []
elapsed_times = []
num_trainable_params = []
num_all_params = []

train = False



In [None]:
# last_experiment_index = np.load('last_experiment_index.npy')
# if last_experiment_index +1 >= len(experiment_configs):
#     train = False
#     print("All experiments have already been run. Loading results.")

# print(last_experiment_index)


In [None]:
for i, experiment in enumerate(experiment_names):
    print(f"Starting experiment {i}: {experiment_names[i]}")
    # if i > last_experiment_index:
    ci, result, elapsed, params, all_params = do_experiment(configs = experiment_configs[i], train=train)

    if train:
        np.save(elapsed_np_names[i], np.array(elapsed))
    else:
        elapsed = np.load(elapsed_np_names[i], allow_pickle=True)

    cis.append(ci)
    results.append(result)
    elapsed_times.append(elapsed)
    num_trainable_params.append(params)
    num_all_params.append(all_params)

        # np.save('last_experiment_index.npy', np.array(i))   

In [None]:
# i = 3; #change this to run a specific experiment
# ci, result, elapsed, params, all_params = do_experiment(configs = experiment_configs[i], train=train)

# if train:
#     np.save(elapsed_np_names[i], np.array(elapsed))
# else:
#     elapsed = np.load(elapsed_np_names[i], allow_pickle=True)

In [None]:
for elapsed in elapsed_times:
    print(elapsed)