# Informer Demo

## Download code and dataset

In [None]:
!git clone https://github.com/zhouhaoyi/ETDataset.git


In [None]:
import sys
if not 'Informer2020' in sys.path:
    sys.path += ['Informer2020']

In [None]:
!pip install -r ./requirements.txt

In [None]:
import pandas as pd

# Load the dataset
data = pd.read_csv('./part4.csv')

# Exclude columns that start with 'lat_' or 'lon_'
data = data[data.columns.drop(list(data.filter(regex='lat_|lon_')))]

# Save the modified dataset
data.to_csv('./Data2023.csv', index=False)


## Experiments: Train and Test

In [1]:
from utils.tools import dotdict
from exp.exp_informer import Exp_Informer
import torch
import wandb
import os
#os.environ['CUDA_VISIBLE_DEVICES'] = ''
import gc
os.environ['WANDB_AGENT_DISABLE_FLAPPING'] = 'true'


In [2]:
args = dotdict()

args.model = 'informer' # model of experiment, options: [informer, informerstack, informerlight(TBD)]

args.data = 'custom' # data
args.root_path = './' # root path of data file
args.data_path = 'Data.csv' # data file
args.features = 'MS' # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate
args.target = 'south' # target feature in S or MS task
args.freq = 'h' # freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h
args.checkpoints = './informer_checkpoints' # location of model checkpoints

args.seq_len = 45 # input sequence length of Informer encoder
args.label_len = 3  # start token length of Informer decoder
args.pred_len = 1 # prediction sequence length
# Informer decoder input: concat[start token series(label_len), zero padding series(pred_len)]

args.enc_in = 32 # encoder input size 7
args.dec_in = 32 # decoder input size 7
args.c_out = 1 # output size 7
args.factor = 4 # probsparse attn factor was 5 - 7 
args.d_model = 1078 # dimension of model
args.n_heads = 11 # num of heads 8
args.e_layers = 4 # num of encoder layers 2
args.d_layers = 2 # num of decoder layers
args.d_ff = 2048 # dimension of fcn in model 2048
args.dropout = 0.25 # dropout 0.05, best for me 0.1
args.attn = 'prob' # attention used in encoder, options:[prob, full]
args.embed = 'timeF' # time features encoding, options:[timeF, fixed, learned]
args.activation = 'gelu' # activation
args.distil = True # whether to use distilling in encoder
args.output_attention = True # whether to output attention in ecoder
args.mix = True
args.padding = 0
args.freq = 'h'

args.batch_size = 76
args.learning_rate = 0.0001 #191
args.loss = 'mse'
args.lradj = 'type1'
args.use_amp = False # whether to use automatic mixed precision training

args.inverse = True
args.scale = True

args.num_workers = 0
args.itr = 1
args.train_epochs = 100
args.patience = 100
args.des = 'exp'

args.use_gpu = True if torch.cuda.is_available() else False
args.gpu = 0

args.use_multi_gpu = False
args.devices = '0,1,2,3'


In [3]:
args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False

if args.use_gpu and args.use_multi_gpu:
    args.devices = args.devices.replace(' ','')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]

In [4]:
# Set augments by using data name
data_parser = {
    'custom':{'data':'Data.csv','T':'Avg','M':[34 ,34 ,34 ],'S':[1,1,1],'MS':[34 ,34 ,1]},
  
}
if args.data in data_parser.keys():
    data_info = data_parser[args.data]
    args.data_path = data_info['data']
    args.target = data_info['T']
    args.enc_in, args.dec_in, args.c_out = data_info[args.features]

In [5]:
args.detail_freq = args.freq
args.freq = args.freq[-1:]

In [6]:
print('Args in experiment:')
print(args)

Args in experiment:
{'model': 'informer', 'data': 'custom', 'root_path': './', 'data_path': 'Data.csv', 'features': 'MS', 'target': 'Avg', 'freq': 'h', 'checkpoints': './informer_checkpoints', 'seq_len': 45, 'label_len': 3, 'pred_len': 1, 'enc_in': 34, 'dec_in': 34, 'c_out': 1, 'factor': 4, 'd_model': 1078, 'n_heads': 11, 'e_layers': 4, 'd_layers': 2, 'd_ff': 2048, 'dropout': 0.25, 'attn': 'prob', 'embed': 'timeF', 'activation': 'gelu', 'distil': True, 'output_attention': True, 'mix': True, 'padding': 0, 'batch_size': 76, 'learning_rate': 0.0001, 'loss': 'mse', 'lradj': 'type1', 'use_amp': False, 'inverse': True, 'scale': True, 'num_workers': 0, 'itr': 1, 'train_epochs': 100, 'patience': 100, 'des': 'exp', 'use_gpu': True, 'gpu': 0, 'use_multi_gpu': False, 'devices': '0,1,2,3', 'detail_freq': 'h'}


In [7]:
Exp = Exp_Informer

In [8]:
##########################################################
sweep_config = {
    'method': 'bayes',  # Bayesian optimization
    'metric': {
        'name': 'RMSE',  # Metric to optimize
        'goal': 'minimize'    # Aim to minimize validation loss
    },
    'parameters': {
        'batch_size': {
              'min' : 8,
            'max' : 64,
            #'values': [   8, 16, 24, 36] #8
        },
        'n_heads': {
            # 'min' : 5,
            #'max' : 17,
             'values': [  8, 9, 10, 11, 12, 13, 14, 15 , 16 ] #14
        },
        'factor': {
             'min' : 3,
            'max' : 9,
            #'values': [  5, 7, 9, 11, 13, 15 , 17, 19 ,21, 22] #7
        },
        'd_model': {
             #'min' : 278,
            #'max' : 878,
            'values': [  678, 778, 878, 978, 1078, 1178, 1278, 1378,
                       1478, 1578] #1500 #1078
        },
        'd_ff' : { #2048
             'min' : 1648,
            'max' : 2648,
            #'values': [ 848, 1048, 1248, 1448, 1648, 1848, 2048, 2248, 2448, 2648]
        },
         'seq_len': {
            'values': [24*3*3, 12*3*3]
        },
        'label_len': {
            'values': [24*3, 12*3]
        },
        'pred_len': {
            'values': [24, 12]
        }
        # You can add other parameters here that you want to optimize
    }
}

In [None]:
def train():
    with wandb.init() as run:
        # Update args with the sweep parameters
        args.n_heads = run.config.n_heads
        args.batch_size = run.config.batch_size
        args.d_model = run.config.d_model
        args.factor = run.config.factor

        # Update other args parameters if needed

        wandb.config.update(args)

        for ii in range(args.itr):
            # setting record of experiments
            setting = '{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_at{}_fc{}_eb{}_dt{}_mx{}_{}_{}12'.format(
                args.model, args.data, args.features, args.seq_len, args.label_len, args.pred_len,
                args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.attn, args.factor, 
                args.embed, args.distil, args.mix, args.des, ii)

            # set experiments
            exp = Exp(args)

            # train
            print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
            exp.train(setting)

            # test
            print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
            exp.test(setting)
            if args.do_predict:
                print('>>>>>>>predicting : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
                exp.predict(setting, True)
            gc.collect()
        run.finish()
        torch.cuda.empty_cache()
 
# Initialize the sweep
sweep_id = wandb.sweep(sweep=sweep_config, project='Elprices', entity='ossyandlars')

# Run the sweep
wandb.agent(sweep_id, train)

In [9]:

run =wandb.init(project='Elprices', entity='ossyandlars')
wandb.config.update(args)

for ii in range(args.itr):
    # setting record of experiments
    setting = '{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_at{}_fc{}_eb{}_dt{}_mx{}_{}_{}relu2021_2023_1HE100P3'.format(args.model, args.data, args.features,
                args.seq_len, args.label_len, args.pred_len,
                args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.attn, args.factor, args.embed, args.distil, args.mix, args.des, ii)

    # set experiments
    exp = Exp(args)

    # train
    print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
    exp.train(setting)

    # test
    print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
    exp.test(setting)
    if args.do_predict:
        print('>>>>>>>predicting : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
        exp.predict(setting, True)
        
    run.finish()
    torch.cuda.empty_cache()

Use GPU: cuda:0
>>>>>>>start training : informer_custom_ftMS_sl45_ll3_pl1_dm1078_nh11_el4_dl2_df2048_atprob_fc4_ebtimeF_dtTrue_mxTrue_exp_0relu2021_2023_1HE100P3>>>>>>>>>>>>>>>>>>>>>>>>>>
train 7010
val 393
test 391
Epoch: 1 cost time: 21.177915573120117
Epoch: 1, Steps: 92 | Train Loss: 863.2930183 Vali Loss: 287.8845551 Test Loss: 202.1205246
Validation loss decreased (inf --> 287.884555).  Saving model ...
Updating learning rate to 0.0001
Epoch: 2 cost time: 21.381032705307007
Epoch: 2, Steps: 92 | Train Loss: 347.7395459 Vali Loss: 248.7390961 Test Loss: 200.7934998
Validation loss decreased (287.884555 --> 248.739096).  Saving model ...
Updating learning rate to 5e-05
Epoch: 3 cost time: 20.975331783294678
Epoch: 3, Steps: 92 | Train Loss: 261.0310777 Vali Loss: 264.8907227 Test Loss: 176.0059174
EarlyStopping counter: 1 out of 100
Updating learning rate to 2.5e-05
Epoch: 4 cost time: 20.667306900024414
Epoch: 4, Steps: 92 | Train Loss: 240.5367307 Vali Loss: 232.7378693 Test Loss

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
MAE,▁
MAE_Vali,█▇▂▃▂▂▃▂▂▂▂▂▂▂▃▂▃▂▁▂▁▂▂▂▃▂▁▂▂▃▂▃▃▂▁▁▂▂▂▂
MAPE,▁
MAPE_Vali,██▂▃▂▂▃▂▂▃▂▁▂▂▁▃▁▃▂▃▁▂▃▂▃▂▂▂▃▂▂▃▃▃▂▂▃▃▂▃
MSE,▁
MSE_Vali,█▅▃▄▃▃▄▄▃▄▃▃▄▃▄▂▄▄▂▂▂▃▃▃▄▄▂▄▃▄▃▄▄▄▂▁▄▄▃▄
MSPE,▁
MSPE_Vali,█▆▃▃▃▃▃▃▂▃▃▂▃▃▁▃▁▃▃▃▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
RMSE,▁
RMSE_Vali,█▅▃▄▃▃▄▄▄▄▃▃▄▃▄▂▄▄▂▂▂▄▃▃▄▄▂▄▃▄▃▄▄▄▂▁▄▄▃▄

0,1
MAE,10.29834
MAE_Vali,10.65829
MAPE,0.11471
MAPE_Vali,0.25825
MSE,186.00745
MSE_Vali,250.39656
MSPE,0.02081
MSPE_Vali,0.40789
RMSE,13.63845
RMSE_Vali,15.82392


## Prediction

In [None]:
import os

# set saved model path
setting = 'informer_custom_ftMS_sl720_ll48_pl24_dm1360_nh14_el4_dl1_df2048_atprob_fc7_ebtimeF_dtTrue_mxTrue_exp_0relu38'
#path = os.path.join(args.checkpoints,setting,'checkpoint.pth')

In [None]:
# If you already have a trained model, you can set the arguments and model path, then initialize a Experiment and use it to predict
# Prediction is a sequence which is adjacent to the last date of the data, and does not exist in the data
# If you want to get more information about prediction, you can refer to code `exp/exp_informer.py function predict()` and `data/data_loader.py class Dataset_Pred`

exp = Exp(args)

exp.predict(setting, True)

In [None]:
# the prediction will be saved in ./results/{setting}/real_prediction.npy
import numpy as np

prediction = np.load('./results/'+setting+'/real_prediction.npy')

prediction.shape

### More details about Prediction - prediction function

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Load your data
data = pd.read_csv('./results/'+setting+'/predictions_and_ground_truths.csv')  # Replace with your file path

# Ensure only numeric columns are used for plotting
numeric_data = data.select_dtypes(include='number')

# Selecting 6 random rows from the numeric data
random_rows_numeric = numeric_data.sample(n=6)

# Plotting the first 24 columns against the second 24 columns for these rows
fig, axs = plt.subplots(6, figsize=(12, 18))

for i, (idx, row) in enumerate(random_rows_numeric.iterrows()):
    # Splitting the row into two parts
    first_24 = row.iloc[:24]
    second_24 = row.iloc[24:48]

    # Plotting
    axs[i].plot(first_24.values, label='First 24 Columns', marker='o')
    axs[i].plot(second_24.values, label='Second 24 Columns', marker='x')
    axs[i].set_title(f'Row {idx}')
    axs[i].legend()

plt.tight_layout()
plt.show()


### More details about Prediction - prediction dataset

You can give a `root_path` and `data_path` of the data you want to forecast, and set `seq_len`, `label_len`, `pred_len` and other arguments as other Dataset. The difference is that you can set a more detailed freq such as `15min` or `3h` to generate the timestamp of prediction series.

`Dataset_Pred` only has one sample (including `encoder_input: [1, seq_len, dim]`, `decoder_token: [1, label_len, dim]`, `encoder_input_timestamp: [1, seq_len, date_dim]`, `decoder_input_timstamp: [1, label_len+pred_len, date_dim]`). It will intercept the last sequence of the given data (seq_len data) to forecast the unseen future sequence (pred_len data).

## Visualization

In [None]:
# When we finished exp.train(setting) and exp.test(setting), we will get a trained model and the results of test experiment
# The results of test experiment will be saved in ./results/{setting}/pred.npy (prediction of test dataset) and ./results/{setting}/true.npy (groundtruth of test dataset)

preds = np.load('./results/'+setting+'/pred.npy')
trues = np.load('./results/'+setting+'/true.npy')
metrics = np.load('./results/'+setting+'/metrics.npy')
#real = np.load('./results/'+setting+'/real_prediction.npy')

import numpy as np
import pandas as pd

def save_to_csv(setting):
    # Load the arrays
    preds = np.load('./results/'+setting+'/pred.npy')
    trues = np.load('./results/'+setting+'/true.npy')

    # Reshape the arrays into 2D format
    # Assuming preds and trues have the same shape
    num_samples, pred_len, dimensions = preds.shape
    preds_reshaped = preds.reshape(num_samples, pred_len * dimensions)
    trues_reshaped = trues.reshape(num_samples, pred_len * dimensions)

    # Concatenate preds and trues for each sample
    combined = np.concatenate((preds_reshaped, trues_reshaped), axis=1)

    # Create a DataFrame and save to CSV
    df = pd.DataFrame(combined)
    df.to_csv(f'./results/{setting}/predictions_and_ground_truths.csv', index=False)

# Example usage

save_to_csv(setting)


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
mae = metrics[0]
mse = metrics[1]
rmse = metrics[2]
mape = metrics[3]
mspe = metrics[4]

print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'MAPE: {mape}')
print(f'MSPE: {mspe}')


In [None]:
plt.figure(figsize=(10, 6))
plt.plot(preds[:, :, -1], label='Predicted', color='blue')
plt.plot(trues[:, :, -1], label='True', color='green')
plt.xlabel('Time Steps')
plt.ylabel('Values')
plt.title('Predicted vs True Values')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# draw HUFL prediction
plt.figure()
plt.plot(trues[:,:,0], label='GroundTruth')
plt.plot(preds[:,:,0], label='Prediction')
plt.legend()
plt.show()

In [None]:
from data.data_loader import Dataset_Daily
from torch.utils.data import DataLoader

Data = Dataset_ETT_hour
timeenc = 0 if args.embed!='timeF' else 1
flag = 'test'; shuffle_flag = False; drop_last = True; batch_size = 1

data_set = Data(
    root_path=args.root_path,
    data_path=args.data_path,
    flag=flag,
    size=[args.seq_len, args.label_len, args.pred_len],
    features=args.features,
    timeenc=timeenc,
    freq=args.freq
)
data_loader = DataLoader(
    data_set,
    batch_size=batch_size,
    shuffle=shuffle_flag,
    num_workers=args.num_workers,
    drop_last=drop_last)

In [None]:
import os

args.output_attention = True

exp = Exp(args)

model = exp.model

setting = 'informer_Data_ftMS_sl60_ll15_pl1_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_exp_0'
path = os.path.join(args.checkpoints,setting,'checkpoint.pth')
model.load_state_dict(torch.load(path))

In [None]:
# attention visualization
idx = 0
for i, (batch_x,batch_y,batch_x_mark,batch_y_mark) in enumerate(data_loader):
    if i!=idx:
        continue
    batch_x = batch_x.float().to(exp.device)
    batch_y = batch_y.float()

    batch_x_mark = batch_x_mark.float().to(exp.device)
    batch_y_mark = batch_y_mark.float().to(exp.device)

    dec_inp = torch.zeros_like(batch_y[:,-args.pred_len:,:]).float()
    dec_inp = torch.cat([batch_y[:,:args.label_len,:], dec_inp], dim=1).float().to(exp.device)

    outputs,attn = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

In [None]:
attn[0].shape, attn[1].shape #, attn[2].shape

In [None]:
layer = 0
distil = 'Distil' if args.distil else 'NoDistil'
for h in range(0,8):
    plt.figure(figsize=[10,8])
    plt.title('Informer, {}, attn:{} layer:{} head:{}'.format(distil, args.attn, layer, h))
    A = attn[layer][0,h].detach().cpu().numpy()
    ax = sns.heatmap(A, vmin=0, vmax=A.max()+0.01)
    plt.show()

In [None]:
layer = 1
distil = 'Distil' if args.distil else 'NoDistil'
for h in range(0,8):
    plt.figure(figsize=[10,8])
    plt.title('Informer, {}, attn:{} layer:{} head:{}'.format(distil, args.attn, layer, h))
    A = attn[layer][0,h].detach().cpu().numpy()
    ax = sns.heatmap(A, vmin=0, vmax=A.max()+0.01)
    plt.show()

## Custom Data

Custom data (xxx.csv) has to include at least 2 features: `date`(format: `YYYY-MM-DD hh:mm:ss`) and `target feature`.


In [None]:
from data.data_loader import Dataset_Custom
from torch.utils.data import DataLoader
import pandas as pd
import os

In [None]:
# custom data: xxx.csv
# data features: ['date', ...(other features), target feature]

# we take ETTh2 as an example
args.root_path = './ETDataset/ETT-small/'
args.data_path = 'ETTh2.csv'

df = pd.read_csv(os.path.join(args.root_path, args.data_path))

In [None]:
df.head()

In [None]:
'''
We set 'HULL' as target instead of 'OT'

The following frequencies are supported:
        Y   - yearly
            alias: A
        M   - monthly
        W   - weekly
        D   - daily
        B   - business days
        H   - hourly
        T   - minutely
            alias: min
        S   - secondly
'''

args.target = 'HULL'
args.freq = 'h'

Data = Dataset_Custom
timeenc = 0 if args.embed!='timeF' else 1
flag = 'test'; shuffle_flag = False; drop_last = True; batch_size = 1

data_set = Data(
    root_path=args.root_path,
    data_path=args.data_path,
    flag=flag,
    size=[args.seq_len, args.label_len, args.pred_len],
    features=args.features,
    timeenc=timeenc,
    target=args.target, # HULL here
    freq=args.freq # 'h': hourly, 't':minutely
)
data_loader = DataLoader(
    data_set,
    batch_size=batch_size,
    shuffle=shuffle_flag,
    num_workers=args.num_workers,
    drop_last=drop_last)

In [None]:
batch_x,batch_y,batch_x_mark,batch_y_mark = data_set[0]