# Informer on S&P 500 Dataset

Custom data (xxx.csv) has to include at least 2 features: date(format: YYYY-MM-DD hh:mm:ss) and target feature

## Set Path and install requirements

In [1]:
import sys
if not 'SNP500' in sys.path:
    sys.path += ['SNP500']

In [3]:
!pip install -r ./Informer2020/requirements.txt

## Importing Neccessary Libraries

In [2]:
from utils.tools import dotdict
from exp.exp_informer import Exp_Informer
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

  from .autonotebook import tqdm as notebook_tqdm


# Experiments: Train and Test

## Single Feature

In [3]:
args = dotdict()

args.model = 'informer' # model of experiment, options: [informer, informerstack, informerlight(TBD)]

args.data = 'custom' # data
args.root_path = 'data' # root path of data file
args.data_path = 'sp500_close_nrm.csv' # data file
args.features = 'S' # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate
args.target = 'Close' # target feature in S or MS task
args.freq = 'd' # freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h
args.checkpoints = 'informer_checkpoints' # location of model checkpoints

args.seq_len = 32 # input sequence length of Informer encoder
args.label_len = 8 # start token length of Informer decoder
args.pred_len = 2 # prediction sequence length
# Informer decoder input: concat[start token series(label_len), zero padding series(pred_len)]

args.enc_in = 5 # encoder input size
args.dec_in = 5 # decoder input size
args.c_out = 5 # output size
args.factor = 5 # probsparse attn factor
args.d_model = 512 # dimension of model
args.n_heads = 10 # num of heads
args.e_layers = 4 # num of encoder layers
args.d_layers = 2 # num of decoder layers
args.d_ff = 2048 # dimension of fcn in model
args.dropout = 0.05 # dropout
args.attn = 'prob' # attention used in encoder, options:[prob, full]
args.embed = 'timeF' # time features encoding, options:[timeF, fixed, learned]
args.activation = 'gelu' # activation
args.distil = True # whether to use distilling in encoder
args.output_attention = False # whether to output attention in ecoder
args.mix = True
args.padding = 0
args.freq = 'd'

args.batch_size = 32 
args.learning_rate = 0.0001
args.loss = 'mse'
args.lradj = 'type1'
args.use_amp = False # whether to use automatic mixed precision training

args.num_workers = 0
args.itr = 1
args.train_epochs = 20
args.patience = 15
args.des = 'exp'

args.use_gpu = True if torch.cuda.is_available() else False
args.gpu = 0

args.use_multi_gpu = True
args.devices = '0,1,2,3'


In [4]:
args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False

if args.use_gpu and args.use_multi_gpu:
    args.devices = args.devices.replace(' ','')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]

In [5]:
# Set augments by using data name
data_parser = {
    'ETTh1':{'data':'ETTh1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTh2':{'data':'ETTh2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTm1':{'data':'ETTm1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTm2':{'data':'ETTm2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'custom':{'data':'sp500_close_nrm.csv','T':'Close','M':[6,6,6],'S':[1,1,1],'MS':[6,6,1]},
}
if args.data in data_parser.keys():
    data_info = data_parser[args.data]
    args.data_path = data_info['data']
    args.target = data_info['T']
    args.enc_in, args.dec_in, args.c_out = data_info[args.features]

In [6]:
args.detail_freq = args.freq
args.freq = args.freq[-1:]

In [7]:
print('Args in experiment:')
print(args)

Args in experiment:
{'model': 'informer', 'data': 'custom', 'root_path': 'data', 'data_path': 'sp500_close_nrm.csv', 'features': 'S', 'target': 'Close', 'freq': 'd', 'checkpoints': 'informer_checkpoints', 'seq_len': 32, 'label_len': 8, 'pred_len': 2, 'enc_in': 1, 'dec_in': 1, 'c_out': 1, 'factor': 5, 'd_model': 512, 'n_heads': 10, 'e_layers': 4, 'd_layers': 2, 'd_ff': 2048, 'dropout': 0.05, 'attn': 'prob', 'embed': 'timeF', 'activation': 'gelu', 'distil': True, 'output_attention': False, 'mix': True, 'padding': 0, 'batch_size': 32, 'learning_rate': 0.0001, 'loss': 'mse', 'lradj': 'type1', 'use_amp': False, 'num_workers': 0, 'itr': 1, 'train_epochs': 20, 'patience': 15, 'des': 'exp', 'use_gpu': True, 'gpu': 0, 'use_multi_gpu': True, 'devices': '0,1,2,3', 'device_ids': [0, 1, 2, 3], 'detail_freq': 'd'}


In [8]:
Exp = Exp_Informer

In [9]:
setting = 'sp500_close_nrm_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_at{}_fc{}_eb{}_dt{}_mx{}_{}_{}'.format(args.model, args.data, args.features, 
                args.seq_len, args.label_len, args.pred_len,
                args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.attn, args.factor, args.embed, args.distil, args.mix, args.des, 0)

In [10]:
for ii in range(args.itr):
    # setting record of experiments
    

    # set experiments
    exp = Exp(args)
    
    # train
    print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
    exp.train(setting)
    
    # test
    print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
    exp.test(setting)

    torch.cuda.empty_cache()

Use GPU: cuda:0
>>>>>>>start training : sp500_close_nrm_informer_custom_ftS_sl32_ll8_pl2_dm512_nh10_el4_dl2_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_exp_0>>>>>>>>>>>>>>>>>>>>>>>>>>
data.data_loader.Dataset_Custom
train
data_path ->  sp500_close_nrm.csv
scaling
data.data_loader.Dataset_Custom
val
data_path ->  sp500_close_nrm.csv
scaling
data.data_loader.Dataset_Custom
test
data_path ->  sp500_close_nrm.csv
scaling
loss ->  tensor(0.7330, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(2.1224, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.8331, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.7487, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.7461, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.2582, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.3856, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.3786, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.2237, device='cuda:0'

## Multiple Features

In [43]:
args = dotdict()

args.model = 'informer' # model of experiment, options: [informer, informerstack, informerlight(TBD)]

args.data = 'custom' # data
args.root_path = 'data' # root path of data file
args.data_path = 'sp500_nrm.csv' # data file
args.features = 'MS' # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate
args.target = 'Close' # target feature in S or MS task
args.freq = 'd' # freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h
args.checkpoints = 'informer_checkpoints' # location of model checkpoints

args.seq_len = 72 # input sequence length of Informer encoder
args.label_len = 16 # start token length of Informer decoder
args.pred_len = 48 # prediction sequence length
# Informer decoder input: concat[start token series(label_len), zero padding series(pred_len)]

args.enc_in = 5 # encoder input size
args.dec_in = 5 # decoder input size
args.c_out = 5 # output size
args.factor = 5 # probsparse attn factor
args.d_model = 512 # dimension of model
args.n_heads = 10 # num of heads
args.e_layers = 4 # num of encoder layers
args.d_layers = 2 # num of decoder layers
args.d_ff = 2048 # dimension of fcn in model
args.dropout = 0.05 # dropout
args.attn = 'prob' # attention used in encoder, options:[prob, full]
args.embed = 'timeF' # time features encoding, options:[timeF, fixed, learned]
args.activation = 'gelu' # activation
args.distil = True # whether to use distilling in encoder
args.output_attention = False # whether to output attention in ecoder
args.mix = True
args.padding = 0
args.freq = 'd'

args.batch_size = 32 
args.learning_rate = 0.0001
args.loss = 'mse'
args.lradj = 'type1'
args.use_amp = False # whether to use automatic mixed precision training

args.num_workers = 0
args.itr = 1
args.train_epochs = 20
args.patience = 5
args.des = 'exp'

args.use_gpu = True if torch.cuda.is_available() else False
args.gpu = 0

args.use_multi_gpu = True
args.devices = '0,1,2,3'


In [44]:
args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False

if args.use_gpu and args.use_multi_gpu:
    args.devices = args.devices.replace(' ','')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]

In [45]:
# Set augments by using data name
data_parser = {
    'ETTh1':{'data':'ETTh1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTh2':{'data':'ETTh2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTm1':{'data':'ETTm1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTm2':{'data':'ETTm2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'custom':{'data':'sp500_nrm.csv','T':'Close','M':[5,5,5],'S':[1,1,1],'MS':[5,5,1]},
}
if args.data in data_parser.keys():
    data_info = data_parser[args.data]
    args.data_path = data_info['data']
    args.target = data_info['T']
    args.enc_in, args.dec_in, args.c_out = data_info[args.features]

In [46]:
args.detail_freq = args.freq
args.freq = args.freq[-1:]

In [47]:
print('Args in experiment:')
print(args)

Args in experiment:
{'model': 'informer', 'data': 'custom', 'root_path': 'data', 'data_path': 'sp500_nrm.csv', 'features': 'MS', 'target': 'Close', 'freq': 'd', 'checkpoints': 'informer_checkpoints', 'seq_len': 72, 'label_len': 16, 'pred_len': 48, 'enc_in': 5, 'dec_in': 5, 'c_out': 1, 'factor': 5, 'd_model': 512, 'n_heads': 10, 'e_layers': 4, 'd_layers': 2, 'd_ff': 2048, 'dropout': 0.05, 'attn': 'prob', 'embed': 'timeF', 'activation': 'gelu', 'distil': True, 'output_attention': False, 'mix': True, 'padding': 0, 'batch_size': 32, 'learning_rate': 0.0001, 'loss': 'mse', 'lradj': 'type1', 'use_amp': False, 'num_workers': 0, 'itr': 1, 'train_epochs': 20, 'patience': 5, 'des': 'exp', 'use_gpu': True, 'gpu': 0, 'use_multi_gpu': True, 'devices': '0,1,2,3', 'device_ids': [0, 1, 2, 3], 'detail_freq': 'd'}


In [48]:
Exp = Exp_Informer

In [49]:
setting = 'sp500_nrm_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_at{}_fc{}_eb{}_dt{}_mx{}_{}_{}'.format(args.model, args.data, args.features, 
                args.seq_len, args.label_len, args.pred_len,
                args.d_model, args.n_heads, args.e_layers, args.d_layers, args.d_ff, args.attn, args.factor, args.embed, args.distil, args.mix, args.des, 0)

In [50]:
for ii in range(args.itr):
    # setting record of experiments
    

    # set experiments
    exp = Exp(args)
    
    # train
    print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
    exp.train(setting)
    
    # test
    print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
    exp.test(setting)

    torch.cuda.empty_cache()

Use GPU: cuda:0
>>>>>>>start training : sp500_nrm_informer_custom_ftMS_sl72_ll16_pl48_dm512_nh10_el4_dl2_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_exp_0>>>>>>>>>>>>>>>>>>>>>>>>>>
data.data_loader.Dataset_Custom
train
data_path ->  sp500_nrm.csv
scaling
data.data_loader.Dataset_Custom
val
data_path ->  sp500_nrm.csv
scaling
data.data_loader.Dataset_Custom
test
data_path ->  sp500_nrm.csv
scaling
loss ->  tensor(0.8338, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.9607, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.5505, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.4041, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.5191, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.3444, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.3331, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.3815, device='cuda:0', grad_fn=<L1LossBackward0>)
loss ->  tensor(0.4321, device='cuda:0', grad_fn=<L1LossBack