In [1]:
# !git clone https://github.com/zhouhaoyi/Informer2020.git
# !git clone https://github.com/zhouhaoyi/ETDataset.git

# pip install numpy==1.* torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128

# pip install pandas

In [2]:
import sys
if not 'Informer2020' in sys.path:
    sys.path += ['Informer2020']

from utils.tools import dotdict
from exp.exp_informer import Exp_Informer
import numpy as np
import torch

print(torch.__version__)
print(f"torch.cuda.is_available: {torch.cuda.is_available()}")
print(f"torch.version.cuda: {torch.version.cuda}")

2.7.0+cu128
torch.cuda.is_available: True
torch.version.cuda: 12.8


In [3]:
args = dotdict()

args.model = 'informer' # model of experiment, options: [informer, informerstack, informerlight(TBD)]

args.data = 'ETTm1' # data our data: NIFTY100_1m test data: ETTm1
args.root_path = './ETDataset/ETT-small/' # root path of data file
args.features = 'M' # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate
args.freq = 't' # freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h
args.checkpoints = './informer_checkpoints' # location of model checkpoints

args.seq_len = 180 # input sequence length of Informer encoder
args.label_len = 48 # start token length of Informer decoder
args.pred_len = 5 # prediction sequence length
# Informer decoder input: concat[start token series(label_len), zero padding series(pred_len)]

args.factor = 10 # probsparse attn factor
args.d_model = 512 # dimension of model
args.n_heads = 4 # num of heads
args.e_layers = 2 # num of encoder layers
args.d_layers = 1 # num of decoder layers
args.d_ff = 1024 # dimension of fcn in model
args.dropout = 0.05 # dropout
args.attn = 'prob' # attention used in encoder, options:[prob, full]
args.embed = 'timeF' # time features encoding, options:[timeF, fixed, learned]
args.activation = 'gelu' # activation
args.distil = True # whether to use distilling in encoder
args.output_attention = True # whether to output attention in ecoder
args.mix = True
args.padding = 0

args.batch_size = 400 
args.learning_rate = 0.0001
args.loss = 'mse'
args.lradj = 'type1'
args.use_amp = False # whether to use automatic mixed precision training

args.num_workers = 0
args.itr = 1
args.train_epochs = 30
args.patience = 3
args.des = 'exp'

args.use_gpu = True if torch.cuda.is_available() else False
args.gpu = 0

args.use_multi_gpu = False
args.devices = '0,1,2,3'

print(f"GPU in use: {args.use_gpu}")

GPU in use: True


In [4]:
# Set augments by using data name
data_parser = {
    'ETTh1':{'data':'ETTh1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTh2':{'data':'ETTh2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTm1':{'data':'ETTm1.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
    'ETTm2':{'data':'ETTm2.csv','T':'OT','M':[7,7,7],'S':[1,1,1],'MS':[7,7,1]},
	'NIFTY100_1m':{'data':'NIFTY100_1m.csv','T':'close','MS':[7,7,1], 'M':[6,6,6]},
	'GAZP_combo':{'data':'GAZP_combo.csv','T':'gazp','MS':[6,6,1], 'M':[6,6,6]},
 	'GAZP_combo_2':{'data':'GAZP_combo_2.csv','T':'gazp','MS':[6,6,1], 'M':[6,6,6]},
}
if args.data in data_parser.keys():
    data_info = data_parser[args.data]
    args.data_path = data_info['data']
    args.target = data_info['T'] # target feature in S or MS task
    args.enc_in, args.dec_in, args.c_out = data_info[args.features]
   
	 # args.enc_in - encoder input size
	 # args.dec_in - decoder input size
	 # args.c_out - output size
else:
   raise ValueError(f"No {args.data} in data_parser.keys()")

In [5]:
args.detail_freq = args.freq
args.freq = args.freq[-1:]

In [6]:
# setting record of experiments
torch.cuda.empty_cache()

ii = 0
setting = f'{args.model}_{args.data}_ft{args.features}_sl{args.seq_len}_ll{args.label_len}\
_pl{args.pred_len}_dm{args.d_model}_nh{args.n_heads}_el{args.e_layers}_dl{args.d_layers}_df{args.d_ff}\
_at{args.attn}_fc{args.factor}_eb{args.embed}_dt{args.distil}_mx{args.mix}_{args.des}_{ii}'

# set experiments
exp = Exp_Informer(args)

# train
print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
exp.train(setting)

# test
print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
exp.test(setting)

torch.cuda.empty_cache()

Use GPU: cuda:0
>>>>>>>start training : informer_ETTm1_ftM_sl180_ll48_pl5_dm512_nh4_el2_dl1_df1024_atprob_fc10_ebtimeF_dtTrue_mxTrue_exp_0>>>>>>>>>>>>>>>>>>>>>>>>>>

train 55560

val 6964

test 6964
	iters: 100, epoch: 1 | loss: 0.1153699
	speed: 0.3425s/iter; left time: 1384.0073s
Epoch: 1 cost time: 45.939656496047974
Epoch: 1, Steps: 138 | Train Loss: 0.2081040 Vali Loss: 0.1303649 Test Loss: 0.1473155
Validation loss decreased (inf --> 0.130365).  Saving model ...
Updating learning rate to 0.0001
	iters: 100, epoch: 2 | loss: 0.0795015
	speed: 0.6156s/iter; left time: 2402.7018s
Epoch: 2 cost time: 44.66612100601196
Epoch: 2, Steps: 138 | Train Loss: 0.0901085 Vali Loss: 0.1123780 Test Loss: 0.1346495
Validation loss decreased (0.130365 --> 0.112378).  Saving model ...
Updating learning rate to 5e-05
	iters: 100, epoch: 3 | loss: 0.0762265
	speed: 0.6348s/iter; left time: 2390.1109s
Epoch: 3 cost time: 48.05846285820007
Epoch: 3, Steps: 138 | Train Loss: 0.0809722 Vali Loss: 0.1075

In [7]:
import os

ext = Exp_Informer(args)
path = os.path.join(ext.args.checkpoints, setting)
best_model_path = path+'/'+'checkpoint.pth'
ext.model.load_state_dict(torch.load(best_model_path))

pred_data, pred_loader = exp._get_data(flag='pred')

exp.model.eval()

preds = []

for i, (batch_x,batch_y,batch_x_mark,batch_y_mark) in enumerate(pred_loader):
	pred, true = exp._process_one_batch(
			pred_data, batch_x, batch_y, batch_x_mark, batch_y_mark)
	preds.append(pred.detach().cpu().numpy())

preds = np.array(preds)
preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])

pred_data.inverse_transform(preds)

Use GPU: cuda:0
pred 1


array([[[14.34109736,  4.11343317,  9.62179671,  1.93871046,
          4.66827986,  1.44486514,  8.78766807],
        [14.39241874,  4.06452512,  9.48739623,  1.96010379,
          4.5791833 ,  1.43437699,  8.28596629],
        [14.42261884,  4.1347334 ,  9.68108617,  1.97527742,
          4.65006038,  1.43875528,  8.52632974],
        [14.51175294,  4.05762681,  9.58978683,  1.93271255,
          4.66752982,  1.42588657,  8.42428758],
        [14.43198136,  4.0985133 ,  9.5598187 ,  2.00983661,
          4.53544439,  1.42060802,  8.17950219]]])

In [8]:
exp.vali(pred_data, pred_loader, exp._select_criterion())

0.13300876

In [9]:
pred_data.inverse_transform(pred_data.data_x)

array([[12.45800018,  5.4920001 ,  9.70100021, ...,  2.16300011,
         0.88300002,  8.51200008],
       [14.33399963,  5.69299984,  9.91399956, ...,  4.5079999 ,
         1.24899995,  8.58199978],
       [14.53499985,  5.55900002,  9.80799961, ...,  4.47700024,
         1.24899995,  8.72299957],
       ...,
       [10.7840004 ,  3.34899998,  7.        , ...,  3.74600005,
         1.43200004,  9.42599964],
       [11.65499973,  3.6170001 ,  7.53299999, ...,  4.17299986,
         1.523     ,  9.42599964],
       [12.99400043,  3.81800008,  8.24400043, ...,  4.72100019,
         1.523     ,  9.77799988]])