In [1]:
import time
import torch
import torch.nn as nn
import torch.optim as opt
import torch.utils.data as Data
import numpy as np
import pandas as pd
import os
import sys
import multiprocessing
from attnset import ProbabilityAttention, FullAttention
from pretreatment import PositionalEmbedding, ExpandEmbedding
from module import Distilling, CrossLayer
from moduledata import AceUnitData

In [2]:
class AllAceData:
    def __init__(self, source_data: pd.DataFrame, verify_size: int, test_size: int, unit_size: int, predict_size: int, emd_col: list, result_col: list, back_num: int):
        super(AllAceData, self).__init__()
        dataframe = source_data
        self.true_train_set,self.true_verify_set,self.true_test_set = [], [], []
        self.former_train_set,self.former_verify_set,self.former_test_set = [], [], []

        split_index = - back_num * test_size - verify_size - 1
        train_tmp = dataframe.iloc[:split_index].reset_index(drop=True)
        self.true_train_set.append(train_tmp)
        self.former_train_set.append(AceUnitData(train_tmp, unit_size, predict_size, emd_col=emd_col, result_col=result_col))

        for _ in range(back_num):
            # train data
            train_tmp = dataframe.iloc[split_index-unit_size+predict_size: split_index+test_size if split_index<-test_size else -1].reset_index(drop=True)
            self.true_train_set.append(train_tmp)
            self.former_train_set.append(AceUnitData(train_tmp, unit_size, predict_size, emd_col=emd_col, result_col=result_col))

            # verify data
            split_index += verify_size
            verify_tmp = dataframe.iloc[split_index-verify_size-unit_size+predict_size: split_index].reset_index(drop=True)
            self.true_verify_set.append(verify_tmp)
            self.former_verify_set.append(AceUnitData(verify_tmp, unit_size, predict_size, emd_col=emd_col, result_col=result_col))

            # test data
            split_index += test_size
            test_tmp = dataframe.iloc[split_index-test_size-unit_size+predict_size: split_index].reset_index(drop=True)
            self.true_test_set.append(test_tmp)
            self.former_test_set.append(AceUnitData(test_tmp, unit_size, predict_size, emd_col=emd_col, result_col=result_col))
            
            # index
            split_index -= verify_size

    def get_data(self):
        return self.former_train_set, self.former_verify_set, self.former_test_set

    def get_not_normaliza_data(self):
        return self.true_train_set, self.true_verify_set, self.true_test_set

In [3]:
class ACEFormer(nn.Module):
    def __init__(self, data_dim: int, embed_dim: int, forward_dim: int, unit_size: int, dis_layer: int = 3, attn_layer: int = 2, factor: int = 5, dropout: float = 0.1, activation: str = "relu"):
        super(ACEFormer, self).__init__()
        self.dis_layer = dis_layer
        self.attn_layer = attn_layer

        # pretreatment module
        ## data embedding
        self.ExpandConv = ExpandEmbedding(data_dim, embed_dim)
        ## local position
        self.position_emb = PositionalEmbedding(embed_dim)
        ## dropout
        self.dropout = nn.Dropout(p=dropout)

        # distillation module
        ## temporal perception mechanism
        self.temporal = nn.ParameterList()
        ## distillation mechanism
        self.dis_attn = nn.ModuleList()
        self.distill = nn.ModuleList()
        ## create distillation module
        for num in range(dis_layer):
            embed_tmp = embed_dim // pow(2, num)
            self.dis_attn.append(
                CrossLayer(
                    ProbabilityAttention(embed_tmp, n_heads=8, factor=factor),
                    embed_dim=embed_tmp, forward_dim=forward_dim, 
                    dropout=dropout, activation=activation
                )
            )
            self.distill.append(Distilling(embed_tmp))
            self.temporal.append(nn.Parameter(torch.rand(unit_size, embed_tmp // 2)))

        # attention module
        self.attn = nn.ModuleList(
            CrossLayer(
                FullAttention(embed_tmp // 2, n_heads=8, factor=factor),
                embed_dim=embed_tmp // 2, forward_dim=embed_tmp * 2,
                dropout=dropout, activation=activation
            ) for _ in range(attn_layer)
        )

        # projection
        self.full_connect = nn.Linear(embed_tmp // 2, 1, bias=True)

    def forward(self, data: torch.tensor):
        # data embedding
        data_emb = self.ExpandConv(data)
        local_position = self.position_emb(data)
        dis_input = data_emb + local_position
        dis_output = self.dropout(dis_input)

        # distilling module
        for i in range(self.dis_layer):
            attn_res = self.dis_attn[i](dis_output, dis_output)
            dis_res = self.distill[i](attn_res)
            dis_output = dis_res + self.temporal[i]
        
        # attention dealt
        attn_output = dis_output
        for layer in self.attn:
            attn_output = layer(attn_output, dis_output)

        # projection
        output = self.full_connect(attn_output)

        return output

In [4]:
def train(model, train_data: Data.Dataset, batch_size: int, device: str = "cpu", iteration: int = 2000):
    # data to DataLoader
    train_loader = Data.DataLoader(train_data, batch_size)
    # loss and optimizer
    criterion = nn.MSELoss()
    optimizer = opt.Adam(model.parameters(), lr=0.001)

    # trainning model
    model.train()
    start = time.time()
    for epoch in range(iteration):
        ## calculate the loss
        batch_count, loss_count = 0, 0.0

        for data, _, true_data in train_loader:
            data = data.float().to(device)
            true_data = true_data.float().to(device)

            outputs = model(data)
            loss = criterion(outputs, true_data)
            loss_count += loss.cpu().data
            batch_count += 1
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if epoch % 100 == 0:
            end = time.time()
            print("epoch=" + str(epoch) + ", loss=" + str(loss_count/batch_count) + ", use time=" + str(int(end - start)) + "s, in " + time.strftime("%m-%d %H:%M:%S", time.localtime()) + ", predict next epoch in " + time.strftime("%m-%d %H:%M:%S", time.localtime(time.time() + end - start)))
            start = time.time()

    return model

def test(model, test_data, predict_size: int, device: str):
    model.eval()
    true, predict = [], []
    with torch.no_grad():
        for (data, stamp, true_data) in test_data:
            data = torch.tensor(data).unsqueeze(0).float().to(device)
            stamp = torch.tensor(stamp).unsqueeze(0).int().to(device)

            true.append(true_data[-predict_size])
            outputs = model(data)
            predict.append(outputs.reshape(-1)[-predict_size:].tolist())

    true, predict = test_data.anti_normalize_data(np.array(true), np.array(predict))
    return true, predict

In [5]:
# process and GPU use
device = "cpu"
# experiment times for each data
model_time = 1
# data file
data_path = "stockdata/NDX100.csv"
# the number of the dataset splits
backtest_num = 1
# iteration number
iteration = 1
# data
source_data = pd.read_csv(data_path)

In [6]:
# hyper parameter
batch_size = 64
emd_col = ['close', 'close_x', 'close_y', 'vol', 'vol_x', 'vol_y']
result_col = ['close']

In [7]:
# product dataset for model
print("create dataset")
data_set = AllAceData(source_data=source_data, verify_size=50, test_size=100, unit_size=30, predict_size=5, emd_col=emd_col, result_col=result_col, back_num=backtest_num)
former_train_set, former_verify_set, former_test_set = data_set.get_data()
true_train_set, true_verify_set, true_test_set = data_set.get_not_normaliza_data()

# create model
print("create model")
model = ACEFormer(data_dim=len(emd_col), embed_dim=64, forward_dim=256, unit_size=30, dis_layer=1, attn_layer=1, dropout=0.1, factor=5)

create dataset
create model


In [8]:
# train & test
train_true_set, train_predict_set = [], []
verify_true_set, verify_predict_set = [], []
test_true_set, test_predict_set = [], []
for i in range(backtest_num):
    # training model with train set
    model = train(model, former_train_set[i], batch_size, device, iteration)
    # training
    true, predict = test(model, former_train_set[i], predict_size=5, device=device)
    train_true_set.append(true)
    train_predict_set.append(predict)
    # verify
    true, predict = test(model, former_verify_set[i], predict_size=5, device=device)
    verify_true_set.append(true)
    verify_predict_set.append(predict)
    # test
    true, predict = test(model, former_test_set[i], predict_size=5, device=device)
    test_true_set.append(true)
    test_predict_set.append(predict)

epoch=0, loss=tensor(0.2278), use time=3s, in 06-16 02:24:14, predict next epoch in 06-16 02:24:17


In [9]:
true.shape, predict.shape

((96, 1), (96, 5))