In [1]:
import sys
sys.path.append('/root/persistence_data')
import _setup
# Config
from TechfinTorchAPI.config import *
from logger import get_logger
from TechfinDataAPI.utils.reader import pkl_reader
# Data
from TechfinTorchAPI.dataloader.pandas_dataloader import PandasDataset
from TechfinTorchAPI.dataloader.tensor_dataset import TensorListDataset
# Training Structure
from TechfinTorchAPI.engine.hooks import *
from TechfinTorchAPI.engine.trainer import SimpleTrainer
# Losses
from TechfinTorchAPI.models import *
from TechfinTorchAPI.components.loss import *
# eval
from TechfinTorchAPI.matrics.pandas_matrics import my_IC, my_IR
from TechfinTorchAPI.engine.BT_system import BTSystem
from TechfinDataAPI.utils.decorator import time_consumption

# 超参数

In [2]:
#####################################
#       Hyperparameters
#####################################
eval_period = 300
test_batch_size = 1
result_path = '/root/persistence_data/TechfinTorch/example/LSTM_result_along_2layers'
_hidden_size = 64
_device = 'cuda'
iter_num = 5000
_optimizer = 'adam'
_num_layer = 1
learning_rate = 0.0001
training_batch_size = 100
_loss_func = '*'
checkpoint_freq = eval_period

# 读取数据（Tensor List）[训练测试数据]

In [3]:
train_data = t.load('*')
test_data = t.load('*')

In [4]:
type(train_data), type(train_data[0])

(list, torch.Tensor)

In [155]:
train_dataset = TensorListDataset(train_data, lambda x: (x[:,:-2].type(t.float32), x[:,-1].type(t.float32)))
test_dataset = TensorListDataset(test_data, lambda x: (x[:,:-2].type(t.float32), x[:,-1].type(t.float32)))

In [156]:
train_dataset.to('cuda')
test_dataset.to('cuda')

<TechfinTorchAPI.dataloader.tensor_dataset.TensorListDataset at 0x7f45f41b6ca0>

In [157]:
train_loader = DataLoader(train_dataset, batch_size=1000, shuffle = False)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle = False)

# 回测数据读取

In [8]:
logger = get_logger(__name__)
train_data_ = pkl_reader('*')
test_data_ = pkl_reader('*')
logger.info('Data reading finishes')
train_dataset_ = PandasDataset(train_data_, 
                              None, # 需要被截面标准化的cols
                              lambda x: (x.iloc[:,:-2], x.iloc[:,-1])) #输出前的transform_method。只输出因子还有label_2
test_dataset_ = PandasDataset(test_data_,
                            None,
                            lambda x: (x.iloc[:,:-2], x.iloc[:,-1])) 

2021-10-20 14:22:57,955 - [<module>] - INFO: Data reading finishes


# 建模

In [65]:
d_feat = len(train_data_.columns) - 2
model = Model(d_feat = d_feat,
                  hidden_size = _hidden_size,
                  num_layers = _num_layer,
                  dropout = 0.3,
                  only_end=False).to(_device)

if _optimizer == 'adam':
    optimizer = optim.Adam(model_lstm.parameters(), lr = learning_rate)
elif _optimizer == 'SGD':
    optimizer = optim.SGD(model_lstm.parameters(), lr = learning_rate)


# 训练

In [159]:
    logger.info('Now the Training Process starts. {} max Iterations'.format(iter_num))
    trainer = SimpleTrainer(    model,
                                train_loader,
                                optimizer,
                                _loss_func,
                                )
    back_testing_sys = BTSystem(test_dataset_)
    back_testing_sys.register({'label_1_IR': my_IR('prediction', 'label_1'),
                               'label_2_IR': my_IR('prediction', 'label_2')})
    back_testing_sys2 = BTSystem(train_dataset_)
    back_testing_sys2.register({'label_1_IR_Training': my_IR('prediction', 'label_1'),
                               'label_2_IR_Training': my_IR('prediction', 'label_2')})
    trainer.register_hooks([    
                                TqdmHookBase(period = 2),
                                CustomEvalHook(period = eval_period,
                                            eval_functions = {
                                                'IR_test,train':back_testing_sys,
                                            }),
                                CustomEvalHook(period = 2*eval_period,
                                            eval_functions = {
                                                'IR_test,train':back_testing_sys2,
                                            }),
                                BatchValLossHook(test_dataloader= test_loader,
                                                period =eval_period,
                                                test_batchsize = test_batch_size),
                                CheckpointHook(result_path = result_path, 
                                            period = checkpoint_freq),
                                BestModelHook(result_path = result_path),
                                IterationWriterHook( result_path, eval_period)])

    trainer.train(iter_num)

2021-10-20 15:12:58,719 - [<module>] - INFO: Now the Training Process starts. 5000 max Iterations
2021-10-20 15:12:58,724 - [train] - INFO: start training from the epoch 0
2021-10-20 15:12:58,728 - [before_train] - INFO: Model LSTMModel starts training at Wed Oct 20 15:12:58 2021
  0%|          | 1/5000 [00:00<09:54,  8.41it/s, loss=tensor(-0.0804), average_loss=tensor(-8.0445e-05)]

+------------------+------------+
|     Modules      | Parameters |
+------------------+------------+
| rnn.weight_ih_l0 |   22016    |
| rnn.weight_hh_l0 |   16384    |
|  rnn.bias_ih_l0  |    256     |
|  rnn.bias_hh_l0  |    256     |
|  fc_out.weight   |     64     |
|   fc_out.bias    |     1      |
+------------------+------------+
Total Trainable Params: 38977


2021-10-20 15:13:33,998 - [_do_eval] - INFO: Eval results at iterations 299: {'label_1_IR': 0.06099583704713495, 'label_2_IR': 0.0712840577306617}
2021-10-20 15:13:34,113 - [after_step] - INFO: Test Loss at iterations 299: -0.07414558529853821
2021-10-20 15:14:09,478 - [_do_eval] - INFO: Eval results at iterations 599: {'label_1_IR': -0.0013868054391489844, 'label_2_IR': 0.06884406135267125}
2021-10-20 15:14:32,210 - [_do_eval] - INFO: Eval results at iterations 599: {'label_1_IR': -0.0013868054391489844, 'label_2_IR': 0.06884406135267125}
