In [1]:
# 初始化配置
import qlib
from qlib.constant import REG_CN
data_uri = '~/.qlib/qlib_data/cn_data/'
qlib.init(provider_uri=data_uri, region=REG_CN)

# 使用"配置"进行实例化
from qlib.utils import init_instance_by_config
qdl_config = {
    "class": "QlibDataLoader",
    "module_path": "qlib.data.dataset.loader",
    "kwargs": {
        "config": {
            "feature": (['EMA($close, 10)', 'EMA($close, 30)'], ['EMA10', 'EMA30'] ),
            "label": (['Ref($close, -1)/$close - 1',],['RET_1',]),
        },
        "freq": 'day',
    },
}
qdl = init_instance_by_config(qdl_config)
market = 'csi300' # 沪深300股票池代码，在instruments文件夹下有对应的sh000300.txt
qdl.load(instruments=market, start_time='20200101', end_time='20200110')

[235289:MainThread](2024-11-13 21:35:54,387) INFO - qlib.Initialization - [config.py:416] - default_conf: client.
[235289:MainThread](2024-11-13 21:35:54,583) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[235289:MainThread](2024-11-13 21:35:54,584) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/home/hhh/.qlib/qlib_data/cn_data')}


Unnamed: 0_level_0,Unnamed: 1_level_0,feature,feature,label
Unnamed: 0_level_1,Unnamed: 1_level_1,EMA10,EMA30,RET_1
datetime,instrument,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2020-01-02,SH600000,9.171472,9.060854,0.010425
2020-01-02,SH600004,6.221383,6.198955,-0.007991
2020-01-02,SH600009,25.708969,25.645361,-0.000776
2020-01-02,SH600010,4.653900,4.625497,0.007519
2020-01-02,SH600011,2.117210,2.154742,0.000000
...,...,...,...,...
2020-01-10,SZ300347,17.478148,17.084019,-0.005612
2020-01-10,SZ300408,2.253388,2.172849,0.002635
2020-01-10,SZ300413,5.067583,4.742254,0.017136
2020-01-10,SZ300433,2.601543,2.478502,0.060436


In [2]:
# 实现一个自定义的特征集，MACD、RSI

from qlib.data.dataset.handler import DataHandlerLP

class MyFeatureSet(DataHandlerLP):
    def __init__(self,
                 instruments="csi300", 
                 start_time=None,
                 end_time=None,
                 freq="day",
                 infer_processors=[],
                 learn_processors=[],
                 fit_start_time=None,
                 fit_end_time=None,
                 process_type=DataHandlerLP.PTYPE_A,
                 filter_pipe=None,
                 **kwargs,
                ):
        data_loader = {
            "class": "QlibDataLoader",
            "kwargs": {
                "config": {
                    "feature": self.get_feature_config(),
                    "label": kwargs.get("label", self.get_label_config()), # label可以自定义，也可以使用初始化时候的设置
                },
                "filter_pipe": filter_pipe,
                "freq": freq,
                },
            }
        super().__init__(
            instruments=instruments,
            start_time=start_time,
            end_time=end_time,
            data_loader=data_loader,
            infer_processors=infer_processors,
            learn_processors=learn_processors,
            process_type=process_type,
        )
        
    def get_feature_config(self):
        
        MACD = '(EMA($close, 12) - EMA($close, 26))/$close - EMA((EMA($close, 12) - EMA($close, 26))/$close, 9)/$close'
        RSI = '100 - 100/(1+(Sum(Greater($close-Ref($close, 1),0), 14)/Count(($close-Ref($close, 1))>0, 14))/ (Sum(Abs(Greater(Ref($close, 1)-$close,0)), 14)/Count(($close-Ref($close, 1))<0, 14)))'
        
        return [MACD, RSI ], ['MACD', 'RSI']

    def get_label_config(self):
        return (["Ref($close, -1)/$close - 1"], ["LABEL"])

# 初始化的过程中已经完成的数据的load
my_feature = MyFeatureSet(instruments='csi300', start_time='2020-01-01', end_time='2020-06-30')

# my_feature.get_feature_config()
my_feature.fetch() # my_feature.fetch(col_set='feature') / my_feature.fetch(col_set='label')

[235289:MainThread](2024-11-13 21:35:56,399) INFO - qlib.timer - [log.py:127] - Time cost: 1.053s | Loading data Done
[235289:MainThread](2024-11-13 21:35:56,400) INFO - qlib.timer - [log.py:127] - Time cost: 0.000s | fit & process data Done
[235289:MainThread](2024-11-13 21:35:56,400) INFO - qlib.timer - [log.py:127] - Time cost: 1.054s | Init data Done


Unnamed: 0_level_0,Unnamed: 1_level_0,MACD,RSI,LABEL
datetime,instrument,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-02,SH600000,0.006354,76.146812,0.010425
2020-01-02,SH600004,0.002323,54.615391,-0.007991
2020-01-02,SH600009,0.002592,44.651718,-0.000776
2020-01-02,SH600010,0.003610,64.705887,0.007519
2020-01-02,SH600011,-0.006209,46.551727,0.000000
...,...,...,...,...
2020-06-30,SZ300413,0.054595,81.474556,-0.039496
2020-06-30,SZ300433,0.062837,88.799995,0.053560
2020-06-30,SZ300498,0.048884,46.736053,0.023864
2020-06-30,SZ300601,0.031392,87.047409,-0.009254


In [3]:
import qlib
from qlib.constant import REG_CN
data_uri = '~/.qlib/qlib_data/cn_data/'
qlib.init(provider_uri=data_uri, region=REG_CN)
from qlib.data.dataset.handler import DataHandlerLP

# 使用"配置"进行实例化
from qlib.utils import init_instance_by_config
from qlib.contrib.data.handler import Alpha158
from qlib.data.dataset import TSDatasetH
from qlib.contrib.model.pytorch_alstm_ts import ALSTM

# 配置数据
train_period = ("2017-01-01", "2018-12-31")
valid_period = ("2019-01-01", "2019-12-31")
test_period = ("2020-01-01", "2020-08-01")

dh = Alpha158(instruments='csi300', 
              start_time=train_period[0], 
              end_time=test_period[1],
             fit_start_time = "2018-01-01",
             fit_end_time = "2019-12-31",
              infer_processors= [
                        {"class": "RobustZScoreNorm", "kwargs": {"fields_group": "feature", "clip_outlier": "true"}},
                        {"class": "Fillna", "kwargs": {"fields_group": "feature"}},
                    ],
            learn_processors= [
                        "DropnaLabel",
                        {"class": "CSRankNorm", "kwargs": {"fields_group": "label"}},  # CSRankNorm
                    ],
             )
ds = TSDatasetH(handler=dh,
                step_len=20, # 时间步数
                segments={"train": train_period, 
                          "valid": valid_period, 
                          "test": test_period})


[235289:MainThread](2024-11-13 21:35:56,420) INFO - qlib.Initialization - [config.py:416] - default_conf: client.
[235289:MainThread](2024-11-13 21:35:56,422) INFO - qlib.Initialization - [__init__.py:74] - qlib successfully initialized based on client settings.
[235289:MainThread](2024-11-13 21:35:56,423) INFO - qlib.Initialization - [__init__.py:76] - data_path={'__DEFAULT_FREQ': PosixPath('/home/hhh/.qlib/qlib_data/cn_data')}


ModuleNotFoundError. CatBoostModel are skipped. (optional: maybe installing CatBoostModel can fix it.)
ModuleNotFoundError. XGBModel is skipped(optional: maybe installing xgboost can fix it).
-------------DropnaLabel: label


  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 

In [None]:
ds.prepare("train")

<qlib.data.dataset.TSDataSampler at 0x7ebbf653b610>

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
#%%
from qlib.data.dataset.handler import DataHandlerLP
dl_train = ds.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)

In [None]:
dl_train.config(fillna_type="ffill+bfill")  # process nan brought by dataloader

In [None]:
train_loader = DataLoader(
dl_train, batch_size=30, shuffle=True, num_workers=0, drop_last=True
)

In [None]:
from transformerModule import train_transformer
train_transformer(train_loader)

ValueError: too many values to unpack (expected 2)

In [None]:
for batch_idx, inputs in enumerate(train_loader):  
    if batch_idx == 1:  
        one_batch_data = inputs
        break   


In [None]:
# 一个batch的数据
one_batch_data.shape

torch.Size([30, 20, 159])

In [None]:

# feature: (8192,20,158)
feature = one_batch_data[:, :, 0:-1]
# 8192
label = one_batch_data[:, -1, -1]
print(feature.shape)
print(label.shape)


torch.Size([30, 20, 158])
torch.Size([30])


In [None]:
feature.shape
linear1 = nn.Linear(feature.shape[2],512)
feature = linear1(feature.float())
feature.shape

torch.Size([30, 20, 512])

In [None]:
 
from transformerModule import PositionalEncoding, Attention, FeedForwardNetwork,AddNormLayer

num_features = feature.shape[2]
max_len = feature.shape[0]
instance = PositionalEncoding(num_features, max_len)
addNorm = AddNormLayer(num_features, dropout=0.1)
attn = Attention(num_features, dropout=0.1)

ImportError: cannot import name 'Attention' from 'transformerModule' (/home/hhh/proj/qlib-main/workspace/transformerModule.py)

In [None]:
posi_out=instance(feature.float())
addNorm_out = addNorm(posi_out, feature.float())
attn_out=attn(addNorm_out)


In [None]:
feedForward = FeedForwardNetwork(num_features)
feed_out = feedForward(attn_out)
# loss = nn.MSELoss()
# optimizer = optim.Adam(feedForward.parameters(), lr=0.001)

# for epoch in range(100):
#     optimizer.zero_grad()
#     pred = feedForward.forward(attn_out)
#     loss_val = loss(pred, target)
#     loss_val.backward()
#     optimizer.step()
#     if epoch % 10 == 0:
#         print(f"Epoch {epoch+1}: Loss {loss_val.item():.4f}")      

In [None]:
feed_out.shape

In [None]:
# 测试随机数种子
import torch
import numpy as np
seed = 32
np.random.seed(seed) 
torch.cuda.manual_seed_all(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():  
    print(torch.rand(3).cuda())  #

tensor([0.8757, 0.2721, 0.4141], device='cuda:0')


In [1]:
import numpy as np
import torch.nn as nn
import torch
import math
seed = 32
np.random.seed(seed) 
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.manual_seed(seed)
from transformerModule import PositionalEncoding, MultiHeadAttention2, MultiHeadAttention1, MultiHeadAttention
input = torch.randn(20,5,512)
# linear1 = nn.Linear(input.shape[2],512)
# linear2 = nn.Linear(input.shape[2],512)

# feature = linear1(input.float())
# re = linear2(input.float())
# print(feature)
# print(re)
mul_att1 = MultiHeadAttention(2,512,0)
mul_att_out1 = mul_att1.forward(input)
print(mul_att_out1)
mul_att2 = MultiHeadAttention1(512, 2)
mul_att_out2 = mul_att2.forward(input.float())
mul_att3 = MultiHeadAttention2(512, 2)
mul_att_out3 = mul_att3.forward(input.float())


tensor([[[ 0.3404, -0.0693, -0.0580,  ...,  0.1308,  0.0929,  0.1308],
         [ 0.1879, -0.1515, -0.1827,  ...,  0.1950,  0.0717,  0.1767],
         [ 0.3964, -0.1794, -0.1384,  ...,  0.2825,  0.1471,  0.2644],
         [ 0.2349, -0.1145, -0.2139,  ...,  0.2555,  0.0487,  0.1271],
         [ 0.4859, -0.1111, -0.1023,  ...,  0.1507,  0.0310,  0.1209]],

        [[-0.2961,  0.2756,  0.1975,  ..., -0.0398,  0.1809, -0.0574],
         [-0.2173,  0.1555,  0.2164,  ..., -0.1591,  0.1756,  0.0442],
         [-0.3517,  0.4233,  0.1924,  ..., -0.1060,  0.1913,  0.0360],
         [-0.3103,  0.5332,  0.1204,  ..., -0.1964,  0.1266,  0.0235],
         [-0.2907,  0.4850,  0.2990,  ..., -0.0789,  0.2371,  0.1415]],

        [[-0.1334,  0.4457,  0.0056,  ..., -0.2319,  0.3601,  0.0179],
         [ 0.1575,  0.4824,  0.1847,  ..., -0.1879,  0.2204,  0.1673],
         [-0.1338,  0.4777, -0.0485,  ..., -0.2391,  0.3283,  0.1268],
         [-0.1690,  0.5188,  0.1346,  ..., -0.1861,  0.3455,  0.1808],
  

In [2]:
mul_att_out1

tensor([[[ 0.3404, -0.0693, -0.0580,  ...,  0.1308,  0.0929,  0.1308],
         [ 0.1879, -0.1515, -0.1827,  ...,  0.1950,  0.0717,  0.1767],
         [ 0.3964, -0.1794, -0.1384,  ...,  0.2825,  0.1471,  0.2644],
         [ 0.2349, -0.1145, -0.2139,  ...,  0.2555,  0.0487,  0.1271],
         [ 0.4859, -0.1111, -0.1023,  ...,  0.1507,  0.0310,  0.1209]],

        [[-0.2961,  0.2756,  0.1975,  ..., -0.0398,  0.1809, -0.0574],
         [-0.2173,  0.1555,  0.2164,  ..., -0.1591,  0.1756,  0.0442],
         [-0.3517,  0.4233,  0.1924,  ..., -0.1060,  0.1913,  0.0360],
         [-0.3103,  0.5332,  0.1204,  ..., -0.1964,  0.1266,  0.0235],
         [-0.2907,  0.4850,  0.2990,  ..., -0.0789,  0.2371,  0.1415]],

        [[-0.1334,  0.4457,  0.0056,  ..., -0.2319,  0.3601,  0.0179],
         [ 0.1575,  0.4824,  0.1847,  ..., -0.1879,  0.2204,  0.1673],
         [-0.1338,  0.4777, -0.0485,  ..., -0.2391,  0.3283,  0.1268],
         [-0.1690,  0.5188,  0.1346,  ..., -0.1861,  0.3455,  0.1808],
  

In [3]:
mul_att_out2

tensor([[[-0.1202,  0.3292,  0.1323,  ..., -0.1233,  0.0701, -0.0498],
         [-0.1498,  0.3513,  0.1852,  ..., -0.1472,  0.0423, -0.0190],
         [-0.0857,  0.2320,  0.2141,  ..., -0.1329,  0.0818, -0.1206],
         [-0.1141,  0.4366,  0.1576,  ..., -0.1333,  0.0887, -0.0898],
         [-0.1479,  0.3208,  0.2210,  ..., -0.0845,  0.0514, -0.0484]],

        [[-0.0188, -0.0477, -0.0510,  ...,  0.1459, -0.0102,  0.0323],
         [-0.0213, -0.0317,  0.0122,  ...,  0.1948, -0.0639,  0.0419],
         [-0.0883, -0.0652,  0.0037,  ...,  0.1581, -0.0815,  0.1266],
         [-0.0895, -0.0191,  0.0118,  ...,  0.1126, -0.0149,  0.1010],
         [ 0.0312,  0.0019, -0.0158,  ...,  0.1715,  0.0554, -0.0089]],

        [[ 0.1493,  0.2193,  0.0422,  ...,  0.0860,  0.0684, -0.1021],
         [ 0.1401,  0.2497, -0.0285,  ...,  0.0481, -0.0394, -0.0598],
         [ 0.1341,  0.1841, -0.0650,  ...,  0.0503, -0.0361,  0.0206],
         [ 0.1200,  0.1772,  0.0207,  ...,  0.0520, -0.0047, -0.0378],
  

In [4]:
mul_att_out3

tensor([[[-0.0441,  0.4273, -0.3312,  ..., -0.6886,  0.4468, -0.5582],
         [ 0.2218,  0.7371, -0.4118,  ..., -0.7317,  0.3873, -0.6018],
         [ 0.1115,  0.5882, -0.3344,  ..., -0.6965,  0.4719, -0.5828],
         [ 0.0194,  0.5534, -0.3732,  ..., -0.6397,  0.4618, -0.6558],
         [-0.0019,  0.4656, -0.2999,  ..., -0.6250,  0.5249, -0.6105]],

        [[ 0.1214,  0.0786, -0.2339,  ..., -0.3898,  0.0762, -0.1128],
         [ 0.1330,  0.3012, -0.3015,  ..., -0.4583,  0.0755, -0.1020],
         [ 0.2969, -0.0069, -0.1047,  ..., -0.7321,  0.0875, -0.2396],
         [ 0.0493, -0.0197, -0.2020,  ..., -0.5070,  0.0664, -0.1928],
         [ 0.3583,  0.1236, -0.2715,  ..., -0.1624,  0.1628,  0.0235]],

        [[ 0.2954,  0.3353, -0.1986,  ...,  0.1587,  0.0823,  0.1010],
         [ 0.3411,  0.2549,  0.0070,  ...,  0.1322,  0.0889,  0.1851],
         [ 0.3220,  0.3814, -0.2431,  ...,  0.1742,  0.2077,  0.0083],
         [ 0.3170,  0.3355, -0.2013,  ...,  0.2339,  0.1440, -0.1071],
  

In [None]:
# 创建两个随机矩阵
A = torch.rand(2, 3, 4)
B = torch.rand(2,4, 3)
 
# 使用 torch.matmul 进行矩阵乘法
result_matmul = torch.matmul( B, A)
 
# 使用 @ 运算符进行矩阵乘法
result_at = A @ B
 
# 检查结果是否相等
print(result_matmul) 

tensor([[[0.2562, 0.8929, 0.5936, 0.4848],
         [0.2629, 0.3948, 0.3084, 0.2130],
         [0.5219, 1.4535, 0.9425, 0.7853],
         [0.5524, 0.8696, 0.2773, 0.4491]],

        [[0.4403, 0.9595, 0.1692, 0.9105],
         [0.6753, 0.7960, 0.1774, 0.6060],
         [0.2421, 0.4145, 0.3884, 0.2315],
         [0.7251, 1.0493, 0.1218, 0.9191]]])
