## 使用ML方法组合估计器

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import importlib
import sys
sys.path.append('C:/Users/Administrator/Desktop/Repositories/Low-Frequency-Spread-Estimator')
sys.path.append('C:/Users/Handsome Bad Guy/Desktop/Repositories/Low-Frequency-Spread-Estimator')

from SpreadEstimator.SpreadEstimator import SpreadEstimator
from mytools.AutoTester import AutoTester

from time import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim

from sklearn.linear_model import LinearRegression

In [2]:
se = SpreadEstimator()
univ = ~np.isnan(se.data.data_dic['close'])  # 合法的univ

#### 构造因子矩阵

In [3]:
sig = {}

fml = 'minus{div{powv{tsmean{powv{minus{div{high,low},1},2},10},0.5},0.4142},'+\
'powv{div{tsmean{powv{minus{div{tsmax{high,2},tsmin{low,2}},1},2},10},0.1716},0.5}}'
stats, signal = se.test_factor(fml, corr_type='linear', method='cs', spread_type='spread')
sig['HL'] = signal


fml = 'neg{tscov{tsdelta{close,1},tsdelay{tsdelta{close,1},1},5}}'
stats, signal = se.test_factor(fml, corr_type='linear', method='cs', spread_type='spread')
signal[signal<0] = 0
signal = np.sqrt(signal)
signal[signal <= 0.01] = 0.01
sig['ROLL'] = signal

signal = np.zeros((sig['HL'].shape[0], sig['HL'].shape[1], 2), dtype=np.float32)
signal[:, :, 0] = sig['HL']
signal[:, :, 1] = sig['ROLL']

mean corr: -0.0895, positive_corr_ratio: 0.0558, corr_IR: -1.6103
mean corr: 0.3027, positive_corr_ratio: 0.8608, corr_IR: 1.1464


In [13]:
def get_xy(x,y,univ):
    xx = []
    yy = []
    for i in range(len(x)):
        se = univ[i]
        if np.sum(se) == 0:
            continue
        xx.append(x[i, se, :])
        yy.append(y[i,se])
    return np.vstack(xx), np.hstack(yy)

In [14]:
signal[np.isnan(signal)] = 0.01
x, y = get_xy(signal, se.data.spread, univ)

#### 测试NN

In [15]:
sys.path.append('C:/Users/Administrator/Desktop/Daily-Frequency-Quant/QBG')
sys.path.append('C:/Users/HBG/Desktop/Repositories/Daily-Frequency-Quant/QBG')

from Model.MyDeepModel import *
from Model.Loss import *
from Model.tools.fitting_tools import *
from Model.tools.test_tools import *

In [17]:
device = 'cuda'
x = torch.Tensor(x).to(device)
y = torch.Tensor(y).to(device).unsqueeze(-1)

#### Linear

In [29]:
model = nn.Linear(2, 1).to(device)
# model = GateNet(input_dim=2, output_dim=1, dropout=0.2, alpha=0.2).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-2, weight_decay=1e-2)
loss_func = MSELoss()

best_IC = 0

top =univ
t = time()
for i in range(2000):
    model.train()
    optimizer.zero_grad()
    loss = loss_func(model(x[:400000]), y[:400000])
    loss.backward()
    optimizer.step()
    
    if (i + 1) % 200 == 0:
        model.eval()
        print('epoch {}'.format(i + 1))
        print('loss: {:.4f}, time used: {:.4f}s'.format(float(loss)*1e4, time()-t))
        print('test loss: {:.4f}, time used: {:.4f}s'.format(float(loss_func(model(x[400000:]), y[400000:]))*1e4, time()-t))

epoch 200
loss: 4.7572, time used: 0.3460s
test loss: 3.8838, time used: 0.3470s
epoch 400
loss: 4.4164, time used: 0.4481s
test loss: 3.7790, time used: 0.4481s
epoch 600
loss: 4.4152, time used: 0.5512s
test loss: 3.7806, time used: 0.5522s
epoch 800
loss: 4.4152, time used: 0.6543s
test loss: 3.7806, time used: 0.6543s
epoch 1000
loss: 4.4152, time used: 0.7574s
test loss: 3.7806, time used: 0.7574s
epoch 1200
loss: 4.4152, time used: 0.8595s
test loss: 3.7806, time used: 0.8595s
epoch 1400
loss: 4.4152, time used: 0.9621s
test loss: 3.7806, time used: 0.9621s
epoch 1600
loss: 4.4152, time used: 1.0692s
test loss: 3.7806, time used: 1.0702s
epoch 1800
loss: 4.4152, time used: 1.1728s
test loss: 3.7806, time used: 1.1728s
epoch 2000
loss: 4.4152, time used: 1.2759s
test loss: 3.7806, time used: 1.2769s


#### GateNet

In [30]:
class GateNet(nn.Module):
    def __init__(self, input_dim: int, output_dim: int, dropout: int = 0.7, alpha: float = 0.2):
        super(GateNet, self).__init__()
        self.input_dim = input_dim
        self.output_Dim = output_dim

        self.Dense1 = nn.Linear(input_dim, 16)
        self.Dense2 = nn.Linear(16, 8)
        self.Dense3 = nn.Linear(8, 8)
        self.Dense4 = nn.Linear(8, output_dim)

        self.gate0 = Gate(input_dim)
        self.gate1 = Gate(16)
        self.gate2 = Gate(8)
        self.gate3 = Gate(8)

        self.act = nn.LeakyReLU(alpha)

        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.act(self.Dense1(self.gate0(self.dropout(x))))
        x = self.act(self.Dense2(self.gate1(self.dropout(x))))
        x = x + self.dropout(self.gate3(self.act(self.Dense3(self.gate2(self.dropout(x))))))
        x = self.Dense4(x)
        return x

In [34]:

model = GateNet(input_dim=2, output_dim=1, dropout=0.2, alpha=0.2).to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=5e-3, weight_decay=1e-2)
loss_func = MSELoss()

t = time()
for i in range(1000):
    model.train()
    optimizer.zero_grad()
    loss = loss_func(model(x[:400000]), y[:400000])
    loss.backward()
    optimizer.step()
    
    if (i + 1) % 100 == 0:
        model.eval()
        print('epoch {}'.format(i + 1))
        print('loss: {:.4f}, time used: {:.4f}s'.format(float(loss)*1e4, time()-t))
        print('test loss: {:.4f}, time used: {:.4f}s'.format(float(loss_func(model(x[400000:]), y[400000:]))*1e4, time()-t))
        corr = test(model, signal, se.data.spread, univ, 20, 240)
        print('vs IC: {:.4f}'.format(np.mean(corr)))

epoch 100
loss: 5.2283, time used: 11.1840s
test loss: 3.7970, time used: 11.1860s
vs IC: 0.4742
epoch 200
loss: 4.6191, time used: 22.6325s
test loss: 3.7757, time used: 22.6345s
vs IC: 0.4806
epoch 300
loss: 4.4495, time used: 33.7848s
test loss: 3.7678, time used: 33.7868s
vs IC: 0.4881
epoch 400
loss: 4.3889, time used: 44.9934s
test loss: 3.7552, time used: 44.9954s
vs IC: 0.4916
epoch 500
loss: 4.3446, time used: 56.2653s
test loss: 3.7400, time used: 56.2663s
vs IC: 0.4934
epoch 600
loss: 4.3077, time used: 67.4819s
test loss: 3.7406, time used: 67.4839s
vs IC: 0.4952
epoch 700
loss: 4.3132, time used: 78.8388s
test loss: 3.7393, time used: 78.8408s
vs IC: 0.4961
epoch 800
loss: 4.2777, time used: 90.1702s
test loss: 3.7437, time used: 90.1722s
vs IC: 0.4975
epoch 900
loss: 4.3034, time used: 101.4013s
test loss: 3.7408, time used: 101.4043s
vs IC: 0.4981
epoch 1000
loss: 4.2896, time used: 112.6566s
test loss: 3.7432, time used: 112.6586s
vs IC: 0.4990


In [35]:
torch.cuda.empty_cache()