In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader,TensorDataset
from sklearn.model_selection import train_test_split
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
from matplotlib import pyplot as plt
macro_data_path='./Data/Macro/macro_embedding.npy'
policy_data_path='./Data/Closeindex/Count_Y.csv'

## 加载数据

In [2]:
## 导入之前训练好的宏观数据的embedding
macro_data= np.load(macro_data_path,allow_pickle=True)
macro_data.shape

(199, 128)

In [3]:
policy_df=pd.read_csv(policy_data_path,index_col=0)
policy_df.columns = pd.to_datetime(policy_df.columns).strftime('%Y-%m')
policy_df

Unnamed: 0,2007-02,2007-03,2007-04,2007-05,2007-06,2007-07,2007-08,2007-09,2007-10,2007-11,...,2022-11,2022-12,2023-01,2023-02,2023-03,2023-04,2023-05,2023-06,2023-07,2023-08
000015_上证红利,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,0
000052_上证50基本,0,0,0,0,0,0,0,1,0,1,...,0,1,1,0,0,0,0,0,0,0
000053_上证180基本,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,1,0,0,0
000128_上证380基本,0,1,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
000129_上证180波动,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
000130_上证380波动,0,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
000135_上证180高贝,0,0,0,0,0,0,1,0,0,0,...,0,0,0,1,0,0,0,0,0,1
000136_上证180低贝,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,1,0,0,0
000137_上证380高贝,1,0,0,0,0,0,0,0,0,0,...,1,0,0,1,0,0,0,1,1,0
000138_上证380低贝,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
## 划分数据集

train_Y,test_Y=torch.FloatTensor(policy_df.loc[:,:'2015-12'].values.T),torch.FloatTensor(policy_df.loc[:,'2016-01':].values.T)
train_X,test_X=torch.FloatTensor(macro_data[:train_Y.size(0)]),torch.FloatTensor(macro_data[train_Y.size(0):])


## 模型

In [5]:
# 分类模型
class MLPRegressor(nn.Module):
    def __init__(self, input_size=train_X.size(1), hidden_size=256,output_size=train_Y.size(1)):
        super(MLPRegressor, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)  # Output is a single regression value
        self.sigmod=nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return self.sigmod(x)

## 超参数定义

In [58]:
batch_size = 64
num_epochs = 200
learning_rate = 1.5e-4
num_indicators = train_X.shape[1]
model=MLPRegressor(hidden_size=256).to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

## 数据加载器生成

In [11]:
train_dataset = TensorDataset(train_X, train_Y)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(test_X, test_Y)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
len(train_loader),len(test_loader.dataset)

(2, 92)

## 训练

In [59]:
train_losses = []
test_losses = []
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0

    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        # print(loss)
        loss.backward()
        train_loss += loss.item() 
        optimizer.step()
    

    # 测试循环
    model.eval()
    test_loss = 0.0
    test_total_correct = 0
    test_total_samples = 0
    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()
            # 找到张量中的 top5 元素及其索引
            _, top5_indices = torch.topk(outputs.to('cpu'), k=5,dim=1)
            # 创建一个与原始张量形状相同的全零张量
            model_output_index = torch.zeros_like(outputs.to('cpu')).scatter_(1, top5_indices, 1).to(device)
            test_total_correct += torch.sum(model_output_index == targets).item()
            test_total_samples += targets.size(0)*targets.size(1)

        
        average_train_loss = train_loss / len(train_loader)
        average_loss = test_loss / len(test_loader)
        train_losses.append(average_train_loss)
        test_losses.append(average_loss)
        test_accuracy = test_total_correct / test_total_samples
    
        print(f'Epoch {epoch+1}/{num_epochs}, Training Loss: {average_train_loss:.4f} Test Loss: {average_loss:.4f}, Test Accuracy: {test_accuracy:.4f}') if (epoch+1) % 20== 0 else None

Epoch 20/200, Training Loss: 13.4563 Test Loss: 12.5765, Test Accuracy: 0.7511
Epoch 40/200, Training Loss: 12.8294 Test Loss: 12.1211, Test Accuracy: 0.7504
Epoch 60/200, Training Loss: 9.5936 Test Loss: 9.4399, Test Accuracy: 0.7461
Epoch 80/200, Training Loss: 9.7047 Test Loss: 9.6332, Test Accuracy: 0.7461
Epoch 100/200, Training Loss: 9.6465 Test Loss: 9.6570, Test Accuracy: 0.7496
Epoch 120/200, Training Loss: 9.6493 Test Loss: 9.6111, Test Accuracy: 0.7546
Epoch 140/200, Training Loss: 9.6436 Test Loss: 9.6233, Test Accuracy: 0.7532
Epoch 160/200, Training Loss: 9.7296 Test Loss: 9.5904, Test Accuracy: 0.7482
Epoch 180/200, Training Loss: 9.5490 Test Loss: 9.5968, Test Accuracy: 0.7518
Epoch 200/200, Training Loss: 9.7720 Test Loss: 9.5613, Test Accuracy: 0.7525


## 保存模型

In [60]:
torch.save(model.state_dict(), 'monthly_model_dict.pth')

## 加载模型

In [61]:
model=MLPRegressor(hidden_size=256)
model.load_state_dict(torch.load('monthly_model_dict.pth'))

<All keys matched successfully>

## 每个月选取该宏观情境下最适合的前五个子策略

导入模型，将宏观变量输入模型中

In [63]:
## 导入之前训练好的宏观数据的embedding
macro_data= np.load(macro_data_path,allow_pickle=True)
macro_data=torch.tensor(macro_data,dtype=torch.float32)
result=model(macro_data)

选取前五的标签

In [69]:
_, top5_indices = torch.topk(result, k=5,dim=1)
model_output_index = torch.zeros_like(result).scatter_(1, top5_indices, 1).to(device).type(torch.int64)
model_output_index = model_output_index.cpu().numpy().T

In [70]:
policy_result_df= pd.DataFrame(model_output_index,index=policy_df.index,columns=policy_df.columns)
policy_result_df

Unnamed: 0,2007-02,2007-03,2007-04,2007-05,2007-06,2007-07,2007-08,2007-09,2007-10,2007-11,...,2022-11,2022-12,2023-01,2023-02,2023-03,2023-04,2023-05,2023-06,2023-07,2023-08
000015_上证红利,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
000052_上证50基本,0,0,0,0,0,0,0,1,1,1,...,0,0,0,1,0,0,0,1,1,1
000053_上证180基本,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
000128_上证380基本,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
000129_上证180波动,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
000130_上证380波动,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
000135_上证180高贝,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
000136_上证180低贝,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
000137_上证380高贝,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
000138_上证380低贝,0,0,0,0,0,0,1,1,0,1,...,0,0,0,0,0,0,1,0,0,0


保存在文件中

In [71]:
policy_result_df.to_csv('top_5_policy.csv')