In [1]:
#  测试通过案例
import pandas as pd
train = pd.read_feather('/data/home/lichengzhang/zhoujun/HaimianData/20250325_split/train74_5_300750.SZ/20200427.ftr')
test = pd.read_feather('/data/home/lichengzhang/zhoujun/HaimianData/20250325_split/test74_5_300750.SZ/20200427.ftr')


In [2]:
# Data Pre
cat_cols = []
num_cols = [f'factor_{i}' for i in range(1,113)]
train['y'] = train['y60_duo'].apply(lambda x: 1 if x > 0.0022 else 0)
test['y'] = test['y60_duo'].apply(lambda x: 1 if x > 0.0022 else 0)

In [3]:
a = train[['factor_0','factor_5', 'factor_6', 'y', 'y60_duo']].head(1000)

In [4]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import pytorch_lightning as pl
from torch.utils.data import DataLoader
from omegaconf import DictConfig
from typing import Dict 
# 简单模型定义
class StockModel(pl.LightningModule):
    def __init__(self, input_dim: int, hidden_dim: int = 64, output_dims: Dict[str, int] = None):
        super().__init__()
        self.save_hyperparameters()
        
        # 定义网络
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 2)  # 假设所有任务输出拼接
        )
        self.output_dims = output_dims
        self.loss_fn = nn.CrossEntropyLoss() # 假设分类任务

    def forward(self, continuous, categorical=None):
        # 假设只用连续特征
        return self.network(continuous)

    def training_step(self, batch, batch_idx):
        continuous = batch["continuous"]
        targets = batch["targets"]
        pred = self(continuous)
        loss = self.loss_fn(pred, targets["y"])  # 假设目标是 y60_duo

        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        continuous = batch["continuous"]
        targets = batch["targets"]
        pred = self(continuous)
        loss = self.loss_fn(pred, targets["y"])
        self.log("val_loss", loss, on_epoch=True, prog_bar=True)

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=1e-3)


In [None]:
from utils.loss import MultiTaskLoss


In [None]:
from config import DataConfig
from data import StockDataModule
from omegaconf import OmegaConf 
from models.mlp import MLPModel, MLPConfig
config = DataConfig(
        continuous_cols=["factor_0", "factor_5", "factor_6"],
        target_cols=["y", "y60_duo"],
        task_types={"y": "classification", "y60_duo": "regression"},
        categorical_cols=None,
        category_col="factor_0",
        target_category=7,
        window_len=1,
        padding_value=0.0,
        batch_size=256,
        split_ratio=0.1,
        split_type="time",
        split_start=0.9
    )

models_config = MLPConfig(
    layers = "32-32"
)
config = OmegaConf.structured(config)
models_config = OmegaConf.structured(models_config)

# 合并所有参数
config = OmegaConf.merge(
    OmegaConf.to_container(config), 
    OmegaConf.to_container(models_config)
    )

# 数据模块
data_module = StockDataModule(
    train = train, 
    config = config, 
    validation = None, # 空的话从训练集中切分
    train_sampler = True, #使用
    verbose = True,
)
infer_config = data_module.infer_config(config)
infer_config = OmegaConf.structured(infer_config)
config = OmegaConf.merge(config, OmegaConf.to_container(infer_config))
data_module.setup() 
traindataset = data_module.train_dataset


print(traindataset[1])

#
task_weights = {"y": 1.0, "y60_duo": 1.0}
# 自定义的损失函数
loss_fn = MultiTaskLoss(task_types=config.task_types, task_weights=task_weights)

# 模型模块 
model = MLPModel(config)

trainer = pl.Trainer(max_epochs=2, devices = [0])
trainer.fit(model, data_module)

初始化 StockDataModule: split_type=time, batch_size=256
训练集大小: 15606, 验证集大小: 1734
{'continuous': tensor([ 7.0000, -0.9503, -0.2595]), 'categorical': tensor([]), 'targets': {'y': tensor(0), 'y60_duo': tensor(-0.0044)}}


NotImplementedError: 多任务学习需要自定义损失函数