In [1]:
import pandas as pd
import numpy as np
import torch as th
from torch.utils.data import Dataset,DataLoader
from torch import nn
from torch.cuda.amp import autocast,GradScaler
import time
from dataset import NormalDataset
from model import SelfAttnModel,GinAttnModel,EMA

## get necessary data

In [2]:
def get_data():
    df_stage1=pd.read_csv("./qube/data/ashares_daily_stage1.csv")
    df_stage1_lite=df_stage1[["norm_wma_open","norm_wma_close","norm_wma_high","norm_wma_low","diff_log_vol",
                             "f1","f2","f3","trade_date","return","stock_id"]]
    np_stage1_lite=np.array(df_stage1_lite)
    dict_colname_lite={}
    for i,colname in enumerate(df_stage1_lite.columns.values.tolist()):
        dict_colname_lite[colname]=i
    return df_stage1_lite,dict_colname_lite

## loss & eval

In [7]:
def pcc_loss(x,y):
    x=x.squeeze()
    y=y.squeeze()
    x_hat=th.mean(x)
    y_hat=th.mean(y)
    return 1-th.mean((x-x_hat)*(y-y_hat))/(th.std(x)*th.std(y))

def ccc_loss(x,y):
    x=x.squeeze()
    y=y.squeeze()
    x_hat=th.mean(x)
    y_hat=th.mean(y)
    return 1-2*th.mean((x-x_hat)*(y-y_hat))/(th.std(x)**2+th.std(y)**2+(x_hat-y_hat)**2)

def extra_ic(x,y):
    ic=[format(1-pcc_loss(x[:,i],y).item(),".4f") for i in range(x.shape[-1])]
    print(f"[TEST] extra ic {ic}")

## train

In [14]:
def train(np_stage1_lite,dict_colname_lite):
    df_stage1_lite,dict_colname_lite=get_data()
    
    GPU_VIS=0
    th.manual_seed(0)
    th.cuda.manual_seed(0)
    np.random.seed(0)
    th.cuda.set_device(GPU_VIS)

    seq_len=5
    train_ranges=(20100000,20200000)
    test_ranges=(20200000,20210000)
    train_bs,test_bs=int(1e5),int(1e5)

    train_set=NormalDataset(seq_len,np_stage1_lite,train_ranges,dict_colname_lite)
    test_set=NormalDataset(seq_len,np_stage1_lite,test_ranges,dict_colname_lite)
    train_dataset=DataLoader(train_set,batch_size=train_bs,num_workers=4,pin_memory=True,shuffle=True)
    test_dataset=DataLoader(test_set,batch_size=test_bs,num_workers=4,pin_memory=True,shuffle=False)

    EPOCH=1
    LR=1e-3
    DECAY=0.999

    args={
        "dropout":[0.1,0.3,0.3],
        "in_dim":8,
        "dim_emb_id":3,
        "dim_lstm_hid":32,
    }

    model=SelfAttnModel(**args).to(device="cuda")
    print(model)

    # ema=EMA(model,DECAY)
    # ema.register()

    optimizer=th.optim.AdamW(model.parameters(),lr=LR,weight_decay=1e-3)
    def loss_func(x,y):
        return 0*nn.MSELoss()(x,y)+ccc_loss(x,y)

    scaler=GradScaler()
    for epoch in range(EPOCH):
        t1=time.time()
        model.train()
        total_loss=0
        for idx,(src,stock_id,label) in enumerate(train_dataset):
            src=src.cuda(non_blocking=True)
            stock_id=stock_id.cuda(non_blocking=True)
            label=label.cuda(non_blocking=True)
            optimizer.zero_grad()
            with autocast():
                output=model(src,stock_id).flatten()
                loss=loss_func(output,label)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
    #         ema.update() 
            total_loss+=loss.item()

        t2=time.time()
        print(f"[TRAIN] epoch {epoch+1} total loss {total_loss} elapsed time {t2-t1}s")
    #     ema.apply_shadow()
        model.eval()
        all_pred=th.zeros(int(1e6)).cuda()
        all_label=th.zeros(int(1e6)).cuda()
        all_extra=th.zeros(int(1e6),10).cuda()
        with th.no_grad():
            cnt=0
            for idx,(src,stock_id,label) in enumerate(test_dataset):
                src=src.cuda(non_blocking=True)
                stock_id=stock_id.cuda(non_blocking=True)
                label=label.cuda(non_blocking=True)

    #             pred=model(src,stock_id).flatten()
                pred,extra=model(src,stock_id,True)
                pred=pred.flatten()
                pred_len=pred.shape[0]
                all_pred[cnt:cnt+pred_len]=pred
                all_label[cnt:cnt+pred_len]=label
                all_extra[cnt:cnt+pred_len]=extra
                cnt+=pred_len

            all_pred=all_pred[:cnt]    
            all_label=all_label[:cnt]
            all_extra=all_extra[:cnt]
            ic_test=1-pcc_loss(all_pred,all_label).item()
            extra_ic(all_extra,all_label)

        t3=time.time()
        print(f"[TEST] epoch {epoch+1} ic {ic_test} elapsed time {t3-t2}s")
        print(f"{'-'*100}")
    #     ema.restore()

SelfAttnModel(
  (emb_id): Embedding(1605, 3)
  (mlp1): Linear(in_features=8, out_features=32, bias=False)
  (mlp2): Linear(in_features=192, out_features=32, bias=False)
  (mlp3): Linear(in_features=32, out_features=10, bias=False)
  (fc): Linear(in_features=10, out_features=1, bias=False)
  (act1): Sequential(
    (0): PReLU(num_parameters=1)
    (1): Dropout(p=0.1, inplace=False)
  )
  (act2): Sequential(
    (0): PReLU(num_parameters=1)
    (1): Dropout(p=0.1, inplace=False)
  )
  (act3): Sequential(
    (0): PReLU(num_parameters=1)
    (1): Dropout(p=0.1, inplace=False)
  )
  (lstm): LSTM(32, 32, batch_first=True, dropout=0.3)
  (self_attn): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=32, out_features=32, bias=True)
  )
)
[TRAIN] epoch 1 total loss 34.79574555158615 elapsed time 35.51377558708191s
[TEST] extra ic ['0.0580', '-0.0175', '0.0575', '0.0461', '0.0314', '-0.0272', '0.0408', '0.0583', '0.0667', '-0.0712']
[TEST] epoch 1 ic 0.05187129974

In [None]:
train()