In [1]:
import pandas as pd
import numpy as np
import torch as th
from torch.utils.data import Dataset,DataLoader
from torch import nn
from torch.cuda.amp import autocast,GradScaler
import time
import pickle
from sam import SAM
from dataset import PanelDataset
from model import SelfAttnModel,GinAttnModel,EMA,Stage2Model

## get necessary data

In [2]:
def get_data():
    df_stage1=pd.read_csv("./qube/data/ashares_daily_stage1.csv")
    df_stage1_lite=df_stage1[["norm_wma_open","norm_wma_close","norm_wma_high","norm_wma_low","diff_log_vol",
                             "f1","f2","f3","trade_date","return","stock_id"]]
    np_stage1_lite=np.array(df_stage1_lite)
    dict_colname_lite={}
    for i,colname in enumerate(df_stage1_lite.columns.values.tolist()):
        dict_colname_lite[colname]=i

    dict_trade_date={}
    for i,v in enumerate(np.sort(df_stage1_lite["trade_date"].unique()).tolist()):
        dict_trade_date[v]=i
    
    return np_stage1_lite,dict_colname_lite,dict_trade_date

## loss & eval

In [41]:
def mask_mse_loss(x,y):
    x=x.squeeze()
    y=y.squeeze()
    valid=(y!=-1)
    x=x[valid]
    y=y[valid]
    return nn.MSELoss()(x,y)

def mask_pcc_loss(x,y):
    x=x.squeeze()
    y=y.squeeze()
    valid=(y!=-1)
    x=x[valid]
    y=y[valid]
    x_hat=th.mean(x)
    y_hat=th.mean(y)
    return 1-th.mean((x-x_hat)*(y-y_hat))/(th.std(x)*th.std(y))

def mask_ccc_loss(x,y,only_long=False):
    x=x.squeeze()
    y=y.squeeze()
    valid=(y>0) if only_long else (y!=-1)
    x=x[valid]
    y=y[valid]
    x_hat=th.mean(x)
    y_hat=th.mean(y)
    return 1-2*th.mean((x-x_hat)*(y-y_hat))/(th.std(x)**2+th.std(y)**2+(x_hat-y_hat)**2)

def get_position(x,y):
    valid=(x>0)&(y!=-1)
    x[~valid]=0
    pos=x/th.sum(x,dim=1).view(-1,1)
    return pos
    
def mask_sharpe_loss(x,y,evaluation=False):
    """
    x num_day*1605
    y num_day*1605
    """
    rf=0.237
    tf=0.0005
    num_td=252
    pos=get_position(x,y)
    r=th.sum(pos*y,dim=1) # (num_day,)
    sr=(num_td*th.mean(r)-rf)*0.5/(th.std(r,unbiased=False)*np.sqrt(num_td))
    if evaluation:
        r-=2*tf
        nsr=(num_td*th.mean(r)-rf)*0.5/(th.std(r,unbiased=False)*np.sqrt(num_td))
        return sr,nsr
    else:
        return -sr

In [16]:
def forward(model,stage1_model,optimizer,loss_func,src,label,seq_len,scaler,use_sam=False):
    model.train()
    src=src.view(-1,seq_len,src.shape[-1])
    src=stage1_model(src,stock_id=None,extra_output=True)[1].detach()
    src=src.view(-1,1605,src.shape[-1])

    optimizer.zero_grad()
    with autocast():
        output=model(src)
        output=output.view(-1,1605)
        loss=loss_func(output,label)
        
    if use_sam:
        loss.backward()
        optimizer.first_step(zero_grad=True)
        loss_func(model(src).view(-1,1605),label).backward()
        optimizer.second_step(zero_grad=True)
    else:
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
    return loss

%%time
def max_sr_optimization(pred,past_return):
    def f(x,pred,cov):
        std=np.sqrt(np.matmul(np.matmul(x,cov),x.T))
        mean=np.matmul(pred,x.T)
        func=-(mean/std)
        return func

    def constraint(x):
        A=np.ones(x.shape)
        b=1
        return np.matmul(A,x.T)-b 

    pred=pred.detach().cpu().numpy()
    past_return=past_return.detach().cpu().numpy().T

    position=np.zeros(pred.shape[0])
    valid=(pred>max(np.quantile(pred,0.5),0))
    pred=pred[valid]
    past_return=past_return[valid]
    past_return[past_return==-1]=0
    cov=np.cov(past_return)
    
    #define bounds and other parameters
    xinit=np.repeat(1/pred.shape[0],pred.shape[0])
    cons=({"type":"eq","fun":constraint})
    lb=0
    ub=1
    bnds=tuple([(lb,ub) for x in xinit])

    #invoke minimize solver
    opt=optimize.minimize(f,x0=xinit,args=(pred,cov),method="SLSQP",bounds=bnds,constraints=cons,tol=10**-3)
    position[valid]=opt['x']
    return th.tensor(position)

## train

In [None]:
def train(analytic=False):
    np_stage1_lite,dict_colname_lite,dict_trade_date=get_data()
    
    GPU_VIS=0
    th.manual_seed(0)
    th.cuda.manual_seed(0)
    np.random.seed(0)
    th.cuda.set_device(GPU_VIS)

    seq_len=5
    train_bs,test_bs,inc_bs=100,1,100
    train_ranges=(20100000,20200000)
    test_ranges=(20200000,20210000)
    inc_start_day=[k for k,v in dict_trade_date.items() if v==dict_trade_date[20191231]-inc_bs+1][0]
    inc_ranges=(inc_start_day,20200000)

    train_set=PanelDataset(seq_len,np_stage1_lite,train_ranges,dict_colname_lite,dict_trade_date)
    test_set=PanelDataset(seq_len,np_stage1_lite,test_ranges,dict_colname_lite,dict_trade_date)
    inc_set=PanelDataset(seq_len,np_stage1_lite,inc_ranges,dict_colname_lite,dict_trade_date)

    train_dataset=DataLoader(train_set,batch_size=train_bs,num_workers=4,pin_memory=False,shuffle=True)
    test_dataset=DataLoader(test_set,batch_size=test_bs,num_workers=4,pin_memory=False,shuffle=False)
    inc_dataset=DataLoader(inc_set,batch_size=inc_bs,num_workers=0,pin_memory=False,shuffle=False)

    EPOCH=10
    LR=1e-3
    DECAY=0.999

    args={
        "dropout":[0.1],
        "in_dim":32,
    }

    model=Stage2Model(**args).to(device="cuda")
    print(model)

    ema=EMA(model,DECAY)
    ema.register()

    stage1_model_path="./qube/model/selfattn_st1_seq5_ic0.067_extra32"
    with open(f"{stage1_model_path}.pkl","rb") as f:
        stage1_model_args=pickle.load(f)

    stage1_model=SelfAttnModel(**stage1_model_args).to(device="cuda")
    stage1_model.load_state_dict(th.load(f"{stage1_model_path}.pt",map_location="cuda"))
    stage1_model.eval()

    optimizer=th.optim.AdamW(model.parameters(),lr=LR,weight_decay=1e-3)
    # base_optimizer=th.optim.AdamW
    # optimizer=SAM(model.parameters(),base_optimizer,lr=LR,rho=0.1,weight_decay=1e-3)

    def loss_func(x,y):
        return mask_mse_loss(x,y)+mask_ccc_loss(x,y)+0.01*mask_sharpe_loss(x,y)

    scaler=GradScaler()
    for epoch in range(EPOCH):
        t1=time.time()
        model.train()
        total_loss=0
        for idx,(src,label) in enumerate(train_dataset):
            """
            src N*1605*seq_len*M
            label N*1605
            """
            src=src.cuda(non_blocking=True)
            label=label.cuda(non_blocking=True)
            loss=forward(model,stage1_model,optimizer,loss_func,src,label,seq_len,scaler,use_sam=False)
            ema.update() 
            total_loss+=loss.item()

        t2=time.time()
        print(f"[TRAIN] epoch {epoch+1} total loss {format(total_loss,'.4f')} elapsed time {int(t2-t1)}s")
        ema.apply_shadow()

        th.save({
            "model_state_dict":model.state_dict(),
            "optimizer_state_dict":optimizer.state_dict(),
        },f"./qube/tmp/gpuvis{GPU_VIS}.pt")
        ckpt=th.load(f"./qube/tmp/gpuvis{GPU_VIS}.pt")

        inc_model=Stage2Model(**args).to(device="cuda")
        inc_model.load_state_dict(ckpt["model_state_dict"])
        inc_optimizer=th.optim.AdamW(inc_model.parameters(),lr=LR,weight_decay=1e-3)
    #     inc_optimizer=SAM(inc_model.parameters(),base_optimizer,lr=LR,rho=0.1,weight_decay=1e-3)
    #     inc_optimizer.load_state_dict(ckpt["optimizer_state_dict"])
        inc_ema=EMA(inc_model,DECAY,ema.get_shadow())

        inc_src,inc_label=next(iter(inc_dataset))
        inc_src=inc_src.to(device="cuda")
        inc_label=inc_label.to(device="cuda")

        all_pred=th.zeros(242,1605).cuda()
        all_label=th.zeros(242,1605).cuda()
        for idx,(src,label) in enumerate(test_dataset):
            inc_model.eval()
            inc_ema.apply_shadow()
            with th.no_grad():
                src=src.cuda(non_blocking=True)
                label=label.cuda(non_blocking=True)
                inc_src=th.concat((inc_src[1:],src),dim=0)

                src=src.view(-1,seq_len,src.shape[-1])
                src=stage1_model(src,stock_id=None,extra_output=True)[1].detach()
                src=src.view(-1,1605,src.shape[-1])
                
                pred=inc_model(src)
                pred=pred.flatten()
                if analytic:
                    pred=max_sr_optimization(pred,inc_label).to("cuda")
                    
                all_pred[idx]=pred
                all_label[idx]=label
                inc_label=th.concat((inc_label[1:],label),dim=0)

            inc_ema.restore()
            forward(inc_model,stage1_model,inc_optimizer,loss_func,inc_src,inc_label,seq_len,scaler,use_sam=False)
            inc_ema.update()

        ic_test=1-mask_pcc_loss(all_pred,all_label).item()
        sr_test,nsr_test=mask_sharpe_loss(all_pred,all_label,evaluation=True)
        sr_test,nsr_test=sr_test.item(),nsr_test.item()

        t3=time.time()
        print(f"[TEST] epoch {epoch+1} ic {format(ic_test,'.5f')} sr {format(sr_test,'.5f')} nsr {format(nsr_test,'.5f')} elapsed time {int(t3-t2)}s")
        print(f"{'-'*100}")
        ema.restore()

In [None]:
train()