In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, TensorDataset
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
from tqdm.auto import tqdm
import argparse
from factorvae import FactorVAE, FeatureExtractor, FactorDecoder, FactorEncoder, FactorPredictor, AlphaLayer, BetaLayer
from dataset import StockDataset

#### **Set Parameters**

In [15]:
args = {
    'batch_size': 300,
    'seq_len': 20,
    'num_latent': 6,
    'hidden_size': 1,
    'num_factor': 1,
    'lr': 0.0005,
    'num_epochs': 15
}

#### **Load Datasets**

In [16]:
df_train = pd.read_pickle('data/train.pkl')
df_valid = pd.read_pickle('data/valid.pkl')
df_test = pd.read_pickle('data/test.pkl')

df_train.columns = df_train.columns.droplevel(level=0)
df_valid.columns = df_valid.columns.droplevel(level=0)
df_test.columns = df_test.columns.droplevel(level=0)


df_train = df_train[['ROC10', 'MA5', 'STD5','BETA5', 'QTLU5', 'VMA5', 'LABEL0']]
df_valid = df_valid[['ROC10', 'MA5', 'STD5','BETA5', 'QTLU5', 'VMA5', 'LABEL0']]
df_test = df_test[['ROC10', 'MA5', 'STD5','BETA5', 'QTLU5', 'VMA5', 'LABEL0']]

In [17]:
ds_train = StockDataset(df_train, args['batch_size'], args['seq_len'])
ds_valid = StockDataset(df_valid, args['batch_size'], args['seq_len'])
ds_test = StockDataset(df_test, args['batch_size'], args['seq_len'])

In [18]:
train_dataloader = DataLoader(ds_train, batch_size=300, shuffle=False)
valid_dataloader = DataLoader(ds_valid, batch_size=300, shuffle=False)
test_dataloader = DataLoader(ds_test, batch_size=300, shuffle=False)

In [19]:
check_dataloader = DataLoader(ds_valid, batch_size=1, shuffle=False)

In [20]:
for hist, futr in check_dataloader:
    print(hist)
    print(hist.shape)
    print(futr)
    print(futr.shape)
    break

tensor([[[-2.0111,  0.7564,  0.4204, -0.3068,  1.4478,  0.5099],
         [ 0.0000, -2.3075,  3.0000,  3.0000, -1.4645, -0.6722],
         [ 0.0000, -2.1091,  3.0000,  3.0000, -0.8140, -2.0823],
         [ 0.6692,  1.0286,  2.0219, -1.9777,  2.7433,  2.1081],
         [ 1.1554,  0.0453, -1.0847, -0.0212, -0.1030, -0.7383],
         [-0.7030, -0.7916,  0.4710, -0.1283, -0.2992, -0.1427],
         [-0.6141, -0.9453,  0.7078,  1.1968, -0.2500, -0.9272],
         [-0.5405, -1.3194,  0.3118,  1.2065, -0.9047, -0.7059],
         [ 1.6439,  1.0330,  0.2461, -1.0557,  1.8713,  0.1003],
         [ 0.9570,  0.1799,  1.7301, -1.2433,  0.3301, -0.0951],
         [-1.7880, -0.0241,  0.0000,  0.0000, -0.4029, -0.2048],
         [ 0.5559, -0.0079,  0.3854, -0.7906,  0.6636,  0.6196],
         [ 1.0845, -0.2072, -0.3215,  0.0201, -0.1224,  0.1625],
         [ 1.2533, -0.4495, -0.7070,  0.0742, -0.4836, -1.4857],
         [-1.4838,  0.4985, -0.7398, -0.4975,  0.6383,  1.5637],
         [ 0.2246,  0.116

#### **Build FactorVAE Model**

In [21]:
feature_extractor = FeatureExtractor(num_latent = args['num_latent'], hidden_size = args['hidden_size'])

factor_encoder = FactorEncoder(num_factors = args['num_factor'], num_portfolio = args['num_latent'], hidden_size = args['hidden_size'])

alpha_layer = AlphaLayer(args['hidden_size'])
beta_layer = BetaLayer(args['hidden_size'], args['num_factor'])
factor_decoder = FactorDecoder(alpha_layer, beta_layer)

factor_predictor = FactorPredictor(args['batch_size'], args['hidden_size'], args['num_factor'])

factorVAE = FactorVAE(feature_extractor, factor_encoder, factor_decoder, factor_predictor)

#### **Train the Model**

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [23]:
factorVAE.to(device)

best_val_loss = 10000.0
optimizer = torch.optim.Adam(factorVAE.parameters(), lr = args['lr'])
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr = args['lr'], \
    steps_per_epoch = len(train_dataloader), epochs=args['num_epochs'])

In [24]:
def train(factor_model, dataloader, optimizer, args):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    factor_model.to(device)
    factor_model.train()

    total_loss = 0

    with tqdm(total=len(dataloader)-args['seq_len']+1) as pbar:

        for char, returns in dataloader:
            if char.shape[1] != args['seq_len']:
                continue
            inputs = char.to(device)
            labels = returns[:,-1].reshape(-1,1).to(device)
            inputs = inputs.float()
            labels = labels.float()
            
            optimizer.zero_grad()
            # print(inputs.shape)
            # print(labels.shape)
            loss, reconstruction, factor_mu, factor_sigma, pred_mu, pred_sigma = factor_model(inputs, labels)
            total_loss += loss.item() * inputs.size(0)
            loss.backward()
            optimizer.step()
            pbar.update(1)
        # print(loss)
    avg_loss = total_loss / len(dataloader.dataset)
    return avg_loss


@torch.no_grad()
def validate(factor_model, dataloader, args):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    factor_model.to(device)
    factor_model.eval()
    total_loss = 0

    with tqdm(total=len(dataloader)-args['seq_len']+1) as pbar:
        for char, returns in dataloader:
            if char.shape[1] != args['seq_len']:
                continue
            inputs = char.to(device)
            labels = returns[:,-1].reshape(-1,1).to(device)
            inputs = inputs.float()
            labels = labels.float()
            
            loss, reconstruction, factor_mu, factor_sigma, pred_mu, pred_sigma = factor_model(inputs, labels)
            total_loss += loss.item() * inputs.size(0)
            pbar.update(1)
            
    avg_loss = total_loss / len(dataloader.dataset)
    return avg_loss

In [25]:
for epoch in tqdm(range(args['num_epochs'])):

    train_loss = train(factorVAE, train_dataloader, optimizer, args)
    val_loss = validate(factorVAE, valid_dataloader, args)

    scheduler.step()
    print(f"Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}") 

    if val_loss < best_val_loss:
        best_val_loss = val_loss
 
        torch.save(factorVAE.state_dict(), "model.pt")

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 1: Train Loss: 2.0955, Validation Loss: 1.7580


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 2: Train Loss: 1.6883, Validation Loss: 1.4464


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 3: Train Loss: 1.3956, Validation Loss: 1.2103


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 4: Train Loss: 1.1753, Validation Loss: 1.0248


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 5: Train Loss: 1.0088, Validation Loss: 0.8799


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 6: Train Loss: 0.8835, Validation Loss: 0.7805


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 7: Train Loss: 0.7916, Validation Loss: 0.7080


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 8: Train Loss: 0.7223, Validation Loss: 0.6520


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 9: Train Loss: 0.6601, Validation Loss: 0.5877


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 10: Train Loss: 0.5599, Validation Loss: 0.4699


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 11: Train Loss: 0.4413, Validation Loss: 0.3676


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 12: Train Loss: 0.3474, Validation Loss: 0.2901


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 13: Train Loss: 0.2722, Validation Loss: 0.2268


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 14: Train Loss: 0.2156, Validation Loss: 0.1804


  0%|          | 0/1671 [00:00<?, ?it/s]

  0%|          | 0/244 [00:00<?, ?it/s]

Epoch 15: Train Loss: 0.1751, Validation Loss: 0.1485
