In [1]:
# for correct relative imports
import sys; sys.path.append("../var_dgm")

In [2]:
import warnings
warnings.filterwarnings("ignore")

import torch
from torch.utils.data import DataLoader, TensorDataset

from diffusers import DDPMScheduler
import pandas as pd
import numpy as np
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import pprint

from sklearn.preprocessing import StandardScaler

from var_dgm import TimeGrad
from var_dgm.basic_models import HistoricalSimulation, VarCov
from var_dgm.stat_tests import generate_report
from var_dgm.utils import seed_everything, compute_individual_returns, compute_portfolio_returns, estimate_var_es_torch_multivariate

In [3]:
sns.set_style("darkgrid", {"axes.facecolor": ".9"})
sns.set_context("paper")

In [4]:
DATA_FOLDER = "../../data/"
df = pd.read_csv(DATA_FOLDER + "complete_stocks.csv")
df["Date"] = pd.to_datetime(df["Date"])

In [5]:
len(df["Ticker"].unique())

89

In [6]:
res_timeGrad = list()
res_hist = list()
res_varcov = list()
alpha = 0.01

In [7]:
RANDOM_STATE = 12

In [8]:
for i in range(5):
    # one complete cycle
    seed_everything(RANDOM_STATE + i)
    n_stocks = 10
    tickers = np.random.choice(df["Ticker"].unique(), n_stocks, replace=False)
    weights = 1/n_stocks
    print("Portfolio = {0}".format(" + ".join([f"{weights} * {i}"for i in tickers])))
    df_copy = df.loc[df["Ticker"].isin(tickers)].copy(deep=True)

    df_returns = compute_individual_returns(df_copy)
    df_returns = compute_portfolio_returns(df_returns)

    return_cols = [i for i in df_returns.columns if (i.startswith("Return_") and i != "Return_Target")]
    multivariate_returns = df_returns[return_cols]
    multivariate_target = df_returns["Return_Target"]

    multivariate_target = multivariate_target.values[1:]
    train_size = df_returns[df_returns.Date <= "2022-06-01"].index[-1] + 1
    test_size = len(multivariate_target) - train_size
    train = multivariate_returns.values[1:train_size]

    ss = StandardScaler()
    train_scaled = torch.tensor(ss.fit_transform(train), dtype=torch.float32)

    seed_everything(RANDOM_STATE)
    context_size = 90
    num_train_samples = 3000
    train_data = torch.zeros(num_train_samples, context_size, train_scaled.shape[1])
    train_target = torch.zeros(num_train_samples, 1, train_scaled.shape[1])
    train_idx = np.random.choice(np.arange(context_size, train_scaled.shape[0]), num_train_samples, replace=False)

    for i in tqdm(range(num_train_samples)):
        idx = train_idx[i]
        train_context = train_scaled[idx-context_size:idx]
        target_obs = train_scaled[idx]
        train_data[i] = train_context
        train_target[i] = target_obs
    
    # Create DataLoader for ease of torch training
    train_loader = DataLoader(TensorDataset(train_data, train_target), batch_size=128, shuffle=False)


    temp = torch.tensor(ss.transform(multivariate_returns.values[1:]))
    test_data_context = torch.zeros(test_size, context_size, temp.shape[1])
    test_data_real = torch.zeros(test_size, 1, 1)
    for i in range(test_size):
        idx = i + train_size
        test_data_context[i] = temp[idx-context_size:idx]
        test_data_real[i] = multivariate_target[idx]

    seed_everything(RANDOM_STATE)
    sheduler = DDPMScheduler(num_train_timesteps=47, beta_end=0.13159191527736805, clip_sample=False)
    model = TimeGrad(train.shape[-1], train.shape[-1], hidden_size=50, num_layers=2, scheduler=sheduler, num_inference_steps=47)
    
    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.003457518172345782)
    device = "mps"
    n_epochs = 15
    model.to(device);

    model.fit(train_loader, optimizer, n_epochs, device)


    seed_everything(RANDOM_STATE)
    VaR_TimeGrad = torch.zeros(test_data_real.shape[0])
    ES_TimeGrad = torch.zeros(test_data_real.shape[0])
    
    for i in tqdm(range(test_data_real.shape[0])):
        test = test_data_context[[i]]
        
        # compute correlation matrix
        pho = torch.corrcoef(torch.squeeze(test).T).to(torch.double)
        
        VaR_TimeGrad[i], ES_TimeGrad[i] = estimate_var_es_torch_multivariate(model, test, ss, pho, alpha=alpha, n_samples=500, device="mps")

    res_timeGrad.append(generate_report(test_data_real.flatten(), VaR_TimeGrad, ES_TimeGrad, alpha=alpha))

    hist_sim = HistoricalSimulation(alpha=alpha)

    VaR_histSim = torch.zeros(test_data_real.shape[0])
    ES_histSim = torch.zeros(test_data_real.shape[0])
    
    for i in tqdm(range(test_data_real.shape[0])):
        test = test_data_context[[i]]
        VaR_histSim[i], ES_histSim[i] = hist_sim.predict(test_data_context[[i]], scaler=ss)
    
    res_hist.append(generate_report(test_data_real.flatten(), VaR_histSim, ES_histSim, alpha=alpha))
    
    var_cov = VarCov(alpha=alpha)
    
    VaR_varCov = torch.zeros(test_data_real.shape[0])
    ES_varCov = torch.zeros(test_data_real.shape[0])
    
    for i in tqdm(range(test_data_real.shape[0])):
        test = test_data_context[[i]]
        VaR_varCov[i], ES_varCov[i] = var_cov.predict(test_data_context[[i]], scaler=ss)
    
    res_varcov.append(generate_report(test_data_real.flatten(), VaR_varCov, ES_varCov, alpha=alpha))

    print(res_timeGrad[-1], res_hist[-1], res_varcov[-1])

Portfolio = 0.1 * INTC + 0.1 * DHR + 0.1 * COP + 0.1 * MRK + 0.1 * HON + 0.1 * T + 0.1 * MSFT + 0.1 * AXP + 0.1 * PEP + 0.1 * SYK


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3000 [00:00<?, ?it/s]

Epochs:   0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

{'Kupicks POF': 0.9839089602677263, 'Haas TBF': 0.30628224668370924, 'Acerbi Szekely 1': 3.213366985321045, 'Acerbi Szekely 2': 0.00032458253554068506} {'Kupicks POF': 0.9839089602677263, 'Haas TBF': 0.6645764225397761, 'Acerbi Szekely 1': 2.8453407287597656, 'Acerbi Szekely 2': 0.0002874081546906382} {'Kupicks POF': 1.0170486293810412e-06, 'Haas TBF': 4.094967765465478e-06, 'Acerbi Szekely 1': 1.7914091348648071, 'Acerbi Szekely 2': 0.000769039208535105}
Portfolio = 0.1 * GE + 0.1 * ELV + 0.1 * DE + 0.1 * PG + 0.1 * BLK + 0.1 * CI + 0.1 * NVDA + 0.1 * MCD + 0.1 * NFLX + 0.1 * AAPL


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3000 [00:00<?, ?it/s]

Epochs:   0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

{'Kupicks POF': 0.9839089602677263, 'Haas TBF': 0.335129638526632, 'Acerbi Szekely 1': 3.211578369140625, 'Acerbi Szekely 2': 0.00032440185896120965} {'Kupicks POF': 0.0740988658366811, 'Haas TBF': nan, 'Acerbi Szekely 1': 3.1352040767669678, 'Acerbi Szekely 2': 7.917181210359558e-05} {'Kupicks POF': 2.0013209031303686e-05, 'Haas TBF': 0.0016641905134072883, 'Acerbi Szekely 1': 1.6814143657684326, 'Acerbi Szekely 2': 0.0006368993781507015}
Portfolio = 0.1 * BA + 0.1 * KO + 0.1 * ACN + 0.1 * AMD + 0.1 * GE + 0.1 * COP + 0.1 * TMO + 0.1 * MO + 0.1 * NEE + 0.1 * CVX


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3000 [00:00<?, ?it/s]

Epochs:   0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

{'Kupicks POF': 0.6124944699749673, 'Haas TBF': 0.630090793615534, 'Acerbi Szekely 1': 3.4687302112579346, 'Acerbi Szekely 2': 0.000262782588833943} {'Kupicks POF': 0.9839089602677263, 'Haas TBF': 0.7098705732649506, 'Acerbi Szekely 1': 2.6493921279907227, 'Acerbi Szekely 2': 0.000267615367192775} {'Kupicks POF': 4.111959361375068e-08, 'Haas TBF': 8.005523223073634e-05, 'Acerbi Szekely 1': 1.8036127090454102, 'Acerbi Szekely 2': 0.0008653697441332042}
Portfolio = 0.1 * T + 0.1 * TJX + 0.1 * MS + 0.1 * EOG + 0.1 * BAC + 0.1 * BLK + 0.1 * GS + 0.1 * GOOGL + 0.1 * JNJ + 0.1 * HD


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3000 [00:00<?, ?it/s]

Epochs:   0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

{'Kupicks POF': 0.33831157927013206, 'Haas TBF': 0.08264072877584465, 'Acerbi Szekely 1': 3.5919129848480225, 'Acerbi Szekely 2': 0.0005442292313091457} {'Kupicks POF': 0.0730582553226457, 'Haas TBF': 0.10968966807767856, 'Acerbi Szekely 1': 2.9774134159088135, 'Acerbi Szekely 2': 0.0006014976534061134} {'Kupicks POF': 1.0170486293810412e-06, 'Haas TBF': 7.94384350519109e-06, 'Acerbi Szekely 1': 1.8436431884765625, 'Acerbi Szekely 2': 0.0007914629532024264}
Portfolio = 0.1 * CB + 0.1 * RTX + 0.1 * MS + 0.1 * NFLX + 0.1 * PG + 0.1 * XOM + 0.1 * MSFT + 0.1 * MDT + 0.1 * C + 0.1 * VZ


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3000 [00:00<?, ?it/s]

Epochs:   0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

  0%|          | 0/396 [00:00<?, ?it/s]

{'Kupicks POF': 0.9839089602677263, 'Haas TBF': 0.44792183679657593, 'Acerbi Szekely 1': 3.311065912246704, 'Acerbi Szekely 2': 0.0003344510914757848} {'Kupicks POF': 0.33831157927013206, 'Haas TBF': 0.5618091006890653, 'Acerbi Szekely 1': 2.862550735473633, 'Acerbi Szekely 2': 0.0004337197751738131} {'Kupicks POF': 4.111959361375068e-08, 'Haas TBF': 3.0204523379147893e-05, 'Acerbi Szekely 1': 1.8316164016723633, 'Acerbi Szekely 2': 0.0008788058767095208}


In [9]:
print(np.array([list(i.values()) for i in res_timeGrad]).mean(axis=0))
print(np.array([list(i.values()) for i in res_hist]).mean(axis=0))
print(np.array([list(i.values()) for i in res_varcov]).mean(axis=0))

[7.80506586e-01 3.60413049e-01 3.35933089e+00 3.58089461e-04]
[4.90657324e-01            nan 2.89398022e+00 3.33882553e-04]
[4.42590910e-06 3.57297816e-04 1.79033916e+00 7.88315432e-04]
