In [1]:
import torch
from tqdm import tqdm
import numpy as np
from torch.utils.data import DataLoader
from datasetv2 import StockData
from BiTransformer import BetaTransformer, FactorTransformer

In [2]:
train_dataset = StockData(return_file= "data/month_ret.pkl", data_file="data/datashare.pkl", split = "train")
trainloader = DataLoader(train_dataset, batch_size=256, shuffle= False)


  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

beta_model = BetaTransformer(embed_size=128, inner_dim=32, output_dim=5, num_characteristics=94, heads = 8, repeats=5, dropout=0.1)
factor_model = FactorTransformer(embed_size=128, inner_dim= 32, output_dim=5, heads=8, num_characteristics=94, repeats=5, dropout=0.1)

beta_model.to(device)
factor_model.to(device)

epochs = 100

optimizer = torch.optim.Adam([ {
    "params" : beta_model.parameters(),
    "params" : factor_model.parameters()
}], lr=1e-5)

loss_fn = torch.nn.MSELoss()

In [4]:
train_loss = []

for epoch in range(epochs):
    train_batch_loss = 0 

    for batch in tqdm(trainloader, total=len(trainloader)):
        optimizer.zero_grad()
        x, att = batch[0]
        y = batch[1]

        x = x.cuda()
        att = att.cuda()
        y = y.cuda()

        b_in = x + att

        betas_out = beta_model(b_in.to(torch.float32), att.to(torch.float32))
        factors_out = factor_model(b_in.to(torch.float32), y.to(torch.float32).view(y.shape[0],1), att.to(torch.float32))

        return_estimate = torch.mul(betas_out, factors_out).sum(dim = 1)

        loss = loss_fn(return_estimate, y.to(torch.float32))
        loss.backward()
        optimizer.step()

        train_batch_loss += loss.item()

        print(f"Batch Loss = {loss.item()}")

    print(f"Epochs {epoch+1}/{epochs} ------------ train loss = {train_batch_loss/len(trainloader)} ")    



  0%|          | 1/1140 [00:48<15:17:58, 48.36s/it]

Batch Loss = 51.27640151977539


  0%|          | 2/1140 [01:36<15:13:58, 48.19s/it]

Batch Loss = 35.59489059448242


  0%|          | 3/1140 [02:25<15:20:11, 48.56s/it]

Batch Loss = 34.82820129394531


  0%|          | 4/1140 [03:15<15:30:19, 49.14s/it]

Batch Loss = 26.911029815673828


  0%|          | 5/1140 [04:03<15:22:05, 48.75s/it]

Batch Loss = 45.915565490722656


  1%|          | 6/1140 [04:49<15:06:39, 47.97s/it]

Batch Loss = 39.509220123291016


  1%|          | 7/1140 [05:38<15:06:34, 48.01s/it]

Batch Loss = 62.83960723876953


  1%|          | 7/1140 [06:22<17:11:32, 54.63s/it]


KeyboardInterrupt: 

In [5]:
with torch.no_grad():
    b_in = x + att
    betas_out = beta_model(b_in.to(torch.float32), att.to(torch.float32))
    factors_out = factor_model(b_in.to(torch.float32), y.to(torch.float32).view(y.shape[0],1), att.to(torch.float32))

In [9]:
torch.mul(factors_out, betas_out).sum(dim = 1)

tensor([-101.2903,  -83.5545,  -17.8044,  -75.0067,   63.5480,   66.2908,
         -33.3369,  -31.6131,  -16.1224,   13.3225, -102.6843,  -48.7030,
         -31.0768,  -28.3364,  -50.4019,  -11.7796,  -92.8685,   16.6493,
        -109.6773,   26.2689,  -43.2266, -107.6852,  -57.1064,   19.1339,
         -86.9978,  -37.3394,   46.9498,   -2.9617,  -48.1317,   78.5096,
          26.1454,  -75.6495,  -36.1237, -134.4268,  -57.3510,  -40.1741,
         -96.1820,   23.1225,   50.7180,  -27.5669,  -26.1173,  -84.9718,
          66.1404,    4.2162,   -9.6041,  -25.3653,   60.8849,  -70.9091,
         -89.9695, -236.9128,    2.7734,  -55.5641, -143.4900,  -88.2520,
         -15.9793, -113.6538,   41.9622, -171.4710,   62.5981,   -6.6074,
         -25.1827, -101.1404,    6.6869,  -46.5135])

In [5]:
factors_out

tensor([-1.5604,  3.0144, -1.3905,  0.3964, -0.2673], device='cuda:0',
       grad_fn=<ViewBackward0>)

In [6]:
betas_out

tensor([[-0.6039, -0.0195,  0.3782, -0.0153,  0.0196],
        [-0.1495, -0.1187,  0.3071,  0.1474,  0.1892],
        [-0.1886,  0.0703,  0.3584,  0.0600,  0.2457],
        ...,
        [-0.6620, -0.2956, -0.1072,  0.3109,  0.1399],
        [-0.3750, -0.0899,  0.1466, -0.0107,  0.3157],
        [-0.6398, -0.3189,  0.2864, -0.1460,  0.2826]], device='cuda:0',
       grad_fn=<AddmmBackward0>)

In [9]:
y

tensor([  4.2783,  -6.0796,  -7.5671,  -0.9493,  -0.6009,   8.0069,  -3.6057,
         52.8713,  10.8611,  -0.9257,   4.3477,  -7.5894,  -1.3437,   2.6071,
         -0.6516,   0.2848,   1.9722,   8.7976,  -3.3750,  -1.0951,   3.0833,
         -5.9760,   1.8406,  -1.2201,  -4.4167,  10.1747,  -3.8214,   2.5086,
         25.5232,   2.1310,  13.3093,   1.9972,  -2.7206,  -2.5938,   0.4079,
          3.1591,   7.1301,  -4.9559,   2.6740, -10.5948,  -0.0925,  -1.1429,
         -2.9951,   8.2406,  10.5608,  -1.8125,   2.1629,  21.1786,  -0.8598,
          8.2927,   5.3366,  -4.3224,   3.0287,   4.2767,  -0.0662,   3.8910,
         -1.9167,  -0.2500,   5.7159,  -2.1192,   0.7500,   2.4660,   2.1772,
          1.5682,   3.9167,  -9.4049,   6.2190,   7.4881,   3.9475,   1.4759,
          1.7500,   0.3711,  13.4486,  -3.3750,   0.2130,  12.8887,   7.1859,
          2.4674,   9.6879,   1.8777,  14.8443,  -8.8214,  -3.1071,   1.8777,
          0.9265,  -0.4722, -14.8703,   2.9758,   3.8618,  -1.06

In [8]:
return_estimate

tensor([-7.9852e-01,  1.6098e-01,  1.3774e+00,  6.6569e-01, -4.7709e-01,
        -1.0528e+00, -5.9948e-01, -4.8443e-01, -1.2229e-01, -1.1031e+00,
        -9.2964e-01, -1.2024e+00, -1.3105e+00, -1.7177e-01, -9.5886e-01,
         5.3306e-01, -1.1922e+00,  9.6256e-02, -8.1031e-02,  9.2450e-02,
         3.4750e-01, -6.2580e-01,  3.8872e-01,  7.3369e-01, -5.5297e-01,
        -5.7237e-01, -1.5223e-01, -1.1519e-01,  5.1266e-01,  6.0079e-02,
        -1.3270e+00, -1.5696e+00, -1.7195e+00, -1.8946e-01, -5.1987e-01,
        -3.6302e-01, -9.5490e-01, -1.7715e+00,  2.1624e-01,  5.4394e-01,
        -5.8300e-01, -9.6621e-01, -1.7643e+00, -6.3698e-02, -7.2091e-01,
         6.3877e-01, -1.9825e+00, -7.4308e-01,  2.3791e-01,  1.0523e+00,
        -1.1094e+00,  1.2581e-01, -9.1685e-01, -1.3387e+00,  2.2675e-02,
        -1.4606e-01, -1.0345e+00, -4.4858e-01, -8.8104e-01, -8.9449e-01,
        -2.8202e-01, -8.8648e-01, -7.0026e-01, -5.8243e-01, -4.6261e-01,
         1.3323e+00, -4.6530e-01, -6.1906e-01, -2.8

In [15]:
sum(betas_out[0][0])

tensor(-1.0967e-05)

In [30]:
L,V = torch.linalg.eig(A)

In [31]:
torch.real(L)

tensor([5460.0898, 1103.7900, 1069.0416,  989.6089, 1009.4409,  906.0813,
         896.9238,  884.9628,  877.9852,  824.7115,  800.4466,  781.9396,
         764.2097,  736.5269,  723.3835,  713.1860,  707.3641,  693.6501,
         677.9960,  669.5670,  659.3270,  648.9361,  629.7836,  619.5671,
         617.3815,  122.4133,  132.0341,  140.2777,  588.2957,  155.5132,
         159.6166,  580.1430,  573.9927,  576.4462,  562.1726,  168.6394,
         170.0196,  553.8890,  548.8576,  543.8217,  528.7883,  532.6273,
         175.7346,  179.9915,  495.3282,  514.6953,  505.4503,  477.2546,
         192.5651,  482.3644,  460.6175,  196.0766,  199.6541,  204.5398,
         448.8571,  441.8883,  216.1230,  218.0048,  224.7543,  430.9372,
         412.5706,  419.3622,  417.6948,  401.9385,  233.5191,  399.0354,
         388.3470,  239.8217,  249.6799,  378.3864,  243.3026,  280.9480,
         269.9452,  274.2514,  265.8348,  361.5014,  287.8109,  241.9338,
         358.5362,  257.7657,  381.806

In [20]:
A.shape

torch.Size([94, 94])

In [32]:
out[2]

tensor([[ 0.4672,  0.6865, -0.5330,  ...,  0.1016, -2.1300,  0.8769],
        [ 0.7225, -0.3270, -0.6206,  ...,  2.3793, -0.1092,  0.7760],
        [ 1.0716, -1.9624, -0.3132,  ..., -0.7111, -0.9161, -0.6825],
        ...,
        [ 1.9289, -1.3707, -0.4832,  ...,  1.9094,  1.5193,  0.9237],
        [-1.2710,  1.2234,  1.2892,  ..., -0.1634,  1.5330, -0.3174],
        [-0.8854, -0.8661, -0.7718,  ..., -1.1532, -1.0024,  0.8124]])