## AI Finance Project - Loda Enrico

In [75]:
# import
import pandas as pd
import yfinance as yf
import numpy as np
import torch
from torch.utils.data import Dataset
from sklearn.preprocessing import StandardScaler

We want to calculate the daily simple rate of return of our assets. To do so, we define:

---

**Simple rate of return**  
Given:  
- **P**: the price of the asset at the time of investment  
- **T**: maturity (number of periods)  
- **C**: the price of the asset at maturity **T**

The formula is:
$$
r = \frac{\frac{C}{P} - 1}{T}
$$

---

**Daily simple rate of return:**  
For daily returns, considering $P_t$ as the close price of the asset at time t, the formula is:
$$
r_t = \frac{P_t - P_{t-1}}{P_{t-1}} = \frac{P_t}{P_{t-1}} - 1
$$

In [51]:
df = yf.download(["SPY","NVDA"], start="2010-01-01", end="2025-01-01")
spy = df.xs('SPY', axis=1, level=1)
nvda = df.xs('NVDA', axis=1, level=1)

prices = df["Close"]
returns = (prices / prices.shift(1)) - 1
returns = returns.dropna()
SPY_r = returns['SPY']
NVDA_r = returns['NVDA']

[*********************100%***********************]  2 of 2 completed


In [52]:
df

Price,Close,Close,High,High,Low,Low,Open,Open,Volume,Volume
Ticker,NVDA,SPY,NVDA,SPY,NVDA,SPY,NVDA,SPY,NVDA,SPY
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
2010-01-04,0.423884,85.768425,0.426864,85.813831,0.415172,84.391045,0.424342,85.041895,800204000,118944600
2010-01-05,0.430073,85.995499,0.434658,86.033341,0.422279,85.405193,0.422279,85.715485,728648000,111579900
2010-01-06,0.432824,86.056046,0.433741,86.267949,0.425718,85.844142,0.429844,85.912251,649168000,116074400
2010-01-07,0.424342,86.419273,0.432366,86.525225,0.421133,85.654901,0.430532,85.897078,547792000,131091100
2010-01-08,0.425259,86.706863,0.428239,86.744706,0.418382,86.018176,0.420903,86.192237,478168000,126402800
...,...,...,...,...,...,...,...,...,...,...
2024-12-24,140.207108,599.496582,141.886946,599.536501,138.637245,593.684050,139.987127,594.272307,105157000,33160100
2024-12-26,139.917130,599.536499,140.837058,600.673034,137.717335,596.286267,139.687155,597.701991,116205600,41219100
2024-12-27,136.997391,593.225464,139.007216,595.987176,134.697615,588.988210,138.537258,595.747844,170582600,64969300
2024-12-30,137.477356,586.455811,140.257099,589.965273,134.007674,582.657240,134.817597,586.126844,167734700,56578800


In [53]:
returns[:]

Ticker,NVDA,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-01-05,0.014603,0.002648
2010-01-06,0.006396,0.000704
2010-01-07,-0.019598,0.004221
2010-01-08,0.002161,0.003328
2010-01-11,-0.014016,0.001397
...,...,...
2024-12-24,0.003938,0.011115
2024-12-26,-0.002068,0.000067
2024-12-27,-0.020868,-0.010527
2024-12-30,0.003503,-0.011412


#dataset for training
The features, for a total of 24, are the following:

*   $\mu_1$, $\mu_2$ of $r_1$, $r_2$ calculated on the last 10 lags
*   $[r_1^t,\dots ,r_1^{t-10}]$, $[r_2^t,\dots ,r_2^{t-10}]$

The target is the values of my covariance matrix:
* $\Sigma_{t+1}$

In [54]:
rNVDA_series = returns['NVDA']
rSPY_series = returns['SPY']
df = pd.DataFrame()
df['rNVDA_today'] = rNVDA_series
df['rSPY_today'] = rSPY_series

for i in range(1, 11):
    df[f'rNVDA_t-{i}'] = rNVDA_series.shift(i)
    df[f'rSPY_t-{i}'] = rSPY_series.shift(i)

df.head(11)

Unnamed: 0_level_0,rNVDA_today,rSPY_today,rNVDA_t-1,rSPY_t-1,rNVDA_t-2,rSPY_t-2,rNVDA_t-3,rSPY_t-3,rNVDA_t-4,rSPY_t-4,...,rNVDA_t-6,rSPY_t-6,rNVDA_t-7,rSPY_t-7,rNVDA_t-8,rSPY_t-8,rNVDA_t-9,rSPY_t-9,rNVDA_t-10,rSPY_t-10
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-05,0.014603,0.002648,,,,,,,,,...,,,,,,,,,,
2010-01-06,0.006396,0.000704,0.014603,0.002648,,,,,,,...,,,,,,,,,,
2010-01-07,-0.019598,0.004221,0.006396,0.000704,0.014603,0.002648,,,,,...,,,,,,,,,,
2010-01-08,0.002161,0.003328,-0.019598,0.004221,0.006396,0.000704,0.014603,0.002648,,,...,,,,,,,,,,
2010-01-11,-0.014016,0.001397,0.002161,0.003328,-0.019598,0.004221,0.006396,0.000704,0.014603,0.002648,...,,,,,,,,,,
2010-01-12,-0.033898,-0.009326,-0.014016,0.001397,0.002161,0.003328,-0.019598,0.004221,0.006396,0.000704,...,,,,,,,,,,
2010-01-13,0.013582,0.008446,-0.033898,-0.009326,-0.014016,0.001397,0.002161,0.003328,-0.019598,0.004221,...,0.014603,0.002648,,,,,,,,
2010-01-14,-0.015633,0.002705,0.013582,0.008446,-0.033898,-0.009326,-0.014016,0.001397,0.002161,0.003328,...,0.006396,0.000704,0.014603,0.002648,,,,,,
2010-01-15,-0.029495,-0.011224,-0.015633,0.002705,0.013582,0.008446,-0.033898,-0.009326,-0.014016,0.001397,...,-0.019598,0.004221,0.006396,0.000704,0.014603,0.002648,,,,
2010-01-19,0.018703,0.012496,-0.029495,-0.011224,-0.015633,0.002705,0.013582,0.008446,-0.033898,-0.009326,...,0.002161,0.003328,-0.019598,0.004221,0.006396,0.000704,0.014603,0.002648,,


In [55]:
cols = [f'rNVDA_t-{i}' for i in range(1, 11)]
df['mean_NVDA'] = df[cols].mean(axis=1)
cols.append('rNVDA_today')
df['var_NVDA'] = df[cols].var(axis=1)

cols = [f'rSPY_t-{i}' for i in range(1, 11)]
df['mean_SPY'] = df[cols].mean(axis=1)
cols.append('rSPY_today')
df['var_SPY'] = df[cols].var(axis=1)
df = df.dropna()
df#["r2_t-10"]

Unnamed: 0_level_0,rNVDA_today,rSPY_today,rNVDA_t-1,rSPY_t-1,rNVDA_t-2,rSPY_t-2,rNVDA_t-3,rSPY_t-3,rNVDA_t-4,rSPY_t-4,...,rNVDA_t-8,rSPY_t-8,rNVDA_t-9,rSPY_t-9,rNVDA_t-10,rSPY_t-10,mean_NVDA,var_NVDA,mean_SPY,var_SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-20,-0.004016,-0.010168,0.018703,0.012496,-0.029495,-0.011224,-0.015633,0.002705,0.013582,0.008446,...,-0.019598,0.004221,0.006396,0.000704,0.014603,0.002648,-0.005720,0.000331,0.001539,0.000059
2010-01-21,-0.017857,-0.019229,-0.004016,-0.010168,0.018703,0.012496,-0.029495,-0.011224,-0.015633,0.002705,...,0.002161,0.003328,-0.019598,0.004221,0.006396,0.000704,-0.007581,0.000296,0.000258,0.000093
2010-01-22,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010168,0.018703,0.012496,-0.029495,-0.011224,...,-0.014016,0.001397,0.002161,0.003328,-0.019598,0.004221,-0.010007,0.000327,-0.001736,0.000131
2010-01-25,0.017011,0.005128,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010168,0.018703,0.012496,...,-0.033898,-0.009326,-0.014016,0.001397,0.002161,0.003328,-0.011507,0.000395,-0.004387,0.000132
2010-01-26,-0.031661,-0.004191,0.017011,0.005128,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010168,...,0.013582,0.008446,-0.033898,-0.009326,-0.014016,0.001397,-0.010022,0.000424,-0.004207,0.000127
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,0.013730,-0.000307,-0.011351,-0.029803,...,-0.014141,-0.005153,0.031391,0.007731,-0.026943,-0.003109,0.000889,0.000547,-0.001269,0.000132
2024-12-26,-0.002068,0.000067,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,0.013730,-0.000307,...,-0.022499,-0.000199,-0.014141,-0.005153,0.031391,0.007731,0.003977,0.000464,0.000153,0.000131
2024-12-27,-0.020868,-0.010527,-0.002068,0.000067,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,...,-0.016760,0.004270,-0.022499,-0.000199,-0.014141,-0.005153,0.000631,0.000420,-0.000613,0.000134
2024-12-30,0.003503,-0.011412,-0.020868,-0.010527,-0.002068,0.000067,0.003938,0.011115,0.036897,0.005988,...,-0.012197,-0.004120,-0.016760,0.004270,-0.022499,-0.000199,-0.000041,0.000403,-0.001150,0.000142


In [56]:
def compute_covariance_row(row):
    r1 = [row[f'rNVDA_t-{i}'] for i in range(1, 11)]
    r1.append(row['rNVDA_today'])

    r2 = [row[f'rSPY_t-{i}'] for i in range(1, 11)]
    r2.append(row['rSPY_today'])

    return np.cov(r1, r2)[0, 1]

df['cov'] = df.apply(compute_covariance_row, axis=1)
df

Unnamed: 0_level_0,rNVDA_today,rSPY_today,rNVDA_t-1,rSPY_t-1,rNVDA_t-2,rSPY_t-2,rNVDA_t-3,rSPY_t-3,rNVDA_t-4,rSPY_t-4,...,rSPY_t-8,rNVDA_t-9,rSPY_t-9,rNVDA_t-10,rSPY_t-10,mean_NVDA,var_NVDA,mean_SPY,var_SPY,cov
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-20,-0.004016,-0.010168,0.018703,0.012496,-0.029495,-0.011224,-0.015633,0.002705,0.013582,0.008446,...,0.004221,0.006396,0.000704,0.014603,0.002648,-0.005720,0.000331,0.001539,0.000059,0.000097
2010-01-21,-0.017857,-0.019229,-0.004016,-0.010168,0.018703,0.012496,-0.029495,-0.011224,-0.015633,0.002705,...,0.003328,-0.019598,0.004221,0.006396,0.000704,-0.007581,0.000296,0.000258,0.000093,0.000111
2010-01-22,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010168,0.018703,0.012496,-0.029495,-0.011224,...,0.001397,0.002161,0.003328,-0.019598,0.004221,-0.010007,0.000327,-0.001736,0.000131,0.000153
2010-01-25,0.017011,0.005128,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010168,0.018703,0.012496,...,-0.009326,-0.014016,0.001397,0.002161,0.003328,-0.011507,0.000395,-0.004387,0.000132,0.000184
2010-01-26,-0.031661,-0.004191,0.017011,0.005128,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010168,...,0.008446,-0.033898,-0.009326,-0.014016,0.001397,-0.010022,0.000424,-0.004207,0.000127,0.000175
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,0.013730,-0.000307,-0.011351,-0.029803,...,-0.005153,0.031391,0.007731,-0.026943,-0.003109,0.000889,0.000547,-0.001269,0.000132,0.000135
2024-12-26,-0.002068,0.000067,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,0.013730,-0.000307,...,-0.000199,-0.014141,-0.005153,0.031391,0.007731,0.003977,0.000464,0.000153,0.000131,0.000126
2024-12-27,-0.020868,-0.010527,-0.002068,0.000067,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,...,0.004270,-0.022499,-0.000199,-0.014141,-0.005153,0.000631,0.000420,-0.000613,0.000134,0.000122
2024-12-30,0.003503,-0.011412,-0.020868,-0.010527,-0.002068,0.000067,0.003938,0.011115,0.036897,0.005988,...,-0.004120,-0.016760,0.004270,-0.022499,-0.000199,-0.000041,0.000403,-0.001150,0.000142,0.000114


In [57]:
df.drop(['rNVDA_today', 'rSPY_today'], axis=1, inplace=True)
df

Unnamed: 0_level_0,rNVDA_t-1,rSPY_t-1,rNVDA_t-2,rSPY_t-2,rNVDA_t-3,rSPY_t-3,rNVDA_t-4,rSPY_t-4,rNVDA_t-5,rSPY_t-5,...,rSPY_t-8,rNVDA_t-9,rSPY_t-9,rNVDA_t-10,rSPY_t-10,mean_NVDA,var_NVDA,mean_SPY,var_SPY,cov
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-20,0.018703,0.012496,-0.029495,-0.011224,-0.015633,0.002705,0.013582,0.008446,-0.033898,-0.009326,...,0.004221,0.006396,0.000704,0.014603,0.002648,-0.005720,0.000331,0.001539,0.000059,0.000097
2010-01-21,-0.004016,-0.010168,0.018703,0.012496,-0.029495,-0.011224,-0.015633,0.002705,0.013582,0.008446,...,0.003328,-0.019598,0.004221,0.006396,0.000704,-0.007581,0.000296,0.000258,0.000093,0.000111
2010-01-22,-0.017857,-0.019229,-0.004016,-0.010168,0.018703,0.012496,-0.029495,-0.011224,-0.015633,0.002705,...,0.001397,0.002161,0.003328,-0.019598,0.004221,-0.010007,0.000327,-0.001736,0.000131,0.000153
2010-01-25,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010168,0.018703,0.012496,-0.029495,-0.011224,...,-0.009326,-0.014016,0.001397,0.002161,0.003328,-0.011507,0.000395,-0.004387,0.000132,0.000184
2010-01-26,0.017011,0.005128,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010168,0.018703,0.012496,...,0.008446,-0.033898,-0.009326,-0.014016,0.001397,-0.010022,0.000424,-0.004207,0.000127,0.000175
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,0.036897,0.005988,0.030762,0.012011,0.013730,-0.000307,-0.011351,-0.029803,-0.012197,-0.004120,...,-0.005153,0.031391,0.007731,-0.026943,-0.003109,0.000889,0.000547,-0.001269,0.000132,0.000135
2024-12-26,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,0.013730,-0.000307,-0.011351,-0.029803,...,-0.000199,-0.014141,-0.005153,0.031391,0.007731,0.003977,0.000464,0.000153,0.000131,0.000126
2024-12-27,-0.002068,0.000067,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,0.013730,-0.000307,...,0.004270,-0.022499,-0.000199,-0.014141,-0.005153,0.000631,0.000420,-0.000613,0.000134,0.000122
2024-12-30,-0.020868,-0.010527,-0.002068,0.000067,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,...,-0.004120,-0.016760,0.004270,-0.022499,-0.000199,-0.000041,0.000403,-0.001150,0.000142,0.000114


### we are trying to scale in order to have numerical stability, we are not sure it is correct since we need to have the non-scaled version of the variance at inference time

In [76]:
train_df = df.loc["2010-01-01":"2023-12-31"]
test_df  = df.loc["2024-01-01":]
print((len(train_df) + len(test_df)) == len(df))


##CARE FOR SCALER
#scaler = StandardScaler()
#scaler.fit(train_df)
#train_df = pd.DataFrame(scaler.transform(train_df), columns=train_df.columns, index=train_df.index)
#test_df = pd.DataFrame(scaler.transform(test_df), columns=test_df.columns, index=test_df.index)

True


Flip the columns in order to have a temporal sequence from past to present

In [77]:
train_df = train_df[train_df.columns[::-1]]
test_df = test_df[test_df.columns[::-1]]

In [78]:
train_df

Unnamed: 0_level_0,cov,var_SPY,mean_SPY,var_NVDA,mean_NVDA,rSPY_t-10,rNVDA_t-10,rSPY_t-9,rNVDA_t-9,rSPY_t-8,...,rSPY_t-5,rNVDA_t-5,rSPY_t-4,rNVDA_t-4,rSPY_t-3,rNVDA_t-3,rSPY_t-2,rNVDA_t-2,rSPY_t-1,rNVDA_t-1
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-20,-0.232657,-0.217027,0.324991,-0.451866,-0.863260,0.192904,0.454284,0.015121,0.164346,0.336892,...,-0.902628,-1.259526,0.723371,0.417672,0.198092,-0.614659,-1.076637,-1.104754,1.094020,0.598300
2010-01-21,-0.201611,-0.100245,-0.091778,-0.485331,-1.078271,0.015009,0.164310,0.337030,-0.754183,0.255154,...,0.723584,0.417888,0.197970,-0.614694,-1.076656,-1.104510,1.094142,0.598514,-0.980450,-0.204699
2010-01-22,-0.103425,0.029827,-0.740012,-0.456245,-1.358350,0.336919,-0.754207,0.255290,0.014687,0.078396,...,0.198243,-0.614250,-1.076704,-1.104537,1.094114,0.598668,-0.979986,-0.204345,-1.809825,-0.693907
2010-01-25,-0.031520,0.034958,-1.602198,-0.391500,-1.531645,0.255179,0.014652,0.078527,-0.556967,-0.903135,...,-1.076286,-1.103984,1.093940,0.598615,-0.980006,-0.204148,-1.809224,-0.693468,-2.090128,-1.285831
2010-01-26,-0.050954,0.017232,-1.543670,-0.363815,-1.360158,0.078415,-0.556994,-0.903035,-1.259511,0.723651,...,1.094111,0.598790,-0.980059,-0.204189,-1.809240,-0.693245,-2.089481,-1.285289,0.419610,0.538513
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-22,-0.287612,-0.252154,0.976915,-0.466108,0.395218,0.344096,0.628377,0.306700,-0.715502,0.368601,...,-0.199904,0.332621,0.465207,0.795660,0.507063,-0.396004,-1.317593,-1.126062,0.818172,0.583020
2023-12-26,-0.289859,-0.252199,0.902457,-0.486621,0.131976,0.306588,-0.715527,0.368740,0.718901,1.212802,...,0.465450,0.795792,0.506923,-0.396042,-1.317612,-1.125817,0.818340,0.583237,0.134219,-0.178182
2023-12-27,-0.286169,-0.251550,0.913288,-0.536491,0.451830,0.368629,0.718857,1.212968,0.257893,0.244309,...,0.507161,-0.395646,-1.317645,-1.125844,0.818313,0.583392,0.134499,-0.177833,0.336768,0.262252
2023-12-28,-0.291036,-0.249980,0.823559,-0.565419,0.229072,1.212856,0.257854,0.244445,0.130840,-0.200163,...,-1.317200,-1.125286,0.818155,0.583339,0.134475,-0.177638,0.337015,0.262525,0.115765,0.036232


In [None]:
#target_cols = ['var_SPY', 'var_NVDA', 'cov']
#train_df = train_df[train_df.columns[::-1]]
#
#X_train = train_df.drop(columns=target_cols)
#y_train = train_df[target_cols]
#
#print(X_train.head())
#print("-"*10)
#y_train.head()

In [79]:
mean_cols = ["mean_SPY","mean_NVDA" ]
class LSTMdataset(Dataset):
    def __init__(self, dataframe):
        super().__init__()
        self.X = torch.tensor(dataframe.drop(['cov', 'var_NVDA', 'var_SPY'], axis=1).values, dtype=torch.float32)
        self.y = torch.tensor(dataframe[['cov', 'var_NVDA', 'var_SPY']].values, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [70]:
dataset = LSTMdataset(train_df)
dataset[0]

(tensor([ 0.0015, -0.0057,  0.0026,  0.0146,  0.0007,  0.0064,  0.0042, -0.0196,
          0.0033,  0.0022,  0.0014, -0.0140, -0.0093, -0.0339,  0.0084,  0.0136,
          0.0027, -0.0156, -0.0112, -0.0295,  0.0125,  0.0187]),
 tensor([9.7133e-05, 3.3126e-04, 5.8702e-05]))

In [80]:
dataset = LSTMdataset(train_df)
dataset[0]

(tensor([ 0.3250, -0.8633,  0.1929,  0.4543,  0.0151,  0.1643,  0.3369, -0.7543,
          0.2551,  0.0144,  0.0788, -0.5568, -0.9026, -1.2595,  0.7234,  0.4177,
          0.1981, -0.6147, -1.0766, -1.1048,  1.0940,  0.5983]),
 tensor([-0.2327, -0.4519, -0.2170]))

In [84]:
## Assuming you have a batch of data from your dataset, for example, just one item for demonstration
#X, y = dataset[0]
#
## The target values are in the 'y' tensor. We need to convert it to a NumPy array
## to use the scaler's inverse_transform method.
#y_np = y.numpy().reshape(1, -1) # Reshape to (1, n_features) as inverse_transform expects a 2D array
#
## Create a temporary DataFrame for the target variables, maintaining the correct column order
#target_cols = ['cov', 'var_NVDA', 'var_SPY']
#y_df = pd.DataFrame(y_np, columns=target_cols)
#
## Apply inverse_transform to the target DataFrame
#unscaled_y_df = scaler.inverse_transform(y_df)
#
## Now you can access the unscaled target values
#unscaled_cov = unscaled_y_df['cov'].iloc[0]
#unscaled_var_NVDA = unscaled_y_df['var_NVDA'].iloc[0]
#unscaled_var_SPY = unscaled_y_df['var_SPY'].iloc[0]
#
#print(f"Unscaled Covariance: {unscaled_cov}")
#print(f"Unscaled NVDA Variance: {unscaled_var_NVDA}")
#print(f"Unscaled SPY Variance: {unscaled_var_SPY}")

In [None]:
# batch first lets us have the tensor in the shape (batch, sequence, feature)
class LSTM(torch.nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm  = torch.nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, output_size=output_size, batch_first=True, droput=0.1 )
        # hidden size + 2 since the fc will take in input the mean of each asset 
        self.fc = torch.nn.Linear (hidden_size+2, output_size)

    def forward (self, x):
      mean_SPY = 