In [171]:
# import
import pandas as pd
import yfinance as yf
import numpy as np

In [172]:
data = yf.download(["SPY","NVDA"], start="2010-01-01", end="2025-01-01")
spy = data.xs('SPY', axis=1, level=1)
nvda = data.xs('NVDA', axis=1, level=1)

[*********************100%***********************]  2 of 2 completed


**Simple returns for every day:**
$$
R_t = \frac{P_t - P_{t-1}}{P_{t-1}} = \frac{P_t}{P_{t-1}} - 1
$$

In [173]:
prices = data["Close"]
returns = (prices / prices.shift(1)) - 1
returns = returns.fillna(0)
spy_r = returns['SPY']
nvda_r = returns['NVDA']

In [174]:
mu1 = returns['NVDA'].mean()
mu2 = returns['SPY'].mean()

print(mu1, mu2)

0.0019323756230389096 0.0005664353469725369


## BUILD DATASET

In [175]:
returns.head(5)

Ticker,NVDA,SPY
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-01-04,0.0,0.0
2010-01-05,0.014603,0.002647
2010-01-06,0.006397,0.000705
2010-01-07,-0.019597,0.004221
2010-01-08,0.002161,0.003328


In [176]:
r1_series = returns['NVDA']
r1_series.head(5)

Date
2010-01-04    0.000000
2010-01-05    0.014603
2010-01-06    0.006397
2010-01-07   -0.019597
2010-01-08    0.002161
Name: NVDA, dtype: float64

In [177]:
r2_series = returns['SPY']
r2_series.head(5)

Date
2010-01-04    0.000000
2010-01-05    0.002647
2010-01-06    0.000705
2010-01-07    0.004221
2010-01-08    0.003328
Name: SPY, dtype: float64

In [178]:
df = pd.DataFrame()
df['r1_today'] = r1_series
df['r2_today'] = r2_series

df.head(5)

Unnamed: 0_level_0,r1_today,r2_today
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-01-04,0.0,0.0
2010-01-05,0.014603,0.002647
2010-01-06,0.006397,0.000705
2010-01-07,-0.019597,0.004221
2010-01-08,0.002161,0.003328


In [179]:
for i in range(1, 11):
    df[f'r1_t-{i}'] = r1_series.shift(i)
    df[f'r2_t-{i}'] = r2_series.shift(i)

df.head(11)

Unnamed: 0_level_0,r1_today,r2_today,r1_t-1,r2_t-1,r1_t-2,r2_t-2,r1_t-3,r2_t-3,r1_t-4,r2_t-4,...,r1_t-6,r2_t-6,r1_t-7,r2_t-7,r1_t-8,r2_t-8,r1_t-9,r2_t-9,r1_t-10,r2_t-10
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-04,0.0,0.0,,,,,,,,,...,,,,,,,,,,
2010-01-05,0.014603,0.002647,0.0,0.0,,,,,,,...,,,,,,,,,,
2010-01-06,0.006397,0.000705,0.014603,0.002647,0.0,0.0,,,,,...,,,,,,,,,,
2010-01-07,-0.019597,0.004221,0.006397,0.000705,0.014603,0.002647,0.0,0.0,,,...,,,,,,,,,,
2010-01-08,0.002161,0.003328,-0.019597,0.004221,0.006397,0.000705,0.014603,0.002647,0.0,0.0,...,,,,,,,,,,
2010-01-11,-0.014016,0.001396,0.002161,0.003328,-0.019597,0.004221,0.006397,0.000705,0.014603,0.002647,...,,,,,,,,,,
2010-01-12,-0.033898,-0.009326,-0.014016,0.001396,0.002161,0.003328,-0.019597,0.004221,0.006397,0.000705,...,0.0,0.0,,,,,,,,
2010-01-13,0.013583,0.008446,-0.033898,-0.009326,-0.014016,0.001396,0.002161,0.003328,-0.019597,0.004221,...,0.014603,0.002647,0.0,0.0,,,,,,
2010-01-14,-0.015634,0.002705,0.013583,0.008446,-0.033898,-0.009326,-0.014016,0.001396,0.002161,0.003328,...,0.006397,0.000705,0.014603,0.002647,0.0,0.0,,,,
2010-01-15,-0.029495,-0.011224,-0.015634,0.002705,0.013583,0.008446,-0.033898,-0.009326,-0.014016,0.001396,...,-0.019597,0.004221,0.006397,0.000705,0.014603,0.002647,0.0,0.0,,


In [180]:
cols = [f'r1_t-{i}' for i in range(1, 11)]
df['mean1'] = df[cols].mean(axis=1)
cols.append('r1_today')
df['var1'] = df[cols].var(axis=1)

cols = [f'r2_t-{i}' for i in range(1, 11)]
df['mean2'] = df[cols].mean(axis=1)
cols.append('r2_today')
df['var2'] = df[cols].var(axis=1)

In [181]:
# remove nan => remove first ten days 
df = df.dropna()

In [182]:
df

Unnamed: 0_level_0,r1_today,r2_today,r1_t-1,r2_t-1,r1_t-2,r2_t-2,r1_t-3,r2_t-3,r1_t-4,r2_t-4,...,r1_t-8,r2_t-8,r1_t-9,r2_t-9,r1_t-10,r2_t-10,mean1,var1,mean2,var2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-19,0.018703,0.012496,-0.029495,-0.011224,-0.015634,0.002705,0.013583,0.008446,-0.033898,-0.009326,...,0.006397,0.000705,0.014603,0.002647,0.000000,0.000000,-0.007590,0.000334,0.000290,0.000046
2010-01-20,-0.004016,-0.010169,0.018703,0.012496,-0.029495,-0.011224,-0.015634,0.002705,0.013583,0.008446,...,-0.019597,0.004221,0.006397,0.000705,0.014603,0.002647,-0.005720,0.000331,0.001539,0.000059
2010-01-21,-0.017857,-0.019229,-0.004016,-0.010169,0.018703,0.012496,-0.029495,-0.011224,-0.015634,0.002705,...,0.002161,0.003328,-0.019597,0.004221,0.006397,0.000705,-0.007581,0.000296,0.000258,0.000093
2010-01-22,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010169,0.018703,0.012496,-0.029495,-0.011224,...,-0.014016,0.001396,0.002161,0.003328,-0.019597,0.004221,-0.010007,0.000327,-0.001736,0.000131
2010-01-25,0.017011,0.005128,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010169,0.018703,0.012496,...,-0.033898,-0.009326,-0.014016,0.001396,0.002161,0.003328,-0.011507,0.000395,-0.004387,0.000132
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,0.013730,-0.000307,-0.011351,-0.029803,...,-0.014141,-0.005153,0.031391,0.007731,-0.026943,-0.003109,0.000889,0.000547,-0.001269,0.000132
2024-12-26,-0.002068,0.000067,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,0.013730,-0.000307,...,-0.022499,-0.000199,-0.014141,-0.005153,0.031391,0.007731,0.003977,0.000464,0.000153,0.000131
2024-12-27,-0.020868,-0.010527,-0.002068,0.000067,0.003938,0.011115,0.036897,0.005988,0.030762,0.012011,...,-0.016760,0.004270,-0.022499,-0.000199,-0.014141,-0.005153,0.000631,0.000420,-0.000613,0.000134
2024-12-30,0.003503,-0.011412,-0.020868,-0.010527,-0.002068,0.000067,0.003938,0.011115,0.036897,0.005988,...,-0.012197,-0.004120,-0.016760,0.004270,-0.022499,-0.000199,-0.000041,0.000403,-0.001150,0.000142


## SPLIT DATASET

In [183]:
train_df = df.loc["2010-01-01":"2023-12-31"]
test_df  = df.loc["2024-01-01":]

In [184]:
# check consistency of the split
print((len(train_df) + len(test_df)) == len(df))

True


In [185]:
# io penso che usando .cov non credo sia utile mettere mean e var nel calcolo della cov
# però va beh, volendo possiamo inserirli, però i risultati cambiano
def compute_covariance_row(row):
    r1 = [row[f'r1_t-{i}'] for i in range(1, 11)]
    r1.append(row['r1_today'])
    #r1.append(row['mean1'])
    #r1.append(row['var1'])
    r2 = [row[f'r2_t-{i}'] for i in range(1, 11)]
    r2.append(row['r2_today'])
    #r2.append(row['mean2'])
    #r2.append(row['var2'])
    return np.cov(r1, r2)[0, 1]

train_df['cov'] = train_df.apply(compute_covariance_row, axis=1)
train_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['cov'] = train_df.apply(compute_covariance_row, axis=1)


Unnamed: 0_level_0,r1_today,r2_today,r1_t-1,r2_t-1,r1_t-2,r2_t-2,r1_t-3,r2_t-3,r1_t-4,r2_t-4,...,r2_t-8,r1_t-9,r2_t-9,r1_t-10,r2_t-10,mean1,var1,mean2,var2,cov
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-19,0.018703,0.012496,-0.029495,-0.011224,-0.015634,0.002705,0.013583,0.008446,-0.033898,-0.009326,...,0.000705,0.014603,0.002647,0.000000,0.000000,-0.007590,0.000334,0.000290,0.000046,0.000098
2010-01-20,-0.004016,-0.010169,0.018703,0.012496,-0.029495,-0.011224,-0.015634,0.002705,0.013583,0.008446,...,0.004221,0.006397,0.000705,0.014603,0.002647,-0.005720,0.000331,0.001539,0.000059,0.000097
2010-01-21,-0.017857,-0.019229,-0.004016,-0.010169,0.018703,0.012496,-0.029495,-0.011224,-0.015634,0.002705,...,0.003328,-0.019597,0.004221,0.006397,0.000705,-0.007581,0.000296,0.000258,0.000093,0.000111
2010-01-22,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010169,0.018703,0.012496,-0.029495,-0.011224,...,0.001396,0.002161,0.003328,-0.019597,0.004221,-0.010007,0.000327,-0.001736,0.000131,0.000153
2010-01-25,0.017011,0.005128,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010169,0.018703,0.012496,...,-0.009326,-0.014016,0.001396,0.002161,0.003328,-0.011507,0.000395,-0.004387,0.000132,0.000184
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-12-22,-0.003266,0.002010,0.018270,0.009482,-0.030098,-0.013857,-0.009445,0.006081,0.024279,0.005625,...,0.004567,-0.018503,0.003890,0.019530,0.004299,0.005178,0.000316,0.003544,0.000048,0.000073
2023-12-26,0.009195,0.004223,-0.003266,0.002010,0.018270,0.009482,-0.030098,-0.013857,-0.009445,0.006081,...,0.013790,0.022090,0.004567,-0.018503,0.003890,0.002899,0.000295,0.003315,0.000048,0.000072
2023-12-27,0.002800,0.001808,0.009195,0.004223,-0.003266,0.002010,0.018270,0.009482,-0.030098,-0.013857,...,0.003209,0.009044,0.013790,0.022090,0.004567,0.005669,0.000242,0.003348,0.000049,0.000074
2023-12-28,0.002125,0.000378,0.002800,0.001808,0.009195,0.004223,-0.003266,0.002010,0.018270,0.009482,...,-0.001647,0.005448,0.003209,0.009044,0.013790,0.003740,0.000212,0.003072,0.000049,0.000072


In [186]:
train_df.drop(['r1_today', 'r2_today'], axis=1, inplace=True)
test_df.drop(['r1_today', 'r2_today', 'var1', 'var2'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df.drop(['r1_today', 'r2_today'], axis=1, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df.drop(['r1_today', 'r2_today', 'var1', 'var2'], axis=1, inplace=True)


In [187]:
train_df.to_csv('train.csv', index=False)
test_df.to_csv('test.csv', index=False)

In [188]:
train_df.head(5)

Unnamed: 0_level_0,r1_t-1,r2_t-1,r1_t-2,r2_t-2,r1_t-3,r2_t-3,r1_t-4,r2_t-4,r1_t-5,r2_t-5,...,r2_t-8,r1_t-9,r2_t-9,r1_t-10,r2_t-10,mean1,var1,mean2,var2,cov
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-01-19,-0.029495,-0.011224,-0.015634,0.002705,0.013583,0.008446,-0.033898,-0.009326,-0.014016,0.001396,...,0.000705,0.014603,0.002647,0.0,0.0,-0.00759,0.000334,0.00029,4.6e-05,9.8e-05
2010-01-20,0.018703,0.012496,-0.029495,-0.011224,-0.015634,0.002705,0.013583,0.008446,-0.033898,-0.009326,...,0.004221,0.006397,0.000705,0.014603,0.002647,-0.00572,0.000331,0.001539,5.9e-05,9.7e-05
2010-01-21,-0.004016,-0.010169,0.018703,0.012496,-0.029495,-0.011224,-0.015634,0.002705,0.013583,0.008446,...,0.003328,-0.019597,0.004221,0.006397,0.000705,-0.007581,0.000296,0.000258,9.3e-05,0.000111
2010-01-22,-0.017857,-0.019229,-0.004016,-0.010169,0.018703,0.012496,-0.029495,-0.011224,-0.015634,0.002705,...,0.001396,0.002161,0.003328,-0.019597,0.004221,-0.010007,0.000327,-0.001736,0.000131,0.000153
2010-01-25,-0.034604,-0.022292,-0.017857,-0.019229,-0.004016,-0.010169,0.018703,0.012496,-0.029495,-0.011224,...,-0.009326,-0.014016,0.001396,0.002161,0.003328,-0.011507,0.000395,-0.004387,0.000132,0.000184


In [189]:
test_df.head(5)

Unnamed: 0_level_0,r1_t-1,r2_t-1,r1_t-2,r2_t-2,r1_t-3,r2_t-3,r1_t-4,r2_t-4,r1_t-5,r2_t-5,...,r1_t-7,r2_t-7,r1_t-8,r2_t-8,r1_t-9,r2_t-9,r1_t-10,r2_t-10,mean1,mean2
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-01-02,0.0,-0.002895,0.002125,0.000378,0.0028,0.001808,0.009195,0.004223,-0.003266,0.00201,...,-0.030098,-0.013857,-0.009445,0.006081,0.024279,0.005625,0.011169,-0.001647,0.002503,0.001121
2024-01-03,-0.027341,-0.005596,0.0,-0.002895,0.002125,0.000378,0.0028,0.001808,0.009195,0.004223,...,0.01827,0.009482,-0.030098,-0.013857,-0.009445,0.006081,0.024279,0.005625,-0.001348,0.000726
2024-01-04,-0.012436,-0.008167,-0.027341,-0.005596,0.0,-0.002895,0.002125,0.000378,0.0028,0.001808,...,-0.003266,0.00201,0.01827,0.009482,-0.030098,-0.013857,-0.009445,0.006081,-0.00502,-0.000653
2024-01-05,0.009019,-0.003221,-0.012436,-0.008167,-0.027341,-0.005596,0.0,-0.002895,0.002125,0.000378,...,0.009195,0.004223,-0.003266,0.00201,0.01827,0.009482,-0.030098,-0.013857,-0.003173,-0.001584
2024-01-08,0.022897,0.00137,0.009019,-0.003221,-0.012436,-0.008167,-0.027341,-0.005596,0.0,-0.002895,...,0.0028,0.001808,0.009195,0.004223,-0.003266,0.00201,0.01827,0.009482,0.002126,-6.1e-05
