In [61]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit
from sklearn.linear_model import LinearRegression

In [62]:
SPY = yf.Ticker('SPY')

In [63]:
data = SPY.history(period='10y')

In [64]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2012-09-24,120.253037,120.940673,120.161905,120.667274,95682000,0.0,0
2012-09-25,120.924119,121.156090,119.350010,119.383156,133165200,0.0,0
2012-09-26,119.358311,119.391445,118.430411,118.712090,146502200,0.0,0
2012-09-27,119.209165,120.103919,118.894341,119.830521,111830300,0.0,0
2012-09-28,119.374872,119.764256,118.852942,119.275459,150696100,0.0,0
...,...,...,...,...,...,...,...
2022-09-19,382.260010,388.549988,382.179993,388.549988,73278500,0.0,0
2022-09-20,385.059998,386.119995,381.200012,384.089996,77274900,0.0,0
2022-09-21,386.109985,389.309998,377.380005,377.390015,106746600,0.0,0
2022-09-22,376.579987,378.299988,373.440002,374.220001,89472600,0.0,0


In [65]:
data['retorno diario']= (data['Close']/data['Close'].shift(1)) - 1
data['-1 day'] = data['retorno diario'].shift(1)
data['-2 day'] = data['retorno diario'].shift(2)
data['-5 day'] = data['retorno diario'].shift(5)
data['-20 day'] = data['retorno diario'].shift(20)
data = data[['retorno diario','-1 day','-2 day','-5 day','-20 day']]
data = data.dropna(axis=0)

In [66]:
data

Unnamed: 0_level_0,retorno diario,-1 day,-2 day,-5 day,-20 day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2012-10-23,-0.013876,0.000140,-0.016665,0.010133,-0.010642
2012-10-24,-0.002829,-0.013876,0.000140,0.004535,-0.005621
2012-10-25,0.002907,-0.002829,-0.013876,-0.002598,0.009421
2012-10-26,-0.000566,0.002907,-0.002829,-0.016665,-0.004632
2012-10-31,0.000000,-0.000566,0.002907,0.000140,0.002639
...,...,...,...,...,...
2022-09-19,0.007755,-0.007629,-0.011353,0.010748,-0.013438
2022-09-20,-0.011479,0.007755,-0.007629,-0.043483,-0.020823
2022-09-21,-0.017444,-0.011479,0.007755,0.003816,-0.002419
2022-09-22,-0.008400,-0.017444,-0.011479,-0.011353,0.003201


In [67]:
Y = data['retorno diario']
X = data[['-1 day','-2 day','-5 day','-20 day']]

In [68]:
tss = TimeSeriesSplit(n_splits=2)

In [69]:
for train_index, test_index in tss.split(X):
    X_train, X_test = X.iloc[train_index, :], X.iloc[test_index,:]
    y_train, y_test = Y.iloc[train_index], Y.iloc[test_index]

In [70]:
X_train

Unnamed: 0_level_0,-1 day,-2 day,-5 day,-20 day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-10-23,0.000140,-0.016665,0.010133,-0.010642
2012-10-24,-0.013876,0.000140,0.004535,-0.005621
2012-10-25,-0.002829,-0.013876,-0.002598,0.009421
2012-10-26,0.002907,-0.002829,-0.016665,-0.004632
2012-10-31,-0.000566,0.002907,0.000140,0.002639
...,...,...,...,...
2019-05-31,0.002731,-0.006711,-0.012219,-0.002159
2019-06-03,-0.013475,0.002731,0.002269,0.009788
2019-06-04,-0.002543,-0.013475,-0.009301,-0.004115
2019-06-05,0.021707,-0.002543,-0.006711,-0.016700


In [71]:
X_test

Unnamed: 0_level_0,-1 day,-2 day,-5 day,-20 day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-06-07,0.006503,0.008662,-0.013475,-0.003026
2019-06-10,0.010007,0.006503,-0.002543,0.005023
2019-06-11,0.004589,0.010007,0.021707,-0.025130
2019-06-12,-0.000242,0.004589,0.008662,0.009043
2019-06-13,-0.001765,-0.000242,0.006503,0.005858
...,...,...,...,...
2022-09-19,-0.007629,-0.011353,0.010748,-0.013438
2022-09-20,0.007755,-0.007629,-0.043483,-0.020823
2022-09-21,-0.011479,0.007755,0.003816,-0.002419
2022-09-22,-0.017444,-0.011479,-0.011353,0.003201


In [72]:
model = LinearRegression()
model.fit(X_train,y_train)

LinearRegression()

In [73]:
model.score(X_train,y_train)

0.002977203971266884

In [74]:
y_train_pred = model.predict(X_train)

In [75]:
y_train

Date
2012-10-23   -0.013876
2012-10-24   -0.002829
2012-10-25    0.002907
2012-10-26   -0.000566
2012-10-31    0.000000
                ...   
2019-05-31   -0.013475
2019-06-03   -0.002543
2019-06-04    0.021707
2019-06-05    0.008662
2019-06-06    0.006503
Name: retorno diario, Length: 1664, dtype: float64

In [76]:
y_train = y_train.to_frame()

In [77]:
y_train

Unnamed: 0_level_0,retorno diario
Date,Unnamed: 1_level_1
2012-10-23,-0.013876
2012-10-24,-0.002829
2012-10-25,0.002907
2012-10-26,-0.000566
2012-10-31,0.000000
...,...
2019-05-31,-0.013475
2019-06-03,-0.002543
2019-06-04,0.021707
2019-06-05,0.008662


In [78]:
y_train_pred = pd.DataFrame(y_train_pred,columns=['Prediccion'])

In [79]:
y_train_pred

Unnamed: 0,Prediccion
0,0.000867
1,0.000713
2,0.001257
3,0.001161
4,0.000469
...,...
1659,0.001169
1660,0.000690
1661,0.001446
1662,0.000407


In [80]:
 y_train_pred['Date'] = y_train.index

In [81]:
y_train

Unnamed: 0_level_0,retorno diario
Date,Unnamed: 1_level_1
2012-10-23,-0.013876
2012-10-24,-0.002829
2012-10-25,0.002907
2012-10-26,-0.000566
2012-10-31,0.000000
...,...
2019-05-31,-0.013475
2019-06-03,-0.002543
2019-06-04,0.021707
2019-06-05,0.008662


In [82]:
y_train_pred

Unnamed: 0,Prediccion,Date
0,0.000867,2012-10-23
1,0.000713,2012-10-24
2,0.001257,2012-10-25
3,0.001161,2012-10-26
4,0.000469,2012-10-31
...,...,...
1659,0.001169,2019-05-31
1660,0.000690,2019-06-03
1661,0.001446,2019-06-04
1662,0.000407,2019-06-05


In [83]:
y_train_pred = y_train_pred.set_index('Date')

In [84]:
retornos = pd.concat([y_train,y_train_pred],axis=1)

In [85]:
retornos

Unnamed: 0_level_0,retorno diario,Prediccion
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-10-23,-0.013876,0.000867
2012-10-24,-0.002829,0.000713
2012-10-25,0.002907,0.001257
2012-10-26,-0.000566,0.001161
2012-10-31,0.000000,0.000469
...,...,...
2019-05-31,-0.013475,0.001169
2019-06-03,-0.002543,0.000690
2019-06-04,0.021707,0.001446
2019-06-05,0.008662,0.000407


In [86]:
def winloss(s):
    if ((s['retorno diario'] > 0) and (s['Prediccion'] > 0)) or ((s['retorno diario'] < 0) and (s['Prediccion'] < 0)):
        return 'win'
    else:
        return 'loss'

def longshort(s):
    if (s['Prediccion'] > 0):
        return 'Long'
    else:
        return 'Short'

In [87]:
retornos['W/L'] = retornos.apply(winloss, axis=1)
retornos['S/L'] = retornos.apply(longshort,axis=1)

In [88]:
retornos

Unnamed: 0_level_0,retorno diario,Prediccion,W/L,S/L
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-10-23,-0.013876,0.000867,loss,Long
2012-10-24,-0.002829,0.000713,loss,Long
2012-10-25,0.002907,0.001257,win,Long
2012-10-26,-0.000566,0.001161,loss,Long
2012-10-31,0.000000,0.000469,loss,Long
...,...,...,...,...
2019-05-31,-0.013475,0.001169,loss,Long
2019-06-03,-0.002543,0.000690,loss,Long
2019-06-04,0.021707,0.001446,win,Long
2019-06-05,0.008662,0.000407,win,Long
