# Import Packages

In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import yfinance as yf 
import pandas_datareader.data as pdr
import datetime as dt
import seaborn as sns
sns.set()
%matplotlib inline

# Download S&P500 Prices Data

## First, Read S&P500 List

In [2]:
SP = pd.read_csv("./Data/SP500_List.csv")
SP.head(3)

Unnamed: 0,Symbol
0,MMM
1,AOS
2,ABT


In [3]:
SP_List = list(SP['Symbol'].values)

## Download Panel Data From Yahoo Finance

In [4]:
yf.pdr_override()
start = "2017-03-01"
end = "2020-04-01"  # Real End Date + 1 day

df = {}

for ticker in SP_List:
    df[ticker] = pdr.get_data_yahoo(ticker, start, end, as_panel=False)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [5]:
# See the result
df['AAPL'].head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-03-01,137.889999,140.149994,137.600006,139.789993,133.797699,36414600
2017-03-02,140.0,140.279999,138.759995,138.960007,133.003311,26211000
2017-03-03,138.779999,139.830002,138.589996,139.779999,133.788147,21108100


## Combine All Stocks' Close Price

In [6]:
# Extract df's keys
keys = []
for key in df:
    keys.append(key)

In [12]:
# Construct a Dataframe to Store All Data
df_Close = pd.DataFrame(df[keys[0]]['Close'].copy())
df_Close['Return'] = df_Close['Close'] / df_Close['Close'].shift(1) - 1

# Calculate Momentum
df_Close['PM_1W'] = df_Close['Close'] / df_Close['Close'].shift(5) - 1
df_Close['PM_1M'] = df_Close['Close'] / df_Close['Close'].shift(22) - 1
df_Close['PM_3M'] = df_Close['Close'] / df_Close['Close'].shift(63) - 1 
df_Close['PM_6M'] = df_Close['Close'] / df_Close['Close'].shift(125) - 1

df_Close['Ticker'] = keys[0]
#df_Close.reset_index(inplace=True)

for i in range(1, len(keys)):
    a = pd.DataFrame(df[keys[i]]['Close'].copy())
    a['Return'] = a['Close'] / a['Close'].shift(1) - 1
    a['PM_1W'] = a['Close'] / a['Close'].shift(5) - 1
    a['PM_1M'] = a['Close'] / a['Close'].shift(22) - 1
    a['PM_3M'] = a['Close'] / a['Close'].shift(63) - 1 
    a['PM_6M'] = a['Close'] / a['Close'].shift(125) - 1
    a['Ticker'] = keys[i]
    #a.reset_index(inplace=True)
    df_Close = pd.concat([df_Close, a])

In [13]:
df_Close

Unnamed: 0_level_0,Close,Return,PM_1W,PM_1M,PM_3M,PM_6M,Ticker
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-03-01,189.860001,,,,,,MMM
2017-03-02,189.889999,0.000158,,,,,MMM
2017-03-03,189.309998,-0.003054,,,,,MMM
2017-03-06,188.899994,-0.002166,,,,,MMM
2017-03-07,189.089996,0.001006,,,,,MMM
...,...,...,...,...,...,...,...
2020-03-25,108.209999,0.042887,0.022586,-0.218079,-0.182519,-0.132794,ZTS
2020-03-26,117.910004,0.089641,0.152253,-0.125427,-0.112925,-0.064652,ZTS
2020-03-27,111.769997,-0.052074,0.107291,-0.179067,-0.159814,-0.097902,ZTS
2020-03-30,116.680000,0.043930,0.259227,-0.128929,-0.124353,-0.063488,ZTS


In [14]:
df_Close.reset_index(inplace=True)

# Split Train and Test Datasets

In [15]:
df_Close.dropna(inplace=True)

In [16]:
Train = df_Close[(df_Close['Date'] >= '2019-03-01') & (df_Close['Date'] <= '2020-02-28')]
Test = df_Close[(df_Close['Date'] >= '2020-03-01') & (df_Close['Date'] <= '2020-03-31')]

In [17]:
# Train.to_csv("./Data/BackTest.csv", index=False)
# Test.to_csv("./Data/Trading.csv", index=False)

In [18]:
df_Close.to_csv("./Data/Price_Data.csv", index=False)

# Download SPY as Benchmark

In [27]:
SPY = pdr.get_data_yahoo('SPY', start, end, as_panel=False)

[*********************100%***********************]  1 of 1 completed


In [28]:
SPY.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-03-01,238.389999,240.320007,238.369995,239.779999,225.208954,149158200
2017-03-02,239.559998,239.570007,238.210007,238.270004,223.790726,70246000
2017-03-03,238.169998,238.610001,237.729996,238.419998,223.931625,81974300
2017-03-06,237.5,238.119995,237.009995,237.710007,223.264771,55391500
2017-03-07,237.360001,237.770004,236.759995,237.0,222.597885,65103700


In [29]:
# Calculate Momentum
SPY['SPY_1W'] = SPY['Close'] / SPY['Close'].shift(5) - 1
SPY['SPY_1M'] = SPY['Close'] / SPY['Close'].shift(22) - 1
SPY['SPY_3M'] = SPY['Close'] / SPY['Close'].shift(63) - 1 
SPY['SPY_6M'] = SPY['Close'] / SPY['Close'].shift(125) - 1

In [30]:
# Momentum should lag 1 period
SPY['SPY_1W'] = SPY['SPY_1W'].shift(1)
SPY['SPY_1M'] = SPY['SPY_1M'].shift(1) 
SPY['SPY_3M'] = SPY['SPY_3M'].shift(1) 
SPY['SPY_6M'] = SPY['SPY_6M'].shift(1) 

In [31]:
SPY.dropna(inplace=True)
SPY.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,SPY_1W,SPY_1M,SPY_3M,SPY_6M
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-08-29,243.059998,245.149994,242.929993,244.850006,232.097702,51135700,0.006875,-0.010639,0.012712,0.019977
2017-08-30,244.830002,246.320007,244.619995,246.009995,233.197266,62030800,-0.002404,-0.008343,0.014124,0.027616
2017-08-31,246.720001,247.770004,246.050003,247.490005,234.60022,103803900,0.005929,-0.00308,0.010889,0.031835
2017-09-01,247.919998,248.330002,247.669998,247.839996,234.931976,62007000,0.014345,0.000687,0.013597,0.041143
2017-09-05,247.259995,247.520004,244.949997,246.059998,233.244644,91398800,0.013412,0.001617,0.015779,0.045738


In [32]:
SPY['SPY Return'] = SPY['Close'] / SPY['Close'].shift(1) - 1

In [33]:
# SPY.reset_index(inplace=True)
# SPY = SPY[(SPY['Date'] >= '2018-03-01') & (SPY['Date'] <= '2020-03-31')]

In [34]:
SPY.to_csv("./Data/SPY.csv")

In [36]:
type('SPY')

str

In [37]:
type(['SPY'])

list

In [38]:
if type(['SPY'] == list):
    print(1)

1


In [43]:
pdr.get_data_yahoo('SPY', start, end, interval='1mo')

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-03-01,238.389999,240.320007,231.610001,235.740005,221.414474,1921474000.0
2017-03-17,,,,,,
2017-04-01,235.800003,239.529999,232.509995,238.080002,224.585098,1549614000.0
2017-05-01,238.679993,242.080002,235.429993,241.440002,227.754623,1492547000.0
2017-06-01,241.970001,245.009995,239.960007,241.800003,228.094208,1572753000.0
2017-06-16,,,,,,
2017-07-01,242.880005,248.0,240.339996,246.770004,233.917709,1055909000.0
2017-08-01,247.460007,248.910004,241.830002,247.490005,234.60022,1557032000.0
2017-09-01,247.919998,251.320007,244.949997,251.229996,238.145416,1286405000.0
2017-09-15,,,,,,
