# Import

In [1]:
import pandas as pd
import yfinance as yf

# Read S&P 500 Constituents Ticker List

In [2]:
sp_ticker = pd.read_csv("sp500_Constituents_20190906.csv")
sp_ticker.head()

Unnamed: 0,company_name,ticker
0,3M Company,MMM
1,A. O. Smith Corporation,AOS
2,Abbott Laboratories,ABT
3,AbbVie Inc.,ABBV
4,"Abiomed, Inc.",ABMD


# Quality Check

In [3]:
# check if each ticker is unique
print(sp_ticker.shape)
sp_ticker.ticker.nunique()

(505, 2)


505

# Use `yfinance` to Download Price Data

In [4]:
# delete dot if it's included in ticker 
def del_dot(x):
    if '.' in x:
        return x.replace('.', '')
    else:
        return x

In [5]:
sp_ticker.ticker = sp_ticker.ticker.apply(lambda x : del_dot(x))

In [6]:
# drop BFB since it only has one day historical date
sp_ticker.drop(index=sp_ticker.loc[sp_ticker.ticker=='BFB'].index, inplace=True)

In [7]:
# change ticker BRKB to BRK-B
sp_ticker.loc[sp_ticker.ticker == 'BRKB', 'ticker'] = 'BRK-B'

In [8]:
# define some constants
start_date = '2005-01-01'
end_date = '2019-09-01'
ticker_list = sp_ticker.ticker.to_list()
PRICE_MAX_RETRY = 5
PRICE_MAX_SLEEP = 5
YF_VERBOSE = False

In [9]:
stock_price = yf.download(ticker_list, start=start_date, end=end_date)

[*********************100%***********************]  504 of 504 downloaded


# Transformation

In [10]:
# only keep adjusted close price
stock_adj_close = stock_price.drop(columns=['Volume', 'Open', 'Low', 'High', 'Close'])

In [11]:
# drop level 0 index 'Adj Close'
stock_adj_close.columns = stock_adj_close.columns.droplevel(0)

In [12]:
stock_adj_close.head()

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABMD,ABT,ACN,ADBE,...,XEL,XLNX,XOM,XRAY,XRX,XYL,YUM,ZBH,ZION,ZTS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2005-01-03,14.95,,27.64,3.95,,11.54,15.01,15.43,19.99,30.84,...,10.0,21.21,33.26,25.4,38.39,,11.3,,55.63,
2005-01-04,14.56,,27.45,3.99,,11.45,14.98,15.27,19.52,30.02,...,9.85,20.61,33.04,25.2,37.83,,11.15,,54.87,
2005-01-05,14.55,,27.51,4.02,,11.43,14.69,15.1,19.44,29.86,...,9.73,19.96,32.87,24.9,37.08,,11.13,,54.67,
2005-01-06,14.23,,27.48,4.02,,11.52,14.52,15.44,19.27,29.36,...,9.72,19.64,33.28,25.02,36.92,,11.25,,54.98,
2005-01-07,14.22,,27.32,4.32,,11.49,14.29,15.7,20.17,29.38,...,9.66,19.81,33.06,24.83,36.72,,11.19,,53.65,


In [13]:
stock_adj_close = stock_adj_close.unstack().reset_index()

In [14]:
stock_adj_close.columns = ['Ticker', 'Date', 'Adj_Close']

In [15]:
stock_adj_close.head()

Unnamed: 0,Ticker,Date,Adj_Close
0,A,2005-01-03,14.95
1,A,2005-01-04,14.56
2,A,2005-01-05,14.55
3,A,2005-01-06,14.23
4,A,2005-01-07,14.22


In [16]:
# output data
stock_adj_close.to_csv('SP_Stock_Adj_Close.csv', index=False)