In [1]:
#regular imports
import pandas as pd
import numpy as np

# for stock features import
import yahoo_historical
import yahoofinancials
from yahoofinancials import YahooFinancials

# 1. Get Data

- To get data from yahoo finance, we first get the ticker list we want
- Then retrieve fundamental and technical data using library `yahoofinancials`

In [2]:
# get ticker table

ticker_url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
ticker_table = pd.read_html(ticker_url)
ticker_table

[          0                                1            2  \
 0    Symbol                         Security  SEC filings   
 1       MMM                       3M Company      reports   
 2       ABT              Abbott Laboratories      reports   
 3      ABBV                      AbbVie Inc.      reports   
 4      ABMD                      ABIOMED Inc      reports   
 5       ACN                    Accenture plc      reports   
 6      ATVI              Activision Blizzard      reports   
 7      ADBE                Adobe Systems Inc      reports   
 8       AMD       Advanced Micro Devices Inc      reports   
 9       AAP               Advance Auto Parts      reports   
 10      AES                         AES Corp      reports   
 11      AMG    Affiliated Managers Group Inc      reports   
 12      AFL                        AFLAC Inc      reports   
 13        A         Agilent Technologies Inc      reports   
 14      APD     Air Products & Chemicals Inc      reports   
 15     

In [3]:
# generate tickers list
tickers = ticker_table[0][0][1:].tolist()

## 1.1 Get Technical Data

In [4]:
# get the historical price data
sp_data = YahooFinancials(tickers)
sp_prices = sp_data.get_historical_price_data('2018-06-01', '2019-06-01', 'daily')

In [5]:
result = []
for stock in tickers:
    
    # check whether we got the prices data for a pecific stock
    # if not, pass to the next
    if "prices" not in sp_prices[stock]:
        continue
        
    temp_list = sp_prices[stock]["prices"]
    
    # reformatting the list
    for daily_data in temp_list:
        result_temp = []
        result_temp.append(stock)
        result_temp.append(daily_data["formatted_date"])
        result_temp.append(daily_data["high"])
        result_temp.append(daily_data["low"])
        result_temp.append(daily_data["adjclose"])
        result_temp.append(daily_data["volume"])
        result.append(result_temp)

# store the data into pandas dataframe
sp_df = pd.DataFrame(np.array(result))
sp_df.columns = ["ticker","date","high","low","adjclose","volume"]

In [6]:
sp_df

Unnamed: 0,ticker,date,high,low,adjclose,volume
0,MMM,2018-06-01,200.4199981689453,198.27999877929688,193.90518188476562,1827000
1,MMM,2018-06-04,201.69000244140625,200.07000732421875,195.3332977294922,2408400
2,MMM,2018-06-05,202.10000610351562,200.88999938964844,195.4693145751953,2246100
3,MMM,2018-06-06,204.0500030517578,201.5500030517578,198.0535430908203,2297300
4,MMM,2018-06-07,205.88999938964844,203.75999450683594,199.22909545898438,2279200
5,MMM,2018-06-08,206.75,204.41000366210938,200.70579528808594,2170200
6,MMM,2018-06-11,206.7899932861328,205.0,199.27767944335938,2302900
7,MMM,2018-06-12,205.9499969482422,203.9499969482422,199.08335876464844,1806100
8,MMM,2018-06-13,205.66000366210938,203.72999572753906,199.05421447753906,2097900
9,MMM,2018-06-14,206.27000427246094,203.5,197.83010864257812,2221500


In [7]:
sp_df.to_csv(r'sp500.csv')

## 1.2 Get Fundamental Data

In [8]:
# get the key statistics of the stocks

sp_fundamental = sp_data.get_key_statistics_data()

In [9]:
df_fundamental = pd.DataFrame.from_dict(sp_fundamental, orient = 'index')

In [10]:
df_fundamental.shape

(501, 51)

In [11]:
df_fundamental

Unnamed: 0,annualHoldingsTurnover,enterpriseToRevenue,beta3Year,profitMargins,enterpriseToEbitda,52WeekChange,morningStarRiskRating,forwardEps,revenueQuarterlyGrowth,sharesOutstanding,...,dateShortInterest,pegRatio,ytdReturn,forwardPE,maxAge,lastCapGain,shortPercentOfFloat,sharesShortPriorMonth,category,fiveYearAverageReturn
A,,4.553,,0.22255,18.778,0.16847,,3.40,,315992992,...,1560470400,2.37,,21.579412,1,,0.0123,4636334.0,,
AAL,,0.972,,0.03215,7.474,-0.100111,,5.72,,460611008,...,1560470400,0.45,,5.758741,1,,0.0601,20867364.0,,
AAP,,1.429,,0.04448,14.405,0.118062,,9.45,,71717600,...,1560470400,1.14,,16.553438,1,,0.0524,2867611.0,,
AAPL,,3.708,,0.22117,12.392,0.0711092,,12.61,,4601079808,...,1560470400,1.46,,16.116573,1,,0.0099,49550348.0,,
ABBV,,4.227,,0.16418,9.896,-0.249789,,9.43,,1478329984,...,1560470400,1.38,,7.598091,1,,0.0150,21309313.0,,
ABC,,0.117,,0.00531,8.974,0.0125086,,7.36,,210176992,...,1560470400,1.81,,12.133152,1,,0.0509,7081809.0,,
ABMD,,14.451,,0.33663,46.715,-0.362788,,5.82,,45283200,...,1560470400,2.01,,43.953606,1,,0.0605,2689518.0,,
ABT,,5.447,,0.08534,22.967,0.362794,,3.60,,1764179968,...,1560470400,2.27,,23.716667,1,,0.0081,13613261.0,,
ACN,,2.847,,0.11343,17.303,0.148999,,7.99,,616668032,...,1560470400,2.86,,23.993744,1,,0.0092,6507155.0,,
ADBE,,14.685,,0.26249,44.045,0.22602,,9.75,,487951008,...,1560470400,1.68,,31.485128,1,,0.0073,4930215.0,,


In [12]:
df_fundamental.to_csv(r'sp500_fundamental.csv')

In [19]:
sp_eps = sp_data.get_earnings_per_share()
sp_mc = sp_data.get_market_cap()
sp_pe = sp_data.get_pe_ratio()
sp_ni = sp_data.get_net_income()
sp_ebit = sp_data.get_ebit()

df_eps = pd.DataFrame(sp_eps, index=['eps']).T
df_mc = pd.DataFrame(sp_mc, index=['Market Cap']).T
df_pe = pd.DataFrame(sp_pe, index=['PE']).T
df_ni = pd.DataFrame(sp_ni, index=['NI']).T
df_ebit = pd.DataFrame(sp_ebit, index=['EBIT']).T

In [20]:
df = df_fundamental.join(df_eps, how = 'outer')
df = df.join(df_mc, how = 'outer')
df = df.join(df_pe, how = 'outer')
df = df.join(df_ni, how = 'outer')
df = df.join(df_ebit, how = 'outer')

In [21]:
df.head()

Unnamed: 0,annualHoldingsTurnover,enterpriseToRevenue,beta3Year,profitMargins,enterpriseToEbitda,52WeekChange,morningStarRiskRating,forwardEps,revenueQuarterlyGrowth,sharesOutstanding,...,lastCapGain,shortPercentOfFloat,sharesShortPriorMonth,category,fiveYearAverageReturn,eps,Market Cap,PE,NI,EBIT
A,,4.553,,0.22255,18.778,0.16847,,3.4,,315993000.0,...,,0.0123,4636334.0,,,3.443,23184406528,21.3099,316000000,974000000
AAL,,0.972,,0.03215,7.474,-0.100111,,5.72,,460611000.0,...,,0.0601,20867364.0,,,3.118,14652864512,10.5645,1412000000,3757000000
AAP,,1.429,,0.04448,14.405,0.118062,,9.45,,71717600.0,...,,0.0524,2867611.0,,,5.856,11221801984,26.7128,423847000,712142000
AAPL,,3.708,,0.22117,12.392,0.0711092,,12.61,,4601080000.0,...,,0.0099,49550348.0,,,11.887,914994298880,17.0968,59531000000,70898000000
ABBV,,4.227,,0.16418,9.896,-0.249789,,9.43,,1478330000.0,...,,0.015,21309313.0,,,3.511,100639596544,20.4073,5687000000,11911000000


In [22]:
df.to_csv(r'sp500_fundamental_eps_mc.csv')