In [9]:
import pandas as pd
import numpy as np
import wikipedia as wiki
import yfinance as yfin
from ta.momentum import rsi as ta_rsi
from ta.trend import macd as ta_macd

In [4]:
wiki_html = wiki.page("S&P 100").html().encode("UTF-8")
sp100_df = pd.read_html(wiki_html)[2].set_index("Symbol")
sp100_df = sp100_df.drop("GOOG")
sp100_df.loc["GOOGL", "Name"] = "Alphabet"
sp100_df = sp100_df.rename(index={"BRK.B": "BRK-B"})
sp100_df.head()

Unnamed: 0_level_0,Name,Sector
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPL,Apple Inc.,Information Technology
ABBV,AbbVie,Health Care
ABT,Abbott Laboratories,Health Care
ACN,Accenture,Information Technology
ADBE,Adobe Inc.,Information Technology


In [None]:
sp100_df.to_csv("datasets/stocks.csv")

In [5]:
fundamentals_list = [
    yfin.Ticker(ticker).info for ticker in sp100_df.index
]
fundamentals_df = pd.DataFrame(fundamentals_list).set_index("symbol")
fundamentals_df.index = fundamentals_df.index.rename("Symbol")  # Consistency
fundamentals_df = fundamentals_df[[
    "marketCap", "trailingPE", "forwardPE",  "priceToBook",
    "trailingEps", "forwardEps", "bookValue", "payoutRatio", "beta",
    "fiveYearAvgDividendYield", "52WeekChange", "averageVolume",
    "enterpriseToRevenue", "profitMargins"
]].fillna(0)
fundamentals_df.index = fundamentals_df.index.rename("Symbol")  # Consistency
print(fundamentals_df.head(10))

            marketCap  trailingPE  forwardPE  priceToBook  trailingEps  \
Symbol                                                                   
AAPL    3172382277632   33.520794  25.412874    47.584720         6.30   
ABBV     344685772800   82.914900  16.063478   103.423580         2.35   
ABT      226276737024   17.077225  25.284885     4.740227         7.64   
ACN      185685557248   24.432453  21.081022     6.353296        12.14   
ADBE     158810636288   24.595380  18.132360    12.378168        15.15   
AIG       47869386752   20.147419  12.220566     1.168957         4.07   
AMD      157289037824   96.815300  18.983393     2.727806         1.00   
AMGN     154827358208   38.091270  13.931786    26.308240         7.56   
AMT      103355777024   31.952244  32.516937    30.512712         6.91   
AMZN    1985523744768   33.771606  30.421902     6.930460         5.54   

        forwardEps  bookValue  payoutRatio   beta  fiveYearAvgDividendYield  \
Symbol                          

In [6]:
norm_fundamentals = (fundamentals_df - fundamentals_df.mean()) / fundamentals_df.std()
norm_fundamentals.head()

Unnamed: 0_level_0,marketCap,trailingPE,forwardPE,priceToBook,trailingEps,forwardEps,bookValue,payoutRatio,beta,fiveYearAvgDividendYield,52WeekChange,averageVolume,enterpriseToRevenue,profitMargins
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
AAPL,4.979507,-0.005449,-0.024955,1.354977,-0.231396,-0.217946,-0.100819,-0.765857,0.627643,-0.972561,0.176036,1.20208,0.228341,0.517615
ABBV,-0.011882,0.786736,-0.235176,3.335136,-0.443169,-0.041718,-0.100875,5.350956,-1.01607,1.102268,0.070635,-0.268137,0.119515,-0.771738
ABT,-0.220895,-0.269172,-0.027833,-0.164375,-0.159554,-0.363264,-0.100307,-0.449491,-0.416491,-0.311285,0.152536,-0.283513,-0.03458,1.093959
ACN,-0.292545,-0.151209,-0.122357,-0.107173,0.081707,0.04778,-0.099883,-0.041768,0.75531,-0.493288,-0.358443,-0.382364,-0.31195,-0.459156
ADBE,-0.339985,-0.148595,-0.188657,0.106481,0.243084,0.346721,-0.10025,-1.145545,1.15883,-1.245565,-0.725713,-0.371953,0.1278,0.998978


In [None]:
norm_fundamentals.to_csv("datasets/fundamentals.csv")

In [10]:
values_df = [
	yfin.Ticker(stock).history(period="5y", actions=False) for stock in sp100_df.index
]


for idx, stock_values in enumerate(values_df):
	stock_values["NormClose"] = (stock_values["Close"] - stock_values["Close"].mean()) / stock_values["Close"].std()
	# Log returns
	stock_values["DailyLogReturn"] = np.log(1 + stock_values["Close"].pct_change())
	stock_values["ALR1W"] = stock_values["DailyLogReturn"].rolling(window=5).sum() * 5
	stock_values["ALR2W"] = stock_values["DailyLogReturn"].rolling(window=10).sum() * 5
	stock_values["ALR1M"] = stock_values["DailyLogReturn"].rolling(window=21).sum() * 21
	stock_values["ALR2M"] = stock_values["DailyLogReturn"].rolling(window=42).sum() * 21
	# Normalizing the standard deviation
	stock_values["DailyLogReturn"] /= stock_values["DailyLogReturn"].std()
	stock_values["ALR1W"] /= stock_values["ALR1W"].std()
	stock_values["ALR2W"] /= stock_values["ALR2W"].std()
	stock_values["ALR1M"] /= stock_values["ALR1M"].std()
	stock_values["ALR2M"] /= stock_values["ALR2M"].std()
	stock_values["RSI"] = ta_rsi(stock_values["Close"]) / 100
	stock_values["MACD"] = ta_macd(stock_values["Close"])
	stock_values.drop(columns=["Open", "High", "Low", "Volume"], inplace=True)
values_df = pd.concat(values_df, keys=sp100_df.index).dropna()
values_df.head(n=10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,NormClose,DailyLogReturn,ALR1W,ALR2W,ALR1M,ALR2M,RSI,MACD
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
AAPL,2020-06-29 00:00:00-04:00,88.034103,-1.860825,1.21478,0.19868,0.948021,1.620094,2.123246,0.645419,2.907908
AAPL,2020-06-30 00:00:00-04:00,88.768936,-1.842347,0.443175,-0.116392,0.63084,1.571055,2.008116,0.659935,2.90382
AAPL,2020-07-01 00:00:00-04:00,88.601059,-1.846569,-0.100922,0.27516,0.621945,1.489382,2.139495,0.653355,2.854132
AAPL,2020-07-02 00:00:00-04:00,88.601059,-1.846569,0.0,-0.049269,0.614866,1.420527,2.010757,0.653355,2.782678
AAPL,2020-07-06 00:00:00-04:00,90.971153,-1.786969,1.407425,1.367858,1.185962,1.860098,2.116142,0.702009,2.884051
AAPL,2020-07-07 00:00:00-04:00,90.688858,-1.794068,-0.165698,0.730879,0.671646,1.468903,1.993613,0.689594,2.908088
AAPL,2020-07-08 00:00:00-04:00,92.801041,-1.740954,1.227473,1.092769,0.705475,1.683729,2.110272,0.72831,3.062273
AAPL,2020-07-09 00:00:00-04:00,93.200104,-1.730919,0.228771,1.244896,1.098306,1.347617,1.934048,0.735034,3.18001
AAPL,2020-07-10 00:00:00-04:00,93.363136,-1.72682,0.09318,1.287891,0.894957,1.050942,1.807001,0.737888,3.24902
AAPL,2020-07-13 00:00:00-04:00,92.932434,-1.73765,-0.246518,0.524729,1.367476,1.610034,1.869953,0.715947,3.231704


In [None]:
values_df.to_csv("datasets/values.csv")