In [80]:
import pymysql 
import config as cfg
from sqlalchemy import create_engine
import mysql.connector
import pandas as pd
import requests 
import random
import warnings
warnings.filterwarnings("ignore")
import os
os.environ["TZ"] = "America/New_York" 
import time


In [81]:
# connect to mysql db
mydb = mysql.connector.connect(
    host=cfg.db_stock["host"],
    port=cfg.db_stock["port"],
    user=cfg.db_stock["user"],
    password=cfg.db_stock["password"],
    database=cfg.db_stock["database"],
)

In [82]:
# S&P 
sp500 = pd.read_html(cfg.sp500_wiki)[0]
#print(sp500)  # GICS Sector is a good sector to use 
#sp500.to_csv("sp500.csv")
sp500.head(1)

Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub-Industry,Headquarters Location,Date first added,CIK,Founded
0,MMM,3M,reports,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1976-08-09,66740,1902


In [83]:
# key generator
def gen_apikey(keys=None):
    idx = random.randint(0, len(keys)-1)
    return(keys[idx])

In [84]:
# stock price
# pull historical data for a symbol
def get_history_stock_price(symbol, max_num=1000, apikeys=None):
    # get adjusted daily time series value
    url = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={0}&outputsize=full&apikey={1}&datatype=csv".format(symbol, gen_apikey(keys=apikeys))
    df = pd.read_csv(url).loc[0:max_num+1, ]
      
    # approx logic - adjusted close not just calculated by dividen & split, stock buyback could affect
    df["adjusted_ratio"] = df["adjusted_close"] / df["close"]
    df["open"] = df["open"] * df["adjusted_ratio"]
    df["high"] = df["high"] * df["adjusted_ratio"]
    df["low"] = df["low"] * df["adjusted_ratio"]
    df["close"] = df["adjusted_close"]
    df["volume"] = df["volume"] / df["adjusted_ratio"]

    df["last_close"] = df["close"].shift(-1)
    df["change"] = (df["close"] - df["last_close"]) / df["last_close"]
    df["date"] = df["timestamp"]
    df = df.loc[0:max_num, ]
    time.sleep(12)

    # macd
    url = "https://www.alphavantage.co/query?function=MACD&symbol={0}&interval=daily&series_type=close&apikey={1}&datatype=csv".format(symbol, gen_apikey(keys=apikeys))
    df_macd = pd.read_csv(url).loc[0:max_num, ]
    df_macd["MACD_Hist"] = df_macd["MACD_Hist"] * 2
    df_macd.columns = ["date", "DIF", "MACD", "DEA"]
    time.sleep(12)

    # rsi
    url = "https://www.alphavantage.co/query?function=RSI&symbol={0}&interval=daily&time_period=6&series_type=close&apikey={1}&datatype=csv".format(symbol, gen_apikey(keys=apikeys))
    df_rsi = pd.read_csv(url).loc[0:max_num, ]
    df_rsi.columns = ["date", "RSI"]
    df_signal = pd.merge(df_macd, df_rsi, how="inner", on="date")
    time.sleep(12)

    # combine all
    df = pd.merge(df, df_signal, how="inner", on="date")
    df["symbol"] = symbol
    df = df[["date", "symbol", "open", "high", "low", "close", "volume", "change", "DIF", "DEA", "MACD", "RSI"]]
    return(df)

def get_sp500_history_stock_price(sp500_list, max_num=1000):
    df = pd.DataFrame()
    for symbol in sp500_list:
        df_symbol = get_history_stock_price(symbol, max_num=max_num)
        df = df.append([df_symbol])
    print(df)
    return(df)

In [85]:
## stock fundamentals 
# pull stock basic info for a symbol
def get_stock_fundamentals(symbol, apikeys):
    url = "https://www.alphavantage.co/query?function=OVERVIEW&symbol={0}&apikey={1}".format(symbol, gen_apikey(keys=apikeys))
    r = requests.get(url)
    data = r.json()
    return(data)

def get_sp500_funamentals(sp500_list, apikeys):
    info = []
    for symbol in sp500_list:
        print(symbol)
        while True:
            time.sleep(6)
            data = get_stock_fundamentals(symbol, apikeys)
            if "Note" not in data:
                info.append(data)
                break
    
    df = pd.DataFrame(info)
    print("done!")
    return(df)


In [86]:
df = get_sp500_funamentals(sp500_list=sp500["Symbol"], apikeys=cfg.api_keys)


MMM
ABT
ABBV
ABMD
ACN
ATVI
ADBE
AMD
AAP
AES
AFL
A
APD
AKAM
ALK
ALB
ARE
ALGN
ALLE
LNT
ALL
GOOGL
GOOG
MO
AMZN
AMCR
AEE
AAL
AEP
AXP
AIG
AMT
AWK
AMP
ABC
AME
AMGN
APH
ADI
ANSS
ANTM
AON
AOS
APA
AAPL
AMAT
APTV
ADM
ANET
AJG
AIZ
T
ATO
ADSK
ADP
AZO
AVB
AVY
BKR
BLL
BAC
BBWI
BAX
BDX
BRK.B
BBY
BIO
TECH
BIIB
BLK
BK
BA
BKNG
BWA
BXP
BSX
BMY
AVGO
BR
BRO
BF.B
CHRW
CDNS
CZR
CPB
COF
CAH
KMX
CCL
CARR
CTLT
CAT
CBOE
CBRE
CDW
CE
CNC
CNP
CDAY
CERN
CF
CRL
SCHW
CHTR
CVX
CMG
CB
CHD
CI
CINF
CTAS
CSCO
C
CFG
CTXS
CLX
CME
CMS
KO
CTSH
CL
CMCSA
CMA
CAG
COP
ED
STZ
COO
CPRT
GLW
CTVA
COST
CTRA
CCI
CSX
CMI
CVS
DHI
DHR
DRI
DVA
DE
DAL
XRAY
DVN
DXCM
FANG
DLR
DFS
DISCA
DISCK
DISH
DG
DLTR
D
DPZ
DOV
DOW
DTE
DUK
DRE
DD
DXC
EMN
ETN
EBAY
ECL
EIX
EW
EA
EMR
ENPH
ETR
EOG
EFX
EQIX
EQR
ESS
EL
ETSY
EVRG
ES
RE
EXC
EXPE
EXPD
EXR
XOM
FFIV
FAST
FRT
FDX
FIS
FITB
FE
FRC
FISV
FLT
FMC
F
FTNT
FTV
FBHS
FOXA
FOX
BEN
FCX
GPS
GRMN
IT
GNRC
GD
GE
GIS
GM
GPC
GILD
GL
GPN
GS
GWW
HAL
HBI
HIG
HAS
HCA
PEAK
HSIC
HSY
HES
HPE
HLT
HOLX
HD
HON
HRL
HST
HWM
HPQ
HUM

SSLError: HTTPSConnectionPool(host='www.alphavantage.co', port=443): Max retries exceeded with url: /query?function=OVERVIEW&symbol=ZTS&apikey=NPEE5WVEA7P7Y295 (Caused by SSLError(SSLError("bad handshake: SysCallError(54, 'ECONNRESET')",),))

In [100]:
df2

Unnamed: 0,Information
0,Thank you for using Alpha Vantage! Our standar...
1,Thank you for using Alpha Vantage! Our standar...
2,Thank you for using Alpha Vantage! Our standar...
3,Thank you for using Alpha Vantage! Our standar...
4,Thank you for using Alpha Vantage! Our standar...


In [94]:
df2 = get_sp500_funamentals(sp500_list={'AMAT', 'BF.B', 'BRK.B', 'HBI', 'PEAK'}, apikeys=cfg.api_keys)


PEAK
HBI
BRK.B
AMAT
BF.B
done!


In [46]:
l = set(sp500["Symbol"]) - set(df.loc[df["Sector"].notnull(), "Symbol"])
print(len(l))

316


In [70]:
df = df.append([df2])

In [96]:
df_copy = df.copy()


In [95]:
df = df.loc[df["Symbol"].notnull(), ]
df.shape

(500, 48)

In [97]:
df = df.append(df2)
df.shape

(505, 48)

In [93]:
set(sp500["Symbol"]) - set(df["Symbol"][df["Symbol"].notnull()])

{'AMAT', 'BF.B', 'BRK.B', 'HBI', 'PEAK'}

In [98]:
df.to_csv("sp500_vantage.csv")

In [68]:
df = df[df["Symbol"].notnull()]
df

Unnamed: 0,200DayMovingAverage,50DayMovingAverage,52WeekHigh,52WeekLow,Address,AnalystTargetPrice,AssetType,Beta,BookValue,CIK,...,QuarterlyEarningsGrowthYOY,QuarterlyRevenueGrowthYOY,ReturnOnAssetsTTM,ReturnOnEquityTTM,RevenuePerShareTTM,RevenueTTM,Sector,SharesOutstanding,Symbol,TrailingPE
0,193.01,179.95,205.85,159.64,"3M CENTER, BLDG. 220-13E-26A, ST PAUL, MN, US",190.33,Common Stock,0.948,25.09,66740,...,0,0.071,0.108,0.448,60.91,35326001000,LIFE SCIENCES,576253000,MMM,18.17
1,120.17,122.55,129.53,103.72,"100 ABBOTT PARK ROAD, D-322 AP6D, ABBOTT PARK,...",138.67,Common Stock,0.668,19.47,1800,...,0.696,0.234,0.0774,0.219,23.82,42308002000,LIFE SCIENCES,1768290000,ABT,31.75
2,113.67,110.85,120.07,93.83,"1 NORTH WAUKEGAN ROAD, NORTH CHICAGO, IL, US",127.78,Common Stock,0.799,7.67,1551152,...,0.38,0.112,0.0848,0.523,31.19,55168999000,LIFE SCIENCES,1767880000,ABBV,27.88
3,326.84,344.6,379.3,254.41,"22 CHERRY HILL DR, DANVERS, MA, US",387.33,Common Stock,1.263,30.43,815094,...,-0.088,0.183,0.115,0.116,21.49,973635000,LIFE SCIENCES,45497500,ABMD,110.8
4,317.2,344.4,372.12,235.78,"1 GRAND CANAL SQUARE, GRAND CANAL HARBOUR, DUB...",376.3,Common Stock,1.129,30.9,1467373,...,0.104,0.238,0.119,0.319,79.61,50533388000,TRADE & SERVICES,631641000,ACN,40.55
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
496,106.36,89.51,143.88,78.55,"3131 LAS VEGAS BOULEVARD SOUTH, LAS VEGAS, NV, US",105.83,Common Stock,2.412,0.895,1174922,...,0,1.685,-0.019,-5.61,30.49,3407434000,REAL ESTATE & CONSTRUCTION,115658000,WYNN,-
497,67.56,63.86,72.77,56.05,"414 NICOLLET MALL, MINNEAPOLIS, MN, US",71.14,Common Stock,0.352,28.18,72903,...,-0.009,0.09,0.0247,0.108,24.26,13023000000,ENERGY & TRANSPORTATION,538676000,XEL,21.59
498,146.12,172.03,216.63,111.63,"2100 LOGIC DR, SAN JOSE, CA, US",192,Common Stock,1.04,13.7,743988,...,0.19,0.221,0.0899,0.274,14.12,3468767000,MANUFACTURING,247880000,XLNX,66.42
499,124.1,127.07,138.78,92.92,"1133 WESTCHESTER AVENUE, WHITE PLAINS, NY, US",122.27,Common Stock,1.034,17.16,1524472,...,2.15,0.037,0.0473,0.155,29.1,5245000000,TECHNOLOGY,180325000,XYL,51.21


In [73]:
engine = create_engine("mysql+pymysql://{user}:{pw}@{host}:{port}/{db}".format(
    user=cfg.db_stock["user"],
    pw=cfg.db_stock["password"],
    host=cfg.db_stock["host"],
    port=cfg.db_stock["port"],
    db=cfg.db_stock["database"]))

In [74]:
df.to_sql("Fundamentals", con=engine, if_exists="replace")
engine.execute("SHOW TABLES").fetchall()


[('Fundamentals',), ('abcd',), ('test',)]

In [78]:
mycursor = mydb.cursor()
mycursor.execute(
    """
    SELECT *
    FROM Fundamentals
    WHERE Symbol='PYPL';
    """
)
print([i[0] for i in mycursor.description])
result = mycursor.fetchall()
for x in result:
    print(x)

['index', '200DayMovingAverage', '50DayMovingAverage', '52WeekHigh', '52WeekLow', 'Address', 'AnalystTargetPrice', 'AssetType', 'Beta', 'BookValue', 'CIK', 'Country', 'Currency', 'Description', 'DilutedEPSTTM', 'DividendDate', 'DividendPerShare', 'DividendYield', 'EBITDA', 'EPS', 'EVToEBITDA', 'EVToRevenue', 'ExDividendDate', 'Exchange', 'FiscalYearEnd', 'ForwardPE', 'GrossProfitTTM', 'Industry', 'Information', 'LatestQuarter', 'MarketCapitalization', 'Name', 'Note', 'OperatingMarginTTM', 'PEGRatio', 'PERatio', 'PriceToBookRatio', 'PriceToSalesRatioTTM', 'ProfitMargin', 'QuarterlyEarningsGrowthYOY', 'QuarterlyRevenueGrowthYOY', 'ReturnOnAssetsTTM', 'ReturnOnEquityTTM', 'RevenuePerShareTTM', 'RevenueTTM', 'Sector', 'SharesOutstanding', 'Symbol', 'TrailingPE']
(275, '268.89', '248.51', '310.16', '183.54', '2211 NORTH FIRST STREET, SAN JOSE, CA, US', '290.11', 'Common Stock', '1.104', '18.82', '1633917', 'USA', 'USD', 'PayPal Holdings, Inc. is an American company operating an online payme