In [80]:
import pymysql 
import config as cfg
from sqlalchemy import create_engine
import mysql.connector
import pandas as pd
import requests 
import random
from datetime import datatime
import warnings
warnings.filterwarnings("ignore")
import os
os.environ["TZ"] = "America/New_York" 
import time


In [81]:
# connect to mysql db
mydb = mysql.connector.connect(
    host=cfg.db_stock["host"],
    port=cfg.db_stock["port"],
    user=cfg.db_stock["user"],
    password=cfg.db_stock["password"],
    database=cfg.db_stock["database"],
)

In [126]:
# S&P 
sp500 = pd.read_html(cfg.sp500_wiki)[0]
#print(sp500)  # GICS Sector is a good sector to use 
#sp500.to_csv("sp500.csv")
print(type(sp500))
sp500.head(1)

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,Symbol,Security,SEC filings,GICS Sector,GICS Sub-Industry,Headquarters Location,Date first added,CIK,Founded
0,MMM,3M,reports,Industrials,Industrial Conglomerates,"Saint Paul, Minnesota",1976-08-09,66740,1902


In [83]:
# key generator
def gen_apikey(keys=None):
    idx = random.randint(0, len(keys)-1)
    return(keys[idx])

In [84]:
# stock price
# pull historical data for a symbol
def get_history_stock_price(symbol, max_num=1000, apikeys=None):
    # get adjusted daily time series value
    url = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&symbol={0}&outputsize=full&apikey={1}&datatype=csv".format(symbol, gen_apikey(keys=apikeys))
    df = pd.read_csv(url).loc[0:max_num+1, ]
      
    # approx logic - adjusted close not just calculated by dividen & split, stock buyback could affect
    df["adjusted_ratio"] = df["adjusted_close"] / df["close"]
    df["open"] = df["open"] * df["adjusted_ratio"]
    df["high"] = df["high"] * df["adjusted_ratio"]
    df["low"] = df["low"] * df["adjusted_ratio"]
    df["close"] = df["adjusted_close"]
    df["volume"] = df["volume"] / df["adjusted_ratio"]

    df["last_close"] = df["close"].shift(-1)
    df["change"] = (df["close"] - df["last_close"]) / df["last_close"]
    df["date"] = df["timestamp"]
    df = df.loc[0:max_num, ]
    time.sleep(12)

    # macd
    url = "https://www.alphavantage.co/query?function=MACD&symbol={0}&interval=daily&series_type=close&apikey={1}&datatype=csv".format(symbol, gen_apikey(keys=apikeys))
    df_macd = pd.read_csv(url).loc[0:max_num, ]
    df_macd["MACD_Hist"] = df_macd["MACD_Hist"] * 2
    df_macd.columns = ["date", "DIF", "MACD", "DEA"]
    time.sleep(12)

    # rsi
    url = "https://www.alphavantage.co/query?function=RSI&symbol={0}&interval=daily&time_period=6&series_type=close&apikey={1}&datatype=csv".format(symbol, gen_apikey(keys=apikeys))
    df_rsi = pd.read_csv(url).loc[0:max_num, ]
    df_rsi.columns = ["date", "RSI"]
    df_signal = pd.merge(df_macd, df_rsi, how="inner", on="date")
    time.sleep(12)

    # combine all
    df = pd.merge(df, df_signal, how="inner", on="date")
    df["symbol"] = symbol
    df = df[["date", "symbol", "open", "high", "low", "close", "volume", "change", "DIF", "DEA", "MACD", "RSI"]]
    return(df)

def get_sp500_history_stock_price(sp500_list, max_num=1000):
    df = pd.DataFrame()
    for symbol in sp500_list:
        df_symbol = get_history_stock_price(symbol, max_num=max_num)
        df = df.append([df_symbol])
    print(df)
    return(df)

In [101]:
## stock fundamentals 
# pull stock basic info for a symbol
def get_stock_fundamentals(symbol, apikeys):
    url = "https://www.alphavantage.co/query?function=OVERVIEW&symbol={0}&apikey={1}".format(symbol, gen_apikey(keys=apikeys))
    r = requests.get(url)
    data = r.json()
    return(data)

def get_sp500_funamentals(sp500_list, apikeys):
    info = []
    for symbol in sp500_list:
        print(symbol)
        while True:
            time.sleep(6)
            data = get_stock_fundamentals(symbol, apikeys)
            if "Symbol" in data:
                info.append(data)
                break
    
    df = pd.DataFrame(info)
    print("done!")
    return(df)


In [102]:
df = get_sp500_funamentals(sp500_list=sp500["Symbol"], apikeys=cfg.api_keys)
df.to_csv("sp500_{}_{}_{}.csv".format(datetime.now().year, datetime.now().month, datetime.now().day))


In [111]:
df = pd.read_csv("sp500_vantage.csv")
df.to_csv("sp500_{}_{}_{}.csv".format(datetime.now().year, datetime.now().month, datetime.now().day))


In [110]:
from datetime import datetime
datetime.now()

datetime.datetime(2021, 11, 13, 14, 13, 40, 128695)

In [112]:
engine = create_engine("mysql+pymysql://{user}:{pw}@{host}:{port}/{db}".format(
    user=cfg.db_stock["user"],
    pw=cfg.db_stock["password"],
    host=cfg.db_stock["host"],
    port=cfg.db_stock["port"],
    db=cfg.db_stock["database"]))

In [113]:
df.to_sql("Fundamentals", con=engine, if_exists="replace")
engine.execute("SHOW TABLES").fetchall()


[('Fundamentals',), ('abcd',), ('test',)]

In [125]:
mycursor = mydb.cursor()
mycursor.execute(
    """
    SELECT sector, count(*)
    FROM Fundamentals
    group by sector;
    """
)
print([i[0] for i in mycursor.description])
result = mycursor.fetchall()
for x in result:
    print(x)

['sector', 'count(*)']
('LIFE SCIENCES', 80)
('TRADE & SERVICES', 77)
('FINANCE', 66)
('ENERGY & TRANSPORTATION', 74)
('MANUFACTURING', 93)
('TECHNOLOGY', 70)
('REAL ESTATE & CONSTRUCTION', 42)
