# Alpha-Vantage Data Engineering  

In [115]:
import os
import io
import json
import requests
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from pymongo import MongoClient
import time
from tqdm.notebook import tqdm

In [89]:
secrets_path = os.path.join(os.path.expanduser('~'), "git/bakery/bakery/data/config.json")
with open(secrets_path, "rb") as f:
    secrets = json.loads(f.read().decode())
    api_key = secrets["alpha"]["api_key"]
    limit = secrets["alpha"]["limit"]
    endpoint = secrets["alpha"]["endpoint"]
    conn_str = secrets["mongo"]["conn_str"]

## Exploration

### [Fundamental Data](https://www.alphavantage.co/documentation/#dailyadj:~:text=json()%0A%0Aprint(data)-,Fundamental%20Data,-We%20offer%20the)

#### [Listing & Delisting Status](https://www.alphavantage.co/documentation/#dailyadj:~:text=json()%0A%0Aprint(data)-,Listing%20%26%20Delisting%20Status,-This%20API%20returns)

Active list, most recent

In [90]:
params = {
        "function": "LISTING_STATUS",
        "apikey": api_key
    }
r = requests.get(endpoint, params)
data = io.BytesIO(r.content)
active_ls_df = pd.read_csv(data)
active_ls_df.head()

Unnamed: 0,symbol,name,exchange,assetType,ipoDate,delistingDate,status
0,A,Agilent Technologies Inc,NYSE,Stock,1999-11-18,,Active
1,AA,Alcoa Corp,NYSE,Stock,2016-10-18,,Active
2,AAA,AXS First Priority CLO Bond ETF,NYSE ARCA,ETF,2020-09-09,,Active
3,AAAU,Goldman Sachs Physical Gold ETF,BATS,ETF,2018-08-15,,Active
4,AACG,ATA Creativity Global,NASDAQ,Stock,2008-01-29,,Active


Delisted list, most recent

In [91]:
params = {
        "function": "LISTING_STATUS",
        "state": "delisted",
        "apikey": api_key
    }
r = requests.get(endpoint, params)
data = io.BytesIO(r.content)
delisted_ls_df = pd.read_csv(data)
delisted_ls_df.head()

Unnamed: 0,symbol,name,exchange,assetType,ipoDate,delistingDate,status
0,AA-W,Alcoa Corporation When Issued,NYSE,Stock,2016-10-18,2016-11-08,Delisted
1,AAAP,Advanced Accelerator Applications SA,NASDAQ,Stock,2015-11-11,2018-02-20,Delisted
2,AABA,Altaba Inc,NASDAQ,Stock,1996-04-12,2019-11-06,Delisted
3,AAC,American Addiction Centers,NYSE,Stock,2014-10-02,2021-04-19,Delisted
4,AAC-U,Ares Acquisition Corporation - Units (1 Ord Sh...,NYSE,Stock,2021-02-02,2023-11-06,Delisted


Get an active list for a date range

In [92]:
path = os.path.join(os.path.expanduser('~'), "git/bakery/bakery/data/config.json")
with open(path, "rb") as f:
    config = json.loads(f.read().decode())
client = MongoClient(config["mongo"]["conn_str"])

db = client["bakery"]
coll = db["alpha_daily_active_ls"]

In [93]:
today = datetime.today()
start_dt = datetime(year=2010, month=1, day=2)
num_days = (today - start_dt).days + 1
date_ls = [(today - timedelta(days=x)) for x in range(num_days)]
date_ls_str = [dt.strftime("%Y-%m-%d") for dt in date_ls]

In [94]:
# total_requests = 0
# t0 = time.time()
# t = tqdm(date_ls_str, position=0, desc="Date", leave=False)
# for date_str in t:
#     # ETL
#     params = {
#             "function": "LISTING_STATUS",
#             "state": "active",
#             "date": date_str,
#             "apikey": api_key
#         }
#     r = requests.get(endpoint, params)
#     data = io.BytesIO(r.content)
#     active_ls_pdf = pd.read_csv(data)
#     active_ls_1d = active_ls_pdf["symbol"].tolist()

#     # Write to mongo
#     result = coll.insert_one({"dttm": datetime.strptime(date_str, "%Y-%m-%d"), "ls": active_ls_1d})

#     # Rate limiting
#     total_requests += 1
#     total_seconds = time.time() - t0
#     requests_per_minute = total_requests / (total_seconds / 60)
#     t.set_description(f"Dates (req / min: {requests_per_minute:.2f})")
#     t.refresh()
#     while (requests_per_minute > limit - 2):
#         time.sleep(1)
#         total_seconds = time.time() - t0
#         requests_per_minute = total_requests / (total_seconds / 60)

In [95]:
# active_pdf["dttm"] = pd.to_datetime(active_pdf["dt"])
# rows = active_pdf.to_dict("records")

In [75]:
# result = coll.insert_many(rows)

### [Company Overview](https://www.alphavantage.co/documentation/#dailyadj:~:text=fundamental%20data%20points.-,Company%20Overview,-This%20API%20returns)

In [116]:
params = {
        "function": "OVERVIEW",
        "symbol": "TSLA",
        "apikey": api_key
    }
r = requests.get(endpoint, params)
r_json = r.json()

In [117]:
r_json["CIK"] = int(r_json["CIK"])
r_json["LatestQuarter"] = datetime.strptime(r_json["LatestQuarter"], "%Y-%m-%d")
r_json["MarketCapitalization"] = int(r_json["MarketCapitalization"])
r_json["EBITDA"] = int(r_json["EBITDA"])
r_json["PERatio"] = float(r_json["PERatio"])
r_json["PEGRatio"] = float(r_json["PEGRatio"])
r_json["BookValue"] = float(r_json["BookValue"])
r_json["DividendPerShare"] = np.nan if r_json["DividendPerShare"] == "None" else float(r_json["DividendPerShare"])
r_json["DividendYield"] = np.nan if r_json["DividendYield"] == "None" else float(r_json["DividendYield"])
r_json["EPS"] = float(r_json["EPS"])
r_json["RevenuePerShareTTM"] = float(r_json["RevenuePerShareTTM"])
r_json["ProfitMargin"] = float(r_json["ProfitMargin"])
r_json["OperatingMarginTTM"] = float(r_json["OperatingMarginTTM"])
r_json["ReturnOnAssetsTTM"] = float(r_json["ReturnOnAssetsTTM"])
r_json["RevenueTTM"] = int(r_json["RevenueTTM"])
r_json["GrossProfitTTM"] = int(r_json["GrossProfitTTM"])
r_json["DilutedEPSTTM"] = float(r_json["DilutedEPSTTM"])
r_json["QuarterlyEarningsGrowthYOY"] = float(r_json["QuarterlyEarningsGrowthYOY"])
r_json["QuarterlyRevenueGrowthYOY"] = float(r_json["QuarterlyRevenueGrowthYOY"])
r_json["AnalystTargetPrice"] = float(r_json["AnalystTargetPrice"])
r_json["TrailingPE"] = float(r_json["TrailingPE"])
r_json["ForwardPE"] = float(r_json["ForwardPE"])
r_json["PriceToSalesRatioTTM"] = float(r_json["PriceToSalesRatioTTM"])
r_json["PriceToBookRatio"] = float(r_json["PriceToBookRatio"])
r_json["EVToRevenue"] = float(r_json["EVToRevenue"])
r_json["EVToEBITDA"] = float(r_json["EVToEBITDA"])
r_json["Beta"] = float(r_json["Beta"])
r_json["52WeekHigh"] = float(r_json["52WeekHigh"])
r_json["52WeekLow"] = float(r_json["52WeekLow"])
r_json["50DayMovingAverage"] = float(r_json["50DayMovingAverage"])
r_json["200DayMovingAverage"] = float(r_json["200DayMovingAverage"])
r_json["SharesOutstanding"] = int(r_json["SharesOutstanding"])
r_json["DividendDate"] = np.nan if r_json["DividendDate"] == "None" else datetime.strptime(r_json["DividendDate"], "%Y-%m-%d")
r_json["ExDividendDate"] = np.nan if r_json["ExDividendDate"] == "None" else datetime.strptime(r_json["ExDividendDate"], "%Y-%m-%d")

In [118]:
r_json

{'Symbol': 'TSLA',
 'AssetType': 'Common Stock',
 'Name': 'Tesla Inc',
 'Description': "Tesla, Inc. is an American electric vehicle and clean energy company based in Palo Alto, California. Tesla's current products include electric cars, battery energy storage from home to grid-scale, solar panels and solar roof tiles, as well as other related products and services. In 2020, Tesla had the highest sales in the plug-in and battery electric passenger car segments, capturing 16% of the plug-in market (which includes plug-in hybrids) and 23% of the battery-electric (purely electric) market. Through its subsidiary Tesla Energy, the company develops and is a major installer of solar photovoltaic energy generation systems in the United States. Tesla Energy is also one of the largest global suppliers of battery energy storage systems, with 3 GWh of battery storage supplied in 2020.",
 'CIK': 1318605,
 'Exchange': 'NASDAQ',
 'Currency': 'USD',
 'Country': 'USA',
 'Sector': 'MANUFACTURING',
 'Indu

In [119]:
active_symbols = active_ls_df["symbol"].to_list()
len(active_symbols)

11643

In [135]:
coll = db["alpha_company_overview"]

In [136]:
def clean_response(r_json):
    keys = r_json.keys()

    def coerce_int(value):
        return np.nan if value == "None" else int(value)
    def coerce_float(value):
        return np.nan if value in ["None", "-"] else float(value)
    def coerce_date(value):
        return np.nan if value in ["None", "0000-00-00"] else datetime.strptime(value, "%Y-%m-%d")
    type_map = {
        "CIK": "int",
        "LatestQuarter": "date",
        "MarketCapitalization": "int",
        "EBITDA": "int",
        "PERatio": "float",
        "PEGRatio": "float",
        "BookValue": "float",
        "DividendPerShare": "float",
        "DividendYield": "float",
        "EPS": "float",
        "RevenuePerShareTTM": "float",
        "ProfitMargin": "float",
        "OperatingMarginTTM": "float",
        "ReturnOnAssetsTTM": "float",
        "RevenueTTM": "int",
        "GrossProfitTTM": "int",
        "DilutedEPSTTM": "float",
        "QuarterlyEarningsGrowthYOY": "float",
        "QuarterlyRevenueGrowthYOY": "float",
        "AnalystTargetPrice": "float",
        "TrailingPE": "float",
        "ForwardPE": "float",
        "PriceToSalesRatioTTM": "float",
        "PriceToBookRatio": "float",
        "EVToRevenue": "float",
        "EVToEBITDA": "float",
        "Beta": "float",
        "52WeekHigh": "float",
        "52WeekLow": "float",
        "50DayMovingAverage": "float",
        "200DayMovingAverage": "float",
        "SharesOutstanding": "int",
        "DividendDate": "date",
        "ExDividendDate": "date",
        }
    
    for key in r_json.keys():
        dtype = type_map.get(key)
        if dtype:
            if dtype == "int":
                r_json[key] = coerce_int(r_json[key])
            if dtype == "float":
                r_json[key] = coerce_float(r_json[key])
            if dtype == "date":
                r_json[key] = coerce_date(r_json[key])
    r_json["UpdateDttm"] = datetime.now()

    return r_json

In [137]:
total_requests = 0
t0 = time.time()
t = tqdm(active_symbols, position=0, desc="Symbol", leave=False)
for symbol in t:
    # ETL
    params = {
        "function": "OVERVIEW",
        "symbol": symbol,
        "apikey": api_key
    }
    r_json = requests.get(endpoint, params).json()
    r_json_clean = clean_response(r_json)

    # Write to mongo
    result = coll.insert_one(r_json_clean)

    # Rate limiting
    total_requests += 1
    total_seconds = time.time() - t0
    requests_per_minute = total_requests / (total_seconds / 60)
    t.set_description(f"Symbols (req / min: {requests_per_minute:.2f})")
    t.refresh()
    while (requests_per_minute > limit - 2):
        time.sleep(1)
        total_seconds = time.time() - t0
        requests_per_minute = total_requests / (total_seconds / 60)

Symbol:   0%|          | 0/11643 [00:00<?, ?it/s]

ConnectionError: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))

### Quotes

In [17]:
params = {
        "function": "GLOBAL_QUOTE",
        "symbol": "TSLA",
        "apikey": api_key
    }
r = requests.get(endpoint, params)
r_json = r.json()
r_json

{'Global Quote': {'01. symbol': 'TSLA',
  '02. open': '202.0600',
  '03. high': '203.1700',
  '04. low': '197.4000',
  '05. price': '199.9500',
  '06. volume': '111346705',
  '07. latest trading day': '2024-02-16',
  '08. previous close': '200.4500',
  '09. change': '-0.5000',
  '10. change percent': '-0.2494%'}}

### News

In [18]:
params = {
        "function": "NEWS_SENTIMENT",
        "apikey": api_key,
        "topics": "economy_fiscal",
        "time_from": "20240120T1130"
    }
r = requests.get(endpoint, params)
r.json()

{'items': '50',
 'sentiment_score_definition': 'x <= -0.35: Bearish; -0.35 < x <= -0.15: Somewhat-Bearish; -0.15 < x < 0.15: Neutral; 0.15 <= x < 0.35: Somewhat_Bullish; x >= 0.35: Bullish',
 'relevance_score_definition': '0 < x <= 1, with a higher score indicating higher relevance.',
 'feed': [{'title': 'Geo Group  ( GEO )  Q4 2023 Earnings Call Transcript',
   'url': 'https://www.fool.com/earnings/call-transcripts/2024/02/15/geo-group-geo-q4-2023-earnings-call-transcript/',
   'time_published': '20240215T223017',
   'authors': ['Motley Fool Transcribing'],
   'summary': 'GEO earnings call for the period ending December 31, 2023.',
   'banner_image': 'https://g.foolcdn.com/misc-assets/fool-transcripts-logo.png',
   'source': 'Motley Fool',
   'category_within_source': 'n/a',
   'source_domain': 'www.fool.com',
   'topics': [{'topic': 'Life Sciences', 'relevance_score': '0.5'},
    {'topic': 'Economy - Fiscal', 'relevance_score': '0.890401'},
    {'topic': 'Financial Markets', 'relevan

### Income statements

In [19]:
params = {
        "function": "INCOME_STATEMENT",
        "symbol": "TSLA",
        "apikey": api_key
    }
r = requests.get(endpoint, params)
income_statements = r.json()

In [21]:
income_statements["quarterlyReports"][0]

{'fiscalDateEnding': '2023-12-31',
 'reportedCurrency': 'USD',
 'grossProfit': '4438000000',
 'totalRevenue': '25167000000',
 'costOfRevenue': '20729000000',
 'costofGoodsAndServicesSold': '102000000',
 'operatingIncome': '2064000000',
 'sellingGeneralAndAdministrative': '1280000000',
 'researchAndDevelopment': '1094000000',
 'operatingExpenses': '2374000000',
 'investmentIncomeNet': '333000000',
 'netInterestIncome': '-61000000',
 'interestIncome': '333000000',
 'interestExpense': '61000000',
 'nonInterestIncome': '24994000000',
 'otherNonOperatingIncome': '-145000000',
 'depreciation': '890000000',
 'depreciationAndAmortization': '1232000000',
 'incomeBeforeTax': '2176000000',
 'incomeTaxExpense': '-5752000000',
 'interestAndDebtExpense': '61000000',
 'netIncomeFromContinuingOperations': '7943000000',
 'comprehensiveIncomeNetOfTax': '8477000000',
 'ebit': '2237000000',
 'ebitda': '3296000000',
 'netIncome': '7928000000'}

### IPO calendar

In [22]:
params = {
        "function": "IPO_CALENDAR",
        "apikey": api_key
    }
r = requests.get(endpoint, params)
data = io.BytesIO(r.content)
df = pd.read_csv(data)
df.head()

Unnamed: 0,symbol,name,ipoDate,priceRangeLow,priceRangeHigh,currency,exchange
0,WETH,Wetouch Technology Inc. Common Stock,2024-02-21,0,0,USD,NASDAQ


In [23]:
params = {
        "function": "EARNINGS_CALENDAR",
        "symbol": "TSLA",
        "horizon": "3month",
        "apikey": api_key
    }
r = requests.get(endpoint, params)
data = io.BytesIO(r.content)
df = pd.read_csv(data)
df.head()

Unnamed: 0,symbol,name,reportDate,fiscalDateEnding,estimate,currency
0,TSLA,Tesla Inc,2024-04-17,2024-03-31,0.68,USD


In [24]:
params = {
        "function": "TREASURY_YIELD",
        "interval": "daily",
        "maturity": "5year",
        "apikey": api_key
    }
r = requests.get(endpoint, params)
r.json()

{'name': '5-Year Treasury Constant Maturity Rate',
 'interval': 'daily',
 'unit': 'percent',
 'data': [{'date': '2024-02-15', 'value': '4.22'},
  {'date': '2024-02-14', 'value': '4.25'},
  {'date': '2024-02-13', 'value': '4.31'},
  {'date': '2024-02-12', 'value': '4.13'},
  {'date': '2024-02-09', 'value': '4.14'},
  {'date': '2024-02-08', 'value': '4.12'},
  {'date': '2024-02-07', 'value': '4.06'},
  {'date': '2024-02-06', 'value': '4.03'},
  {'date': '2024-02-05', 'value': '4.13'},
  {'date': '2024-02-02', 'value': '3.99'},
  {'date': '2024-02-01', 'value': '3.80'},
  {'date': '2024-01-31', 'value': '3.91'},
  {'date': '2024-01-30', 'value': '4.00'},
  {'date': '2024-01-29', 'value': '3.97'},
  {'date': '2024-01-26', 'value': '4.04'},
  {'date': '2024-01-25', 'value': '4.01'},
  {'date': '2024-01-24', 'value': '4.06'},
  {'date': '2024-01-23', 'value': '4.06'},
  {'date': '2024-01-22', 'value': '4.03'},
  {'date': '2024-01-19', 'value': '4.08'},
  {'date': '2024-01-18', 'value': '4.04

### Time series

In [25]:
year_month = str(2024) + "-" + f"{1:02}"
interval = "Daily"
symbol = "TSLA"

params = {
    "function": "TIME_SERIES_DAILY_ADJUSTED",
    "symbol": symbol,
    # "interval": interval,
    # "extended_hours": "true",
    "outputsize": "compact",
    # "month": year_month,
    "apikey": api_key
}
r = requests.get(endpoint, params)
r_dict = r.json()

ts_meta = r_dict["Meta Data"]
ts = r_dict[f"Time Series ({interval})"]

In [26]:
ts_meta

{'1. Information': 'Daily Time Series with Splits and Dividend Events',
 '2. Symbol': 'TSLA',
 '3. Last Refreshed': '2024-02-16',
 '4. Output Size': 'Compact',
 '5. Time Zone': 'US/Eastern'}

In [27]:
ts

{'2024-02-16': {'1. open': '202.06',
  '2. high': '203.17',
  '3. low': '197.4',
  '4. close': '199.95',
  '5. adjusted close': '199.95',
  '6. volume': '111346705',
  '7. dividend amount': '0.0000',
  '8. split coefficient': '1.0'},
 '2024-02-15': {'1. open': '189.16',
  '2. high': '200.88',
  '3. low': '188.8595',
  '4. close': '200.45',
  '5. adjusted close': '200.45',
  '6. volume': '120831762',
  '7. dividend amount': '0.0000',
  '8. split coefficient': '1.0'},
 '2024-02-14': {'1. open': '185.3',
  '2. high': '188.89',
  '3. low': '183.35',
  '4. close': '188.71',
  '5. adjusted close': '188.71',
  '6. volume': '81202987',
  '7. dividend amount': '0.0000',
  '8. split coefficient': '1.0'},
 '2024-02-13': {'1. open': '183.99',
  '2. high': '187.26',
  '3. low': '182.1087',
  '4. close': '184.02',
  '5. adjusted close': '184.02',
  '6. volume': '86759478',
  '7. dividend amount': '0.0000',
  '8. split coefficient': '1.0'},
 '2024-02-12': {'1. open': '192.11',
  '2. high': '194.73',


In [28]:
df = pd.DataFrame(ts).T
df["dttm"] = df.index
df = df.reset_index(drop=True)
df["symbol"] = symbol

df = df.rename(columns={"1. open": "open", 
                        "2. high": "high", 
                        "3. low": "low", 
                        "4. close": "close", 
                        "5. adjusted close": "adj_close", 
                        "6. volume": "volume",
                        "7. dividend amount": "div_amt",
                        "8. split coefficient": "split_coef"})

float_cols = ["open", "high", "low", "close", "adj_close", "volume", "div_amt", "split_coef"]
df[float_cols] = df[float_cols].apply(pd.to_numeric, errors="coerce")
df["dttm"] = pd.to_datetime(df["dttm"])
df

Unnamed: 0,open,high,low,close,adj_close,volume,div_amt,split_coef,dttm,symbol
0,202.060,203.1700,197.4000,199.95,199.95,111346705,0.0,1.0,2024-02-16,TSLA
1,189.160,200.8800,188.8595,200.45,200.45,120831762,0.0,1.0,2024-02-15,TSLA
2,185.300,188.8900,183.3500,188.71,188.71,81202987,0.0,1.0,2024-02-14,TSLA
3,183.990,187.2600,182.1087,184.02,184.02,86759478,0.0,1.0,2024-02-13,TSLA
4,192.110,194.7300,187.2800,188.13,188.13,95498597,0.0,1.0,2024-02-12,TSLA
...,...,...,...,...,...,...,...,...,...,...
95,244.810,254.2799,242.6200,251.60,251.60,123810402,0.0,1.0,2023-10-02,TSLA
96,250.000,254.7700,246.3500,250.22,250.22,128522729,0.0,1.0,2023-09-29,TSLA
97,240.020,247.5500,238.6500,246.38,246.38,117058870,0.0,1.0,2023-09-28,TSLA
98,244.262,245.3300,234.5800,240.50,240.50,136597184,0.0,1.0,2023-09-27,TSLA


## ETL functions
### Active list  

In [91]:
def extract_active_list(endpoint, api_key):
    """

    """

    params = {
        "function": "LISTING_STATUS",
        "apikey": api_key
    }

    return requests.get(endpoint, params)

def transform_active_list(response):
    """

    """

    data = io.BytesIO(response.content)
    df = pd.read_csv(data)
    df.drop(columns="delistingDate", inplace=True)
    df.rename(columns={"assetType": "asset_type", "ipoDate": "ipo_dt"}, inplace=True)
    df["ipo_dt"] = pd.to_datetime(df["ipo_dt"])
    df["refresh_dttm"]= datetime.today()

    return df

def load_active_list(df, client):
    """ 
    
    """

    rows = df.to_dict("records")
    coll = client["bakery"]["alpha_active_list"]
    result = coll.insert_many(rows)
    
    return result

### Time Series  
#### Daily

In [110]:
def extract_alpha_daily(symbol, api_key, endpoint):
    """ 
    
    """

    params = {
        "function": "TIME_SERIES_DAILY_ADJUSTED",
        "symbol": symbol,
        "outputsize": "full",
        "datatype": "json",
        "apikey": api_key
    }

    try:
        r = requests.get(endpoint, params)
        r_dict = r.json()["Time Series (Daily)"]
    except requests.exceptions.RequestException as re:
        print(f"Request error for symbol {symbol}\n{re}")
    except KeyError as ke:
        print(f"Key error for symbol {symbol}\n{ke}\nAvailable keys: {r.json().keys()}, likely caused by rate limiting.")
    
    return r_dict

def transform_alpha_daily(symbol, data):
    """ 
    
    """

    df = pd.DataFrame(data).T
    df["dttm"] = df.index
    df = df.reset_index(drop=True)
    df["symbol"] = symbol

    df = df.rename(columns={"1. open": "open", 
                            "2. high": "high", 
                            "3. low": "low", 
                            "4. close": "close", 
                            "5. adjusted close": "adj_close", 
                            "6. volume": "volume",
                            "7. dividend amount": "div_amt",
                            "8. split coefficient": "split_coef"})

    float_cols = ["open", "high", "low", "close", "adj_close", "volume", "div_amt", "split_coef"]
    df[float_cols] = df[float_cols].apply(pd.to_numeric, errors="coerce")
    df["dttm"] = pd.to_datetime(df["dttm"])
    
    return df

def load_alpha_daily(df, client):
    """ 
    
    """

    rows = df.to_dict("records")
    coll = client["bakery"]["alpha_stock_daily"]
    result = coll.insert_many(rows)

    return result

In [2]:
def extract_alpha_hourly(symbol, year, month, api_key, endpoint):
    """ 
    
    """
    
    params = {
        "function": "TIME_SERIES_INTRADAY",
        "symbol": symbol,
        "interval": "60min",
        "extended_hours": "true",
        "outputsize": "full",
        "month": str(year) + "-" + f"{month:02}",
        "apikey": api_key
    }
    
    try:
        r = requests.get(endpoint, params)
        r_dict = r.json()["Time Series (60min)"]
    except requests.exceptions.RequestException as re:
        print(f"Request error for symbol {symbol} and year-month {year_month}\n{re}")
    except KeyError as ke:
        print(f"Key error for symbol {symbol} and year-month {year_month}\n{ke}\nAvailable keys: {r.json().keys()}, likely caused by rate limiting.")
    
    return r_dict

def transform_alpha_hourly(symbol, data):
    """ 
    
    """

    df = pd.DataFrame(data).T
    df["dttm"] = df.index
    df = df.reset_index(drop=True)
    df["symbol"] = symbol

    df = df.rename(columns={"1. open": "open", "2. high": "high", "3. low": "low", "4. close": "close", "5. volume": "volume"})

    float_cols = ["open", "high", "low", "close"]
    df[float_cols] = df[float_cols].apply(pd.to_numeric, errors="coerce")
    df["volume"] = pd.to_numeric(df["volume"])
    df["dttm"] = pd.to_datetime(df["dttm"])
    
    return df

def load_alpha_hourly(client, df):
    """ 
    
    """

    rows = df.to_dict("records")
    coll = client["bakery"]["alpha_stock_intraday_hourly"]
    result = coll.insert_many(rows)

    return result


## Tests

In [3]:
client = MongoClient(conn_str)

### Active list

In [92]:
r = extract_active_list(endpoint, api_key)
df = transform_active_list(r)
df.head()

Unnamed: 0,symbol,name,exchange,asset_type,ipo_dt,status,refresh_dttm
0,A,Agilent Technologies Inc,NYSE,Stock,1999-11-18,Active,2024-01-21 17:07:56.228186
1,AA,Alcoa Corp,NYSE,Stock,2016-10-18,Active,2024-01-21 17:07:56.228186
2,AAA,AXS First Priority CLO Bond ETF,NYSE ARCA,ETF,2020-09-09,Active,2024-01-21 17:07:56.228186
3,AAAU,Goldman Sachs Physical Gold ETF,BATS,ETF,2018-08-15,Active,2024-01-21 17:07:56.228186
4,AACG,ATA Creativity Global,NASDAQ,Stock,2008-01-29,Active,2024-01-21 17:07:56.228186


In [93]:
result = load_active_list(df, client)

### Time Series  
#### Daily

In [100]:
data = extract_alpha_daily(symbol, api_key, endpoint)

In [103]:
df = transform_alpha_daily(symbol, data)
df.head()

Unnamed: 0,open,high,low,close,adj_close,volume,div_amt,split_coef,dttm,symbol
0,209.99,213.19,207.56,212.19,212.19,102260343,0.0,1.0,2024-01-19,TSLA
1,216.88,217.45,208.74,211.88,211.88,108595431,0.0,1.0,2024-01-18,TSLA
2,214.86,215.67,212.01,215.55,215.55,103164400,0.0,1.0,2024-01-17,TSLA
3,215.1,223.49,212.18,219.91,219.91,115355046,0.0,1.0,2024-01-16,TSLA
4,220.08,225.34,217.1501,218.89,218.89,123043812,0.0,1.0,2024-01-12,TSLA


In [105]:
result = load_alpha_daily(df, client)

#### Hourly

In [106]:
year, month = 2024, 1

data = extract_alpha_hourly(symbol, year, month, api_key, endpoint)

In [109]:
df = transform_alpha_hourly(symbol, data)
df.head()

Unnamed: 0,open,high,low,close,volume,dttm,symbol
0,212.22,212.46,212.05,212.4,100174,2024-01-19 19:00:00,TSLA
1,212.32,212.42,211.98,212.21,74769,2024-01-19 18:00:00,TSLA
2,212.38,291.308,178.312,212.33,386282,2024-01-19 17:00:00,TSLA
3,212.13,240.45,210.276,212.35,4896408,2024-01-19 16:00:00,TSLA
4,211.96,212.82,210.691,212.17,13964735,2024-01-19 15:00:00,TSLA


In [108]:
result = load_alpha_hourly(client, df)

### Rate Limiting

In [113]:
symbols = ["TSLA", "MSFT", "NVDA", "AMZN", "AAPL"]
total_requests = 0
t0 = time.time()

for symbol in symbols:
    for year in range(2020, 2024):
        for month in range(1, 13):
            load_alpha_hourly(client, transform_alpha_hourly(symbol, extract_alpha_hourly(symbol, year, month, api_key, endpoint)))
            total_requests += 1
            total_seconds = time.time() - t0
            requests_per_minute = total_requests / (total_seconds / 60)
            while (requests_per_minute > limit):
                time.sleep(1)
                total_seconds = time.time() - t0
                requests_per_minute = total_requests / (total_seconds / 60)