In [78]:
import time
from tqdm.notebook import tqdm
import os
import json
import requests
import pandas as pd
from pymongo import MongoClient

In [79]:
def extract_alpha_hourly(symbol, year, month, api_key, endpoint):
    
    year_month = str(year) + "-" + f"{month:02}"
    params = {
        "function": "TIME_SERIES_INTRADAY",
        "symbol": symbol,
        "interval": "60min",
        "extended_hours": "true",
        "outputsize": "full",
        "month": year_month,
        "apikey": api_key
    }
    try:
        r = requests.get(endpoint, params)
        r_dict = r.json()["Time Series (60min)"]
    except requests.exceptions.RequestException as re:
        print(f"Request error for symbol {symbol} and year-month {year_month}\n{re}")
    except KeyError as ke:
        print(f"Key error for symbol {symbol} and year-month {year_month}\n{ke}\nAvailable keys: {r.json().keys()}, likely caused by rate limiting.")
    
    return r_dict

def transform_alpha_hourly(symbol, data):

    df = pd.DataFrame(data).T
    df["dttm"] = df.index
    df = df.reset_index(drop=True)
    df["symbol"] = symbol

    df = df.rename(columns={"1. open": "open", "2. high": "high", "3. low": "low", "4. close": "close", "5. volume": "volume"})

    float_cols = ["open", "high", "low", "close"]
    df[float_cols] = df[float_cols].apply(pd.to_numeric, errors="coerce")
    df["volume"] = pd.to_numeric(df["volume"])
    df["dttm"] = pd.to_datetime(df["dttm"])
    
    return df

def load_alpha_hourly(client, df):

    rows = df.to_dict("records")
    coll = client["bakery"]["alpha_stock_intraday_hourly"]
    result = coll.insert_many(rows)

    return result


In [80]:
secrets_path = os.path.join(os.path.expanduser('~'), 'git/bakery/bakery/data/secrets.json')
with open(secrets_path, "rb") as f:
    secrets = json.loads(f.read().decode())
    api_key = secrets["alpha"]["api_key"]
    limit = secrets["alpha"]["limit"]
    endpoint = secrets["alpha"]["endpoint"]
    conn_str = secrets["mongo"]["conn_str"]

client = MongoClient(conn_str)

symbol = "TSLA"
year = 2024
month = 1

In [81]:
data = extract_alpha_hourly(symbol, year, month, api_key, endpoint)

In [82]:
df = transform_alpha_hourly(symbol, data)

In [83]:
result = load_alpha_hourly(client, df)

In [44]:
symbols = ...
years = ...

t0 = time.time()
num_requests = 0

for symbol in symbols:
    for year in years:
        for month in range(1, 13):
            



            elapsed = (time.time() - t0) / 60
            num_requests += 1
            requests_per_minute = num_requests / elapsed
            while requests_per_minute > limit:
                time.sleep(3)
                elapsed = (time.time() - t0) / 60
                requests_per_minute = num_requests / elapsed

In [74]:
coll = client["bakery"]["alpha_stock_intraday_hourly"]

In [75]:
resp = coll.delete_many({})

In [76]:
resp

DeleteResult({'n': 206, 'ok': 1.0}, acknowledged=True)