# Fetch AlphaVantage API

In [5]:
import io
import os
import time
import requests
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

API_KEY = os.environ.get("ALPHAVANTAGE_API_KEY")
if not API_KEY:
    raise RuntimeError("Set ALPHAVANTAGE_API_KEY environment variable before running this cell")

In [21]:
REQUESTS_PER_MIN = 75  # Alpha Vantage level 1 paid tier limit
SLEEP_PER_CALL = 60 / REQUESTS_PER_MIN  # ~0.8 seconds between calls for 75 req/min

stock_tickers = [
    "NVDA",  # Main stock and 4 competitors
    "AMD",  # Competitor
    "INTC",  # Competitor
]

etf_tickers = [
    "SPY",  # S&P 500 ETF, have some NVDA exposure about 7.84% right now
    "DIA",  # Dow Jones Industrial Average ETF, about 2% NVDA exposure right now
    "IWM",  # Russell 2000 ETF (small cap stocks), no NVDA exposure
]

tickers = stock_tickers + etf_tickers

years = ["2020", "2021", "2022", "2023", "2024", "2025"]
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
years_and_months = [f"{year}-{month}" for year in years for month in months]
years_and_months = years_and_months[:66]  # Up to June 2025
years_and_months[-5:]

['2025-02', '2025-03', '2025-04', '2025-05', '2025-06']

In [30]:
def fetch_alpha_vantage_csv(params: dict) -> pd.DataFrame:
    url = "https://www.alphavantage.co/query"
    resp = requests.get(url, params=params, timeout=30)
    resp.raise_for_status()
    df = pd.read_csv(io.StringIO(resp.text))
    return df


def fetch_intraday_alpha(symbol: str, month: str, interval: str = "60min") -> pd.DataFrame:
    params = {
        "function": "TIME_SERIES_INTRADAY",
        "symbol": symbol,
        "interval": interval,
        "outputsize": "full",
        "apikey": API_KEY,
        "datatype": "csv",
        "month": month,
    }
    df = fetch_alpha_vantage_csv(params)
    return df


def fetch_daily_alpha(symbol: str) -> pd.DataFrame:
    params = {
        "function": "TIME_SERIES_DAILY",
        "symbol": symbol,
        "outputsize": "full",
        "apikey": API_KEY,
        "datatype": "csv",
    }
    df = fetch_alpha_vantage_csv(params)
    return df


def fetch_daily_alpha_adjusted(symbol: str) -> pd.DataFrame:
    params = {
        "function": "TIME_SERIES_DAILY_ADJUSTED",
        "symbol": symbol,
        "outputsize": "full",
        "apikey": API_KEY,
        "datatype": "csv",
    }
    df = fetch_alpha_vantage_csv(params)
    return df


def fetch_weekly_alpha(symbol: str) -> pd.DataFrame:
    params = {
        "function": "TIME_SERIES_WEEKLY",
        "symbol": symbol,
        "apikey": API_KEY,
        "datatype": "csv",
    }
    df = fetch_alpha_vantage_csv(params)
    return df


def fetch_weekly_alpha_adjusted(symbol: str) -> pd.DataFrame:
    params = {
        "function": "TIME_SERIES_WEEKLY_ADJUSTED",
        "symbol": symbol,
        "apikey": API_KEY,
        "datatype": "csv",
    }
    df = fetch_alpha_vantage_csv(params)
    return df


def save_parquet(df: pd.DataFrame, path: str):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    df.to_parquet(path)


def filter_from_2020(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["timestamp"] = pd.to_datetime(df["timestamp"])
    df = df[df["timestamp"] >= pd.Timestamp("2020-01-01")]
    return df


def filter_before_2025_07(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["timestamp"] = pd.to_datetime(df["timestamp"])
    df = df[df["timestamp"] < pd.Timestamp("2025-07-01")]
    return df


def get_date_range(df: pd.DataFrame) -> tuple[str, str]:
    df = df.copy()
    df["timestamp"] = pd.to_datetime(df["timestamp"])
    date_from = df["timestamp"].min().strftime("%Y-%m-%d")
    date_to = df["timestamp"].max().strftime("%Y-%m-%d")
    return date_from, date_to


In [31]:
os.makedirs("stock_prices/1h", exist_ok=True)
os.makedirs("stock_prices/1d", exist_ok=True)
os.makedirs("stock_prices/1d_adj", exist_ok=True)
os.makedirs("stock_prices/1w", exist_ok=True)
os.makedirs("stock_prices/1w_adj", exist_ok=True)

for ticker in tickers:
    # 60min
    file_60min = f"stock_prices/1h/{ticker}.parquet"
    if os.path.exists(file_60min):
        print(f"Skipping {file_60min}, already exists.")
    else:
        print(f"Fetching 60min {ticker} data from Alpha Vantage")
        all_months_dfs = []
        for year_month in years_and_months:
            print(f"Fetching month {year_month}")
            df_month = fetch_intraday_alpha(ticker, month=year_month, interval="60min")
            all_months_dfs.append(df_month)
            time.sleep(SLEEP_PER_CALL)
        df_60min = pd.concat(all_months_dfs, ignore_index=True)
        df_60min.index = pd.to_datetime(df_60min["timestamp"])
        df_60min = df_60min.drop(columns=["timestamp"])
        df_60min = df_60min.sort_index()
        save_parquet(df_60min, file_60min)
        time.sleep(SLEEP_PER_CALL)

    # 1d
    file_1d = f"stock_prices/1d/{ticker}.parquet"
    if os.path.exists(file_1d):
        print(f"Skipping {file_1d}, already exists.")
    else:
        print(f"Fetching daily {ticker} data from Alpha Vantage")
        df_1d = fetch_daily_alpha(ticker)
        df_1d = filter_from_2020(df_1d)
        df_1d = filter_before_2025_07(df_1d)
        df_1d.index = pd.to_datetime(df_1d["timestamp"])
        df_1d = df_1d.drop(columns=["timestamp"])
        df_1d = df_1d.sort_index()
        save_parquet(df_1d, file_1d)
        time.sleep(SLEEP_PER_CALL)

    # 1d adjusted
    file_1d_adj = f"stock_prices/1d_adj/{ticker}.parquet"
    if os.path.exists(file_1d_adj):
        print(f"Skipping {file_1d_adj}, already exists.")
    else:
        print(f"Fetching daily adjusted {ticker} data from Alpha Vantage")
        df_1d_adj = fetch_daily_alpha_adjusted(ticker)
        df_1d_adj = filter_from_2020(df_1d_adj)
        df_1d_adj = filter_before_2025_07(df_1d_adj)
        df_1d_adj.index = pd.to_datetime(df_1d_adj["timestamp"])
        df_1d_adj = df_1d_adj.drop(columns=["timestamp"])
        df_1d_adj = df_1d_adj.sort_index()
        save_parquet(df_1d_adj, file_1d_adj)
        time.sleep(SLEEP_PER_CALL)

    # 1w
    file_1w = f"stock_prices/1w/{ticker}.parquet"
    if os.path.exists(file_1w):
        print(f"Skipping {file_1w}, already exists.")
    else:
        print(f"Fetching weekly {ticker} data from Alpha Vantage")
        df_1w = fetch_weekly_alpha(ticker)
        df_1w = filter_from_2020(df_1w)
        df_1w = filter_before_2025_07(df_1w)
        df_1w.index = pd.to_datetime(df_1w["timestamp"])
        df_1w = df_1w.drop(columns=["timestamp"])
        df_1w = df_1w.sort_index()
        save_parquet(df_1w, file_1w)
        time.sleep(SLEEP_PER_CALL)

    # 1w adjusted
    file_1w_adj = f"stock_prices/1w_adj/{ticker}.parquet"
    if os.path.exists(file_1w_adj):
        print(f"Skipping {file_1w_adj}, already exists.")
    else:
        print(f"Fetching weekly adjusted {ticker} data from Alpha Vantage")
        df_1w_adj = fetch_weekly_alpha_adjusted(ticker)
        df_1w_adj = filter_from_2020(df_1w_adj)
        df_1w_adj = filter_before_2025_07(df_1w_adj)
        df_1w_adj.index = pd.to_datetime(df_1w_adj["timestamp"])
        df_1w_adj = df_1w_adj.drop(columns=["timestamp"])
        df_1w_adj = df_1w_adj.sort_index()
        save_parquet(df_1w_adj, file_1w_adj)
        time.sleep(SLEEP_PER_CALL)

Skipping stock_prices/1h/NVDA.parquet, already exists.
Fetching daily NVDA data from Alpha Vantage
Fetching daily adjusted NVDA data from Alpha Vantage
Fetching weekly NVDA data from Alpha Vantage
Fetching weekly adjusted NVDA data from Alpha Vantage
Fetching 60min AMD data from Alpha Vantage
Fetching month 2020-01
Fetching month 2020-02
Fetching month 2020-03
Fetching month 2020-04
Fetching month 2020-05
Fetching month 2020-06
Fetching month 2020-07
Fetching month 2020-08
Fetching month 2020-09
Fetching month 2020-10
Fetching month 2020-11
Fetching month 2020-12
Fetching month 2021-01
Fetching month 2021-02
Fetching month 2021-03
Fetching month 2021-04
Fetching month 2021-05
Fetching month 2021-06
Fetching month 2021-07
Fetching month 2021-08
Fetching month 2021-09
Fetching month 2021-10
Fetching month 2021-11
Fetching month 2021-12
Fetching month 2022-01
Fetching month 2022-02
Fetching month 2022-03
Fetching month 2022-04
Fetching month 2022-05
Fetching month 2022-06
Fetching month 2

In [37]:
pd.read_parquet("stock_prices/1h/NVDA.parquet")[-20:]

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-06-27 16:00:00,157.7311,179.7887,101.0144,157.4761,89412011
2025-06-27 17:00:00,157.5011,190.0828,100.189,157.3711,7256807
2025-06-27 18:00:00,157.3711,157.7411,157.3611,157.5511,4240402
2025-06-27 19:00:00,157.5611,157.6511,157.4811,157.5512,290486
2025-06-30 04:00:00,158.791,159.271,158.3811,158.721,469565
2025-06-30 05:00:00,158.721,159.091,158.711,158.811,311618
2025-06-30 06:00:00,158.801,158.991,158.661,158.661,218073
2025-06-30 07:00:00,158.651,158.841,158.3411,158.4411,532201
2025-06-30 08:00:00,158.4211,159.411,157.7323,158.4011,1942587
2025-06-30 09:00:00,158.4011,158.891,155.9512,156.6212,29056511


In [35]:
pd.read_parquet("stock_prices/1d/NVDA.parquet")[-20:]

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-06-02,135.49,138.12,135.4,137.38,197663116
2025-06-03,138.78,142.0,137.95,141.22,225578783
2025-06-04,142.19,142.39,139.545,141.92,167120819
2025-06-05,142.17,144.0,138.83,139.99,232410759
2025-06-06,142.51,143.27,141.51,141.72,153986153
2025-06-09,143.19,145.0,141.94,142.63,185114494
2025-06-10,142.69,144.29,141.525,143.96,155881897
2025-06-11,144.61,144.99,141.87,142.83,167694044
2025-06-12,141.97,145.0,141.85,145.0,162364991
2025-06-13,142.48,143.58,140.855,141.97,180820565


In [38]:
pd.read_parquet("stock_prices/1h/SPY.parquet")[-20:]

Unnamed: 0_level_0,open,high,low,close,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-06-27 18:00:00,612.5301,613.2182,611.483,611.5129,1480832
2025-06-27 19:00:00,611.5129,612.1611,611.1539,611.7522,109139
2025-06-27 20:00:00,613.2182,613.2182,613.2182,613.2182,1295757
2025-06-30 04:00:00,614.5445,616.18,614.5445,615.5418,47553
2025-06-30 05:00:00,615.5318,616.1501,615.3722,615.5617,81380
2025-06-30 06:00:00,615.5517,616.0603,615.5318,615.8808,78448
2025-06-30 07:00:00,615.8808,616.19,615.7113,615.8509,213440
2025-06-30 08:00:00,615.9008,616.2997,613.0,615.3922,547245
2025-06-30 09:00:00,615.4021,615.9309,613.0,614.737,7469519
2025-06-30 10:00:00,614.744,615.1229,613.994,614.3151,8153193
