# Downloading The Datasets

Prior to this step we have manually downloaded a number of economic datasets from https://fred.stlouisfed.org/ including informationon commodities, consumer sentiment, external trade, growth and activity, housing and construction, inflation, interest, labor, market sentiment, and monetary conditions. We will be combining these economic indicators with daily features representing the SPY index, and foreign currency data. 

In this step we will be downloading the SPY index dataset for a 5 year historical period from the Polygon.io API, and historical data for the 8 major foreign currency pairs from the Yfinance API. 

#### Step 1: Download SPY

In [4]:
import os, requests
from pathlib import Path
from datetime import date
import pandas as pd

API_KEY = os.getenv("POLYGON_API_KEY")
if not API_KEY:
    raise SystemExit("Set POLYGON_API_KEY env var.")

OUT_DIR = Path("../../data/downloaded/polygon")
OUT_DIR.mkdir(parents=True, exist_ok=True)

start = "2000-01-01"
end   = date.today().isoformat()

tickers = [
    "SPY",   # S&P 500 ETF (equity)
]

all_rows = []

for tkr in tickers:
    url = (
        f"https://api.polygon.io/v2/aggs/ticker/{tkr}/range/1/day/"
        f"{start}/{end}?adjusted=true&sort=asc&limit=50000&apiKey={API_KEY}"
    )
    print("GET", tkr)
    r = requests.get(url)
    if r.status_code != 200:
        print("  -> HTTP", r.status_code, r.text[:200])
        continue

    js = r.json()
    res = js.get("results", [])
    if not res:
        print("  -> no results")
        continue

    df = pd.DataFrame(res)
    if "t" not in df.columns:
        print("  -> no 't' column in results; skipping")
        continue

    df["date"] = pd.to_datetime(df["t"], unit="ms", utc=True).dt.tz_localize(None).dt.normalize()
    df["ticker"] = tkr

    keep = {
        "date": "date",
        "o": "open",
        "h": "high",
        "l": "low",
        "c": "close",
        "v": "volume",
        "vw": "vwap",
        "n": "transactions",
        "ticker": "ticker",
    }
    
    df = df[[k for k in keep if k in df.columns]].rename(columns=keep).sort_values("date")

    safe_name = tkr.replace(":", "_")
    out_file = OUT_DIR / f"{safe_name}.csv"
    df.to_csv(out_file, index=False)
    print(f"  -> {len(df):,} rows → {out_file}")

    all_rows.append(df)


GET SPY
  -> 1,255 rows → ../../data/downloaded/polygon/SPY.csv


#### Step 2: Download Foreign Currencies

In [5]:
from pathlib import Path
from datetime import date
import pandas as pd
import yfinance as yf

OUT_DIR = Path("../../data/downloaded/yfinance")
OUT_DIR.mkdir(parents=True, exist_ok=True)

start = "2000-01-01"
end   = date.today().isoformat()

# 8 majors (Yahoo Finance symbols use '=X')
fx_map = {
    "EURUSD": "EURUSD=X",
    "USDJPY": "USDJPY=X",
    "GBPUSD": "GBPUSD=X",
    "USDCHF": "USDCHF=X",
    "USDCAD": "USDCAD=X",
    "AUDUSD": "AUDUSD=X",
    "NZDUSD": "NZDUSD=X",
    "USDSEK": "USDSEK=X",
}

all_rows = []

for pair, yf_sym in fx_map.items():
    print("GET", pair, "->", yf_sym)
    df = yf.download(yf_sym, start=start, end=end, interval="1d",
                     auto_adjust=False, progress=False, threads=True)
    if df.empty:
        print("  -> no data")
        continue

    df = df.rename(columns={
        "Open": "open", "High": "high", "Low": "low", "Close": "close",
        "Adj Close": "adj_close", "Volume": "volume"
    })
    df["date"] = pd.to_datetime(df.index).tz_localize(None).normalize()
    df["ticker"] = f"FX:{pair}"
    df = df[["date", "open", "high", "low", "close", "adj_close", "volume", "ticker"]]
    df = df.reset_index(drop=True).sort_values("date")

    out_file = OUT_DIR / f"{pair}.csv"
    df.to_csv(out_file, index=False)
    print(f"  -> {len(df):,} rows → {out_file}")
    all_rows.append(df)

if all_rows:
    merged = pd.concat(all_rows, ignore_index=True).sort_values(["ticker", "date"])
    merged.to_csv(OUT_DIR / "fx_all_daily.csv", index=False)
    print(f"Merged → {OUT_DIR/'fx_all_daily.csv'}  ({len(merged):,} rows)")
else:
    print("No FX data fetched.")


GET EURUSD -> EURUSD=X
  -> 5,650 rows → ../../data/downloaded/yfinance/EURUSD.csv
GET USDJPY -> USDJPY=X
  -> 6,670 rows → ../../data/downloaded/yfinance/USDJPY.csv
GET GBPUSD -> GBPUSD=X
  -> 5,662 rows → ../../data/downloaded/yfinance/GBPUSD.csv
GET USDCHF -> USDCHF=X
  -> 5,716 rows → ../../data/downloaded/yfinance/USDCHF.csv
GET USDCAD -> USDCAD=X
  -> 5,718 rows → ../../data/downloaded/yfinance/USDCAD.csv
GET AUDUSD -> AUDUSD=X
  -> 5,026 rows → ../../data/downloaded/yfinance/AUDUSD.csv
GET NZDUSD -> NZDUSD=X
  -> 5,651 rows → ../../data/downloaded/yfinance/NZDUSD.csv
GET USDSEK -> USDSEK=X
  -> 6,109 rows → ../../data/downloaded/yfinance/USDSEK.csv
Merged → ../../data/downloaded/yfinance/fx_all_daily.csv  (46,202 rows)
