# **DATA IMPORTATION NB** 
## Thomas de Portzamparc 27 october 2025

In [27]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np
import os 
import gc
from pandas.plotting import scatter_matrix


import statsmodels.api as sm
import pandas_datareader.data as web
from alpaca_trade_api.rest import REST


gc.collect()



# API keys import
from passwords import ALPACA_API_KEY, ALPACA_API_SECRET
from utils import * 
from data_loader import *

In [29]:
# =========================
# 1. Global Parameters
# =========================
API_KEY = ALPACA_API_KEY
API_SECRET = ALPACA_API_SECRET
BASE_URL = "https://data.alpaca.markets"
API = REST(API_KEY, API_SECRET, base_url=BASE_URL)

UDL = "AAPL"
TICKERS = [
    UDL, "SPY", "QQQ", "XLK",  # Market and Tech
    "TLT", "VIXY", "GLD", "UUP"  # Macro Proxies
]

FRED_CODES = {
    "CPI": "CPIAUCSL",
    "INDPPI": "PPIACO",
    "M1SUPPLY": "M1SL",
    "CCREDIT": "TOTALSL",
    "BMINUSA": "BAA10Y",
    "AAA10Y": "AAA10Y",
    "TB3MS": "TB3MS"
}


START_DATE = "2023-01-01"
END_DATE = "2025-10-27"
TF = "1Day"

if TF == "1Day":
    RESAMPLE_RULE = "D"
elif TF == "1Month":
    RESAMPLE_RULE = "ME"

### Data Extraction : 

> I am using the IEX data feed instead of the SIP feed because the IEX feed is freely accessible through Alpaca’s API and provides high-quality market data suitable for research and backtesting. The SIP feed aggregates prices from all U.S. exchanges and represents the official consolidated tape, but access requires a paid professional subscription. For quantitative research and non-execution purposes, IEX data offers a reliable and cost-effective alternative.

In [30]:
# =========================
# 2. Load Market Data
# =========================

retn, px, prices = get_market_data(API, TICKERS, START_DATE, end=END_DATE, timeframe=TF, feed="iex")
# Plot returns
# retn.plot(figsize=(12, 6), title="Daily Returns (%)")
# scatter_matrix(retn, alpha=0.2, figsize=(15, 15), diagonal="kde")
retn.to_csv(f"../data/market_data_returns_{RESAMPLE_RULE}.csv")

Market data loaded:
Frequency: 1Day
Period: 2023-01-04 → 2025-10-27
706 observations, 8 tickers


In [34]:
prices['AAPL'].to_csv(f"../data/AAPL_{RESAMPLE_RULE}.csv")

In [28]:
# =========================
# 3. Load Macro Data (FRED)
# =========================

fred_daily = get_macro_data(FRED_CODES, START_DATE, resample_rule = RESAMPLE_RULE, end=END_DATE, verbose=True)
macro = build_macro_variables(fred_daily, resample_rule = RESAMPLE_RULE)
macro.to_csv(f"../data/market_data_macro_{RESAMPLE_RULE}.csv")


TypeError: get_macro_data() got an unexpected keyword argument 'resample_rule'

In [25]:
retn.index = retn.index.tz_localize(None).date
macro.index = macro.index.tz_localize(None).date

# Jointure propre
data = retn.join(macro, how="inner").sort_index().drop_duplicates()
data.to_csv(f"../data/market_data_full_{RESAMPLE_RULE}.csv")