# 📊 ETF & Macroeconomic Data EDA
This notebook performs exploratory data analysis on ETF price data and macroeconomic indicators.

In [None]:

import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import ta

plt.style.use("seaborn-v0_8-darkgrid")


## 1. Settings and Parameters

In [None]:

etf_tickers = ["SPY", "QQQ", "VTI", "DIA", "IWM", "EFA", "EEM", "VNQ", "LQD", "BND"]
start_date = "2018-01-01"
end_date = "2025-05-20"
macro_file = "macroeconomic_data_2010_2024.csv"


## 2. Download ETF Data

In [None]:

print("Downloading ETF data...")
data = yf.download(etf_tickers, start=start_date, end=end_date, group_by='ticker', auto_adjust=True)
adj_close = pd.DataFrame({ticker: data[ticker]['Close'] for ticker in etf_tickers})


## 3. Plot ETF Price Trends

In [None]:

adj_close.plot(figsize=(14, 6), title="ETF Price Trends (Adj Close)")
plt.xlabel("Date")
plt.ylabel("Price")
plt.tight_layout()
plt.show()


## 4. ETF Correlation Heatmap

In [None]:

plt.figure(figsize=(10, 8))
corr = adj_close.corr()
sns.heatmap(corr, annot=True, cmap="coolwarm", square=True)
plt.title("Correlation Matrix of ETFs")
plt.tight_layout()
plt.show()


## 5. Regime Classification (Based on SPY)

In [None]:

spy = adj_close["SPY"]
scaler = MinMaxScaler()
spy_norm = scaler.fit_transform(spy.values.reshape(-1, 1)).flatten()

def classify_regime(spy_norm):
    regimes = []
    for val in spy_norm:
        if val <= 0.33:
            regimes.append("Bull")
        elif val <= 0.66:
            regimes.append("Neutral")
        else:
            regimes.append("Bear")
    return regimes

regimes = classify_regime(spy_norm)
adj_close["Regime"] = regimes


## 6. Regime Timeline Plot

In [None]:

color_map = {"Bull": "green", "Neutral": "orange", "Bear": "red"}
colors = [color_map[r] for r in regimes]

plt.figure(figsize=(14, 4))
plt.scatter(adj_close.index, spy, c=colors, s=10, label="SPY Price")
plt.plot(adj_close.index, spy, alpha=0.4)
plt.title("Market Regime Classification (SPY)")
plt.xlabel("Date")
plt.ylabel("SPY Price")
plt.tight_layout()
plt.show()


## 7. Load and Align Macroeconomic Data

In [None]:

print("Loading macroeconomic data...")
macro_df = pd.read_csv(macro_file, parse_dates=["Date"])
macro_df.set_index("Date", inplace=True)
macro_df = macro_df.reindex(adj_close.index, method='ffill')

selected_macro_columns = [
    "VIX Market Volatility",
    "Federal Funds Rate",
    "10-Year Treasury Yield",
    "Unemployment Rate",
    "CPI All Items",
    "Recession Indicator"
]

macro_df = macro_df[selected_macro_columns]


## 8. Plot Macroeconomic Trends

In [None]:

macro_df.plot(subplots=True, figsize=(14, 10), title="Macroeconomic Indicators Over Time")
plt.tight_layout()
plt.show()


## 9. Compute & Visualize Technical Indicators for SPY

In [None]:

spy_df = pd.DataFrame({'Close': spy})
spy_df['RSI'] = ta.momentum.RSIIndicator(close=spy_df['Close']).rsi()
spy_df['MACD_diff'] = ta.trend.MACD(close=spy_df['Close']).macd_diff()
spy_df['BB_width'] = ta.volatility.BollingerBands(close=spy_df['Close']).bollinger_wband()
spy_df['Stoch_K'] = ta.momentum.StochasticOscillator(
    high=spy_df['Close'], low=spy_df['Close'], close=spy_df['Close']).stoch()

spy_df[['RSI', 'MACD_diff', 'BB_width', 'Stoch_K']].plot(
    subplots=True, figsize=(14, 8), title="Technical Indicators for SPY")
plt.tight_layout()
plt.show()


## ✅ EDA Complete