# 00C — Global Market Data Loader

**Purpose**: Download macro-relevant global variables (equity, FX, commodities, rates, liquidity)

**Outputs**:
- `../data_processed/global_equity_weekly.parquet`
- `../data_processed/global_fx_weekly.parquet`
- `../data_processed/global_commodities_weekly.parquet`
- `../data_processed/global_rates_monthly.parquet`
- `../data_processed/global_liquidity_weekly.parquet`

---

## Global Variable Universe (LOCKED)

| Category | Variables | Economic Role |
|----------|-----------|---------------|
| Equity | S&P 500, NASDAQ 100, CSI 300, MSCI EM | Risk sentiment, EM flows |
| FX | DXY, USD/INR, USD/CNY | Global liquidity, import costs |
| Commodities | Brent, Gold, Copper | India import bill, growth proxy |
| Rates | US 10Y, US 2Y, Yield Curve | Global discount rate |
| Liquidity | VIX, India VIX, HY Spreads | Quantity of money, risk appetite |

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Install yfinance if not available
try:
    import yfinance as yf
except ImportError:
    %pip install yfinance -q
    import yfinance as yf

# Paths
# Robust path logic to handle execution from root or notebooks dir
if (Path.cwd() / 'data_raw').exists():
    BASE_PATH = Path.cwd()
elif (Path.cwd().parent / 'data_raw').exists():
    BASE_PATH = Path.cwd().parent
else:
    BASE_PATH = Path('..') # Fallback

DATA_RAW = BASE_PATH / 'data_raw'
PROCESSED_PATH = BASE_PATH / 'data_processed'
PROCESSED_PATH.mkdir(exist_ok=True)
RAW_GLOBAL_PATH = DATA_RAW / 'global'
RAW_GLOBAL_PATH.mkdir(exist_ok=True)

# Date range (10+ years of history)
START_DATE = '2010-01-01'
END_DATE = datetime.now().strftime('%Y-%m-%d')

print(f"Data range: {START_DATE} to {END_DATE}")

Data range: 2010-01-01 to 2026-02-01


## 1. Define Ticker Universe (LOCKED)

In [2]:
GLOBAL_EQUITY = {
    'SP500': '^GSPC',
    'NASDAQ100': '^NDX',
    'CSI300': '000300.SS',
    'MSCI_EM': 'EEM',
    'EUROSTOXX50': '^STOXX50E',
    'NIKKEI225': '^N225'
}

GLOBAL_FX = {
    'USDINR': 'INR=X',
    'EURINR': 'EURINR=X',
    'JPYINR': 'JPYINR=X',
    'DXY': 'DX-Y.NYB',
    'USDCNY': 'CNY=X',
    'USDJPY': 'JPY=X'
}

GLOBAL_COMMODITIES = {
    'BRENT': 'BZ=F',
    'WTI': 'CL=F',
    'GOLD': 'GC=F',
    'SILVER': 'SI=F',
    'COPPER': 'HG=F'
}

GLOBAL_RATES = {
    'US10Y': '^TNX',
    'US2Y': '^IRX',
    'US30Y': '^TYX'
}

GLOBAL_LIQUIDITY = {
    'VIX': '^VIX',
    'INDIAVIX': 'INDIAVIX.NS',
    'HYG': 'HYG',
    'LQD': 'LQD',
}

print("Ticker universe defined")

Ticker universe defined


## 2. Download Functions

In [3]:
def download_tickers(ticker_dict: dict, start: str, end: str) -> pd.DataFrame:
    all_data = {}
    for name, ticker in ticker_dict.items():
        try:
            # Use progress=False and handle MultiIndex
            # yfinance >= 0.2.0 returns MultiIndex by default
            data = yf.download(ticker, start=start, end=end, progress=False)
            
            if not data.empty:
                # Flatten MultiIndex if present
                if isinstance(data.columns, pd.MultiIndex):
                    data.columns = data.columns.get_level_values(0)
                
                col = 'Adj Close' if 'Adj Close' in data.columns else 'Close'
                
                # Ensure we get a Series, not a DataFrame (in case of duplicate columns)
                s = data[col]
                if isinstance(s, pd.DataFrame):
                    s = s.iloc[:, 0]
                
                all_data[name] = s
                print(f"✓ {name}")
            else:
                print(f"✗ {name}: No data")
        except Exception as e:
            print(f"✗ {name}: {e}")
    
    if all_data:
        # Use pd.concat for safer construction from dict of Series
        df = pd.concat(all_data, axis=1)
        df.index = pd.to_datetime(df.index)
        return df
    return pd.DataFrame()

def resample_to_weekly(df: pd.DataFrame) -> pd.DataFrame:
    if df.empty: return df
    return df.resample('W-FRI').last().dropna(how='all')

def resample_to_monthly(df: pd.DataFrame) -> pd.DataFrame:
    if df.empty: return df
    return df.resample('ME').last().dropna(how='all')

## 3. Data Collection

In [4]:
print("Downloading Equities...")
equity_daily = download_tickers(GLOBAL_EQUITY, START_DATE, END_DATE)

print("\nDownloading FX...")
fx_daily = download_tickers(GLOBAL_FX, START_DATE, END_DATE)

print("\nDownloading Commodities...")
commodities_daily = download_tickers(GLOBAL_COMMODITIES, START_DATE, END_DATE)

print("\nDownloading Rates...")
rates_daily = download_tickers(GLOBAL_RATES, START_DATE, END_DATE)

print("\nDownloading Liquidity...")
liquidity_daily = download_tickers(GLOBAL_LIQUIDITY, START_DATE, END_DATE)

Downloading Equities...


✓ SP500


✓ NASDAQ100


✓ CSI300


✓ MSCI_EM


✓ EUROSTOXX50


✓ NIKKEI225

Downloading FX...


✓ USDINR


✓ EURINR


✓ JPYINR


✓ DXY


✓ USDCNY


✓ USDJPY

Downloading Commodities...


✓ BRENT


✓ WTI


✓ GOLD


✓ SILVER


✓ COPPER

Downloading Rates...


✓ US10Y


✓ US2Y


✓ US30Y

Downloading Liquidity...


✓ VIX



1 Failed download:


['INDIAVIX.NS']: YFPricesMissingError('possibly delisted; no price data found  (1d 2010-01-01 -> 2026-02-01)')


✗ INDIAVIX: No data


✓ HYG


✓ LQD


## 4. Transformations

In [5]:
equity_weekly = resample_to_weekly(equity_daily)
fx_weekly = resample_to_weekly(fx_daily)
commodities_weekly = resample_to_weekly(commodities_daily)
rates_monthly = resample_to_monthly(rates_daily)
liquidity_weekly = resample_to_weekly(liquidity_daily)

if not rates_monthly.empty and 'US10Y' in rates_monthly.columns and 'US2Y' in rates_monthly.columns:
    rates_monthly['US_YIELD_CURVE'] = rates_monthly['US10Y'] - rates_monthly['US2Y']

if not liquidity_weekly.empty and 'HYG' in liquidity_weekly.columns and 'LQD' in liquidity_weekly.columns:
    # Credit Risk Sentiment (Price Ratio: Higher = Risk On / Lower Spreads)
    # Note: This is NOT a yield spread. It tracks the relative performance of Junk vs IG.
    liquidity_weekly['CREDIT_RISK_SENTIMENT'] = (liquidity_weekly['HYG'] / liquidity_weekly['LQD']) * 100

In [6]:
def calculate_returns(df: pd.DataFrame, periods: list = [1, 4, 12]) -> pd.DataFrame:
    if df.empty: return pd.DataFrame()
    returns_list = []
    for col in df.columns:
        for p in periods:
            suffix = f"{p}w" if p > 1 else "1w"
            s = (df[col].pct_change(p) * 100).rename(f"{col}_ret_{suffix}")
            returns_list.append(s)
    
    if not returns_list: return pd.DataFrame(index=df.index)
    return pd.concat(returns_list, axis=1)

equity_returns = calculate_returns(equity_weekly)
fx_returns = calculate_returns(fx_weekly)
commodities_returns = calculate_returns(commodities_weekly)

if not liquidity_weekly.empty:
    # Process HYG/LQD returns
    cols_to_use = [c for c in ['HYG', 'LQD'] if c in liquidity_weekly.columns]
    if cols_to_use:
        l_ret = calculate_returns(liquidity_weekly[cols_to_use], [1, 4])
    else:
        l_ret = pd.DataFrame(index=liquidity_weekly.index)
    
    # Add VIX level and India VIX
    for vix in ['VIX', 'INDIAVIX']:
        if vix in liquidity_weekly.columns:
            l_ret[f'{vix}_level'] = liquidity_weekly[vix]
            l_ret[f'{vix}_delta_1w'] = liquidity_weekly[vix].diff(1)
            
    liquidity_returns = l_ret
else:
    liquidity_returns = pd.DataFrame()

## 5. Export

In [7]:
def save_parquet(df: pd.DataFrame, name: str):
    if not df.empty:
        path = PROCESSED_PATH / name
        df.to_parquet(path)
        print(f"✓ Saved {name}")

save_parquet(equity_weekly, 'global_equity_weekly.parquet')
save_parquet(fx_weekly, 'global_fx_weekly.parquet')
save_parquet(commodities_weekly, 'global_commodities_weekly.parquet')
save_parquet(rates_monthly, 'global_rates_monthly.parquet')
save_parquet(liquidity_weekly, 'global_liquidity_weekly.parquet')
save_parquet(equity_returns, 'global_equity_returns_weekly.parquet')
save_parquet(fx_returns, 'global_fx_returns_weekly.parquet')
save_parquet(commodities_returns, 'global_commodities_returns_weekly.parquet')
save_parquet(liquidity_returns, 'global_liquidity_returns_weekly.parquet')

print("\nAll available global data saved")

✓ Saved global_equity_weekly.parquet
✓ Saved global_fx_weekly.parquet
✓ Saved global_commodities_weekly.parquet
✓ Saved global_rates_monthly.parquet
✓ Saved global_liquidity_weekly.parquet
✓ Saved global_equity_returns_weekly.parquet
✓ Saved global_fx_returns_weekly.parquet
✓ Saved global_commodities_returns_weekly.parquet
✓ Saved global_liquidity_returns_weekly.parquet

All available global data saved
