# 03A — Macro Regime Definition

**Purpose**: Define macro regimes from CPI, IIP, credit, rates, FX reserves

**Regimes**:
- Growth ↑ / ↓
- Inflation ↑ / ↓
- Liquidity tight / loose

**Output**: Monthly regime label time series

---

## Why Regimes Matter

Correlations **flip** in different regimes. A sector that outperforms in growth periods may underperform in tightening cycles. Understanding regimes lets us:
1. Explain why backtests fail out-of-sample
2. Build conditional (regime-aware) strategies
3. Avoid regime mismatch in positioning

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (14, 8)

PROCESSED_PATH = Path('../data_processed')
PROCESSED_PATH.mkdir(exist_ok=True)

# Load data
combined = pd.read_parquet(PROCESSED_PATH / 'macro_sector_monthly_matrix.parquet')
rbi_long = pd.read_parquet(PROCESSED_PATH / 'rbi_macro_all_long.parquet')

print(f"Combined matrix: {combined.shape}")
print(f"Date range: {combined.index.min().date()} to {combined.index.max().date()}")

Combined matrix: (132, 93)
Date range: 2015-02-28 to 2026-01-31


## 1. Define Regime Indicators

In [2]:
REGIME_INDICATORS = {
    'GROWTH': {
        'series_keywords': ['IIP', 'GDP', 'production', 'output', 'PMI'],
        'high_label': 'Growth_High',
        'low_label': 'Growth_Low',
        'description': 'Industrial/economic growth regime'
    },
    'INFLATION': {
        'series_keywords': ['CPI', 'WPI', 'inflation', 'price'],
        'high_label': 'Inflation_High',
        'low_label': 'Inflation_Low',
        'description': 'Price inflation regime'
    },
    'LIQUIDITY': {
        'series_keywords': ['M3', 'credit', 'money supply', 'deposit'],
        'high_label': 'Liquidity_Loose',
        'low_label': 'Liquidity_Tight',
        'description': 'Money and credit conditions'
    },
    'RATES': {
        'series_keywords': ['repo', 'policy rate', 'yield'],
        'high_label': 'Rates_High',
        'low_label': 'Rates_Low',
        'description': 'Interest rate environment'
    }
}

print(f"Defined {len(REGIME_INDICATORS)} regime dimensions")

Defined 4 regime dimensions


## 2. Extract Regime Variables from RBI Data

In [3]:
def find_series_by_keyword(df: pd.DataFrame, keywords: list) -> list:
    all_series = df['series_name'].unique()
    matches = []
    for series in all_series:
        for kw in keywords:
            if kw.lower() in series.lower():
                matches.append(series)
                break
    return matches

regime_series = {}
for regime, config in REGIME_INDICATORS.items():
    matches = find_series_by_keyword(rbi_long, config['series_keywords'])
    regime_series[regime] = matches
    print(f"{regime}: {len(matches)} series found")

GROWTH: 1 series found
INFLATION: 10 series found
LIQUIDITY: 12 series found
RATES: 8 series found


In [4]:
def extract_regime_proxy(df: pd.DataFrame, series_list: list) -> pd.Series:
    if not series_list: return pd.Series(dtype=float)
    coverage = {s: len(df[df['series_name'] == s]) for s in series_list}
    best_series = max(coverage, key=coverage.get)
    series_data = df[df['series_name'] == best_series][['Date', 'value']].copy()
    series_data['Date'] = pd.to_datetime(series_data['Date'])
    series_data = series_data.set_index('Date').sort_index()
    monthly = series_data.resample('ME').last()['value']
    monthly.name = best_series
    return monthly, best_series

regime_proxies = {}
for regime, series_list in regime_series.items():
    if series_list:
        proxy, name = extract_regime_proxy(rbi_long, series_list)
        regime_proxies[regime] = proxy
        print(f"{regime}: using '{name}'")

GROWTH: using 'Index of Industrial Production'
INFLATION: using 'WPI-Monthly-ALL COMMODITY'


LIQUIDITY: using 'M3'
RATES: using 'REPO RATE (OVERNIGHT)'


## 3. Calculate Regime States

In [5]:
def calculate_regime_state(series: pd.Series, method: str = 'zscore', lookback: int = 60) -> pd.Series:
    if method == 'zscore':
        rolling_mean = series.rolling(lookback, min_periods=12).mean()
        rolling_std = series.rolling(lookback, min_periods=12).std()
        zscore = (series - rolling_mean) / rolling_std
        state = pd.Series(0, index=series.index)
        state[zscore > 0.5] = 1
        state[zscore < -0.5] = -1
        return state
    elif method == 'yoy':
        yoy_change = series.pct_change(12)
        # Use quantiles of YoY for thresholds
        q33 = yoy_change.rolling(lookback, min_periods=12).quantile(0.33)
        q66 = yoy_change.rolling(lookback, min_periods=12).quantile(0.66)
        state = pd.Series(0, index=series.index)
        state[yoy_change > q66] = 1
        state[yoy_change < q33] = -1
        return state
    return np.sign(series - series.rolling(lookback, min_periods=12).median())

regime_states = pd.DataFrame(index=combined.index)
for regime, proxy in regime_proxies.items():
    aligned = proxy.reindex(combined.index)
    if regime in ['INFLATION', 'GROWTH']:
        state = calculate_regime_state(aligned, method='yoy')
    else:
        state = calculate_regime_state(aligned, method='zscore')
    regime_states[regime] = state

regime_states = regime_states.ffill().fillna(0)
print(f"Regime states calculated")

Regime states calculated


In [6]:
def create_composite_regime(states_df: pd.DataFrame) -> pd.Series:
    regime = pd.Series('Unknown', index=states_df.index)
    if 'GROWTH' in states_df.columns and 'INFLATION' in states_df.columns:
        growth = states_df['GROWTH']
        inflation = states_df['INFLATION']
        regime[(growth >= 0) & (inflation < 0)] = 'Goldilocks'
        regime[(growth >= 0) & (inflation >= 0)] = 'Reflation'
        regime[(growth < 0) & (inflation >= 0)] = 'Stagflation'
        regime[(growth < 0) & (inflation < 0)] = 'Deflation'
    return regime

regime_states['COMPOSITE'] = create_composite_regime(regime_states)
print("Regime Distribution:")
print(regime_states['COMPOSITE'].value_counts())

Regime Distribution:
COMPOSITE
Reflation      92
Goldilocks     23
Deflation      11
Stagflation     6
Name: count, dtype: int64


## 4. Visualization & Export

In [7]:
regime_states.to_parquet(PROCESSED_PATH / 'macro_regime_states.parquet')
regime_states['COMPOSITE'].to_frame('regime').to_parquet(PROCESSED_PATH / 'composite_regime.parquet')

try:
    regime_series = regime_states['COMPOSITE']
    transitions = pd.crosstab(regime_series.shift(1), regime_series, margins=True, normalize='index') * 100
    # Robust drop
    transitions = transitions.drop('All', axis=0, errors='ignore').drop('All', axis=1, errors='ignore')
    print("\nTransition Matrix (%):")
    print(transitions.round(1))
except Exception as e:
    print(f"Could not calculate transitions: {e}")

print("\n✓ Saved: macro_regime_states.parquet")


Transition Matrix (%):
COMPOSITE    Deflation  Goldilocks  Reflation  Stagflation
COMPOSITE                                                 
Deflation         63.6        27.3        9.1          0.0
Goldilocks        13.0        73.9       13.0          0.0
Reflation          1.1         3.3       89.0          6.6
Stagflation        0.0         0.0      100.0          0.0

✓ Saved: macro_regime_states.parquet
