# 4. Regime Detection with Hidden Markov Model
## Classify Market into 3 Regimes: Uptrend, Downtrend, Sideways

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from data_utils import load_data
from regime import RegimeDetector, RegimeVisualizer, detect_regimes
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

In [None]:
# Load feature data
df = load_data('../data/nifty_features_5min.csv')
print(f"Data shape: {df.shape}")

## 4.1 Prepare Features for HMM

In [None]:
# Features for HMM (options-based only)
hmm_features = [
    'avg_iv', 'iv_spread', 'pcr_oi',
    'call_delta', 'call_gamma', 'call_vega',
    'futures_basis', 'spot_returns'
]

# Filter to existing columns
hmm_features = [f for f in hmm_features if f in df.columns]
print(f"Using {len(hmm_features)} features for HMM:")
print(hmm_features)

## 4.2 Train HMM Model

In [None]:
# Detect regimes using HMM
df_with_regimes, detector = detect_regimes(
    df, 
    train_size=0.7, 
    feature_cols=hmm_features
)

print("\nRegime detection completed!")
print(f"Regime distribution:")
print(df_with_regimes['regime'].value_counts())

## 4.3 Analyze Regimes

In [None]:
# Get regime statistics
regime_stats = detector.get_regime_statistics(
    df_with_regimes, 
    df_with_regimes['regime'].values, 
    hmm_features
)

print("\nRegime Statistics:")
regime_stats

In [None]:
# Calculate regime durations
durations = detector.calculate_regime_durations(df_with_regimes['regime'].values)

print("\nRegime Durations:")
for regime, stats in durations.items():
    regime_name = {-1: 'Downtrend', 0: 'Sideways', 1: 'Uptrend'}.get(regime, regime)
    print(f"\n{regime_name}:")
    print(f"  Mean: {stats['mean']:.2f} periods")
    print(f"  Median: {stats['median']:.2f} periods")
    print(f"  Min: {stats['min']} periods")
    print(f"  Max: {stats['max']} periods")

## 4.4 Visualize Regimes

In [None]:
# Plot regimes on price chart
RegimeVisualizer.plot_regimes_on_price(
    df_with_regimes,
    df_with_regimes['regime'].values,
    price_col='close_spot',
    timestamp_col='timestamp',
    save_path='../plots/regime_price_overlay.png'
)
print("Regime price overlay plot saved")

In [None]:
# Plot transition matrix
transition_matrix = detector.get_transition_matrix()
RegimeVisualizer.plot_transition_matrix(
    transition_matrix,
    save_path='../plots/regime_transition_matrix.png'
)
print("Transition matrix plot saved")

print("\nTransition Matrix:")
print(transition_matrix)

In [None]:
# Plot regime statistics
RegimeVisualizer.plot_regime_statistics(
    regime_stats,
    feature_cols=['avg_iv', 'iv_spread', 'pcr_oi', 'spot_returns'],
    save_path='../plots/regime_statistics.png'
)
print("Regime statistics plot saved")

In [None]:
# Plot duration histogram
RegimeVisualizer.plot_duration_histogram(
    durations,
    save_path='../plots/regime_durations.png'
)
print("Duration histogram saved")

## 4.5 Save Results

In [None]:
# Save data with regimes
df_with_regimes.to_csv('../data/nifty_with_regimes.csv', index=False)
print("Data with regimes saved: data/nifty_with_regimes.csv")

# Save HMM model
detector.save_model('../models/hmm_regime_model.pkl')
print("HMM model saved: models/hmm_regime_model.pkl")

## Summary

In [None]:
print("=" * 80)
print("REGIME DETECTION SUMMARY")
print("=" * 80)
regime_counts = df_with_regimes['regime'].value_counts()
print(f"\nUptrend (+1): {regime_counts.get(1, 0)} periods ({regime_counts.get(1, 0)/len(df_with_regimes)*100:.1f}%)")
print(f"Sideways (0): {regime_counts.get(0, 0)} periods ({regime_counts.get(0, 0)/len(df_with_regimes)*100:.1f}%)")
print(f"Downtrend (-1): {regime_counts.get(-1, 0)} periods ({regime_counts.get(-1, 0)/len(df_with_regimes)*100:.1f}%)")
print("\nNext Step: Proceed to 05_baseline_strategy.ipynb")