# 4. Regime Detection with Hidden Markov Model
## Classify Market into 3 Regimes: Uptrend, Downtrend, Sideways

In [1]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from data_utils import load_data
from regime import RegimeDetector, RegimeVisualizer, detect_regimes
import warnings
warnings.filterwarnings('ignore')

print("Libraries imported successfully!")

Libraries imported successfully!


In [9]:
# Load merged data
df = load_data('../data/nifty_merged_5min.csv')
print(f"Data shape: {df.shape}")
print(f"Columns: {list(df.columns)}")

INFO:data_utils:Loading data from ../data/nifty_merged_5min.csv


Data shape: (245, 16)
Columns: ['timestamp', 'open_spot', 'high_spot', 'low_spot', 'close_spot', 'volume_spot', 'open_futures', 'high_futures', 'low_futures', 'close_futures', 'volume_futures', 'open', 'high', 'low', 'close', 'volume']


## 4.1 Prepare Features for HMM

In [13]:
# Features for HMM - using available columns
hmm_features = [col for col in df.columns if col not in ['timestamp']]

print(f"Using {len(hmm_features)} features for HMM:")
print(hmm_features)

Using 15 features for HMM:
['open_spot', 'high_spot', 'low_spot', 'close_spot', 'volume_spot', 'open_futures', 'high_futures', 'low_futures', 'close_futures', 'volume_futures', 'open', 'high', 'low', 'close', 'volume']


## 4.2 Train HMM Model

In [17]:
# Calculate simple returns for regime detection
df['returns'] = df['close_spot'].pct_change()

# Detect regimes using HMM with returns
from regime import RegimeDetector
detector = RegimeDetector(n_regimes=3)

# Fit on training data
train_size = int(0.7 * len(df))
train_df = df.iloc[:train_size].copy()
test_df = df.iloc[train_size:].copy()

# Fit detector
detector.fit(train_df, feature_cols=['returns'])

# Predict regimes
train_regimes = detector.predict(train_df)
test_regimes = detector.predict(test_df)

# Create result dataframe
df_with_regimes = df.copy()
df_with_regimes['regime'] = -1
df_with_regimes.loc[:train_size-1, 'regime'] = train_regimes
df_with_regimes.loc[train_size:, 'regime'] = test_regimes

print("\nRegime detection completed!")
print(f"Regime distribution:")
print(df_with_regimes['regime'].value_counts())

INFO:regime:Initialized RegimeDetector with 3 regimes
INFO:regime:Fitting HMM to training data
INFO:regime:Preparing 1 features for HMM
INFO:regime:Features prepared and normalized


INFO:regime:HMM fitted successfully. Log-likelihood: -210.82
INFO:regime:Predicting regimes
INFO:regime:Preparing 1 features for HMM
INFO:regime:Features prepared and normalized
INFO:regime:Predicting regimes
INFO:regime:Preparing 1 features for HMM
INFO:regime:Features prepared and normalized



Regime detection completed!
Regime distribution:
regime
2    230
1     12
0      3
Name: count, dtype: int64


## 4.3 Analyze Regimes

In [None]:
# Get regime statistics
regime_stats = detector.get_regime_statistics(
    df_with_regimes, 
    df_with_regimes['regime'].values, 
    hmm_features
)

print("\nRegime Statistics:")
regime_stats

In [None]:
# Calculate regime durations
durations = detector.calculate_regime_durations(df_with_regimes['regime'].values)

print("\nRegime Durations:")
for regime, stats in durations.items():
    regime_name = {-1: 'Downtrend', 0: 'Sideways', 1: 'Uptrend'}.get(regime, regime)
    print(f"\n{regime_name}:")
    print(f"  Mean: {stats['mean']:.2f} periods")
    print(f"  Median: {stats['median']:.2f} periods")
    print(f"  Min: {stats['min']} periods")
    print(f"  Max: {stats['max']} periods")

## 4.4 Visualize Regimes

In [None]:
# Plot regimes on price chart
RegimeVisualizer.plot_regimes_on_price(
    df_with_regimes,
    df_with_regimes['regime'].values,
    price_col='close_spot',
    timestamp_col='timestamp',
    save_path='../plots/regime_price_overlay.png'
)
print("Regime price overlay plot saved")

In [None]:
# Plot transition matrix
transition_matrix = detector.get_transition_matrix()
RegimeVisualizer.plot_transition_matrix(
    transition_matrix,
    save_path='../plots/regime_transition_matrix.png'
)
print("Transition matrix plot saved")

print("\nTransition Matrix:")
print(transition_matrix)

In [None]:
# Plot regime statistics
RegimeVisualizer.plot_regime_statistics(
    regime_stats,
    feature_cols=['avg_iv', 'iv_spread', 'pcr_oi', 'spot_returns'],
    save_path='../plots/regime_statistics.png'
)
print("Regime statistics plot saved")

In [None]:
# Plot duration histogram
RegimeVisualizer.plot_duration_histogram(
    durations,
    save_path='../plots/regime_durations.png'
)
print("Duration histogram saved")

## 4.5 Save Results

In [None]:
# Save data with regimes
df_with_regimes.to_csv('../data/nifty_with_regimes.csv', index=False)
print("Data with regimes saved: data/nifty_with_regimes.csv")

# Save HMM model
detector.save_model('../models/hmm_regime_model.pkl')
print("HMM model saved: models/hmm_regime_model.pkl")

## Summary

In [18]:
print("=" * 80)
print("REGIME DETECTION SUMMARY")
print("=" * 80)
regime_counts = df_with_regimes['regime'].value_counts()
print(f"\nUptrend (+1): {regime_counts.get(1, 0)} periods ({regime_counts.get(1, 0)/len(df_with_regimes)*100:.1f}%)")
print(f"Sideways (0): {regime_counts.get(0, 0)} periods ({regime_counts.get(0, 0)/len(df_with_regimes)*100:.1f}%)")
print(f"Downtrend (-1): {regime_counts.get(-1, 0)} periods ({regime_counts.get(-1, 0)/len(df_with_regimes)*100:.1f}%)")
print("\nNext Step: Proceed to 05_baseline_strategy.ipynb")

REGIME DETECTION SUMMARY

Uptrend (+1): 12 periods (4.9%)
Sideways (0): 3 periods (1.2%)
Downtrend (-1): 0 periods (0.0%)

Next Step: Proceed to 05_baseline_strategy.ipynb
