In [75]:
import yfinance as yf
import pandas as pd
import requests

url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=VOO&apikey=KQ6OMU8QUYEDLUZF&outputsize=full"
response = requests.get(url)
data = response.json()

In [123]:
ts_data = data.get("Time Series (Daily)", {})
prices = []
for x in ts_data:
    prices.append(float(ts_data[x]['4. close']))

prices.reverse()

In [157]:
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
import matplotlib.pyplot as plt

# Example: assume `prices` is a 1D NumPy array or list of daily stock prices
def regime_segmentation(prices, n_states=2):
    # Step 1: Compute log returns
    log_returns = np.diff(np.log(prices)).reshape(-1, 1)

    # Step 2: Fit a Gaussian HMM
    model = GaussianHMM(n_components=n_states, covariance_type="full", n_iter=1000)
    model.fit(log_returns)

    # Step 3: Predict hidden states
    hidden_states = model.predict(log_returns)

    # Step 4: Assign state statistics
    regimes = []
    for i in range(n_states):
        state_returns = log_returns[hidden_states == i]
        mean_return = np.mean(state_returns)
        std_return = np.std(state_returns)
        regimes.append({
            'state': i,
            'mean_daily_return': mean_return,
            'std_daily_return': std_return,
            'count': len(state_returns)
        })

    # Step 5: Sort regimes by volatility (std)
    regimes_sorted = sorted(regimes, key=lambda x: x['std_daily_return'])
    for i, r in enumerate(regimes_sorted):
        r['regime'] = ['Low', 'High'][i]  # label by sorted volatility

    return regimes_sorted, hidden_states

regimes, state_sequence = regime_segmentation(prices)

for regime in regimes:
    print(f"Regime: {regime['regime']}")
    print(f"  Mean Daily Return: {regime['mean_daily_return']:.5f}")
    print(f"  Std Dev (Volatility): {regime['std_daily_return']:.5f}")
    print(f"  Days in Regime: {regime['count']}")
    print()


Regime: Low
  Mean Daily Return: 0.00066
  Std Dev (Volatility): 0.00946
  Days in Regime: 3667

Regime: High
  Mean Daily Return: -0.00134
  Std Dev (Volatility): 0.12725
  Days in Regime: 37



In [115]:
!pip install hmmlearn

Collecting hmmlearn
  Downloading hmmlearn-0.3.3-cp312-cp312-win_amd64.whl.metadata (3.1 kB)
Downloading hmmlearn-0.3.3-cp312-cp312-win_amd64.whl (127 kB)
   ---------------------------------------- 0.0/127.3 kB ? eta -:--:--
   ------ -------------------------------- 20.5/127.3 kB 640.0 kB/s eta 0:00:01
   -------------------------------------- - 122.9/127.3 kB 1.8 MB/s eta 0:00:01
   ---------------------------------------- 127.3/127.3 kB 1.9 MB/s eta 0:00:00
Installing collected packages: hmmlearn
Successfully installed hmmlearn-0.3.3


In [165]:

def segment_by_volatility(prices, window=5):
    prices = np.array(prices)
    log_returns = np.diff(np.log(prices))
    volatility = pd.Series(log_returns).rolling(window).std().to_numpy()

    # Align lengths: discard first few values lost due to rolling
    log_returns = log_returns[window - 1:]
    volatility = volatility[window - 1:]

    # Rank by volatility
    sorted_indices = np.argsort(volatility)
    n = len(volatility)
    tercile_size = n // 3

    buckets = {
        'Low': sorted_indices[:tercile_size],
        'Mid': sorted_indices[tercile_size:2*tercile_size],
        'High': sorted_indices[2*tercile_size:]
    }

    regime_stats = {}
    for regime, indices in buckets.items():
        rets = log_returns[indices]
        regime_stats[regime] = {
            'mean_daily_return': np.mean(rets),
            'std_daily_return': np.std(rets),
            'count': len(rets)
        }

    return regime_stats
print(segment_by_volatility(prices))


{'Low': {'mean_daily_return': 0.0007015040171695818, 'std_daily_return': 0.004093662304506883, 'count': 1233}, 'Mid': {'mean_daily_return': 0.0003544907479842397, 'std_daily_return': 0.007379604031489026, 'count': 1233}, 'High': {'mean_daily_return': 0.0008542049452281508, 'std_daily_return': 0.02608454782305211, 'count': 1234}}
