In [None]:
pip install hmmlearn pytorch

In [None]:
"""
hmm
"""

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from warnings import filterwarnings
import matplotlib.pyplot as plt
import hmmlearn.hmm as hmm
import yfinance as yf
import pandas as pd
import numpy as np
import pickle
import os

# ------------------- Configuration -------------------
SYMBOL = 'TSLA'
folder = "TSLA"
START_DATE = '2020-01-01'
END_DATE = '2025-06-27'
PERIOD = "1d"
WINDOW_SIZE = 20
TRADING_FEES = 0.002
LOOKBACKS = [1, 5, 10, 21]

os.makedirs(folder, exist_ok=True)

# -----------------------------------------------------

data = yf.Ticker(SYMBOL).history(start=START_DATE, end=END_DATE)
data = data.drop(["Dividends", "Stock Splits"], axis = 1)


total_size = len(data)
train_size = int(total_size * 0.7)
gap_size = max(int(total_size * 0.1), 24)
test_size = total_size - train_size - gap_size

if test_size <= 30:
    raise ValueError("Insufficient data for meaningful backtesting")

train_data = data.iloc[:train_size].copy()
test_data = data.iloc[train_size + gap_size:].copy()

def create_features(data):
    data = data.copy()

    for days in LOOKBACKS:
      data[f"Log_Return_{days}"] = np.log(data['Close'] / data['Close'].shift(days))      
      if (LOOKBACKS.index(days)+1) % 2 == 0:
        data[f'Volatility_{days}'] = data[f"Log_Return_{days}"].rolling(days, min_periods=1).std()
      if days >= 10:
        data[f"Momentum_{days}"] = data["Close"].shift(1) - data["Close"].shift(days + 1)
    
    log_volume = np.log(data["Volume"].shift(1) + 1e-6)
    rolling_mean = log_volume.rolling(window=5, min_periods=5).mean()
    rolling_std = log_volume.rolling(window=5, min_periods=5).std()
    data["Z_Log_Volume"] = (log_volume - rolling_mean) / (rolling_std + 1e-6)

    MA = data["Close"].rolling(WINDOW_SIZE).mean()
    STD = data["Close"].rolling(WINDOW_SIZE).std()
    data["Z_Price_vs_MA"] = (data["Close"] - MA) / (STD + 1e-6)


    return data.dropna()

train_data = create_features(train_data)
test_data = create_features(test_data)
feature_cols = [col for col in train_data.columns if any(key in col for key in ['Log_Return', 'Volatility', 'Momentum', 'Z_'])]
X_train = train_data[feature_cols]
X_test = train_data[feature_cols]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

pca = PCA(n_components=5)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

hmm_model = hmm.GaussianHMM(
    n_components=5,
    covariance_type="full",
    n_iter=400,
    tol=1e-4,
    #random_state=42,
    verbose=True,
    init_params="stmc"
)

hmm_model.fit(X_train)
hidden_states = hmm_model.predict(X_train)

df_regimes = pd.DataFrame({
    "date": data.index[:len(hidden_states)],
    "close": data["Close"].iloc[:len(hidden_states)].values,
    "regime": hidden_states
})

fig, ax = plt.subplots(figsize=(15, 6))

for regime in np.unique(df_regimes["regime"]):
    mask = df_regimes["regime"] == regime
    ax.plot(df_regimes["date"][mask], df_regimes["close"][mask], '.', label=f"Regime {regime}")

ax.set_title("Market Regimes Detected by HMM")
ax.set_xlabel("Date")
ax.set_ylabel("Close Price")
ax.legend()
plt.tight_layout()
plt.show()

for i, (mean, cov) in enumerate(zip(hmm_model.means_, hmm_model.covars_)):
    print(f"Regime {i}")
    print("Mean vector:", mean)
    print("Volatility proxy (diag(cov)):", np.sqrt(np.diag(cov)))
    print()

In [None]:
"""
    Regiemem implementation
"""
x_data = create_features(data)
feature_cols = [col for col in train_data.columns if any(key in col for key in ['Log_Return', 'Volatility', 'Momentum', 'Z_'])]
x_data = x_data[feature_cols]

x = scaler.transform(x_data)

x = pca.transform(x)
probs = hmm_model.predict_proba(x)

regime_confidence = probs.max(axis=1)
most_likely_regime = probs.argmax(axis=1)

x_data["Regime_Prob"] = regime_confidence
x_data["Most_Likely_Regime"] = most_likely_regime
x_data["Most_Likely_Regime"].value_counts()


In [None]:
"""
    General ML Regieme Detection MOdel (General_Model)
"""

from torch.utils.data import DataLoader, Dataset, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
import yfinance as yf
import pandas as pd
import numpy as np
import pickle
import torch
import os

class General_Model(nn.Module):
    def __init__(self):
        pass
gen_model = General_Model()

In [None]:
"""
    Asset Specific(Residual_Model_A) ML Regieme Detection MOdel
"""

from torch.utils.data import DataLoader, Dataset, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler
import torch.nn.functional as F
import torch.optim as optim
import torch.nn as nn
import yfinance as yf
import pandas as pd
import numpy as np
import pickle
import torch
import os


class Residual_Model_A(nn.Module):
    def __init__(self):
        pass
res_model = Residual_Model_A()

In [None]:
"""
     final output General_Model(x) + Residual_Model_A(x)
"""

out = gen_model(x) + res_model(x)

In [None]:
"""
    Monte arlo simm stress Test
"""

                     +-------------------+
                     | Raw Market Data   |
                     | (multi-asset)     |
                     +--------+----------+
                              |
                     +--------v----------+
                     | Feature Generator |
                     | - Rolling stats   |
                     | - Regime probs    |
                     +--------+----------+
                              |
               +--------------v-------------+
               | General Market State Model |
               | (Trained on pooled data)   |
               +--------------+-------------+
                              |
        +---------------------v---------------------+
        | For each asset A:                         |
        | - Compute residuals                       |
        | - Train Residual_Model_A                  |
        |   (on asset-specific patterns)            |
        +---------------------+---------------------+
                              |
            +-----------------v------------------+
            | Final Prediction for asset A:       |
            | General_Model(x) + Residual_Model_A(x) |
            +-----------------+------------------+

                          ↓
          +------------------------------------+
          | Monte Carlo Simulation / Scenarios |
          | (for stress-testing and robustness)|
          +------------------------------------+

                          ↓
          +----------------+------------------+
          | Evaluation & Metrics               |
          | - Asset-level & portfolio-level    |
          +------------------------------------+
