## Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from datetime import datetime, date, timedelta, timezone
import random
import warnings
warnings.simplefilter('ignore')


import math
from dataclasses import dataclass
from typing import Iterable, Tuple

import pickle
from hmmlearn import hmm
from hmmlearn.hmm import GaussianHMM


## Initial - Sign model

This model counts the signs of returns on OHLCV for day-by-day. \
It is a crude(simple) model, but has been quite useful elsewhere. 

It is my belief that expanding on this model can be useful in detecting "overweights" of negative or positive returns, and their reversion towards a normal. \
I suspect that there will be a slight overweight of positive to negative signs. Say 52% + and 48% -. Where slightly larger overweights on either side, indicate a market regime of either upwards or downwards trending.

In [None]:
lengths = []
Y = np.array([[0,0]])

for t in ["BAC"]:

    data = yf.Ticker(t)
    data = data.history(start = "1996-01-01", end = "2025-08-15")
    data = data.reset_index(drop = True)
    
    data = data[["Open", "High", "Low", "Close", "Volume"]]

    # returns
    data['returns'] = data['Close'].diff()/data['Close'].shift(1)
    data['returns'].loc[data['returns'].isna()] = 0
    
    # pn-counter
    pos_neg = []
    for j in range(len(data)):
        if data['returns'][j] >= 0:
            pos_neg.append(1)
        elif data['returns'][j] < 0:
            pos_neg.append(-1)
        else:
            pos_neg.append(0)
    data['PN_counter'] = pos_neg
    
    
    sign_count = [0]
    for i in range(1,len(data)):
        if i < 2000:
            sign_count.append(sum(data['PN_counter'][0:i] * data['Volume'][0:i]) / sum(data['Volume'][0:i]))
        else:
            sign_count.append(sum(data['PN_counter'][i-2000:i] * data['Volume'][i-2000:i]) / sum(data['Volume'][i-2000:i]))

    data["sign_count"] = sign_count

    data = data.fillna(0)
    data["volume-state"] = data["Volume"].rolling(window=1000).mean()/np.mean(data["Volume"]) - 1
    data = data.fillna(0)

    #features needed 
    features = data[["sign_count","volume-state"]][1000:]
    features = features.reset_index(drop = True)
    
    X = np.array(features)
    Y = np.concatenate([Y, X])
        
    lengths.append(len(features))

Y = Y[1:]
sign_model = GaussianHMM(n_components=3,covariance_type="full",n_iter=1000,random_state=2)
sign_model.fit(Y, lengths = lengths)

with open("sign.pkl", "wb") as f:
    pickle.dump(imb_model, f)

## Initial - Volatility model

In the above script, exchange the features for volatility, volatility adjusted momentum and ITR.\
These have worked well in our initial tests. But need more accuracy, as well as an additional structural break component.\
The code below can be used to substitute.


In [None]:
data['volatility'] = data['returns'].rolling(window=20).std()
data['VolAdjMomentum'] = data['returns'] / data['volatility']

# ITR - interday true range
data['ITR'] = (data['High'] - data['Low']) / (data['Open'])
data['ITR'].loc[data['ITR'].isna()] = 0
data['ITR'][np.isinf(data['ITR'])] = 0


#features needed 
features = data[["returns","volatility", "VolAdjMomentum", "ITR"]][1000:]
features = features.reset_index(drop = True)

## Initial - returns model
Like with the trends in return signs, we have worked to figure out a way to represent changes in the returns distribution. \
In order to find significant changes in the distribution. This has come to some, but not satisfactory results. \

We use multiple returns, shifted on different timeframes along with the sign of the distance of moving averages from the current price. \
the below features are the ones to substitute for our current best.

In [None]:
# log transform of returns
data['log returns'] = log_ret(data['Close'])

# Long-term returns
data['returns1'] = data['Close'].diff()/data['Close'].shift(21)
data['returns1'].loc[data['returns1'].isna()] = 0

data['returns3'] = data['Close'].diff()/data['Close'].shift(63)
data['returns3'].loc[data['returns3'].isna()] = 0

data['returns6'] = data['Close'].diff()/data['Close'].shift(126)
data['returns6'].loc[data['returns6'].isna()] = 0

data['ma21'] = data['Close'].rolling(window=21).mean()
data['ma63'] = data['Close'].rolling(window=63).mean()
data['ma126']= data['Close'].rolling(window=126).mean()

data["sma-s"] = np.sign(data["Close"] - data["ma21"])
data["mma-s"] = np.sign(data["Close"] - data["ma63"])
data["lma-s"] = np.sign(data["Close"] - data["ma126"])

#features needed 
features = data[["log returns", "returns1","returns3","returns6", "sma-s","mma-s","lma-s"]][1000:]
features = features.reset_index(drop = True)

## Initial - Price Model
We are not trying to predict the price. We are looking to consider whether the price has bounced outside of an area of confidence. \
This is an extremely simple version, but it has none the less been useful in estimating the rough area for the price at the time.

We have substituted with these features for the HMM model.

In [None]:
data['log returns'] = log_ret(data['Close'])

data['ma20'] = data['Close'].rolling(window=20).mean()
data['ma40'] = data['Close'].rolling(window=40).mean()
data['ma120']= data['Close'].rolling(window=120).mean()

# Compute MACD line and Signal line
data['EMA20'] = data['Close'].ewm(span=20, adjust=False).mean()
data['EMA40']  = data['Close'].ewm(span=40, adjust=False).mean()
data['EMA120']  = data['Close'].ewm(span=120, adjust=False).mean()

data["sma-s"] = np.sign(data["Close"] - data["ma20"])
data["mma-s"] = np.sign(data["Close"] - data["ma40"])
data["lma-s"] = np.sign(data["Close"] - data["ma120"])

data["sEma-s"] = np.sign(data["Close"] - data['EMA20'])
data["mEma-s"] = np.sign(data["Close"] - data["EMA40"])
data["lEma-s"] = np.sign(data["Close"] - data["EMA120"])

#features needed 
features = data[["sma-s","mma-s","lma-s", "sEma-s", "mEma-s", "lEma-s"]][1000:]
features = features.reset_index(drop = True)