In [1]:
import numpy as np
import pandas as pd

import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
from statsmodels.regression.rolling import RollingOLS

import yfinance as yf
import seaborn
import matplotlib.pyplot as plt

import scipy.stats as stats

In [None]:
#The below code has functions to calculate certain stats based on rolling windows
def zscore(series):
    return (series - series.mean()) / np.std(series)
def rollingVolatility(data: list, window: int = 20):
    data_series = pd.Series(data)
    price_change = data_series.pct_change().dropna()
    rolling_volatility = price_change.rolling(window=window).std()
    return rolling_volatility.dropna()

def movingAverage(data:list,window: int=20):
    data_series = pd.Series(data)
    return data_series.rolling(window=window).mean().dropna()

def standardiseMovingAverage(data:list, window: int=20):
    return zscore(movingAverage(data , window))

def p_value_marginal(data:list, window: int=20):
    ma_zscores = standardiseMovingAverage(data, window)
    ma_data = movingAverage(data, window)
    ma_mean = ma_data.mean()
    ma_std = ma_data.std()
    daily_price = data.rolling(window=1)
    z_score = (daily_price-ma_mean)/ma_std
    return 1-(2*stats.norm.cdf(abs(z_score)))

def p_spread_reverts_based_on_marginal_probability_and_volatility(data_stock_A, data_stock_B, window: int=20, marginal_volatility_sensitivity: float=0.5):
    p_a = p_value_marginal(data_stock_A, window)
    p_b = p_value_marginal(data_stock_B, window)
    volatility_influence_a = marginal_volatility_sensitivity*rollingVolatility(data_stock_A,window)
    volatility_influence_b = marginal_volatility_sensitivity*rollingVolatility(data_stock_B,window)
    weighted_probability = volatility_influence_a * p_a + volatility_influence_b * p_b
    probability = 1 / (1 + np.exp(-weighted_probability)) # apply sigmoid function to map p values into range between [0,1]
    return probability.dropna()

def p_spread_reverts(normalised_spread_data):
    return 1-2*stats.norm.cdf(abs(normalised_spread_data))



In [None]:
config_df = {'stock A timeseries': S1,
             'stock B timeseries': S2,
             'Joint marginal probability': p_spread_reverts_based_on_marginal_probability_and_volatility(S1,S2),
             'spread reverts probability': 
             
             }
resulting_stats = pd.DataFrame()