In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf

import statsmodels.api as sm
from statsmodels.stats.diagnostic import acorr_ljungbox
from scipy.stats import binomtest as binom_test

from datetime import datetime, timedelta
from pathlib import Path
from scipy.stats import t

In [18]:
# Part one of the Project - S&P 500 data retrieval and processing 

sp500_ticker = "^GSPC"
start_date = '2000-01-01'
end_date = None

sp500_raw = yf.download(sp500_ticker, start=start_date, end=end_date, progress=False)

if 'Adj Close' in sp500_raw.columns:
	sp500_data = sp500_raw[['Adj Close']].rename(columns={'Adj Close': 'Price'})
elif isinstance(sp500_raw.columns, pd.MultiIndex):
	
	if ('Adj Close', sp500_ticker) in sp500_raw.columns:
		sp500_data = sp500_raw[[('Adj Close', sp500_ticker)]].copy()
		sp500_data.columns = ['Price']
	elif ('Close', sp500_ticker) in sp500_raw.columns:
		sp500_data = sp500_raw[[('Close', sp500_ticker)]].copy()
		sp500_data.columns = ['Price']
	else:
		sp500_data = sp500_raw.iloc[:, 0:1].copy()
		sp500_data.columns = ['Price']
else:
	
	if 'Close' in sp500_raw.columns:
		sp500_data = sp500_raw[['Close']].rename(columns={'Close': 'Price'})
	else:
		sp500_data = sp500_raw.iloc[:, 0:1].copy()
		sp500_data.columns = ['Price']

sp500_data.dropna(inplace=True)



  sp500_raw = yf.download(sp500_ticker, start=start_date, end=end_date, progress=False)


In [19]:
#daily log returns
sp500_data["log_return"] = np.log(sp500_data["Price"])
sp500_data["ret_1d"] = sp500_data["log_return"].diff()
sp500_data.dropna(inplace=True)


In [20]:
#make a k day horizon of the log returns in order to set up for the martingale and super martingale tests 

def make_k_day_horizon(df, ret_col="ret_1d", ks=(1, 5, 20, 60)):
    
    for k in ks:
        df[f"ret_{k}d"] = df[ret_col].rolling(window=k).sum().shift(-k+1)
    df.dropna(inplace=True)
    return df

sp500_data = make_k_day_horizon(sp500_data, ret_col="ret_1d", ks=(1, 5, 20, 60))


In [21]:
#Mean Return Super and Sub Martingale Tests 
# Null Hypothesis: The return series is a martingale (i.e., the mean return is zero)
# Alternative Hypothesis: The return series is a supermartingale (i.e., the mean return is negative) or submartingale (i.e., the mean return is positive)
# When we have mean > 0 (and a respectively significant p-value) then we submaringale with positive drift
# When we have mean < 0 (and a respectively significant p-value) then we supermartingale with negative drift
def test_mean_return(returns):
    """
    Compute one-sided t-test statistics for mean return.
    Returns (mean_return, std_return, se_return, t_stat, p_value_super, p_value_sub, p_value_two)
    where:
      - p_value_super is the one-sided p-value for the alternative mean < 0 (supermartingale)
      - p_value_sub   is the one-sided p-value for the alternative mean > 0 (submartingale)
      - p_value_two   is the two-sided p-value for the alternative mean != 0
    """
    r = returns.values if hasattr(returns, "values") else np.array(returns)
    n = len(r)
    if n < 2:
        raise ValueError("Not enough data points to perform the test.")
    mean_return = np.mean(r)
    std_return = np.std(r, ddof=1)
    se_return = std_return / np.sqrt(n)
   # need to handel the zero variance problem
    if se_return == 0:
        t_stat = 0.0
    else:
        t_stat = mean_return / se_return

    # look at the one sided p-values
    p_value_super = t.cdf(t_stat, df=n-1)  
    p_value_sub = 1 - p_value_super       
    
    p_value_two = 2 * min(p_value_super, p_value_sub)

    return mean_return, std_return, se_return, t_stat, p_value_super, p_value_sub, p_value_two

#printing the table

def print_mean_return_results(returns):
    (mean, std, se, t_stat,
     p_super, p_sub, p_two) = test_mean_return(returns)

    n = len(returns)

    print("\n==============================")
    print("TEST 0 – Mean Daily Return")
    print("==============================")
    print(f"Number of obs:                    {n}")
    print(f"Mean return:                      {mean:.6e}")
    print(f"Std dev:                          {std:.6e}")
    print(f"Std error of mean:                {se:.6e}")
    print(f"t-statistic:                      {t_stat:.3f}")
    print(f"\nTwo-sided p-value (H1: mean ≠ 0): {p_two:.4g}")
    print(f"One-sided p-value (H1: mean < 0 → supermartingale): {p_super:.4g}")
    print(f"One-sided p-value (H1: mean > 0 → submartingale):   {p_sub:.4g}")

    # Interpretation
    print("\nInterpretation:")
    if p_two < 0.05:
        if mean > 0:
            print("→ Significant *positive* drift: SUBMARTINGALE behaviour.")
        else:
            print("→ Significant *negative* drift: SUPERMARTINGALE behaviour.")
    else:
        print("→ Mean return not significantly different from zero: MARTINGALE-compatible.")

print_mean_return_results(sp500_data["ret_1d"]) 



TEST 0 – Mean Daily Return
Number of obs:                    6448
Mean return:                      2.313184e-04
Std dev:                          1.227132e-02
Std error of mean:                1.528195e-04
t-statistic:                      1.514

Two-sided p-value (H1: mean ≠ 0): 0.1302
One-sided p-value (H1: mean < 0 → supermartingale): 0.9349
One-sided p-value (H1: mean > 0 → submartingale):   0.06508

Interpretation:
→ Mean return not significantly different from zero: MARTINGALE-compatible.
