In [1]:
# SUPERSMOOTHER MEAN REVERTING STRATEGY

import yfinance as yf
import pandas as pd 
import numpy as np
from scipy.signal import filtfilt, freqz, butter
from backtesting import Backtest, Strategy
from backtesting.lib import crossover
import plotly.express as px
import matplotlib.pyplot as plt
from scipy.stats import t

In [1]:
# INDICATORS 

# SuperSmoother
def SM(close,T):
    a = np.exp(-np.sqrt(2)*np.pi/T)
    b = 2*a*np.cos(np.sqrt(2)*np.pi/T)

    c2 = b
    c3 = -a*a
    c1 = 1-c2-c3

    B = np.array([0.5*c1,0.5*c1])
    A = np.array([1,-c2,-c3])

    return filtfilt(B,A,close)

# Simple Moving Averge
def SMA(close,T):
    return pd.Series(close).rolling(T).mean()

# Bollinger Bands 
def Vol_L(close,T,smooth,num_std = 2):
    return smooth - num_std*pd.Series(close).rolling(T).std()

def Vol_U(close,T,smooth,num_std = 2):
    return smooth + num_std*pd.Series(close).rolling(T).std()

# Butterworth 
def BUTT(close,wc, N = 1):
    B,A = butter(N,wc) 
    return filtfilt(B,A,close)

In [6]:
# BB STRATEGY

class strat_BB(Strategy):

    T = 10 #cutoff period for SM or SMA
    Wc = 2/T # cutoff frqz for butterworth
    N = 2 # std dev from mean 
    sl_pct = 0.015

    # Un butterworth de Wc = 2/T tiene la misma Wc que un SM de T periodos. Pero la SMA cambia. 
    # Aunque se podria argumentar que el SM tiene la misma Wc que la SMA y por tanto la SMA tiene la misma Wc que el BUTT. 

    def init(self):

        # Parameters 
        close = self.data.Close
        smooth = BUTT(close,self.Wc)

        # Indicators 
        self.upper_band = self.I(Vol_U, close, self.T, smooth, self.N)
        self.lower_band = self.I(Vol_L, close, self.T, smooth, self.N)

    def next(self):

        price = self.data.Close[-1]

        # Buy   
        if crossover(self.lower_band,self.data.Close):
            self.position.close()
            self.buy(sl = price*(1-self.sl_pct))
            
        # Sell
        elif crossover(self.data.Close,self.upper_band):
            self.position.close()
            self.sell(sl = price*(1+self.sl_pct))

In [8]:
# DATA IMPORT AND BACKTESTING 

xls = r'C:\Users\Usuario\Desktop\TFG\S&P 500 Companies (Standard and Poor 500).xlsx'
file_df = pd.read_excel(xls)
all_tickers = file_df['Symbol']
tickers = all_tickers[0:100]

mu = []; sigma = []; rets = []; ntrades = []

for i in range(len(tickers)):
    try:
        df = yf.download(tickers[i],  start = '2000-01-01', end = '2023-01-01', progress=False)
        
        iCash = 10000
        com = 0.00

        #BACKTESTING 
        bt = Backtest(df, strat_BB, cash = iCash, commission = com, exclusive_orders = True)

        # OUTPUTS
        output = bt.run()
        rets.append(output['# Trades'])
        mu.append(output['Return (Ann.) [%]'])
        sigma.append(output['Volatility (Ann.) [%]'])
        ntrades.append(output['# Trades'])

    except: 
        rets.append(None); mu.append(None); sigma.append(None); ntrades.append(None)

# RESULTS
res = pd.DataFrame({'Ticker':tickers, 'Annualized Returns (%)':mu, 'Annualized Volatility (%)':sigma, 'Trades':ntrades}).dropna()
res = res[res['Trades'] != 0]
res = res[res['Annualized Volatility (%)'] < 200]
mean_values = res.mean()
mean_row = pd.DataFrame(mean_values).T
res = pd.concat([res, mean_row])

#res.to_excel(r'C:\Users\Usuario\Desktop\TFG\Resultados_BT_comparacion.xlsx', sheet_name='Butter', index = False)
bt.plot(plot_volume = False)
res

  mean_values = res.mean()
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],
  formatter=DatetimeTickFormatter(days=['%d %b', '%a %d'],


Unnamed: 0,Ticker,Annualized Returns (%),Annualized Volatility (%),Trades
0,MMM,8.525793,17.776156,47.0
1,ACE,-20.535103,20.567218,7.0
2,ABT,8.765398,16.300109,51.0
3,ANF,18.750417,37.965193,58.0
4,ACN,2.601184,18.337394,53.0
0,,3.621538,22.189214,43.2


$$T\,test:\,t = \frac{\hat{x}-\mu}{s{}/\sqrt{n}}$$

In [3]:
#------------------------------------------------------------------------------------------
# T-TEST 
#------------------------------------------------------------------------------------------
# Like a Z-test but with unknown sigma (sample standar deviation needs to be calculated)
# For small sample size (<30)
# As n aproaches infinity, the T-Distribution approaches the Z-Distribution
# Data should be normally distributed
# Degrees of freedom (df) = sample size - 1
#------------------------------------------------------------------------------------------
def Ttest(s, mean, mu, n, alpha = 0.05, alternative = 'two-sided'):

    smaller = 0; greater = 0; noteq = 0; two = 0
    df = n - 1

    tval = (mean-mu)*np.sqrt(n)/s

    x = np.arange(-5,5,0.1); y = t.pdf(x,df)
    plt.plot(x,y); plt.xlabel('Standard Deviations From Mean'); plt.ylabel('Probability')

    if alternative == 'smaller':
        tcritic = t.ppf(alpha,df)
        area = np.arange(-5,tcritic,0.1)
        smaller = tcritic > tval
        plt.fill_between(area,t.pdf(area,df), label = 'Rejection Region')


    elif alternative == 'greater':
        tcritic = t.ppf(1-alpha,df)
        area = np.arange(tcritic,5,0.1)
        greater = tcritic < tval 
        plt.fill_between(area,t.pdf(area,df), label = 'Rejection Region')

    elif alternative == 'two-sided':
        two = 1
        tcritic = t.ppf(alpha/2,df)
        tcritic1 = t.ppf(1-alpha/2,df)
        tcritic2 = t.ppf(alpha/2,df)
        noteq = tval<tcritic2 or tval>tcritic1
        area1 = np.arange(tcritic1,5,0.1)
        area2 = np.arange(-5,tcritic2,0.1)
        plt.fill_between(area1,t.pdf(area1,df), color = 'b')
        plt.fill_between(area2,t.pdf(area2,df), color = 'b', label = 'Rejection Region')
        
    plt.scatter(tval,0, label= f'T-statistics: {round(tval,2)}'); plt.legend()
    
    if(smaller or greater or noteq):
        plt.title(f"Reject Null Hypothesis (H1 is true)")
    else:
        plt.title("Fail to Reject Null Hypothesis (uncertainty about Ho)") 
    
    if(two): print(f'Critical values: {round(tcritic,3)} and {round(-tcritic,3)}. T-statistics is t = {round(tval,3)}')
    else: print(f'Critical value: {round(tcritic,3)}. T-statistics is t = {round(tval,3)}')

In [None]:
# HYPOTHESIS TEST

# Is the strategy better than the market (x > mu = 10) ?
# H0: mean < mu = 10 (worse than the market)
# H1: mean > mu = 10 (better than the market)

data = pd.read_excel(r'C:\Users\Usuario\Desktop\TFG\Resultados_BT_comparacion.xlsx', sheet_name='SuperSmoother')
x = data['Annualized Returns (%)']
mean = np.mean(x); s = np.std(x); n = len(x); mu = 9.5
Ttest(s, mean, mu, n, alpha = 0.05, alternative = 'greater')


In [22]:
# OPTIMIZATION 
N = 1; wc = 0.8
B,A = butter(N,wc)
print(sum(A))
print(sum(B))

1.5095254494944288
1.5095254494944288
