In [None]:
import re
import time
import math
import random
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib as mpl

import pandas_datareader as web
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

from datetime import datetime
from datetime import timedelta
from matplotlib.lines import Line2D
from matplotlib.collections import LineCollection
from sklearn.linear_model import LinearRegression

In [None]:
def load_spy_extended():
    # This method is how I generated the daily data from 1885 to present. I'm leaving it in here
    # so people can check my work. If you want any of the datasets used here let me know.
    
    df_shiller_dividend = pd.read_csv('data/dividend_data_shiller.csv').dropna()

    df_gspc = Backtest.load_additional_dfs('^GSPC')
    df_gspc['Close (Sim)'] = df_gspc['Close'].copy()
    df_gspc_monthly = df_gspc.loc[df_gspc.groupby(df_gspc.index.to_period('M')) \
                                            .apply(lambda x: x.index.max())][:-1]
    df_gspc = df_gspc.loc[df_gspc.index <= datetime(2022, 11, 1)]

    BASE_PRICE = 16.66
    previous_close = BASE_PRICE
    previous_tr = BASE_PRICE
    previous_index = df_gspc.index[0]

    for index, row in df_gspc.loc[datetime(1950, 1, 4):df_gspc_monthly.index[-3]].iterrows():
        dividend = 0
        if index in df_gspc_monthly.index:
            dividend = df_shiller_dividend.loc[(df_shiller_dividend['Year'] == index.year) 
                                               & (df_shiller_dividend['Month'] == index.month), 'dividend'].iloc[0]

        new_tr = previous_tr * (row['Close'] / ([previous_close - dividend]))
        df_gspc.loc[index, 'Close (Sim)'] = new_tr

        previous_close = df_gspc.loc[index, 'Close']
        previous_tr = new_tr
        previous_index = index

    df_gspc = df_gspc[['Close (Sim)']]

    df_vfinx_gspc = Backtest.load_additional_dfs('SPY')
    df_vfinx_gspc = df_vfinx_gspc.pct_change().merge(df_gspc.pct_change(), 
                                                     how='outer',
                                                     left_index=True,
                                                     right_index=True)

    df_vfinx_gspc['Close'] = df_vfinx_gspc[['Close', 'Close (Sim)']].bfill(axis=1).iloc[:, 0]
    df_vfinx_gspc['Close'] = (df_vfinx_gspc['Close']+1).cumprod()
    df_vfinx_gspc.loc[df_vfinx_gspc.index[0], 'Close'] = 1
    df_vfinx_gspc = df_vfinx_gspc[['Close']]

    df_spy_extended = pd.read_csv('data/extended_spy_data.csv', index_col = False)
    df_spy_extended['Date'] = pd.to_datetime(df_spy_extended['Date'], format='%Y%m%d')
    df_spy_extended = df_spy_extended.set_index('Date')
    df_spy_extended['Close'] = (df_spy_extended['Daily Return'] + 1).cumprod()

    df_spy_extended = df_spy_extended.pct_change().merge(df_vfinx_gspc.pct_change(), 
                                                         how='outer', 
                                                         left_index=True, 
                                                         right_index=True)
    df_spy_extended['Close'] = df_spy_extended[['Close_x', 'Close_y']].bfill(axis=1).iloc[:, 0]
    df_spy_extended['Close'] = (df_spy_extended['Close']+1).cumprod()
    df_spy_extended.loc[df_spy_extended.index[0], 'Close'] = 1
    
    df_shiller = pd.read_excel('data/ie_data.xls', sheet_name='Data', header=7, nrows=1831, dtype={'Date': 'string'})
    df_shiller['Date'] = df_shiller['Date'].apply(lambda x: x.replace('.', '-'))
    df_shiller.index = df_shiller['Date'].apply(lambda x: re.sub('-1$', '-10', x))
    df_shiller.index = pd.to_datetime(df_shiller.index)
    df_shiller = df_shiller.loc[:, ['Rate GS10']]
    df_shiller['Rate GS10'] -= 1  # Adjust to make closer to risk free rate. This does have a pretty big impact on results

    df_tbill = web.DataReader('TB3MS', 'fred', datetime(1934, 1, 1))
    df_shiller = df_shiller.merge(df_tbill, how='outer', left_index=True, right_index=True)
    df_shiller['RFR'] = df_shiller[['TB3MS', 'Rate GS10']].bfill(axis=1).iloc[:, 0]

    df_spy_extended = df_spy_extended.merge(df_shiller, how='left', left_index=True, right_index=True)
    df_spy_extended['Risk Free Rate'] = df_spy_extended['RFR'].interpolate()
    df_spy_extended = df_spy_extended[['Close', 'Risk Free Rate']].dropna()

    total_costs = 0.000945 / 252
    df_spy_extended.loc[Backtest.load_additional_dfs('SPY').index,
                        'Pct Change'] = df_spy_extended['Close'].pct_change()
    df_spy_extended.loc[~df_spy_extended.index.isin(Backtest.load_additional_dfs('SPY').index), 
                        'Pct Change'] = df_spy_extended['Close'].pct_change() - total_costs

    df_spy_extended['Close'] = (df_spy_extended['Pct Change'] + 1).cumprod()
    df_spy_extended.loc[df_spy_extended.index[0], 'Close'] = 1
    df_spy_extended = df_spy_extended[['Close', 'Risk Free Rate']].dropna()

    return df_spy_extended

In [None]:
def load_additional_dfs(ticker):
    df = yf.Ticker(ticker).history(period='max')
    df = df[['Close']]
    return df

In [None]:
def load_us_market_data(data_path):
    df = pd.read_csv(data_path)
    df['Date'] = pd.to_datetime(df['Date'])
    df = df.set_index('Date')
    
    df_spy = load_additional_dfs('SPY')
    df_spy = df_spy.pct_change().merge(df.pct_change(), how='outer', left_index=True, right_index=True)
    df_spy['Close'] = df_spy[['Close_y', 'Close_x']].bfill(axis=1).iloc[:, 0]
    df_spy['Close'] = (df_spy['Close'] + 1).cumprod()
    df_spy.loc[df_spy.index[0], 'Close'] = 1
    df_spy = df_spy[['Close']]
    
    df_tbill = web.DataReader('TB3MS', 'fred', datetime(1934, 1, 1))
    df_tbill = df_tbill.merge(df, how='outer', left_index=True, right_index=True)
    df_tbill['RFR'] = df_tbill[['Risk Free Rate', 'TB3MS']].bfill(axis=1).iloc[:, 0]
    df_tbill = df_tbill[['RFR']]
    
    df_us_data = df_spy.merge(df_tbill, how='left', left_index=True, right_index=True)
    df_us_data['Risk Free Rate'] = df_us_data['RFR'].interpolate()
    
    return df_us_data[['Close', 'Risk Free Rate']]

In [None]:
# Source: https://gist.github.com/tclements/7452c0fac3e66e08b886300b4e24e687
def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

In [None]:
def find_middle_month(dates):
    return min(dates, key=lambda x: abs(x - datetime(dates[0].year, dates[0].month, 15)))

In [None]:
def load_df_data(df_spy, 
                 vol_lookback,
                 mom_lookback, 
                 short_mom_lookback,
                 tbill_lookback, 
                 use_lin_reg,
                 use_sma, 
                 monthly, 
                 use_tbill_data):
    # If use_tbill_data is not set, there must be a Risk Free Rate column in df_spy
    df_spy = df_spy.copy()
        
    df_spy['Pct Change'] = df_spy['Close'].pct_change()

    stds = np.std(rolling_window(df_spy['Pct Change'].dropna().to_numpy(), 
                                                          vol_lookback), axis=1, ddof=1) * (252 ** 0.5)
    stds = np.insert(stds, 0, [None] * vol_lookback)
    df_spy['Volatility'] = stds

    df_spy['Skew'] = df_spy['Close'].rolling(mom_lookback).skew()
    if use_sma:
        x = rolling_window(df_spy['Close'].to_numpy(), mom_lookback)
        momentums = x[:, -1] / np.mean(x, axis = 1)
        momentums = np.insert(momentums, 0, [None] * (mom_lookback - 1))
        df_spy['Momentum'] = momentums
        
        x = rolling_window(df_spy['Close'].to_numpy(), short_mom_lookback)
        momentums = x[:, -1] / np.mean(x, axis = 1)
        momentums = np.insert(momentums, 0, [None] * (short_mom_lookback - 1))
        df_spy['Short Momentum'] = momentums
    else:
        x = rolling_window(df_spy['Close'].to_numpy(), mom_lookback)
        momentums = x[:, -1] / x[:, 0]
        momentums = np.insert(momentums, 0, [None] * (mom_lookback - 1))
        df_spy['Momentum'] = momentums
        
        x = rolling_window(df_spy['Close'].to_numpy(), short_mom_lookback)
        momentums = x[:, -1] / x[:, 0]
        momentums = np.insert(momentums, 0, [None] * (short_mom_lookback - 1))
        df_spy['Short Momentum'] = momentums

    df_tbill = pd.DataFrame()

    if use_tbill_data:
        df_tbill = web.DataReader('DTB3', 'fred', df_spy.index[0]).dropna()
        df_tbill['Risk Free Rate'] = df_tbill['DTB3']
        df_tbill = df_tbill[['Risk Free Rate']]
        df_spy = df_spy.merge(df_tbill, how='left', left_index=True, right_index=True)
        df_spy['Risk Free Rate'] = df_spy['Risk Free Rate'].ffill()

    if use_lin_reg:
        df_spy['Treasury Change'] = df_spy['Risk Free Rate'] \
                                        .rolling(tbill_lookback) \
                                        .apply(lambda x: linear_regression(x))
    else:
        x = rolling_window(df_spy['Risk Free Rate'].to_numpy(), tbill_lookback)
        treasury_changes = x[:, -1] - x[:, 0]
        treasury_changes = np.insert(treasury_changes, 0, [None] * (tbill_lookback - 1))
        df_spy['Treasury Change'] = treasury_changes
    
    df_spy = df_spy.dropna()
    
    if monthly:
        first_month_end = df_spy.groupby(df_spy.index.to_period('M')).apply(lambda x: find_middle_month(x.index))[0]
        df_spy = df_spy.loc[df_spy.index >= first_month_end].copy()

    return df_spy

In [None]:
def linear_regression(data):
    X = data.index.values.reshape(-1, 1).astype(float)
    Y = data.values
    
    linear_regressor = LinearRegression()
    linear_regressor.fit(X, Y)
    
    return linear_regressor.coef_[0]

In [None]:
def calculate_signals(df, lower_vol_cutoff, upper_vol_cutoff, strict_boundaries, exclude_vol, exclude_mom, monthly_calc):
    df = df.copy()
    vol = df['Volatility'].to_numpy()
    mom = df['Momentum'].to_numpy()
    short_mom = df['Short Momentum'].to_numpy()
    treasury = df['Treasury Change'].to_numpy()

    if monthly_calc:
        df_monthly = df.copy()
        df_monthly = df_monthly.loc[df_monthly.groupby(df_monthly.index.to_period('M')) \
                                        .apply(lambda x: find_middle_month(x.index))][:-1]
        tmp = df.reset_index()
        monthly_indices = tmp.loc[tmp['Date'].isin(df_monthly.index)]
        monthly_indices = list(monthly_indices.index)
        states = get_states(vol, mom, short_mom, treasury, lower_vol_cutoff, upper_vol_cutoff, 
                            strict_boundaries, exclude_vol, exclude_mom, monthly_indices)
    else:
        states = get_states(vol, mom, short_mom, treasury, lower_vol_cutoff, upper_vol_cutoff,
                            strict_boundaries, exclude_vol, exclude_mom)
    
    df['State'] = states
    return df

In [None]:
def get_states(vol_arr, 
               mom_arr,
               short_mom_arr,
               treasury_arr, 
               lower_vol_cutoff, 
               upper_vol_cutoff, 
               strict_boundaries,
               exclude_vol,
               exclude_mom,
               month_end_indices = None):
    states = []
    previous_state = ''
    for idx in range(len(vol_arr)):
        vol = vol_arr[idx]
        mom = mom_arr[idx]
        treasury = treasury_arr[idx]
        short_mom = short_mom_arr[idx]
        
        if month_end_indices:
            if idx not in month_end_indices:
                states.append(previous_state)
                continue
        
        adjusted_lower_vol_cutoff = lower_vol_cutoff
        adjusted_upper_vol_cutoff = upper_vol_cutoff
        adjusted_mom_cutoff = 1
        adjusted_treasury_cutoff = 0
        
        if not strict_boundaries:
            if previous_state == 'Risk On':
                adjusted_lower_vol_cutoff += 0.01
                adjusted_mom_cutoff -= 0.02
            elif previous_state == 'Risk Mid':
                adjusted_lower_vol_cutoff -= 0.01
                adjusted_upper_vol_cutoff += 0.01
                adjusted_mom_cutoff -= 0.02
            elif previous_state == 'Risk Off':
                adjusted_lower_vol_cutoff -= 0.01
                adjusted_upper_vol_cutoff -= 0.01
                if not USE_LINEAR_REGRESSION:
                    adjusted_treasury_cutoff -= 0.1
                adjusted_mom_cutoff += 0.02
            elif previous_state == 'Risk Alt':
                adjusted_lower_vol_cutoff -= 0.01
                adjusted_upper_vol_cutoff -= 0.01
                if not USE_LINEAR_REGRESSION:
                    adjusted_treasury_cutoff += 0.1
                adjusted_mom_cutoff += 0.02
        
        low_vol = exclude_vol or vol <= adjusted_lower_vol_cutoff
        med_vol = exclude_vol or ((vol >= adjusted_lower_vol_cutoff) and (vol <= adjusted_upper_vol_cutoff))
        
        high_mom = exclude_mom or mom >= adjusted_mom_cutoff
        
        if low_vol and high_mom:
            new_state = 'Risk On'
        elif (low_vol or med_vol) and high_mom:
            new_state = 'Risk Mid'
        else:
            if (treasury > adjusted_treasury_cutoff):
                new_state = 'Risk Off'
            else:
                new_state = 'Risk Alt'
        
        states.append(new_state)
        previous_state = new_state
    return states

In [None]:
def plot_signal(df, plot_width, plot_height, 
                show_risk_on, show_risk_mid, show_risk_alt, show_risk_off, 
                low_vol_cutoff, upper_vol_cutoff):
    fig, ax1 = plt.subplots(figsize=(plot_width, plot_height))

    ax1.plot(df.index, df['Close'], label = 'S&P 500', c = 'black', linewidth = 2)

    ax1.margins(x = 0)
    ax1.set_yscale('log')
    ax1.set_xlabel('Date', fontsize = 14)
    ax1.set_ylabel('S&P 500', fontsize = 14)

    ax2 = ax1.twinx()

    ax2.plot(df.index, df['Volatility'] * 100, label = 'Volatility', c = 'blue', alpha = 0.7)

    ax2.axhline(y = low_vol_cutoff * 100, label = 'Lower Vol Cutoff', c = 'green')
    ax2.axhline(y = upper_vol_cutoff * 100, label = 'Higher Vol Cutoff', c = 'red')

    if show_risk_on:
        ax2.fill_between(df.index, 0, 1, where = (df['State'] == 'Risk On'), linewidth = 0,
                         transform = ax1.get_xaxis_transform(), color = 'green', alpha = 0.1, label = 'Risk On')
    if show_risk_mid:
        ax2.fill_between(df.index, 0, 1, where = (df['State'] == 'Risk Mid'), linewidth = 0,
                         transform = ax1.get_xaxis_transform(), color = 'blue', alpha = 0.1, label = 'Risk Mid')
    if show_risk_alt:
        ax2.fill_between(df.index, 0, 1, where = (df['State'] == 'Risk Alt'), linewidth = 0,
                         transform = ax1.get_xaxis_transform(), color = 'yellow', alpha = 0.1, label = 'Risk Alt')
    if show_risk_off:
        ax2.fill_between(df.index, 0, 1, where = (df['State'] == 'Risk Off'), linewidth = 0,
                         transform = ax1.get_xaxis_transform(), color = 'red', alpha = 0.1, label = 'Risk Off')

    ax2.margins(x = 0)
    ax2.set_ylabel('21 Day Volatility (Percent)', fontsize = 14)

    plt.title(f'Volatility + Momentum Indicator to S&P 500, ' + \
                  df.dropna().index[0].strftime('%b %Y') + \
                  ' - ' + df.dropna().index[-1].strftime('%b %Y'), fontsize = 14)

    fig.legend(loc = 'upper left', bbox_to_anchor = (0,1), bbox_transform = ax1.transAxes)
    fig.patch.set_facecolor('white')

    plt.show()

In [None]:
class TickerLoader:
    def __init__(self, df_spy, method):
        self.df_spy = df_spy
        self.method = method
        
    # Credits to Leminspector for this function
    def underlying_to_letf(self,
                           underlying,             # Underlying ETF
                           s,                      # Swap exposure (usually around 1.1)
                           E,                      # Expense ratio
                           beta,                   # Leverage
                           start=1,               # Starting price of LETF stock
                           spread=0.004,           # Spread
                           alt_risk_free_rate=None # Alternate risk free rate to use
                           ):                      # Returns pandas Series object of LETF daily prices
        daily_prices = underlying['Close']
        daily_change = daily_prices.pct_change().dropna().rename('Daily Change')
        
        I = 0
        if alt_risk_free_rate is None:  # Earliest date for FFR
            I = web.DataReader('DFF', 'fred', daily_prices.index[0])/100 + spread  # LIBOR + spread
            if I.index[-1] < daily_prices.index[-1]:
                I.loc[daily_prices.index[-1]] = I['DFF'][-1]
        else:
            I = alt_risk_free_rate / 100 + spread
            
        total_costs = (s * (beta - 1) * I + E) / 252
        
        df = total_costs.join(daily_change, how='inner')
        
        
        df['LETF Change'] = df['Daily Change'] * beta - df.iloc[:, 0]
        df['LETF Price'] = start * (1 + df['LETF Change']).cumprod()

        new_df = df['LETF Price'].to_frame(name='Close')

        new_df.loc[underlying['Close'].dropna().index[0], 'Close'] = start
        new_df = new_df.reindex(np.roll(new_df.index, shift=1))

        return new_df
    
    def calculate_daily_returns_coupon_bond(self, df, n):
        df['Return'] = 0
        mask_zero = df.iloc[1:, 0] == 0
        mask_not_zero = df.iloc[1:, 0] != 0

        mask_zero_shifted = (df.iloc[:, 0] == 0).shift(-1)
        mask_zero_shifted.iloc[-1] = False
        mask_not_zero_shifted = (df.iloc[:, 0] != 0).shift(-1)
        mask_not_zero_shifted.iloc[-1] = False

        values_zero = df.iloc[1:, 0][mask_zero].values / 100
        values_zero_previous = df.iloc[:, 0][mask_zero_shifted].values / 100
        returns_zero = values_zero_previous * ((1 / 504) + n)
        df.loc[df.iloc[:, 0] == 0, 'Return'] = returns_zero

        val = df.iloc[1:, 0][mask_not_zero].values / 100
        val_pre = df.iloc[:, 0][mask_not_zero_shifted].values / 100
        returns_non_zero = ((val - val_pre) / val) * (((1 / (1 + val)) ** n) - 1) + (((val + val_pre) / 2) / 252)
        returns_non_zero = np.insert(returns_non_zero, 0, 0)
        df.loc[df.iloc[:, 0] != 0, 'Return'] = returns_non_zero

        df.iloc[0, 1] = 0
        df['Close'] = (df['Return'] + 1).cumprod()
        
    def yield_to_prices(self, df, n):
        df = df.copy().dropna()
        self.calculate_daily_returns_coupon_bond(df, n)
        
        # Columbus/Indigenous People's Day and Veteran's Day recognized by Fed, not by stock exchange
        df_spy_subset = self.df_spy.loc[(self.df_spy.index >= df.index[0]) & (self.df_spy.index <= df.index[-1])]
        df = df_spy_subset.merge(df, how = 'left', left_index = True, right_index = True, suffixes = ['_SPY', None])
        df = df[['Close']].ffill().dropna()

        return df.loc[:, ['Close']]
        
    def calculate_daily_returns_no_coupon_bond_yield(self, df, n):
        df['Return'] = 0
        mask_zero = df.iloc[1:, 0] == 0
        mask_not_zero = df.iloc[1:, 0] != 0

        mask_zero_shifted = (df.iloc[:, 0] == 0).shift(-1)
        mask_zero_shifted.iloc[-1] = False
        mask_not_zero_shifted = (df.iloc[:, 0] != 0).shift(-1)
        mask_not_zero_shifted.iloc[-1] = False

        values_zero = df.iloc[1:, 0][mask_zero].values / 100
        values_zero_previous = df.iloc[:, 0][mask_zero_shifted].values / 100
        returns_zero = (1 + values_zero_previous) ** n
        df.loc[df.iloc[:, 0] == 0, 'Return'] = returns_zero

        values_not_zero = df.iloc[1:, 0][mask_not_zero].values / 100
        values_not_zero_previous = df.iloc[:, 0][mask_not_zero_shifted].values / 100
        returns_non_zero = (((1 + values_not_zero_previous) / (1 + values_not_zero)) ** n) \
                                        * ((1 + values_not_zero) ** (1 / 252))
        returns_non_zero = np.insert(returns_non_zero, 0, 0)
        df.loc[df.iloc[:, 0] != 0, 'Return'] = returns_non_zero

        df.iloc[0, 1] = 1
        df['Close'] = df['Return'].cumprod()
    
    def yield_to_prices_zroz(self, df, n):
        df = df.copy().dropna()
        self.calculate_daily_returns_no_coupon_bond_yield(df, n)
        
        # Columbus/Indigenous People's Day and Veteran's Day recognized by Fed, not by stock exchange
        df_spy_subset = self.df_spy.loc[(self.df_spy.index >= df.index[0]) & (self.df_spy.index <= df.index[-1])]
        df = df_spy_subset.merge(df, how = 'left', left_index = True, right_index = True, suffixes = ['_SPY', None])
        df = df[['Close']].ffill().dropna()

        return df.loc[:, ['Close']]
                
    def load_zroz(self):
        df_30_yield = web.DataReader('DGS30', 'fred', datetime(1977, 2, 15)).dropna()
        if df_30_yield.index[-1] < self.df_spy.index[-1]:
            df_30_yield.loc[self.df_spy.index[-1], 'DGS30'] = df_30_yield['DGS30'][-1]
        df_20_yield = web.DataReader('DGS20', 'fred', datetime(1962, 1, 2)).dropna()

        df_zroz_30 = self.yield_to_prices_zroz(df_30_yield, 30)
        df_zroz_20 = self.yield_to_prices_zroz(df_20_yield, 20)

        df_zroz = df_zroz_30.pct_change().merge(df_zroz_20.pct_change(), how='outer',
                                                                 left_index=True, right_index=True)
        df_zroz['Close'] = df_zroz[['Close_x', 'Close_y']].bfill(axis=1).iloc[:, 0]
        df_zroz['Close'] = (df_zroz['Close']+1).cumprod()
        df_zroz.loc[df_zroz.index[0], 'Close'] = 1
        df_zroz = df_zroz[['Close']]

        df_zroz = df_zroz[df_zroz.index.isin(self.df_spy.index)]
        df_zroz = self.underlying_to_letf(df_zroz, s=0, E=0.0015, beta=1, spread=0)
        
        return df_zroz
    
    def load_risk_free_return(self):
        risk_free_return = web.DataReader('DFF', 'fred', datetime(1954, 7, 1))
        risk_free_return = risk_free_return.loc[risk_free_return.index.isin(self.df_spy.index)]
        if risk_free_return.index[-1] < self.df_spy.index[-1]:
            risk_free_return.loc[self.df_spy.index[-1], 'DFF'] = risk_free_return['DFF'][-1]
        risk_free_return['Close'] = (risk_free_return['DFF'] / 100 / 252 + 1).cumprod()
        risk_free_return = risk_free_return.loc[:, ['Close']]

        df_spy_subset = self.df_spy.loc[(self.df_spy.index >= risk_free_return.index[0]) \
                                   & (self.df_spy.index <= risk_free_return.index[-1])]

        # Columbus/Indigenous People's Day and Veteran's Day recognized by Fed, not by stock exchange
        risk_free_return = df_spy_subset.merge(risk_free_return, how = 'left', 
                                               left_index = True, right_index = True, suffixes = ['_SPY', None])
        risk_free_return = risk_free_return[['Close']].ffill().dropna()
        
        return risk_free_return
    
    def load_kmlm(self):
        df = pd.read_excel(KMLM_DATA_PATH, sheet_name = 'MLM Index EV (15V) Daily', header = 2)
        df.index = pd.to_datetime(df['Date'])
        df['Close'] = (df['TotalReturnDtd'] + 1).cumprod()
        df = df[['Close']]

        df_spy_subset = self.df_spy.loc[(self.df_spy.index >= df.index[0]) \
                               & (self.df_spy.index <= datetime(2020, 12, 2))]
        
        df = df_spy_subset.merge(df, how = 'left', left_index = True, right_index = True, suffixes = ['_SPY', None])

        df_kmlm = load_additional_dfs('KMLM')
        df_kmlm = df_kmlm.pct_change().merge(df[['Close']].pct_change(), how='outer', left_index=True, right_index=True)
        df_kmlm['Close'] = df_kmlm[['Close_y', 'Close_x']].bfill(axis=1).iloc[:, 0]
        df_kmlm['Close'] = (df_kmlm['Close'] + 1).cumprod()
        df_kmlm.loc[df_kmlm.index[0], 'Close'] = 1
        df_kmlm = df_kmlm[['Close']]
        
        return df_kmlm
    
    def load_tmf(self):
        df_30_yield = web.DataReader('DGS30', 'fred', datetime(1977, 2, 15)).dropna()
        if df_30_yield.index[-1] < self.df_spy.index[-1]:
            df_30_yield.loc[self.df_spy.index[-1], 'DGS30'] = df_30_yield['DGS30'][-1]
        df_20_yield = web.DataReader('DGS20', 'fred', datetime(1962, 1, 2)).dropna()

        df_tmf_30 = self.yield_to_prices(df_30_yield, 30)
        df_tmf_20 = self.yield_to_prices(df_20_yield, 20)

        df_tmf = df_tmf_30.pct_change().merge(df_tmf_20.pct_change(), how='outer',
                                                                 left_index=True, right_index=True)
        df_tmf['Close'] = df_tmf[['Close_x', 'Close_y']].bfill(axis=1).iloc[:, 0]
        df_tmf['Close'] = (df_tmf['Close']+1).cumprod()
        df_tmf.loc[df_tmf.index[0], 'Close'] = 1
        df_tmf = df_tmf[['Close']]

        df_tmf = df_tmf[df_tmf.index.isin(self.df_spy.index)]
        df_tmf = self.underlying_to_letf(df_tmf, s=1.1, E=0.0106, beta=3, spread=0)
        
        return df_tmf

    def load_precalculated_dfs(self):
        df_sso = self.underlying_to_letf(self.df_spy, s=1.1, E=0.0088, beta=2, spread=0.005)
        df_upro = self.underlying_to_letf(self.df_spy, s=1.1, E=0.0091, beta=3, spread=0.005)
        risk_free_return = self.load_risk_free_return()
        
        # All backtests need these for plotting
        precalculated_dfs = {'SSO': df_sso, 
                             'UPRO': df_upro,
                             'Risk Free Return': risk_free_return}        
            
        if self.method in ['standard', 'no risk alt', 'rfr instead of spy', 'upro zroz']:
            precalculated_dfs['ZROZ'] = self.load_zroz()
        elif self.method in ['rfr instead of zroz and spy', 'rfr instead of zroz', 'dynamic', 'simple', 'sso', 
                             'upro']:
            if 'Risk Free Rate' in self.df_spy.columns:
                # This is the special case where we are using extended data with rfr data included in df_spy
                # No else statement is required because SSO, UPRO, and RFR are already added to precalculated_dfs
                df_sso = self.underlying_to_letf(self.df_spy, s=1.1, E=0.0088, beta=2, 
                                                 spread=0.005, alt_risk_free_rate=self.df_spy[['Risk Free Rate']])
                df_upro = self.underlying_to_letf(self.df_spy, s=1.1, E=0.0091, beta=3,
                                                  spread=0.005, alt_risk_free_rate=self.df_spy[['Risk Free Rate']])
                risk_free_return = pd.DataFrame()
                risk_free_return['Close'] = (self.df_spy['Risk Free Rate'] / 100 / 252 + 1).cumprod()
                return {'SSO': df_sso, 'UPRO': df_upro, 'Risk Free Return': risk_free_return}
        elif self.method in ['1.5x']:
            if 'Risk Free Rate' in self.df_spy.columns:
                df_15x = self.underlying_to_letf(self.df_spy, s=1.1, E=0.003, beta=1.5,
                                                 spread=0.005, alt_risk_free_rate=self.df_spy[['Risk Free Rate']])
                df_sso = self.underlying_to_letf(self.df_spy, s=1.1, E=0.0088, beta=2, 
                                                 spread=0.005, alt_risk_free_rate=self.df_spy[['Risk Free Rate']])
                df_upro = self.underlying_to_letf(self.df_spy, s=1.1, E=0.0091, beta=3,
                                                  spread=0.005, alt_risk_free_rate=self.df_spy[['Risk Free Rate']])
                risk_free_return = pd.DataFrame()
                risk_free_return['Close'] = (self.df_spy['Risk Free Rate'] / 100 / 252 + 1).cumprod()
                return {'SSO': df_sso, 'UPRO': df_upro, 'Risk Free Return': risk_free_return, '1.5x': df_15x}
        elif self.method in ['hfea']:
            precalculated_dfs['TMF'] = self.load_tmf()
        elif self.method in ['kmlm']:
            precalculated_dfs['ZROZ'] = self.load_zroz()
            precalculated_dfs['TMF'] = self.load_tmf()
            precalculated_dfs['KMLM'] = self.load_kmlm()
        elif self.method in ['static kmlm']:
            precalculated_dfs['ZROZ'] = self.load_zroz()
            precalculated_dfs['TMF'] = self.load_tmf()
            precalculated_dfs['KMLM'] = self.load_kmlm()
        else:
            raise Exception('Invalid method provided to load precalculated dfs. ' +
                            'Did you forget to add the method to TickerLoader?')
        
        return precalculated_dfs

In [None]:
class Backtest:
    
    def __init__(self, 
                 method,
                 method_name,
                 df_500,
                 precalculated_dfs,
                 monthly_calculation,
                 rebalance,
                 transaction_cost,
                 start_date,
                 plot_width,
                 plot_height,
                 line_width):
        self.method = method
        self.method_name = method_name
        self.df_500 = df_500
        self.precalculated_dfs = precalculated_dfs
        self.monthly_calculation = monthly_calculation
        self.rebalance = rebalance
        self.transaction_cost = transaction_cost
        self.start_date = start_date
        self.plot_width = plot_width
        self.plot_height = plot_height
        self.line_width = line_width

    def simulate(self):
        self.dates, self.port_vals, self.assets, self.port_vals_SPY, self.current_allocation = \
        self.simulate_backtest(df_500 = self.df_500,
                               method = self.method,
                               start_date = self.start_date,
                               precalculated_dfs = self.precalculated_dfs,
                               monthly_calculation = self.monthly_calculation,
                               rebalance = self.rebalance,
                               transaction_cost = self.transaction_cost)
        return self.dates, self.port_vals, self.assets, self.port_vals_SPY, self.current_allocation
        
    def plot(self, dates, port_vals, assets, port_vals_SPY):
        self.plot_value(dates,
                        port_vals, 
                        assets,
                        port_vals_SPY,
                        self.method, 
                        self.method_name, 
                        self.plot_width,
                        self.plot_height,
                        self.line_width)
        
    # Credits to Leminspector for this function
    @staticmethod
    def underlying_to_letf(underlying,             # Underlying ETF
                           s,                      # Swap exposure (usually around 1.1)
                           E,                      # Expense ratio
                           beta,                   # Leverage
                           start=1,               # Starting price of LETF stock
                           spread=0.004,           # Spread
                           alt_risk_free_rate=None # Alternate risk free rate to use
                           ):                      # Returns pandas Series object of LETF daily prices
        daily_prices = underlying['Close']
        daily_change = daily_prices.pct_change().dropna().rename('Daily Change')
        
        I = 0
        if alt_risk_free_rate is None:  # Earliest date for FFR
            I = web.DataReader('DFF', 'fred', daily_prices.index[0])/100 + spread  # LIBOR + spread
            if I.index[-1] < daily_prices.index[-1]:
                I.loc[daily_prices.index[-1]] = I['DFF'][-1]
        else:
            I = alt_risk_free_rate / 100 + spread
            
        total_costs = (s * (beta - 1) * I + E) / 252
        
        df = total_costs.join(daily_change, how='inner')
        
        
        df['LETF Change'] = df['Daily Change'] * beta - df.iloc[:, 0]
        df['LETF Price'] = start * (1 + df['LETF Change']).cumprod()

        new_df = df['LETF Price'].to_frame(name='Close')

        new_df.loc[underlying['Close'].dropna().index[0], 'Close'] = start
        new_df = new_df.reindex(np.roll(new_df.index, shift=1))

        return new_df
    
    @staticmethod
    def closest_row(df, date, column, method):
        if column == 'index':
            return df.index.get_indexer([date], method=method)[0]
        else:
            return df.iloc[df.index.get_indexer([date], method=method)[0], df.columns.get_indexer([column])[0]]

    def calculate_statistics(self, dates, portfolio_values):
        t = (dates[-1] - dates[0]).total_seconds() / 31536000  # Get years between dates
        cagr = (((portfolio_values[-1]/portfolio_values[0])**(1/t)) - 1) * 100

        df = pd.DataFrame(portfolio_values, columns=['Close'])
        daily_returns = df.pct_change()
        log_returns = np.log(1 + daily_returns)

        tbill_sub = self.precalculated_dfs['Risk Free Return'].loc[(self.precalculated_dfs['Risk Free Return'].index \
                                                                    >= dates[0]) \
                                                                   & (self.precalculated_dfs['Risk Free Return'].index \
                                                                                    <= dates[-1])].copy()
        tbill_sub['Pct Return'] = tbill_sub['Close'].pct_change()
        log_risk_free = np.log(1 + tbill_sub['Pct Return'])

        sharpe = ((log_returns.mean()[0] - log_risk_free.mean())/log_returns.std()[0])*(252**0.5)
        if sharpe == 0:
            # Calcualting for risk free rate
            return portfolio_values[-1], cagr, log_returns.std()[0], 0, 0, 0, 0, 0
        std_neg = log_returns[log_returns<0].std()
        sortino = ((log_returns.mean()[0] - log_risk_free.mean())/std_neg[0])*(252**0.5)

        max_drawdown = (df / df.cummax() - 1.0).min()[0] * 100

        sum_square = 0
        max_value = 0
        for idx, value in enumerate(portfolio_values):
            if value > max_value:
                max_value = value
            else:
                sum_square += (100 * ((value / max_value)-1)) ** 2
        ulcer_index = (sum_square / len(portfolio_values)) ** 0.5

        cagr_tbill = (((self.closest_row(tbill_sub, dates[-1], 'Close', 'nearest') / \
                        self.closest_row(tbill_sub, dates[0], 'Close', 'nearest'))**(1/t)) - 1) * 100
        upi = (cagr-cagr_tbill)/ulcer_index

        std_non_log = df['Close'].pct_change().std() * (252 ** 0.5)
        return portfolio_values[-1], cagr, std_non_log, sharpe, sortino, max_drawdown, ulcer_index, upi
    
    def simulate_backtest(self, 
                          df_500,
                          method,
                          start_date, 
                          precalculated_dfs, 
                          monthly_calculation, 
                          rebalance, 
                          transaction_cost):
        assets = []
        current_allocation = []
        previous_allocation = []
        current_val = 1
        current_val_SPY = 1
        close = 'Close'
        transaction_was_made = False

        limiting_asset = df_500
        for df in precalculated_dfs.values():
            if df.dropna().index[0] > limiting_asset.dropna().index[0]:
                limiting_asset = df
        for key, df in precalculated_dfs.items():
            precalculated_dfs[key] = df.loc[df.index >= limiting_asset.dropna().index[0]]
        df_500 = df_500.loc[df_500.index >= limiting_asset.dropna().index[0]]

        limiting_asset_end = df_500
        for df in precalculated_dfs.values():
            if df.dropna().index[-1] < limiting_asset_end.dropna().index[-1]:
                limiting_asset_end = df
        for key, df in precalculated_dfs.items():
            precalculated_dfs[key] = df.loc[df.index <= limiting_asset_end.dropna().index[-1]]
        df_500 = df_500.loc[df_500.index <= limiting_asset_end.dropna().index[-1]]
        
        for df in precalculated_dfs.values():
            if len(df) != len(df_500):
                raise Exception('Precalculated dfs must be over the same time period as the S&P 500 data')
        
        if monthly_calculation or rebalance == 'monthly':
            df_500_monthly = df_500.loc[df_500.groupby(df_500.index.to_period('M')) \
                                        .apply(lambda x: find_middle_month(x.index))][:-1]
            
        if start_date:
            start_time = self.start_date
        else:
            start_time = df_500.index[0]
            
        start_index = df_500.index.get_indexer([start_time], method='backfill')[0]
        previous_date = df_500.index[start_index]

        dates = [df_500.index[start_index]]
        port_vals = [current_val]
        port_vals_SPY = [current_val_SPY]
        
        np_signals = df_500['State'].to_numpy()

        np_spy = df_500[close].to_numpy()
        precalculated_df_values = {'SPY': np_spy}
        for key, value in precalculated_dfs.items():
            precalculated_df_values[key] = value[close].to_numpy()
        
        allocation_dict = {}
        dynamic_method = False
        if method in ['dynamic']:
            dynamic_method = True
            current_allocation = self.get_dynamic_allocation(df_500, 
                                                             df_500.index[0],
                                                             df_500['Close'][0],
                                                             np_signals[0],
                                                             None,
                                                             precalculated_dfs)
        else:
            allocation_dict = self.state_to_allocation_dict(df_500, precalculated_dfs)
            current_allocation = allocation_dict[np_signals[0]]
        
        for idx, (date, _) in enumerate(df_500[start_index+1:].iterrows()):
                            
            new_asset_vals = []

            for curr_asset in current_allocation[1:]:
                np_close_values = precalculated_df_values[curr_asset[0]]
                asset_return = np_close_values[idx + start_index + 1] / np_close_values[idx + start_index]
                curr_asset_val = current_val * curr_asset[1]
                
                asset_transaction_cost = 0
                if transaction_was_made:
                    for prev_asset in previous_allocation[1:]:
                        if curr_asset[0] == prev_asset[0]:
                            percent_bought = curr_asset[1] - prev_asset[1]
                            if percent_bought > 0:
                                asset_transaction_cost = curr_asset_val * percent_bought * transaction_cost
                            
                new_asset_vals.append(curr_asset_val * asset_return - asset_transaction_cost)

            current_val = sum(new_asset_vals)

            # So we don't have to create a deep copy of allocation_dict
            new_allocation = [current_allocation[0]]
            for i, allocation in enumerate(current_allocation[1:]):
                new_asset = [allocation[0], new_asset_vals[i] / current_val, allocation[2]]
                new_allocation.append(new_asset)

            current_allocation = new_allocation
            
            current_val_SPY *= np_spy[idx + start_index + 1] / np_spy[idx + start_index]
            
            port_vals_SPY.append(current_val_SPY)
            port_vals.append(current_val)
            dates.append(date)
            assets.append(current_allocation[0])

            transaction_was_made = False
            previous_allocation = current_allocation
            
            if monthly_calculation and date in df_500_monthly.index:
                if dynamic_method:
                    new_allocation = self.get_dynamic_allocation(df_500, 
                                                                 date,
                                                                 np_spy[idx + start_index + 1],
                                                                 np_signals[idx + start_index + 1],
                                                                 current_allocation,
                                                                 precalculated_dfs)
                else:
                    new_allocation = allocation_dict[np_signals[idx + start_index + 1]]
                    
                if rebalance == 'never':
                    for i in range(1, len(current_allocation)):
                        if current_allocation[i][0] != new_allocation[i][0] \
                        or current_allocation[i][1] != new_allocation[i][1]:
                            transaction_was_made = True
                            current_allocation = new_allocation
                            break
                else:
                    current_allocation = new_allocation       
            else:
                if dynamic_method:
                    new_allocation = self.get_dynamic_allocation(df_500, 
                                                                 date,
                                                                 np_spy[idx + start_index + 1],
                                                                 np_signals[idx + start_index + 1],
                                                                 current_allocation,
                                                                 precalculated_dfs)
                else:
                    new_allocation = allocation_dict[np_signals[idx + start_index + 1]]
                if rebalance == 'never':
                    for i in range(len(current_allocation)):
                        if current_allocation[i][0] != new_allocation[i][0]:
                            transaction_was_made = True
                            current_allocation = new_allocation
                            break
                elif rebalance == 'monthly':
                    if date in df_500_monthly.index:
                        transaction_was_made = True
                        current_allocation = new_allocation
                    else:
                        for i in range(len(current_allocation)):
                            if current_allocation[i][0] != new_allocation[i][0]:
                                transaction_was_made = True
                                current_allocation = new_allocation
                                break
                elif rebalance == 'daily':
                    transaction_was_made = True
                    current_allocation = new_allocation
                else:
                    print('This rebalancing frequency is not currently supported.')
                    
            previous_date = date
    
        return dates, port_vals, assets, port_vals_SPY, current_allocation

    def state_to_allocation_dict(self, df_500, precalculated_dfs):
        allocation_dict = {}
        
        allocation_risk_mid = []
        if USE_SSO:
            allocation_risk_mid = ['SSO', ['SSO', 1, precalculated_dfs['SSO']]]
        else:
            allocation_risk_mid = ['SSO', ['UPRO', 0.5, precalculated_dfs['UPRO']], ['SPY', 0.5, df_500]]
            
        if self.method == 'rfr instead of zroz and spy':
            allocation_dict['Risk On'] = ['UPRO', 
                                          ['UPRO', 1, precalculated_dfs['UPRO']]]
            allocation_dict['Risk Mid'] = allocation_risk_mid
            allocation_dict['Risk Alt'] = ['Mixed', 
                                           ['UPRO', 0.25, precalculated_dfs['UPRO']], 
                                           ['Risk Free Return', 0.75, precalculated_dfs['Risk Free Return']]]
            allocation_dict['Risk Off'] = ['Risk Free Return', 
                                            ['Risk Free Return', 1, precalculated_dfs['Risk Free Return']]]
            
        elif self.method == 'standard':
            allocation_dict['Risk On'] = ['UPRO', 
                                          ['UPRO', 1, precalculated_dfs['UPRO']]]
            allocation_dict['Risk Mid'] = allocation_risk_mid
            allocation_dict['Risk Alt'] = ['Mixed', 
                                           ['UPRO', 0.25, precalculated_dfs['UPRO']], 
                                           ['ZROZ', 0.75, precalculated_dfs['ZROZ']]]
            allocation_dict['Risk Off'] = ['SPY', 
                                            ['SPY', 1, df_500]]
            
        elif self.method == 'no risk alt':
            allocation_dict['Risk On'] = ['UPRO', 
                                          ['UPRO', 1, precalculated_dfs['UPRO']]]
            allocation_dict['Risk Mid'] = allocation_risk_mid
            allocation_dict['Risk Alt'] = ['SPY', 
                                            ['SPY', 1, df_500]]
            allocation_dict['Risk Off'] = ['SPY', 
                                            ['SPY', 1, df_500]]
            
        elif self.method == 'rfr instead of zroz':
            allocation_dict['Risk On'] = ['UPRO', 
                                          ['UPRO', 1, precalculated_dfs['UPRO']]]
            allocation_dict['Risk Mid'] = allocation_risk_mid
            allocation_dict['Risk Alt'] = ['Mixed', 
                                           ['UPRO', 0.25, precalculated_dfs['UPRO']], 
                                           ['Risk Free Return', 0.75, precalculated_dfs['Risk Free Return']]]
            allocation_dict['Risk Off'] = ['SPY', 
                                            ['SPY', 1, df_500]]
            
        elif self.method == 'rfr instead of spy':
            allocation_dict['Risk On'] = ['UPRO', 
                                          ['UPRO', 1, precalculated_dfs['UPRO']]]
            allocation_dict['Risk Mid'] = allocation_risk_mid
            allocation_dict['Risk Alt'] = ['Mixed', 
                                           ['UPRO', 0.25, precalculated_dfs['UPRO']], 
                                           ['ZROZ', 0.75, precalculated_dfs['ZROZ']]]
            allocation_dict['Risk Off'] = ['Risk Free Return', 
                                            ['Risk Free Return', 1, precalculated_dfs['Risk Free Return']]]
            
        elif self.method == 'hfea':
            for state in ['Risk On', 'Risk Mid', 'Risk Alt', 'Risk Off']:
                allocation_dict[state] = ['Mixed', 
                               ['UPRO', 0.55, precalculated_dfs['UPRO']], 
                               ['TMF', 0.45, precalculated_dfs['TMF']]]
                
        elif self.method == 'upro zroz':
            for state in ['Risk On', 'Risk Mid', 'Risk Alt', 'Risk Off']:
                allocation_dict[state] = ['Mixed', 
                                          ['UPRO', 0.40, precalculated_dfs['UPRO']],
                                          ['ZROZ', 0.60, precalculated_dfs['ZROZ']]]
                
        elif self.method == 'kmlm':
#             allocation_dict['Risk On'] = ['40/30/30',
#                                           ['UPRO', 0.4, precalculated_dfs['UPRO']],
#                                           ['ZROZ', 0.3, precalculated_dfs['ZROZ']],
#                                           ['KMLM', 0.3, precalculated_dfs['KMLM']]]
#             allocation_dict['Risk Mid'] = ['30/35/35',
#                                           ['SSO', 0.3, precalculated_dfs['SSO']],
#                                           ['ZROZ', 0.35, precalculated_dfs['ZROZ']],
#                                           ['KMLM', 0.35, precalculated_dfs['KMLM']]]
#             allocation_dict['Risk Off'] = ['10/30/60',
#                                           ['UPRO', 0.1, df_500],
#                                           ['Risk Free Return', 0.3, precalculated_dfs['Risk Free Return']],
#                                           ['KMLM', 0.6, precalculated_dfs['KMLM']]]
#             allocation_dict['Risk Alt'] = ['10/60/30',
#                                           ['UPRO', 0.1, df_500],
#                                           ['ZROZ', 0.6, precalculated_dfs['ZROZ']],
#                                           ['KMLM', 0.3, precalculated_dfs['KMLM']]]
            
            allocation_dict['Risk On'] = ['40/30/30',
                                          ['UPRO', 0.4, precalculated_dfs['UPRO']],
                                          ['ZROZ', 0.3, precalculated_dfs['ZROZ']],
                                          ['KMLM', 0.3, precalculated_dfs['KMLM']]]
            allocation_dict['Risk Mid'] = ['30/35/35',
                                          ['SSO', 0.4, precalculated_dfs['SSO']],
                                          ['ZROZ', 0.3, precalculated_dfs['ZROZ']],
                                          ['KMLM', 0.3, precalculated_dfs['KMLM']]]
            allocation_dict['Risk Off'] = ['10/30/60',
                                          ['SPY', 0.4, df_500],
                                          ['Risk Free Return', 0.3, precalculated_dfs['Risk Free Return']],
                                          ['KMLM', 0.3, precalculated_dfs['KMLM']]]
            allocation_dict['Risk Alt'] = ['10/60/30',
                                          ['SPY', 0.4, df_500],
                                          ['ZROZ', 0.3, precalculated_dfs['ZROZ']],
                                          ['KMLM', 0.3, precalculated_dfs['KMLM']]]
            
        elif self.method == 'simple':
            allocation_dict['Risk On'] = ['SPY',
                                          ['SPY', 1, df_500]]
                                          
            allocation_dict['Risk Mid'] = ['Mixed',
                                           ['Risk Free Return', 0.5, precalculated_dfs['Risk Free Return']],
                                           ['SPY', 0.5, df_500]]
                                           
            allocation_dict['Risk Off'] = ['Risk Free Return',
                                           ['Risk Free Return', 1, precalculated_dfs['Risk Free Return']]]
                                           
            allocation_dict['Risk Alt'] = ['Risk Free Return',
                                           ['Risk Free Return', 1, precalculated_dfs['Risk Free Return']]]

        elif self.method == 'static kmlm':
            for state in ['Risk On', 'Risk Mid', 'Risk Alt', 'Risk Off']:
                allocation_dict[state] = ['Mixed',
                                          ['UPRO', 0.4, precalculated_dfs['UPRO']],
                                          ['ZROZ', 0.3, precalculated_dfs['ZROZ']],
                                          ['KMLM', 0.3, precalculated_dfs['KMLM']]]
                
        elif self.method == 'sso':
            for state in ['Risk On', 'Risk Mid', 'Risk Alt', 'Risk Off']:
                allocation_dict[state] = allocation_risk_mid
                
        elif self.method == 'upro':
            for state in ['Risk On', 'Risk Mid', 'Risk Alt', 'Risk Off']:
                allocation_dict[state] = ['UPRO',
                                         ['UPRO', 1, precalculated_dfs['UPRO']]]
                
        elif self.method == '1.5x':
            for state in ['Risk On', 'Risk Mid', 'Risk Alt', 'Risk Off']:
                allocation_dict[state] = ['1.5x',
                                         ['1.5x', 1, precalculated_dfs['1.5x']]]           
                
        else:
            raise Exception('Invalid method')
            
        return allocation_dict
    
    def get_dynamic_allocation(self, df_500, date, current_close, signal, current_allocation, precalculated_dfs):
        if USE_SSO:
            allocation_risk_mid = ['SSO', ['SSO', 1, precalculated_dfs['SSO']]]
        else:
            allocation_risk_mid = ['SSO', ['UPRO', 0.5, precalculated_dfs['UPRO']], ['SPY', 0.5, df_500]]
            
        if self.method == 'dynamic':
            if state == 'Risk On':
                return ['UPRO', ['UPRO', 1, precalculated_dfs['UPRO']]]
            elif state == 'Risk Mid':
                return allocation_risk_mid
            elif state == 'Risk Alt':
                return ['Mixed', ['UPRO', 0.25, precalculated_dfs['UPRO']], 
                        ['Risk Free Return', 0.75, precalculated_dfs['Risk Free Return']]]
            else:
                return ['Risk Free Return', ['Risk Free Return', 1, precalculated_dfs['Risk Free Return']]]
    
    def plot_value(self, 
                   dates, 
                   port_vals, 
                   assets, 
                   port_vals_SPY, 
                   method, 
                   method_name,
                   plot_width,
                   plot_height,
                   line_width):
        df_500_subset = self.df_500.loc[(self.df_500.index >= dates[0]) & (self.df_500.index <= dates[-1])]

        np_dates = np.array(dates, dtype="datetime64[D]")
        y = np.array(port_vals)

        color_list = []
        for asset in assets:
            if asset == 'SPY':
                color_list.append('red')
            elif asset == 'SSO':
                color_list.append('green')
            elif asset == 'UPRO':
                color_list.append('blue')
            elif asset == 'Risk Free Return':
                color_list.append('gray')
            elif asset == 'Mixed':
                color_list.append('purple')
            elif asset == '40/30/30':
                color_list.append('red')
            elif asset == '30/35/35':
                color_list.append('green')
            elif asset == '10/30/60':
                color_list.append('blue')
            elif asset == '10/60/30':
                color_list.append('purple')
            elif asset == '1.5x':
                color_list.append('purple')
                
        inxval = mdates.date2num(np_dates)
        points = np.array([inxval, y]).T.reshape(-1,1,2)
        segments = np.concatenate([points[:-1],points[1:]], axis=1)

        lc = LineCollection(segments, colors=color_list, linewidths=line_width)
        
        fig, ax = plt.subplots(figsize=(plot_width, plot_height))
        ax.add_collection(lc)
        fig.autofmt_xdate()
        fig.patch.set_facecolor('white')

        for label in ax.get_xticklabels():
            label.set_ha('center')
            label.set_rotation(0)

        plt.yscale('log')
        plt.xlim(dates[0], dates[-1])
        ax.autoscale()

        handles, labels = plt.gca().get_legend_handles_labels()
        line1 = Line2D([0], [0], label='1x Leveraged S&P 500', color='r')
        line2 = Line2D([0], [0], label='2x Leveraged S&P 500', color='green')
        line3 = Line2D([0], [0], label='3x Leveraged S&P 500', color='blue')
        line4 = Line2D([0], [0], label='Mixed', color='purple')
        line5 = Line2D([0], [0], label='Risk Free Return', color='gray')
        handles.extend([line1, line2, line3, line4, line5])

        plt.legend(handles=handles, loc = 'upper left')

        plt.xlabel('Date', fontsize=14)
        plt.ylabel('Portfolio Value (Starting at $1)', fontsize=14)
        plt.title(f'{method_name}, ' + dates[0].strftime('%b %Y') + ' - ' + \
                  dates[-1].strftime('%b %Y'), fontsize=14)
        
        def get_comparison_port_vals(ticker):
            df_internal = self.precalculated_dfs[ticker]
            df_internal_sub = df_internal.loc[(df_internal.index >= dates[0]) & (df_internal.index <= dates[-1])]
            port_vals_internal = list((df_internal_sub['Close'].pct_change()+1).cumprod().dropna())
            port_vals_internal.insert(0, 1)
            return np.array(port_vals_internal)

        plt.plot(np_dates, np.array(port_vals_SPY), color='r', alpha=0.2)
        plt.plot(np_dates, get_comparison_port_vals('SSO'), color='green', alpha=0.2)
        plt.plot(np_dates, get_comparison_port_vals('UPRO'), color='blue', alpha=0.2)
        plt.plot(np_dates, get_comparison_port_vals('Risk Free Return'), color='gray', alpha=0.2)
        
        df_stats = pd.DataFrame(columns=['Strategy', 'Ending Value', 'CAGR', 'Standard Deviation', 'Sharpe', 
                                         'Sortino', 'Max Drawdown', 'Ulcer Index', 'Ulcer Performance Index'])
        df_stats.loc[len(df_stats)] = [f'{method_name}'] + \
            [x for x in self.calculate_statistics(dates, port_vals)]
        df_stats.loc[len(df_stats)] = ['S&P 500 Large'] + \
            [x for x in self.calculate_statistics(dates, port_vals_SPY)]
        df_stats.loc[len(df_stats)] = ['2x S&P 500 Large'] + \
            [x for x in self.calculate_statistics(dates, get_comparison_port_vals('SSO'))]
        df_stats.loc[len(df_stats)] = ['3x S&P 500 Large'] + \
            [x for x in self.calculate_statistics(dates, get_comparison_port_vals('UPRO'))]
        df_stats.loc[len(df_stats)] = ['Risk Free Return'] + \
            [x for x in self.calculate_statistics(dates, get_comparison_port_vals('Risk Free Return'))]

        plt.show()
        display(df_stats)
        
    def get_strategy_results(self):
        zipped = list(zip(self.dates, self.port_vals, self.assets))
        df = pd.DataFrame(zipped, columns=['Date', 'Close', 'Asset Held'])
        df['Date'] = pd.to_datetime(df['Date'])
        df = df.set_index('Date')
        return df
    
    def get_spy_results(self):
        zipped = list(zip(self.dates, self.port_vals_SPY))
        df_SPY = pd.DataFrame(zipped, columns=['Date', 'Close'])
        df_SPY['Date'] = pd.to_datetime(df_SPY['Date'])
        df_SPY = df_SPY.set_index('Date')
        return df_SPY

In [None]:
def drawdown_plot(df_results, df_spy, plot_width, plot_height, method_name):
    fig = plt.figure(figsize = (plot_width, plot_height))
    fig.patch.set_facecolor('white')
    plt.margins(x=0, y=0)

    plt.xlabel('Date', fontsize=14)
    plt.ylabel('Drawdown Percentage', fontsize=14)
    plt.title(f'{method_name} Drawdown Chart, ' \
              + df_results.index[0].strftime('%b %Y') + ' - ' \
              + df_results.index[-1].strftime('%b %Y'), fontsize=14)

    rolling_max = df_results['Close'].cummax()
    drawdown_result = (df_results['Close'] - rolling_max) / rolling_max
    drawdown_result *= 100

    rolling_max = df_spy['Close'].cummax()
    drawdown_spy = (df_spy['Close'] - rolling_max) / rolling_max
    drawdown_spy *= 100

    plt.plot(drawdown_result, color='navy', label='Strategy')

    plt.fill_between(drawdown_spy.index, 0, drawdown_spy, alpha=0.3, label='S&P 500')

    plt.legend(loc = 'lower left')
    plt.show()
    
    return drawdown_result

In [None]:
def get_rolling_returns(df_results, df_spy, years, plot_width, plot_height, method_name):
    x = rolling_window(df_results['Close'].to_numpy(), 252 * years)
    rolling_returns = (((x[:, -1] / x[:, 0]) ** (1 / years)) - 1) * 100
    rolling_returns = np.insert(rolling_returns, 0, [None] * (years * 252 - 1))
    
    
    x = rolling_window(df_spy['Close'].to_numpy(), 252 * years)
    rolling_spy = (((x[:, -1] / x[:, 0]) ** (1 / years)) - 1) * 100
    rolling_spy = np.insert(rolling_spy, 0, [None] * (years * 252 - 1))

    fig = plt.figure(figsize = (plot_width, plot_height))
    fig.patch.set_facecolor('white')

    plt.xlabel('Date', fontsize=14)
    plt.ylabel('Rolling CAGR', fontsize=14)
    plt.title(f'{method_name} Rolling Returns ({years} years), ' \
              + df_results.index[0].strftime('%b %Y') + ' - ' \
              + df_results.index[-1].strftime('%b %Y'), fontsize=14)

    plt.plot(df_results.index, rolling_returns, color='blue', linewidth=1, label=method_name)
    plt.plot(df_spy.index, rolling_spy, color='red', alpha=0.5, linewidth=1, label='S&P 500')
    plt.axhline(y=0, color='black', linestyle='--')
    
    plt.legend(loc = 'upper left')
    plt.show()
    
    return rolling_returns[~np.isnan(rolling_returns)], rolling_spy[~np.isnan(rolling_spy)] 

In [None]:
def rolling_correlation(df1, df2, years, plot_width, plot_height):
    rolling_length = 252 * years
    
    rolling_1 = rolling_window(df1['Close'].pct_change().dropna().to_numpy(), rolling_length)
    rolling_2 = rolling_window(df2['Close'].pct_change().dropna().to_numpy(), rolling_length)
    
    results = []
    for idx, row in enumerate(rolling_1):
        results.append(np.corrcoef(row, rolling_2[idx])[0][1])
        
    dates = df1.iloc[rolling_length:].index
    
    fig = plt.figure(figsize = (plot_width, plot_height))
    fig.patch.set_facecolor('white')

    plt.xlabel('Date', fontsize=14)
    plt.ylabel(f'Rolling {years} Year Correlation', fontsize=14)
    plt.title(f'Rolling {years} Year Correlation, ' \
              + dates[0].strftime('%b %Y') + ' - ' \
              + dates[-1].strftime('%b %Y'), fontsize=14)

    plt.plot(dates, results, color='blue', linewidth=1)
    plt.axhline(y=0, color='black', linestyle='--')
    
    plt.show()
    
    return results

In [None]:
def new_cagr_vol(df, new_cagr, new_vol):
    new_cagr /= 100 # CAGR should be a percent, vol shouldn't
    df = df.copy()
    dates = df.index
    portfolio_values = list(df['Close'])
    rplus1 = df['Close'].pct_change() + 1
    old_cagr = (((portfolio_values[-1]/portfolio_values[0])**(252/len(df))) - 1)
    old_vol = rplus1.std() * (252 ** 0.5)
    
    beta = new_vol / old_vol
    alpha = math.log(1 + new_cagr) - (beta * math.log(1 + old_cagr)) \
            + (1/2) * (beta ** 2 - beta) * 252 * ((rplus1 - 1) ** 2).mean() \
            - (1/3) * (beta ** 3 - beta) * 252 * ((rplus1 - 1) ** 3).mean() \
            + (1/4) * (beta ** 4 - beta) * 252 * ((rplus1 - 1) ** 4).mean() \
            - (1/5) * (beta ** 5 - beta) * 252 * ((rplus1 - 1) ** 5).mean()
    
    alpha /= 252
    
    rplus1 = 1 + (beta * (rplus1 - 1) + alpha)
    
    df['Close'] = rplus1.cumprod()
    df.loc[df.index[0], 'Close'] = 1
    
    return df

In [None]:
def simulate_random_scenarios(df, method, num_simulations, print_results, modified_cagr = None, modified_vol = None):
    df_orig = df.copy()
    cagrs = []
    sharpes = []
    upis = []
    max_dds = []
    
    if modified_cagr and modified_vol:
        df = new_cagr_vol(df, modified_cagr, modified_vol)
    
    vol_lookback_days = 21
    mom_lookback_days = 252
    short_mom_lookback_days = 126
    tbill_lookback_days = 21
    monthly = False
    low_vol_cutoff = 0.14
    high_vol_cutoff = 0.24
    strict_boundaries = False
    use_sma = True
    rebalance = 'daily'
    
    ticker_loader = TickerLoader(df, method)
    precalculated_dfs = ticker_loader.load_precalculated_dfs()
    
    if 'Risk Free Rate' in df.columns:
        df = load_df_data(df, vol_lookback_days, mom_lookback_days, short_mom_lookback_days, tbill_lookback_days,
                          False, use_sma, monthly, use_tbill_data = False)
    else:
        df = load_df_data(df, vol_lookback_days, mom_lookback_days, short_mom_lookback_days, tbill_lookback_days,
                          False, use_sma, monthly, use_tbill_data = True)

    df = calculate_signals(df, low_vol_cutoff, high_vol_cutoff, 
                           strict_boundaries, False, False, monthly)

    backtest = Backtest(method = method,
                        method_name = 'this does not matter',
                        df_500 = df,
                        precalculated_dfs = precalculated_dfs,
                        monthly_calculation = monthly,
                        rebalance = rebalance,
                        transaction_cost = 0.001,
                        start_date = None,
                        plot_width = 10, 
                        plot_height = 10, 
                        line_width = 10)

    dates_base, port_vals, assets, port_vals_SPY, _ = backtest.simulate()
    
    _, cagr_base, vol_base, sharpe_base, _, dd_base, _, upi_base = backtest.calculate_statistics(dates_base, port_vals)
    _, cagr_spy, vol_spy, sharpe_spy, _, dd_spy, _, upi_spy = backtest.calculate_statistics(dates_base, port_vals_SPY)
    
    cagrs.extend([cagr_spy, cagr_base])
    sharpes.extend([sharpe_spy, sharpe_base])
    max_dds.extend([dd_spy, dd_base])
    upis.extend([upi_spy, upi_base])
    
    if print_results:
        print(f'\nDates: {dates_base[0].strftime("%B %d, %Y")} to {dates_base[-1].strftime("%B %d, %Y")}\n')
        print(f'=========== S&P 500 Statistics ===========')
        print(f'{"CAGR":<25} {cagr_spy:>10.3f}')
        print(f'{"Volatility":<25} {vol_spy:>10.3f}')
        print(f'{"Sharpe":<25} {sharpe_spy:>10.3f}')
        print(f'{"UPI":<25} {upi_spy:>10.3f}')
        print(f'{"Max Drawdown":<25} {dd_spy:>10.3f}\n')

        print(f'=========== Baseline Strategy ===========')
        print(f'{"Lower volatility cutoff":<25} {low_vol_cutoff:>10.3f}')
        print(f'{"Upper volatility cutoff":<25} {high_vol_cutoff:>10.3f}')
        print(f'{"Volatility lookback days":<25} {vol_lookback_days:>10}')
        print(f'{"Momentum lookback days":<25} {mom_lookback_days:>10}')
        print(f'{"T-bill lookback days":<25} {tbill_lookback_days:>10}')
        print(f'{"Monthly calculation":<25} {str(monthly):>10}')
        print(f'{"Strict boundaries":<25} {str(strict_boundaries):>10}')
        print(f'{"Use SMA":<25} {str(use_sma):>10}')
        print(f'{"Rebalance frequency":<25} {rebalance:>10}')
        print(f'           --- Results ---')
        print(f'{"CAGR":<25} {cagr_base:>10.3f}')
        print(f'{"Volatility":<25} {vol_base:>10.3f}')
        print(f'{"Sharpe":<25} {sharpe_base:>10.3f}')
        print(f'{"UPI":<25} {upi_base:>10.3f}')
        print(f'{"Max Drawdown":<25} {dd_base:>10.3f}\n')
            
    for i in range(num_simulations):
        vol_lookback_days = random.randrange(10, 30)
        mom_lookback_days = random.randrange(126, 252)
        short_mom_lookback_days = random.randrange(63, 126)
        tbill_lookback_days = random.randrange(10, 30)
        monthly = random.choice([False])
        low_vol_cutoff = random.uniform(0.1, 0.2)
        high_vol_cutoff = low_vol_cutoff + random.uniform(0.05, 0.15)
        strict_boundaries = random.choice([True, False])
        use_sma = random.choice([True, False])
        rebalance = random.choice(['daily', 'monthly', 'never'])

        # Not using linear regression because it is slower
        if 'Risk Free Rate' in df.columns:
            df = load_df_data(df, vol_lookback_days, mom_lookback_days, short_mom_lookback_days, tbill_lookback_days,
                              False, use_sma, monthly, use_tbill_data = False)
        else:
            df = load_df_data(df, vol_lookback_days, mom_lookback_days, short_mom_lookback_days, tbill_lookback_days,
                              False, use_sma, monthly, use_tbill_data = True)

        df = calculate_signals(df, low_vol_cutoff, high_vol_cutoff, 
                               strict_boundaries, False, False, monthly)
    
        backtest = Backtest(method = method,
                            method_name = 'this does not matter',
                            df_500 = df,
                            precalculated_dfs = precalculated_dfs,
                            monthly_calculation = monthly,
                            rebalance = rebalance,
                            transaction_cost = 0.001,
                            start_date = dates_base[0],
                            plot_width = 10, 
                            plot_height = 10, 
                            line_width = 10)
        
        dates, port_vals, assets, port_vals_SPY, _ = backtest.simulate()
        _, cagr, vol, sharpe, _, max_drawdown, _, upi = backtest.calculate_statistics(dates, port_vals)
        cagrs.append(cagr)
        sharpes.append(sharpe)
        upis.append(upi)
        max_dds.append(max_drawdown)
        
        if print_results:
            print(f'=========== Simulation {i + 1} ===========')
            print(f'{"Lower volatility cutoff":<25} {low_vol_cutoff:>10.3f}')
            print(f'{"Upper volatility cutoff":<25} {high_vol_cutoff:>10.3f}')
            print(f'{"Volatility lookback days":<25} {vol_lookback_days:>10}')
            print(f'{"Momentum lookback days":<25} {mom_lookback_days:>10}')
            print(f'{"T-bill lookback days":<25} {tbill_lookback_days:>10}')
            print(f'{"Monthly calculation":<25} {str(monthly):>10}')
            print(f'{"Strict boundaries":<25} {str(strict_boundaries):>10}')
            print(f'{"Use SMA":<25} {str(use_sma):>10}')
            print(f'{"Rebalance frequency":<25} {rebalance:>10}')

            print(f'           --- Results ---')
            print(f'{"CAGR":<25} {cagr:>10.3f}')
            print(f'{"Volatility":<25} {vol:>10.3f}')
            print(f'{"Sharpe":<25} {sharpe:>10.3f}')
            print(f'{"UPI":<25} {upi:>10.3f}')
            print(f'{"Max Drawdown":<25} {max_drawdown:>10.3f}\n')
    
    simulation = ['S&P 500', 'Baseline'] + list(range(num_simulations))
    zipped = list(zip(simulation, cagrs, sharpes, max_dds, upis))
    df_results = pd.DataFrame(zipped, columns=['Simulation', 'CAGR', 'Sharpe', 'Max Drawdown', 'UPI'])
    df_results = df_results.set_index('Simulation')
    
    return df_results, dates_base

In [None]:
def simulate_prompt():
    df_results = pd.DataFrame()
    dates = []

    num_sims = int(input('Number of simulations? '))
    if num_sims <= 0:
        return
    method = input('Method? ')
    
    print_results = input('Print individual simulation results? [Y/N] ')
    if print_results.upper() == 'Y' or print_results.upper() == 'YES':
        print_results = True
    elif print_results.upper() == 'N' or print_results.upper() == 'NO':
        print_results = False
    else:
        raise Exception('Invalid input')
        
    sp_500 = input('Use simulated extended S&P 500 data [Y] or use ticker data [N]? ')

    df_sim = pd.DataFrame()
    if sp_500.upper() == 'Y' or sp_500.upper() == 'YES':
        df_sim = load_us_market_data(EXTENDED_DATA_PATH)
    elif sp_500.upper() == 'N' or sp_500.upper() == 'NO':
        ticker = input('Ticker data to use? ')
        df_sim = load_additional_dfs(ticker.upper())
    else:
        raise Exception('Invalid input')
    use_modified = input('Use modified S&P 500 data? [Y/N] ')
    if use_modified.upper() == 'Y' or use_modified.upper() == 'YES':
        print("\nNote that the inputted CAGR will not line up with the S&P 500 statistics. This is because the CAGR given "
            + "is applied over the ENTIRE time period of S&P 500 data available. \nHowever, when the backtest is done, a chunk of the "
            + "start is cut off because momentum needs to be calculated before making an initial allocation.\n")
        modified_cagr = float(input('Modified S&P 500 CAGR? [Input as percentage without % symbol] '))
        modified_vol = float(input('Modified S&P 500 Vol? [Input as decimal] '))
        df_results, dates = simulate_random_scenarios(df_sim, method, num_sims, print_results, modified_cagr, modified_vol)
    elif use_modified.upper() == 'N' or use_modified.upper() == 'NO':
        df_results, dates = simulate_random_scenarios(df_sim, method, num_sims, print_results)
    else:
        raise Exception('Invalid input')
    
    plot_sim_results(df_results, dates, num_sims, 15, 10)
    
    return df_results, dates

In [None]:
def plot_sim_results(df_sim_results, dates, num_simulations, plot_width, plot_height):
    fig, axs = plt.subplots(2, 2, figsize = (plot_width, plot_height), tight_layout = True)
    fig.patch.set_facecolor('white')
    
    cagr_spy = df_sim_results['CAGR'].iloc[0]
    sharpe_spy = df_sim_results['Sharpe'].iloc[0]
    upi_spy = df_sim_results['UPI'].iloc[0]
    dd_spy = df_sim_results['Max Drawdown'].iloc[0]
    
    cagr_base = df_sim_results['CAGR'].iloc[1]
    sharpe_base = df_sim_results['Sharpe'].iloc[1]
    upi_base = df_sim_results['UPI'].iloc[1]
    dd_base = df_sim_results['Max Drawdown'].iloc[1]
    
    axs[0][0].hist(x = df_sim_results.iloc[2:]['CAGR'].values, bins = 30, color = 'blue')
    axs[0][0].axvline(x = cagr_base, color = 'green', label = 'Base Strategy CAGR')
    axs[0][0].axvline(x = cagr_spy, color = 'red', label = 'S&P 500 CAGR')
    axs[0][0].title.set_text('CAGRs')
    axs[0][0].set_xlabel('CAGR')
    axs[0][0].set_ylabel('Count')
    axs[0][0].legend(loc = 'upper left')
    
    axs[1][0].hist(x = df_sim_results.iloc[2:]['Sharpe'].values, bins = 30, color = 'blue')
    axs[1][0].axvline(x = sharpe_base, color = 'green', label = 'Base Strategy Sharpe')
    axs[1][0].axvline(x = sharpe_spy, color = 'red', label = 'S&P 500 Sharpe')
    axs[1][0].title.set_text('Sharpes')
    axs[1][0].set_xlabel('Sharpe')
    axs[1][0].set_ylabel('Count')
    axs[1][0].legend(loc = 'upper left')
    
    axs[0][1].hist(x = df_sim_results.iloc[2:]['UPI'].values, bins = 30, color = 'blue')
    axs[0][1].axvline(x = upi_base, color = 'green', label = 'Base Strategy UPI')
    axs[0][1].axvline(x = upi_spy, color = 'red', label = 'S&P 500 UPI')
    axs[0][1].title.set_text('Ulcer Performance Indices')
    axs[0][1].set_xlabel('Ulcer Performance Index')
    axs[0][1].set_ylabel('Count')
    axs[0][1].legend(loc = 'upper left')
    
    axs[1][1].hist(x = df_sim_results.iloc[2:]['Max Drawdown'].values, bins = 30, color = 'blue')
    axs[1][1].axvline(x = dd_base, color = 'green', label = 'Base Strategy DD')
    axs[1][1].axvline(x = dd_spy, color = 'red', label = 'S&P 500 DD')
    axs[1][1].title.set_text('Max Drawdowns')
    axs[1][1].set_xlabel('Max Drawdown')
    axs[1][1].set_ylabel('Count')
    axs[1][1].legend(loc = 'upper left')
    
    fig.suptitle(f'Results of {num_simulations} simulations, ' + \
                 f'{dates[0].strftime("%B %d, %Y")} to {dates[-1].strftime("%B %d, %Y")}', fontsize=20)
    
    plt.show()

In [None]:
def main():
    time1 = time.time()
    df = pd.DataFrame()
    
    if USE_EXTENDED_DATA:
        df = load_us_market_data(EXTENDED_DATA_PATH)
        if USE_MODIFIED_SP_500:
            df = new_cagr_vol(df, MODIFIED_SP_500_CAGR, MODIFIED_SP_500_VOL)
        time2 = time.time()
        if USE_DAILY_TBILL:
            df = df[['Close']]
            df = load_df_data(df, VOLATILITY_LOOKBACK_DAYS, MOMENTUM_LOOKBACK_DAYS, SHORT_MOMENTUM_LOOKBACK_DAYS, 
                              TBILL_LOOKBACK_DAYS, USE_LINEAR_REGRESSION, USE_SMA, MONTHLY_CALCULATION, 
                              use_tbill_data = True)
        else:
            df = load_df_data(df, VOLATILITY_LOOKBACK_DAYS, MOMENTUM_LOOKBACK_DAYS, SHORT_MOMENTUM_LOOKBACK_DAYS,
                              TBILL_LOOKBACK_DAYS, USE_LINEAR_REGRESSION, USE_SMA, MONTHLY_CALCULATION, 
                              use_tbill_data = False)
    else:
        df = load_additional_dfs(SP_500_TICKER)
        if USE_MODIFIED_SP_500:
            df = new_cagr_vol(df, MODIFIED_SP_500_CAGR, MODIFIED_SP_500_VOL)
        time2 = time.time()
        df = load_df_data(df, VOLATILITY_LOOKBACK_DAYS, MOMENTUM_LOOKBACK_DAYS, SHORT_MOMENTUM_LOOKBACK_DAYS, 
                          TBILL_LOOKBACK_DAYS, USE_LINEAR_REGRESSION, USE_SMA, MONTHLY_CALCULATION, 
                          use_tbill_data = True)
    
    time3 = time.time()
    df = calculate_signals(df, LOWER_VOL_CUTOFF, HIGHER_VOL_CUTOFF, 
                           USE_STRICT_BOUNDARIES, EXCLUDE_VOL, EXCLUDE_MOM, MONTHLY_CALCULATION)
    time4 = time.time()
    
    if USE_MODIFIED_SP_500:
        df = new_cagr_vol(df, MODIFIED_SP_500_CAGR, MODIFIED_SP_500_VOL)
    
    %matplotlib inline
    mpl.rcParams['figure.dpi'] = PLOT_DPI
    mpl.rcParams['font.family'] = PLOT_FONT

    plot_signal(df, PLOT_WIDTH, PLOT_HEIGHT, 
                SHOW_RISK_ON, SHOW_RISK_MID, SHOW_RISK_ALT, SHOW_RISK_OFF, 
                LOWER_VOL_CUTOFF, HIGHER_VOL_CUTOFF)
    time5 = time.time()

    ticker_loader = TickerLoader(method = METHOD, df_spy = df)
    precalculated_dfs = ticker_loader.load_precalculated_dfs()
    
    backtest = Backtest(method = METHOD,
                        method_name = METHOD_NAME,
                        df_500 = df,
                        precalculated_dfs = precalculated_dfs,
                        monthly_calculation = MONTHLY_CALCULATION,
                        rebalance = REBALANCE,
                        transaction_cost = TRANSACTION_COST,
                        start_date = BACKTEST_START_DATE,
                        plot_width = PLOT_WIDTH, 
                        plot_height = PLOT_HEIGHT, 
                        line_width = BACKTEST_LINE_WIDTH)

    time6 = time.time()
    dates, port_vals, assets, port_vals_SPY, current_allocation = backtest.simulate()
    time7 = time.time()

    backtest.plot(dates, port_vals, assets, port_vals_SPY)
    time8 = time.time()

    df_results = backtest.get_strategy_results()
    df_spy_results = backtest.get_spy_results()
    
    drawdowns = drawdown_plot(df_results, df_spy_results, PLOT_WIDTH, PLOT_HEIGHT, METHOD_NAME)
    time9 = time.time()
    rolling_returns, rolling_spy = get_rolling_returns(df_results, df_spy_results, YEARS_ROLLING_RETURNS, 
                                                       PLOT_WIDTH, PLOT_HEIGHT, METHOD_NAME)
    time10 = time.time()

    print(f'Dates: {dates[0].strftime("%B %d, %Y")} to {dates[-1].strftime("%B %d, %Y")}\n')
    
    df = df.loc[(df.index >= dates[0]) & (df.index <= dates[-1])]

    current_signal = df.iloc[-1]['State']
    print(f'Current signal: {current_signal}')
    
    allocation_str = ''
    for allocation in current_allocation[1:]:
        allocation_str += f'\n{allocation[1] * 100:.2f}% {allocation[0]}'
        
    print(f'Current allocation name: {current_allocation[0]}\n')
    print(f'{current_allocation[0]} allocation consists of: {allocation_str}\n')

    years = (dates[-1] - dates[0]).total_seconds() / 31536000
    swaps = df['State'].shift().bfill().ne(df['State']).sum() / years
    print(f'Average number of swaps per year: {swaps:.3f}\n')

    for state in ['Risk On', 'Risk Mid', 'Risk Alt', 'Risk Off']:
        risk_state_pct = ((df['State'].values == state).sum() / len(df)) * 100
        lowercase_state = state.lower()
        print(f'{"Percent of time " + lowercase_state + ":":<25} {risk_state_pct:>10.3f}%')
    
    corr = np.corrcoef(pd.Series(port_vals).pct_change().dropna(), df['Close'].pct_change().dropna())[0][1]
    print(f'\nCorrelation of strategy returns to S&P 500: {corr:.3f}\n')

    print(f'Average {YEARS_ROLLING_RETURNS} year rolling return - Strategy: {rolling_returns.mean():8.3f}%' \
          + f'   SPY: {rolling_spy.mean():8.3f}%')
    print(f'Minimum {YEARS_ROLLING_RETURNS} year rolling return - Strategy: {rolling_returns.min():8.3f}%' \
          + f'   SPY: {rolling_spy.min():8.3f}%')
    print(f'Maximum {YEARS_ROLLING_RETURNS} year rolling return - Strategy: {rolling_returns.max():8.3f}%' \
          + f'   SPY: {rolling_spy.max():8.3f}%')
    time11 = time.time()

    if PRINT_TIMES:
        print(f'\n{"Operation":<28} {"Time (seconds)":>10}')
        print(f'{"Load S&P 500 data":<28} {time2 - time1:>10.3f}')
        print(f'{"Load strategy data":<28} {time3 - time2:>10.3f}')
        print(f'{"Calculate signals":<28} {time4 - time3:>10.3f}')
        print(f'{"Plot signals":<28} {time5 - time4:>10.3f}')
        print(f'{"Load precalculated dfs":<28} {time6 - time5:>10.3f}')
        print(f'{"Simulate backtest":<28} {time7 - time6:>10.3f}')
        print(f'{"Plot backtest":<28} {time8 - time7:>10.3f}')
        print(f'{"Plot drawdowns":<28} {time9 - time8:>10.3f}')
        print(f'{"Plot rolling returns":<28} {time10 - time9:>10.3f}')
        print(f'{"Print statistics":<28} {time11 - time10:>10.3f}')

    return df, df_results, precalculated_dfs, drawdowns, rolling_returns, rolling_spy

In [None]:
# If not using git clone, download daily_us_market_data.csv and MLM_Index_Returns.xlsx
# Put the path to those below
EXTENDED_DATA_PATH = r'data/daily_us_market_data.csv'
KMLM_DATA_PATH = r'data/MLM_Index_Returns.xlsx'

In [None]:
# Strategy parameters
SP_500_TICKER = 'SPY'                # Used if not using extended data
USE_EXTENDED_DATA = True             # If true, use data located at EXTENDED_DATA_PATH, else use the above ticker
USE_DAILY_TBILL = False              # If true, use daily t-bill data. If false, use monthly interpolated to daily.

LOWER_VOL_CUTOFF = 0.14              # Lower cutoff for volatility
HIGHER_VOL_CUTOFF = 0.24             # Higher cutoff for volatility

TBILL_LOOKBACK_DAYS = 21             # Lookback to determine if t-bill rate is up or down
MOMENTUM_LOOKBACK_DAYS = 252         # Lookback to determine if S&P 500 momentum is positive or negative
SHORT_MOMENTUM_LOOKBACK_DAYS = 126   # Lookback to use for the shorter momentum (unused currently)
VOLATILITY_LOOKBACK_DAYS = 21        # Lookback to determine volatility

USE_SMA = True                       # Whether to use an SMA or pure momentum signal
USE_LINEAR_REGRESSION = False        # Whether to use lin. reg. or momentum to determine if t-bill rate is up or down
USE_STRICT_BOUNDARIES = False        # Whether to use strict bondaries for deciding the allocation

EXCLUDE_VOL = False                  # Whether to exclude the use of volatility in deciding allocation
EXCLUDE_MOM = False                  # Whether to exclude the use of momentum in deciding allocation

# Plot parameters
PLOT_DPI = 300                  # DPI to use for the plot (too high will cause lag)
PLOT_WIDTH = 15                 # Width to use for the plot
PLOT_HEIGHT = 5                 # Height to use for the plot
PLOT_FONT = 'Source Serif Pro'  # Font to use for the plot. Font must be installed on your device
 
SHOW_RISK_ON = True             # Whether to show the shaded region for risk on
SHOW_RISK_MID = True            # Whether to show the shaded region for risk mid
SHOW_RISK_ALT = True            # Whether to show the shaded region for risk alt
SHOW_RISK_OFF = True            # Whether to show the shaded region for risk off

# Backtest parameters
BACKTEST_START_DATE = None               # Start date for the backtest. None does as far back as possible

METHOD = 'standard'   # Method to use for the backtest
METHOD_NAME = 'MFEA'                     # Name to use for the backtest plot

USE_SSO = False                          # Whether to use SSO or SPY + UPRO for 2x leverage

MONTHLY_CALCULATION = False              # False does a daily calculation, true does a monthly calculation
REBALANCE = 'daily'                      # Frequency to rebalance. Currently can be daily, monthly, or never

TRANSACTION_COST = 0.001                 # Transaction cost as a decimal. Applied only on buying

BACKTEST_LINE_WIDTH = 3                  # Line width to use for the backtest plot

# Other
YEARS_ROLLING_RETURNS = 3     # Number of years to use for rolling return plot

USE_MODIFIED_SP_500 = False   # Use modified S&P 500. Will apply over all of the data, not just what appears in backtest
MODIFIED_SP_500_CAGR = 7      # CAGR for the modified S&P 500 to have
MODIFIED_SP_500_VOL = 0.2     # volatility for the modified S&P 500 to have

PRINT_TIMES = False           # Prints times for diagnostic purposes

In [None]:
df, df_results, precalculated_dfs, drawdowns, rolling_returns, rolling_returns_spy = main()

In [None]:
df_sim_results, dates = simulate_prompt()