In [1]:
import pandas as pd
import zipfile
import numpy as np
import yfinance as yf
import random

  _empty_series = pd.Series()


## Create Stocks Dataframe

In [2]:
def create_stocks_df(start_date, end_date, num_stocks):
    !!kaggle datasets download -d andrewmvd/sp-500-stocks --force
    
    file = 'sp-500-stocks.zip'

    print('Unzipping:',file)
    with zipfile.ZipFile(file, 'r') as zip_ref:
        zip_ref.extractall()
    sp500_df = pd.read_csv('sp500_companies.csv')
    stocks_list = list(set(sp500_df['Symbol'])) 
    stocks_df = pd.DataFrame()
    random.shuffle(stocks_list)
    for stock in stocks_list[:num_stocks]:
        df = yf.download(stock, start = start_date, end = end_date)
        df['company'] = stock
        stocks_df = pd.concat([stocks_df,df])
    return sp500_df, stocks_df
    

### Russell 3000 Data

In [3]:
#!!kaggle datasets download -d williecosta/russell-3000-stock-history

# file = 'russell-3000-stock-history.zip'
# print('Unzipping:',file)

# with zipfile.ZipFile(file, 'r') as zip_ref:
#     zip_ref.extractall()

## Simple Moving Average Backtester Function

In [4]:
def test_strategy(stocks_df, stock, SMA):
    df = stocks_df[stocks_df['company'] == stock]
    RTX_df= df.Close.to_frame()
    RTX_df['SMA_S'] = RTX_df[['Close']].rolling(window=int(SMA[0])).mean()
    RTX_df['SMA_L'] = RTX_df[['Close']].rolling(window=int(SMA[1])).mean()
    RTX_df['returnsb&h'] = np.log(RTX_df[['Close']].div(RTX_df[['Close']].shift(1)))
    RTX_df.dropna(inplace=True)
    # Short bias
    RTX_df['position_short'] = np.where(RTX_df['SMA_S'] < RTX_df['SMA_L'],1,-1)

    # Long bias
    RTX_df['position_long'] = np.where(RTX_df['SMA_S'] > RTX_df['SMA_L'],1,-1)
    
    RTX_df['strategy_long'] = RTX_df['returnsb&h'] * RTX_df['position_long'].shift(1)
    RTX_df['strategy_short'] = RTX_df['returnsb&h'] * RTX_df['position_short'].shift(1)
    
    long_returns = np.exp(RTX_df['strategy_long'].sum())
    long_std = RTX_df['strategy_long'].std() * np.sqrt(252)
    
    short_returns = np.exp(RTX_df['strategy_short'].sum())
    short_std = RTX_df['strategy_short'].std() * np.sqrt(252)
    
    return (long_returns, long_std), (short_returns,short_std)

## SMA Backtester Class

In [5]:
class SMA_Backtester():
    def __init__(self, stocks_df, stock, SMA_S, SMA_L):
        self.stock = stock
        self.stocks_df = stocks_df
        self.SMA_S = SMA_S
        self.SMA_L = SMA_L
        self.results = None
        self.get_data()
        
    def get_data(self):
        df = self.stocks_df[self.stocks_df['company'] == self.stock]
        RTX_df= df.Close.to_frame()
        RTX_df['SMA_S'] = RTX_df[['Close']].rolling(window=int(self.SMA_S)).mean()
        RTX_df['SMA_L'] = RTX_df[['Close']].rolling(window=int(self.SMA_L)).mean()
        RTX_df['returns'] = np.log(RTX_df[['Close']].div(RTX_df[['Close']].shift(1)))
        RTX_df.dropna(inplace=True)
        self.data2 = RTX_df
        return RTX_df
    
    def test_results(self):
        # Short bias
        data = self.data2.copy().dropna()
        data['position_short'] = np.where(data['SMA_S'] < data['SMA_L'],1,-1)

        # Long bias
        data['position_long'] = np.where(data['SMA_S'] > data['SMA_L'],1,-1)

        data['strategy_long'] = data['returns'] * data['position_long'].shift(1)
        data['strategy_short'] = data['returns'] * data['position_short'].shift(1)
        
        data['returnsbh'] = data['returns'].cumsum().apply(np.exp)
        data['return_strategy_long'] = data['strategy_long'].cumsum().apply(np.exp)
        data['return_strategy_short'] = data['strategy_short'].cumsum().apply(np.exp)
        
        short_perf = data['return_strategy_short'].iloc[-1]
        long_perf = data['return_strategy_long'].iloc[-1]
        
        short_outperf = short_perf - data['returnsbh'].iloc[-1]
        long_outperf = long_perf - data['returnsbh'].iloc[-1]
        self.results = data
        
        #         long_returns = np.exp(data['strategy_long'].sum())
        #         long_std = data['strategy_long'].std() * np.sqrt(252)

        #         short_returns = np.exp(data['strategy_short'].sum())
        #         short_std = data['strategy_short'].std() * np.sqrt(252)
        return (round(short_perf,4),round(short_outperf,4)), (round(long_perf,4),round(long_outperf,4))
    
    def plot_results(self):
        if self.results is None:
            print("Run the Test")
        else:
            title = "{}| SMA_S={} | SMA_L={}".format(self.stock, self.SMA_S, self.SMA_L)
            self.results[['returnsbh','return_strategy_short','return_strategy_long']].plot(title = title, figsize = (10,8))