# Backtesting

In [3]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import sys
sys.path.insert(1, './..')

In [4]:
from quantML import scrape
from quantML import preprocess
from quantML import ml
from quantML import production
from quantML import backtesting

import pandas as pd
import numpy as np
old_settings = np.seterr(all='ignore')
from matplotlib import pyplot as plt
import pickle
import datetime as dt

## Data

In [5]:
price_df = pd.read_parquet('stock-data/historical/latest-scrape-2020.snappy.parquet')
dfml = pd.read_parquet('stock-data/features/featureDF-2020.snappy.parquet')

## Preprocess

In [6]:
try:
    dfml.loc[0]
except:
    full_df = preprocess.create_feats_and_preds(price_df, feat_days, pred_days)
    dfml = preprocess.generate_testing_matrix(full_df, feat_days)

## Model Loading, Predictions, and Prices

In [351]:
file_path = 'modelfiles/tabularML'
tickers = ['HUM', 'JBHT', 'JNJ', 'LB', 'GPC', 'LH', 'LEG', 'FOXA', 'PVH',
       'WMT', 'KO', 'PNW', 'FB', 'F', 'CME', 'WMB', 'AVB', 'BR', 'GOOGL',
       'ORLY', 'SYY', 'INFO', 'KMI', 'GE', 'SCHW', 'MSFT', 'MRK', 'IP',
       'RTX', 'AVY']
low_threshold = 0.02
high_threshold = 0.1

In [352]:
buy_cols = [f'{i}_buy' for i in tickers]
pred_cols = [f'{i}_pred' for i in tickers]
price_cols = [f'{i}_close' for i in tickers]
fut_cols = [f'{i}_close_fut' for i in tickers]
result_cols = [f'{i}_percent_change' for i in tickers]
date_cols = ['current_date', 'prediction_date']

In [353]:
pred_df = backtesting.generate_predictions(tickers, dfml, file_path, 
                                           low_threshold=low_threshold,
                                           high_threshold=high_threshold,)
pred_df = backtesting.merge_prices_and_predictions(price_df, pred_df, tickers).iloc[::5, :]

In [354]:
for ticker in tickers:
    col = f'{ticker}_percent_change'
    pred_df[col] = (pred_df[f'{ticker}_close_fut'] - pred_df[f'{ticker}_close']) / pred_df[f'{ticker}_close']

## Testing

In [355]:
returns = pd.DataFrame(pred_df[buy_cols].to_numpy() * pred_df[result_cols].to_numpy())
returns = returns.sum(axis=1)
totalbuys = pred_df[buy_cols].sum(axis=1)
totalreturn = (pd.Series(returns.to_numpy() / totalbuys.to_numpy()).fillna(0) + 1).to_numpy()
pred_df['totalreturn'] = totalreturn

finalreturn = (pred_df['totalreturn'].cumprod().iloc[-1] - 1) *100
print(f'{finalreturn:.2f}% Algorithmic Return over 2020')

basereturn = ((pred_df[result_cols].sum(axis=1) / pred_df[result_cols].shape[1] + 1).cumprod().iloc[-1] - 1) *100
print(f'{basereturn:.2f}% Baseline Return over 2020')

14.81% Algorithmic Return over 2020
6.66% Baseline Return over 2020
