# Vectorized Backtest

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
from pathlib import Path
from time import time
import datetime

import numpy as np
import pandas as pd
import pandas_datareader.data as web

from scipy.stats import spearmanr

import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
import seaborn as sns

In [3]:
sns.set_style('whitegrid')
np.random.seed(42)

## Load Data

### Return Predictions

In [4]:
DATA_DIR = Path('..', 'data')

In [6]:
data = pd.read_hdf('00_data/backtest.h5', 'data')
data.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 190960 entries, ('AAPL', Timestamp('2014-12-09 00:00:00')) to ('HTZ', Timestamp('2017-11-30 00:00:00'))
Data columns (total 6 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   predicted  74038 non-null   float64
 1   open       190960 non-null  float64
 2   high       190960 non-null  float64
 3   low        190960 non-null  float64
 4   close      190960 non-null  float64
 5   volume     190960 non-null  float64
dtypes: float64(6)
memory usage: 10.2+ MB


### SP500 Benchmark

In [None]:
sp500 = web.DataReader('SP500', 'fred', '2014', '2018').pct_change()

In [None]:
sp500.info()

## Compute Forward Returns

In [None]:
daily_returns = data.open.unstack('ticker').sort_index().pct_change()
daily_returns.info()

In [None]:
fwd_returns = daily_returns.shift(-1)

## Generate Signals

In [None]:
predictions = data.predicted.unstack('ticker')
predictions.info()

In [None]:
N_LONG = N_SHORT = 15

In [None]:
long_signals = ((predictions
                .where(predictions > 0)
                .rank(axis=1, ascending=False) > N_LONG)
                .astype(int))
short_signals = ((predictions
                  .where(predictions < 0)
                  .rank(axis=1) > N_SHORT)
                 .astype(int))

## Compute Portfolio Returns

In [None]:
long_returns = long_signals.mul(fwd_returns).mean(axis=1)
short_returns = short_signals.mul(-fwd_returns).mean(axis=1)
strategy = long_returns.add(short_returns).to_frame('Strategy')

## Plot results

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(14,5))
strategy.join(sp500).add(1).cumprod().sub(1).plot(ax=axes[0], title='Cumulative Return')
sns.distplot(strategy.dropna(), ax=axes[1], hist=False, label='Strategy')
sns.distplot(sp500, ax=axes[1], hist=False, label='SP500')
axes[1].set_title('Daily Standard Deviation')
axes[0].yaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y))) 
axes[1].xaxis.set_major_formatter(FuncFormatter(lambda y, _: '{:.0%}'.format(y))) 
sns.despine()
fig.tight_layout();

In [None]:
res = strategy.join(sp500).dropna()

In [None]:
res.std()

In [None]:
res.corr()