<img src="http://certificate.tpq.io/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# EPAT Session 1

**Executive Program in Algorithmic Trading**

**_Vectorized Backtesting_**

Prof. Dr. Yves J. Hilpisch | The Python Quants GmbH | http://tpq.io

<a href="https://home.tpq.io/certificates/pyalgo" target="_blank"><img src="https://hilpisch.com/pyalgo_cover_color.png" width="300px" align="left" border="1px"></a>

## Basic Imports

In [None]:
import numpy as np
import pandas as pd
from pylab import plt
import cufflinks
plt.style.use('seaborn')
pd.set_option('mode.chained_assignment', None)
cufflinks.set_config_file(offline=True)

## Reading Financial Data

In [None]:
url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'  # EOD data
# url = 'http://hilpisch.com/aiif_eikon_id_data.csv'  # intraday data

In [None]:
raw = pd.read_csv(url, index_col=0, parse_dates=True).dropna()

In [None]:
raw.info()

In [None]:
raw.tail()

In [None]:
raw.plot(figsize=(10, 6));

In [None]:
(raw / raw.iloc[0]).plot(figsize=(10, 6));

In [None]:
# raw.normalize().iplot()

In [None]:
# raw.corr().iplot(kind='heatmap', colorscale="Blues")

## Efficient Markets

In [None]:
sym = 'AAPL.O'
sym = 'EUR='

In [None]:
data = pd.DataFrame(raw[sym])

In [None]:
data.plot(figsize=(10, 6));

In [None]:
cols = []
lags = 5
for lag in range(1, lags + 1):
    col = f'lag_{lag}'
    data[col] = data[sym].shift(lag)
    cols.append(col)

In [None]:
data.head(7)

In [None]:
data.dropna(inplace=True)

In [None]:
data.head()

In [None]:
reg = np.linalg.lstsq(data[cols], data[sym], rcond=-1)[0]

In [None]:
reg

In [None]:
data['OLS_PRED'] = np.dot(data[cols], reg)

In [None]:
data[[sym, 'OLS_PRED']].iloc[-50:].plot()

In [None]:
# data[[sym, 'OLS_PRED']].iplot()

## Simple Trading Strategy

In [None]:
sym = 'AAPL.O'
sym = 'EUR='

In [None]:
data = pd.DataFrame(raw[sym])

In [None]:
data['SMA1'] = data[sym].rolling(42).mean()

In [None]:
data['SMA2'] = data[sym].rolling(252).mean()

In [None]:
data.plot(figsize=(10, 6));

In [None]:
data.dropna(inplace=True)

In [None]:
data.head()

In [None]:
# data['position'] = np.where(data['SMA1'] > data['SMA2'], 'long', 'short')

In [None]:
data['position'] = np.where(data['SMA1'] > data['SMA2'], 1, -1)

In [None]:
data.head()

In [None]:
data.plot(figsize=(10, 6), secondary_y='position');

## Vectorized Backtesting

In [None]:
# data['returns'] = data[sym] / data[sym].shift() - 1  # simple returns

In [None]:
data['returns'] = np.log(data[sym] / data[sym].shift()) # log returns

In [None]:
data.head()

In [None]:
# .shift(1) required to avoid foresight bias
data['strategy'] = data['position'].shift(1) * data['returns']  

In [None]:
data.head()

In [None]:
data[['returns', 'strategy']].sum()  # sum of log returns

In [None]:
data[['returns', 'strategy']].sum().apply(np.exp)  # gross performance

In [None]:
np.exp(data[['returns', 'strategy']].sum()) - 1 # net performance

In [None]:
a = np.arange(10)
a

In [None]:
a.cumsum()

In [None]:
data[['returns', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6));  # gross performance over time

In [None]:
ax = data[['returns', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6))  # gross performance over time
data['position'].plot(ax=ax, secondary_y='position');

## Optimizing the Parameters (OOP Way)

In [None]:
class SMABacktester:
    url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'
    def __init__(self, symbol):
        self.symbol = symbol
        self.retrieve_data()
        self.prepare_data()
    def retrieve_data(self):
        self.raw = pd.read_csv(self.url, index_col=0, parse_dates=True).dropna()
    def prepare_data(self):
        self.data = pd.DataFrame(self.raw[self.symbol])
        self.data['returns'] = np.log(self.data / self.data.shift())
    def calculate_statistics(self, SMA1, SMA2):
        self.data['SMA1'] = self.data[self.symbol].rolling(SMA1).mean()
        self.data['SMA2'] = self.data[self.symbol].rolling(SMA2).mean()
    def backtest_strategy(self, SMA1, SMA2):
        self.calculate_statistics(SMA1, SMA2)
        self.data['position'] = np.where(self.data['SMA1'] > self.data['SMA2'], 1, -1)
        self.data.dropna(inplace=True)
        self.data['strategy'] = self.data['position'].shift(1) * self.data['returns']
        self.data.dropna(inplace=True)
        perf = self.data[['returns', 'strategy']].sum().apply(np.exp)
        return perf
    def backtest_program(self, SMA1_, SMA2_, verbose=False):
        self.results = pd.DataFrame()
        for SMA1, SMA2 in product(SMA1_, SMA2_):
            self.prepare_data()
            perf = self.backtest_strategy(SMA1, SMA2)
            if verbose:
                print(SMA1, SMA2)
                print(perf, '\n')
            res = pd.DataFrame({'symbol': self.symbol, 'SMA1': SMA1, 'SMA2': SMA2,
                               'benchmark': perf['returns'], 'strategy': perf['strategy'],
                               'outperf': perf['strategy'] - perf['returns']},
                              index=[0,])
            self.results = self.results.append(res, ignore_index=True)

In [None]:
symbol = 'EUR='

In [None]:
sma = SMABacktester(symbol)

In [None]:
sma.symbol

In [None]:
sma.raw.head()

In [None]:
sma.prepare_data()

In [None]:
sma.data.head()

From **interactive** vectorized backtesting:

    returns     0.838006
    strategy    1.288039

Results from **OOP-based** vectorized backtesting:

In [None]:
sma.backtest_strategy(42, 252)

In [None]:
# sma.data.head()

In [None]:
# sma.data.tail()

In [None]:
for symbol in raw.columns[:2]:
    print(symbol)
    sma = SMABacktester(symbol)
    perf = sma.backtest_strategy(42, 252)
    print(perf, '\n')

In [None]:
SMA1_ = [30, 40, 50]
SMA2_ = [180, 220, 260]
list(zip(SMA1_, SMA2_))

In [None]:
for SMA1, SMA2 in zip(SMA1_, SMA2_):
    print(SMA1, SMA2)
    sma = SMABacktester('EUR=')
    perf = sma.backtest_strategy(SMA1, SMA2)
    print(perf, '\n')

In [None]:
from itertools import product

In [None]:
list(product(SMA1_, SMA2_))

In [None]:
for SMA1, SMA2 in product(SMA1_, SMA2_):
    print(SMA1, SMA2)
    sma = SMABacktester('EUR=')
    perf = sma.backtest_strategy(SMA1, SMA2)
    print(perf, '\n')

In [None]:
sma = SMABacktester('EUR=')

In [None]:
sma.backtest_program(SMA1_, SMA2_, verbose=False)

In [None]:
sma.results.sort_values('strategy', ascending=False)

In [None]:
sma.results.groupby('SMA1').mean()

In [None]:
sma.results.groupby('SMA2').mean()

In [None]:
SMA1_ = range(20, 121, 5)
SMA2_ = range(150, 301, 10)

In [None]:
%time sma.backtest_program(SMA1_, SMA2_, verbose=False)

In [None]:
sma.results.info()

In [None]:
sma.results.sort_values('strategy', ascending=False).head(7)

In [None]:
sma.results.sort_values('outperf', ascending=False).head(7)

In [None]:
sma = SMABacktester('GLD')

In [None]:
%time sma.backtest_program(SMA1_, SMA2_, verbose=False)

In [None]:
sma.results.sort_values('strategy', ascending=False).head(7)

In [None]:
sma.results.sort_values('outperf', ascending=False).head(7)

## EWMA Strategies

In [None]:
class EWMABacktester:
    url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'
    def __init__(self, symbol):
        self.symbol = symbol
        self.retrieve_data()
        self.prepare_data()
    def retrieve_data(self):
        self.raw = pd.read_csv(self.url, index_col=0, parse_dates=True).dropna()
    def prepare_data(self):
        self.data = pd.DataFrame(self.raw[self.symbol])
        self.data['returns'] = np.log(self.data / self.data.shift())
    def calculate_statistics(self, alpha1, alpha2):
        self.data['EWMA1'] = self.data[self.symbol].ewm(alpha1).mean()
        self.data['EWMA2'] = self.data[self.symbol].ewm(alpha2).mean()
    def backtest_strategy(self, alpha1, alpha2):
        self.calculate_statistics(alpha1, alpha2)
        self.data['position'] = np.where(self.data['EWMA1'] > self.data['EWMA2'], 1, -1)
        self.data.dropna(inplace=True)
        self.data['strategy'] = self.data['position'].shift(1) * self.data['returns']
        self.data.dropna(inplace=True)
        perf = self.data[['returns', 'strategy']].sum().apply(np.exp)
        return perf
    def backtest_program(self, alpha1_, alpha2_, verbose=False):
        self.results = pd.DataFrame()
        for alpha1, alpha2 in product(alpha1_, alpha2_):
            self.prepare_data()
            perf = self.backtest_strategy(alpha1, alpha2)
            if verbose:
                print(alpha1, alpha2)
                print(perf, '\n')
            res = pd.DataFrame({'symbol': self.symbol, 'alpha1': alpha1, 'alpha2': alpha2,
                               'benchmark': perf['returns'], 'strategy': perf['strategy'],
                               'outperf': perf['strategy'] - perf['returns']},
                              index=[0,])
            self.results = self.results.append(res, ignore_index=True)

In [None]:
ewma = EWMABacktester('EUR=')

In [None]:
ewma.backtest_strategy(0.5, 0.001)

In [None]:
ewma.data[[ewma.symbol, 'EWMA1', 'EWMA2']].iloc[-50:].plot(figsize=(10, 6));

In [None]:
ewma.data[['returns', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6));

In [None]:
alpha1_ = np.linspace(0.001, 0.1, 15)
alpha2_ = np.linspace(0.01, 0.5, 15)

In [None]:
%time ewma.backtest_program(alpha1_, alpha2_)

In [None]:
ewma.results.sort_values('outperf', ascending=False).head(7)

<img src="http://certificate.tpq.io/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>