<img src="http://certificate.tpq.io/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>

# EPAT Session 2

**Executive Program in Algorithmic Trading**

**_Vectorized Backtesting_**

Prof. Dr. Yves J. Hilpisch | The Python Quants GmbH | http://tpq.io

<a href="https://home.tpq.io/certificates/pyalgo" target="_blank"><img src="https://hilpisch.com/pyalgo_cover_color.png" width="300px" align="left" border="1px"></a>

## Basic Imports

In [None]:
import numpy as np
import pandas as pd
from pylab import plt
plt.style.use('seaborn')
pd.set_option('mode.chained_assignment', None)

## Reading Financial Data

In [None]:
url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'  # EOD data
# url = 'http://hilpisch.com/aiif_eikon_id_data.csv'  # intraday data

In [None]:
raw = pd.read_csv(url, index_col=0, parse_dates=True).dropna()

In [None]:
raw.columns

## Regime Detection

In [None]:
sym = 'EUR='

In [None]:
data = pd.DataFrame(raw[sym])

In [None]:
data.plot(figsize=(10, 6));

### Features

In [None]:
data['returns'] = np.log(data[sym] / data[sym].shift(1))

In [None]:
window = 20

In [None]:
data['trend'] = data['returns'].rolling(window).mean()  # rolling time series momentum

In [None]:
data['risk'] = data['returns'].rolling(window).std()  # rolling volatilitycv

In [None]:
cols = ['trend', 'risk']

In [None]:
# data[cols] = (data[cols] - data[cols].mean()) / data[cols].std()  # Gaussian Normalization

In [None]:
data.dropna(inplace=True)

In [None]:
data.plot(kind='scatter', x='risk', y='trend', figsize=(10, 6));

### Clustering

In [None]:
from sklearn.cluster import KMeans

In [None]:
model = KMeans(n_clusters=4)  # unsupervised learning

In [None]:
model.fit(data[cols])  # unsupervised learning

In [None]:
data['regime'] = model.predict(data[['trend', 'risk']])

In [None]:
data['regime'].value_counts()

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(x=data['risk'], y=data['trend'],
           c=data['regime'], cmap='coolwarm')
plt.xlabel('risk')
plt.ylabel('trend');

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(x=data.index, y=data[sym],
           c=data['regime'], cmap='coolwarm',
           marker='.');

## Advanced Trading Strategy

### Data Preprocessing

In [None]:
sym = 'EUR='

In [None]:
data = pd.DataFrame(raw[sym])

In [None]:
# data.plot(figsize=(10, 6));

### Features

In [None]:
data['returns'] = np.log(data[sym] / data[sym].shift(1))

In [None]:
data['direction'] = np.sign(data['returns'])

In [None]:
# data['direction']

In [None]:
window = 20

In [None]:
data['mom'] = data['returns'].rolling(window).mean()  # rolling time series momentum

In [None]:
data['vol'] = data['returns'].rolling(window).std()  # rolling volatility

In [None]:
features = ['returns']#'mom', 'vol']

In [None]:
lags = 5
cols = []
for f in features:
    for lag in range(1, lags + 1):
        col = f'{f}_lag_{lag}'
        data[col] = data[f].shift(lag)
        cols.append(col)

In [None]:
cols

In [None]:
data.dropna(inplace=True)

In [None]:
# data[cols] = (data[cols] - data[cols].mean()) / data[cols].std()  # Gaussian Normalization

In [None]:
# data.head()

### Model Fitting (In-Sample)

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [None]:
model = GaussianNB()  # supervised learning

In [None]:
model = LogisticRegression(C=100)  # supervised learning

In [None]:
model = MLPClassifier(max_iter=500)  # supervised learning

In [None]:
%time model.fit(data[cols], data['direction'])  # supervised learning

In [None]:
data['prediction'] = model.predict(data[cols])

In [None]:
data['prediction'].value_counts()

In [None]:
accuracy_score(data['prediction'], data['direction'])

### Vectorized Backtesting (In-Sample)

In [None]:
data['strategy'] = data['returns'] * data['prediction']

In [None]:
data[['returns', 'strategy']].sum()  # sum of log returns

In [None]:
data[['returns', 'strategy']].sum().apply(np.exp)  # gross performance

In [None]:
data[['returns', 'strategy']].sum().apply(np.exp) - 1  # net performance

In [None]:
data[['returns', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6));  # gross performance over time

### Train-Test Split

In [None]:
len(data)

In [None]:
split = int(0.9 * len(data))
split

In [None]:
train = data.iloc[:split].copy()
test = data.iloc[split:].copy()

In [None]:
mu, std = train[cols].mean(), train[cols].std()
train[cols] = (train[cols] - mu) / std
test[cols] = (test[cols] - mu) / std

### Training (In-Sample)

In [None]:
model = MLPClassifier(hidden_layer_sizes=[100], max_iter=500,
                      # random_state=500,
                      shuffle=False)  # supervised learning

In [None]:
# MLPClassifier?

In [None]:
model.fit(train[cols], train['direction'])

In [None]:
train['prediction'] = model.predict(train[cols])

In [None]:
train['strategy'] = train['prediction'] * train['returns']

In [None]:
accuracy_score(train['prediction'], train['direction'])

In [None]:
train[['returns', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6));  # gross performance over time

### Testing (Out-of-Sample)

In [None]:
test.returns.hist(bins=35);

In [None]:
test['prediction'] = model.predict(test[cols])

In [None]:
test['strategy'] = test['prediction'] * test['returns']

In [None]:
accuracy_score(test['prediction'], test['direction'])

In [None]:
test[['returns', 'strategy']].cumsum().apply(np.exp).plot(figsize=(10, 6));  # gross performance over time

<img src="http://certificate.tpq.io/tpq_logo.png" alt="The Python Quants" width="35%" align="right" border="0"><br>