# Test Project — Algorithmic Trading

## Initialization

### Import

In [1]:
import numpy as np
import pandas as pd
from pylab import plt, mpl

### Configuration

In [2]:
plt.style.use('seaborn-v0_8')
mpl.rcParams['savefig.dpi'] = 300
mpl.rcParams['font.family'] = 'serif'

## Data Retrieval

In [3]:
url = 'http://hilpisch.com/ref_eikon_eod_data.csv'

In [4]:
raw = pd.read_csv(url, index_col=0, parse_dates=True)

In [5]:
data = pd.DataFrame(raw['GDX'])

In [6]:
data.dropna(inplace=True)

In [7]:
data.head()

Unnamed: 0_level_0,GDX
Date,Unnamed: 1_level_1
2010-01-04,47.71
2010-01-05,48.17
2010-01-06,49.34
2010-01-07,49.1
2010-01-08,49.84


## Predictions
- log return
- direction (up or down)
- log return as 5 categories
- two SMAs (short and long window)
- difference between the SMAs
- two EWMAs (short and long window)
- difference between the EWMAs
- two rolling volatilities (short and long window)

In [8]:
short_window = 5
long_window = 42

log return

In [9]:
data['r'] = np.log(data['GDX'] / data['GDX'].shift(1))

direction (up or down)

In [10]:
data['d'] = np.where(data['r'] > 0, 1, -1)

-----
Nicht sicher, ob der Code richtig ist

-----

two SMAs (short and long window)

In [11]:
data['sma_short'] =  data['GDX'].rolling(short_window).mean()

In [12]:
data['sma_long'] = data['GDX'].rolling(long_window).mean()

difference between the SMAs

In [13]:
data['mom_sma'] = np.where(data['sma_short'] > data['sma_long'], 1, -1)

two EWMAs (short and long window)

difference between the EWMAs

two rolling volatilities (short and long window)

In [14]:
data['vol_short'] =  data['r'].rolling(short_window).std()

In [15]:
data['vol_long'] = data['r'].rolling(long_window).std()

In [16]:
features = ['r', 'd', 'sma_short', 'sma_long',  'mom_sma', 'vol_short', 'vol_long']

In [17]:
data.dropna(inplace=True)

In [18]:
data.head()

Unnamed: 0_level_0,GDX,r,d,sma_short,sma_long,mom_sma,vol_short,vol_long
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-03-05,46.43,0.018477,1,45.7048,44.798429,1,0.015847,0.025611
2010-03-08,45.95,-0.010392,-1,45.9668,44.745571,1,0.017966,0.025601
2010-03-09,45.58,-0.008085,-1,45.9808,44.656048,1,0.016961,0.02531
2010-03-10,44.96,-0.013696,-1,45.7,44.557476,1,0.014178,0.025372
2010-03-11,45.32,0.007975,1,45.648,44.449857,1,0.013778,0.025281


## Create lagged features data for 5 lags

In [19]:
lags = 5
cols = list()
for f in features:
    for lag in range(1, lags + 1):
            col = f'{f}_lag_{lag}'
            data[col] = data['r'].shift(lag)
            cols.append('Hallo')

In [20]:
data.dropna(inplace=True)

## Split & Normalize the Data

* Split the data set into training (70%) and testing data.
* Normalize the training features data to have
    * zero mean and
    * standard deviation of one.
* Normalize the test features data by the same moment values as the training data.

In [21]:
split = int(len(data) * 0.7)

In [22]:
train = data.iloc[:split].copy()

In [23]:
mu, std = train.mean(), train.std()

In [24]:
train_norm = (train - mu) / std

In [25]:
test = data.iloc[split:].copy()

In [26]:
test_norm = (test - mu) / std

In [27]:
train_norm.head()

Unnamed: 0_level_0,GDX,r,d,sma_short,sma_long,mom_sma,vol_short,vol_long,r_lag_1,r_lag_2,...,vol_short_lag_1,vol_short_lag_2,vol_short_lag_3,vol_short_lag_4,vol_short_lag_5,vol_long_lag_1,vol_long_lag_2,vol_long_lag_3,vol_long_lag_4,vol_long_lag_5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-03-12,0.548041,-0.278755,-0.990587,0.57032,0.498346,1.069855,-1.343973,0.187615,0.331543,-0.534818,...,0.331543,-0.534818,-0.311839,-0.404665,0.751894,0.331543,-0.534818,-0.311839,-0.404665,0.751894
2010-03-15,0.544923,-0.030967,-0.990587,0.557708,0.493242,1.069855,-1.353755,0.106062,-0.280024,0.332517,...,-0.280024,0.332517,-0.536727,-0.312179,-0.405189,-0.280024,0.332517,-0.536727,-0.312179,-0.405189
2010-03-16,0.618513,1.049986,1.008886,0.564451,0.48914,1.069855,-0.69217,0.147441,-0.032082,-0.279168,...,-0.032082,-0.279168,0.331848,-0.537108,-0.312718,-0.032082,-0.279168,0.331848,-0.537108,-0.312718
2010-03-17,0.627867,0.143354,1.008886,0.580809,0.485653,1.069855,-0.951384,0.147705,1.049542,-0.031178,...,1.049542,-0.031178,-0.280711,0.331624,-0.537611,1.049542,-0.031178,-0.280711,0.331624,-0.537611
2010-03-18,0.609158,-0.246627,-0.990587,0.588925,0.483483,1.069855,-0.860414,0.11195,0.142347,1.050653,...,0.142347,1.050653,-0.032367,-0.281046,0.330983,0.142347,1.050653,-0.032367,-0.281046,0.330983


## Train and (back-)test the following algorithms for directional (long/short) trading (from `scikit-learn`):
* `GaussianNB()`
* `LogisticRegression()`
* `DecisionTreeClassifier()`
* `SVC()`
* `MLPClassifier()`
