In [1]:
import numpy as np
import pandas as pd
from pylab import plt, mpl 
plt.style.use('seaborn') 
mpl.rcParams['savefig.dpi'] = 300 
mpl.rcParams['font.family'] = 'serif' 
pd.set_option("display.precision", 4) 
np.set_printoptions(suppress=True, precision=4)

In [2]:
url = 'http://hilpisch.com/aiif_eikon_eod_data.csv'

In [3]:
data = pd.read_csv(url, index_col=0, parse_dates=True).dropna() 

In [4]:
def add_lags(data, ric, lags, window=50): 
    cols = [] 
    df = pd.DataFrame(data[ric]) 
    df.dropna(inplace=True) 
    df['r'] = np.log(df / df.shift()) 
    df['sma'] = df[ric].rolling(window).mean() 
    df['min'] = df[ric].rolling(window).min() 
    df['max'] = df[ric].rolling(window).max() 
    df['mom'] = df['r'].rolling(window).mean() 
    df['vol'] = df['r'].rolling(window).std() 
    df.dropna(inplace=True) 
    df['d'] = np.where(df['r'] > 0, 1, 0) 
    features = [ric, 'r', 'd', 'sma', 'min', 'max', 'mom', 'vol'] 
    for f in features: 
        for lag in range(1, lags + 1): 
            col = f'{f}_lag_{lag}' 
            df[col] = df[f].shift(lag) 
            cols.append(col) 
    df.dropna(inplace=True) 
    return df, cols

In [5]:
lags = 5

In [6]:
dfs = {} 
for ric in data: 
    df, cols = add_lags(data, ric, lags) 
    dfs[ric] = df.dropna(), cols

In [13]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

In [14]:
%%time 
for ric in data: 
    model = MLPClassifier(hidden_layer_sizes=[512], 
    random_state=100, 
    max_iter=1000, 
    early_stopping=True, 
    validation_fraction=0.15, 
    shuffle=False) 
    df, cols = dfs[ric] 
    df[cols] = (df[cols] - df[cols].mean()) / df[cols].std() 
    model.fit(df[cols].values, df['d']) 
    pred = model.predict(df[cols].values) 
    acc = accuracy_score(df['d'], pred) 
    print(f'Z PRÓBKI | {ric:7s} | acc={acc:.4f}')

Z PRÓBKI | AAPL.O  | acc=0.5510
Z PRÓBKI | MSFT.O  | acc=0.5376
Z PRÓBKI | INTC.O  | acc=0.5607
Z PRÓBKI | AMZN.O  | acc=0.5559
Z PRÓBKI | GS.N    | acc=0.5794
Z PRÓBKI | SPY     | acc=0.5729
Z PRÓBKI | .SPX    | acc=0.5941
Z PRÓBKI | .VIX    | acc=0.6940
Z PRÓBKI | EUR=    | acc=0.5766
Z PRÓBKI | XAU=    | acc=0.5672
Z PRÓBKI | GDX     | acc=0.5847
Z PRÓBKI | GLD     | acc=0.5567
CPU times: total: 30.6 s
Wall time: 5.12 s


In [16]:
def create_model(problem='regression'): 
    model = Sequential() 
    model.add(Dense(512, input_dim=len(cols), 
    activation='relu')) 
    if problem == 'regression': 
        model.add(Dense(1, activation='linear')) 
        model.compile(loss='mse', optimizer='adam') 
    else: 
        model.add(Dense(1, activation='sigmoid')) 
        model.compile(loss='binary_crossentropy', optimizer='adam') 
    return model 

In [18]:
import tensorflow as tf
from keras.layers import Dense
from keras.models import Sequential

In [19]:
np.random.seed(100) 
tf.random.set_seed(100) 

In [20]:
%%time 
for ric in data: 
    model = create_model('classification') 
    df, cols = dfs[ric] 
    df[cols] = (df[cols] - df[cols].mean()) / df[cols].std()
    model.fit(df[cols], df['d'], epochs=50, verbose=False) 
    pred = np.where(model.predict(df[cols]) > 0.5, 1, 0) 
    acc = accuracy_score(df['d'], pred) 
    print(f'Z PRÓBKI | {ric:7s} | acc={acc:.4f}') 

Z PRÓBKI | AAPL.O  | acc=0.7087
Z PRÓBKI | MSFT.O  | acc=0.6896
Z PRÓBKI | INTC.O  | acc=0.7030
Z PRÓBKI | AMZN.O  | acc=0.6721
Z PRÓBKI | GS.N    | acc=0.6932
Z PRÓBKI | SPY     | acc=0.6875
Z PRÓBKI | .SPX    | acc=0.6875
Z PRÓBKI | .VIX    | acc=0.7489
Z PRÓBKI | EUR=    | acc=0.6839
Z PRÓBKI | XAU=    | acc=0.6912
Z PRÓBKI | GDX     | acc=0.6790
Z PRÓBKI | GLD     | acc=0.7030
CPU times: total: 2min 35s
Wall time: 1min 49s


In [25]:
def train_test_model(model): 
    for ric in data: 
        df, cols = dfs[ric] 
        split = int(len(df) * 0.85) 
        train = df.iloc[:split].copy() 
        mu, std = train[cols].mean(), train[cols].std() 
        train[cols] = (train[cols] - mu) / std 
        model.fit(train[cols].values, train['d']) 
        test = df.iloc[split:].copy() 
        test[cols] = (test[cols] - mu) / std 
        pred = model.predict(test[cols].values) 
        acc = accuracy_score(test['d'], pred) 
        print(f'SPOZA PRÓBKI | {ric:7s} | acc={acc:.4f}') 

In [26]:
model_mlp = MLPClassifier(hidden_layer_sizes=[512], random_state=100, 
                            max_iter=1000, early_stopping=True, 
                            validation_fraction=0.15, shuffle=False) 

In [27]:
%time 
train_test_model(model_mlp) 

CPU times: total: 0 ns
Wall time: 0 ns
SPOZA PRÓBKI | AAPL.O  | acc=0.4432
SPOZA PRÓBKI | MSFT.O  | acc=0.4595
SPOZA PRÓBKI | INTC.O  | acc=0.5000
SPOZA PRÓBKI | AMZN.O  | acc=0.5270
SPOZA PRÓBKI | GS.N    | acc=0.4838
SPOZA PRÓBKI | SPY     | acc=0.4811
SPOZA PRÓBKI | .SPX    | acc=0.5027
SPOZA PRÓBKI | .VIX    | acc=0.5676
SPOZA PRÓBKI | EUR=    | acc=0.4649
SPOZA PRÓBKI | XAU=    | acc=0.5514
SPOZA PRÓBKI | GDX     | acc=0.5162
SPOZA PRÓBKI | GLD     | acc=0.4946


In [28]:
from sklearn.ensemble import BaggingClassifier

In [29]:
base_estimator = MLPClassifier(hidden_layer_sizes=[256], random_state=100, 
                                 max_iter=1000, early_stopping=True, 
                                 validation_fraction=0.15, shuffle=False)

In [30]:
model_bag = BaggingClassifier(base_estimator=base_estimator, n_estimators=35, 
                                 max_samples=0.25, max_features=0.5, 
                                 bootstrap=False, bootstrap_features=True, 
                                 n_jobs=6, random_state=100)

In [33]:
train_test_model(model_bag) 
%time 

SPOZA PRÓBKI | AAPL.O  | acc=0.5000
SPOZA PRÓBKI | MSFT.O  | acc=0.5703
SPOZA PRÓBKI | INTC.O  | acc=0.5054
SPOZA PRÓBKI | AMZN.O  | acc=0.5270
SPOZA PRÓBKI | GS.N    | acc=0.5135
SPOZA PRÓBKI | SPY     | acc=0.5568
SPOZA PRÓBKI | .SPX    | acc=0.5514
SPOZA PRÓBKI | .VIX    | acc=0.5432
SPOZA PRÓBKI | EUR=    | acc=0.5054
SPOZA PRÓBKI | XAU=    | acc=0.5351
SPOZA PRÓBKI | GDX     | acc=0.5054
SPOZA PRÓBKI | GLD     | acc=0.5189
CPU times: total: 0 ns
Wall time: 0 ns
