In [12]:
import numpy as np
import pandas as pd
from pylab import plt, mpl 
plt.style.use('seaborn') 
mpl.rcParams['savefig.dpi'] = 300 
mpl.rcParams['font.family'] = 'serif' 
pd.set_option("display.precision", 4) 
np.set_printoptions(suppress=True, precision=4)

In [13]:
url = 'http://hilpisch.com/aiif_eikon_id_data.csv'

In [14]:
data = pd.read_csv(url, index_col=0, parse_dates=True)

In [15]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 5529 entries, 2019-03-01 00:00:00 to 2020-01-01 00:00:00
Data columns (total 12 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AAPL.O  3384 non-null   float64
 1   MSFT.O  3378 non-null   float64
 2   INTC.O  3275 non-null   float64
 3   AMZN.O  3381 non-null   float64
 4   GS.N    1686 non-null   float64
 5   SPY     3388 non-null   float64
 6   .SPX    1802 non-null   float64
 7   .VIX    2959 non-null   float64
 8   EUR=    5429 non-null   float64
 9   XAU=    5149 non-null   float64
 10  GDX     3173 non-null   float64
 11  GLD     3351 non-null   float64
dtypes: float64(12)
memory usage: 561.5 KB


In [23]:
def train_test_model(model): 
    for ric in data: 
        df, cols = dfs[ric] 
        split = int(len(df) * 0.85) 
        train = df.iloc[:split].copy() 
        mu, std = train[cols].mean(), train[cols].std() 
        train[cols] = (train[cols] - mu) / std 
        model.fit(train[cols].values, train['d']) 
        test = df.iloc[split:].copy() 
        test[cols] = (test[cols] - mu) / std 
        pred = model.predict(test[cols].values) 
        acc = accuracy_score(test['d'], pred) 
        print(f'SPOZA PRÓBKI | {ric:7s} | acc={acc:.4f}') 

In [24]:
def add_lags(data, ric, lags, window=50): 
    cols = [] 
    df = pd.DataFrame(data[ric]) 
    df.dropna(inplace=True) 
    df['r'] = np.log(df / df.shift()) 
    df['sma'] = df[ric].rolling(window).mean() 
    df['min'] = df[ric].rolling(window).min() 
    df['max'] = df[ric].rolling(window).max() 
    df['mom'] = df['r'].rolling(window).mean() 
    df['vol'] = df['r'].rolling(window).std() 
    df.dropna(inplace=True) 
    df['d'] = np.where(df['r'] > 0, 1, 0) 
    features = [ric, 'r', 'd', 'sma', 'min', 'max', 'mom', 'vol'] 
    for f in features: 
        for lag in range(1, lags + 1): 
            col = f'{f}_lag_{lag}' 
            df[col] = df[f].shift(lag) 
            cols.append(col) 
    df.dropna(inplace=True) 
    return df, cols

In [25]:
lags = 5

In [26]:
dfs = {}
for ric in data:
    df, cols = add_lags(data, ric, lags)
    dfs[ric] = df, cols

In [31]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier

In [32]:
model_mlp = MLPClassifier(hidden_layer_sizes=[512], random_state=100, 
                            max_iter=1000, early_stopping=True, 
                            validation_fraction=0.15, shuffle=False) 

In [33]:
%time 
train_test_model(model_mlp)

CPU times: total: 0 ns
Wall time: 0 ns
SPOZA PRÓBKI | AAPL.O  | acc=0.5420
SPOZA PRÓBKI | MSFT.O  | acc=0.4930
SPOZA PRÓBKI | INTC.O  | acc=0.5549
SPOZA PRÓBKI | AMZN.O  | acc=0.4709
SPOZA PRÓBKI | GS.N    | acc=0.5184
SPOZA PRÓBKI | SPY     | acc=0.4860
SPOZA PRÓBKI | .SPX    | acc=0.5019
SPOZA PRÓBKI | .VIX    | acc=0.4885
SPOZA PRÓBKI | EUR=    | acc=0.5130
SPOZA PRÓBKI | XAU=    | acc=0.4824
SPOZA PRÓBKI | GDX     | acc=0.4765
SPOZA PRÓBKI | GLD     | acc=0.5455


In [35]:
base_estimator = MLPClassifier(hidden_layer_sizes=[256], random_state=100, 
                                 max_iter=1000, early_stopping=True, 
                                 validation_fraction=0.15, shuffle=False)

In [36]:
model_bag = BaggingClassifier(base_estimator=base_estimator, n_estimators=35, 
                                 max_samples=0.25, max_features=0.5, 
                                 bootstrap=False, bootstrap_features=True, 
                                 n_jobs=6, random_state=100)

In [37]:
%time 
train_test_model(model_bag) 

CPU times: total: 0 ns
Wall time: 0 ns
SPOZA PRÓBKI | AAPL.O  | acc=0.5660
SPOZA PRÓBKI | MSFT.O  | acc=0.5551
SPOZA PRÓBKI | INTC.O  | acc=0.5072
SPOZA PRÓBKI | AMZN.O  | acc=0.4830
SPOZA PRÓBKI | GS.N    | acc=0.5020
SPOZA PRÓBKI | SPY     | acc=0.4680
SPOZA PRÓBKI | .SPX    | acc=0.4677
SPOZA PRÓBKI | .VIX    | acc=0.5161
SPOZA PRÓBKI | EUR=    | acc=0.5242
SPOZA PRÓBKI | XAU=    | acc=0.5229
SPOZA PRÓBKI | GDX     | acc=0.5107
SPOZA PRÓBKI | GLD     | acc=0.5475
