In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import OrderedDict
sns.set_context('poster')

from sklearn.preprocessing import  StandardScaler, RobustScaler
from sklearn.manifold import TSNE

# Importing from my own modules
import sys
sys.path.append('../financial_forecasting/')
from utils import load_data, wMSE

In [2]:
from keras.models import Sequential, Model
from keras.layers import Dense, Activation, Dropout, Embedding, Input, Reshape, Concatenate
from keras.optimizers import Adam
from keras.layers.normalization import BatchNormalization
from keras.utils import to_categorical

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Load preprocessed data

In [3]:
X_train = pd.read_csv('../data/preprocessed/train.csv')
X_val = pd.read_csv('../data/preprocessed/validation.csv')
X_test = pd.read_csv('../data/preprocessed/test.csv')

weights_train = pd.read_csv('../data/preprocessed/train_weights.csv', squeeze=True)
weights_val = pd.read_csv('../data/preprocessed/validation_weights.csv', squeeze=True)

y_train = pd.read_csv('../data/preprocessed/train_target.csv', squeeze=True)
y_val = pd.read_csv('../data/preprocessed/validation_target.csv', squeeze=True)

# Re-scale the data

In [4]:
# One hot encoding for Market variable, drop Day variable
X_train = pd.get_dummies(X_train, drop_first=True, columns=['Market'], prefix='Market')
X_train.drop(labels=['Day'], axis=1, inplace=True)

X_val = pd.get_dummies(X_val, drop_first=True, columns=['Market'], prefix='Market')
X_val.drop(labels=['Day'], axis=1, inplace=True)

X_test = pd.get_dummies(X_test, drop_first=True, columns=['Market'], prefix='Market')
X_test.drop(labels=['Day'], axis=1, inplace=True)

In [5]:
feats_to_scale = ['x0', 'x1', 'x2', 'x3A', 'x3B', 'x3C', 'x3D','x3E', 'x4', 'x5', 'x6', 
'x0_log10', 'x1_log10', 'x2_log10','x3A_log10', 'x3B_log10','x3C_log10', 'x3D_log10', 'x3E_log10', 'x4_log10',
'x5_log10', 'x6_log10', 'Market_mean_encoded', 'Day_mean_encoded', 'Stock_mean_encoded',
'x0_log10_diff', 'x1_log10_diff', 'x2_log10_diff','x3A_log10_diff', 'x3B_log10_diff', 
'x3C_log10_diff', 'x3D_log10_diff', 'x3E_log10_diff', 'x4_log10_diff', 'x5_log10_diff',
'x6_log10_diff']

feats_remaining = list(set(X_train.columns) - set(feats_to_scale))

In [6]:
scaler = RobustScaler()

In [7]:
scaler.fit(X_train.loc[:, feats_to_scale])

df = pd.DataFrame(scaler.transform(X_train.loc[:, feats_to_scale]), columns=feats_to_scale, index=X_train.index)
X_train = pd.concat([df, X_train[feats_remaining]],axis=1)

df = pd.DataFrame(scaler.transform(X_val.loc[:, feats_to_scale]), columns=feats_to_scale, index=X_val.index)
X_val = pd.concat([df, X_val[feats_remaining]],axis=1)

df = pd.DataFrame(scaler.transform(X_test.loc[:, feats_to_scale]), columns=feats_to_scale, index=X_test.index)
X_test = pd.concat([df, X_test[feats_remaining]],axis=1)

# Train NN with Keras

In [7]:
input_dim = X_train.shape[1]

## Architecture 1

In [27]:
model = Sequential()

model.add(Dense(120, input_dim=input_dim, kernel_initializer='glorot_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(80, input_dim=input_dim, kernel_initializer='glorot_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(80, input_dim=input_dim, kernel_initializer='glorot_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))

model.add(Dense(1))

adam = Adam()
model.compile(loss='mean_squared_error', optimizer=adam)

In [28]:
model.fit(x=X_train, y=y_train, batch_size=128, epochs=10, sample_weight=weights_train)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1a1c8d37b8>

In [29]:
preds = model.predict(X_train)
wMSE(preds.flatten(), y_train, weights_train)

1.7565806846109713e-06

In [30]:
preds = model.predict(X_val)
wMSE(preds.flatten(), y_val, weights_val)

1.6988073742324752e-06

## Architecture 2

In [17]:
model = Sequential()

model.add(Dense(64, input_dim=input_dim, kernel_initializer='glorot_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Dense(64, input_dim=input_dim, kernel_initializer='glorot_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Dense(32, input_dim=input_dim, kernel_initializer='glorot_uniform'))
model.add(BatchNormalization())
model.add(Activation('relu'))

model.add(Dense(1))

adam = Adam()
model.compile(loss='mean_squared_error', optimizer=adam)

In [18]:
model.fit(x=X_train, y=y_train, batch_size=128, epochs=1, sample_weight=weights_train)

Epoch 1/1


<keras.callbacks.History at 0x1a1dc01438>

In [19]:
preds = model.predict(X_train)
wMSE(preds.flatten(), y_train, weights_train)

0.0049049173028181065

In [20]:
preds = model.predict(X_val)
wMSE(preds.flatten(), y_val, weights_val)

0.00498437638836153

As expected, a vanilla multilayer perceptron, even with some regularisation and other tricks, does not perform well on structured data.