Keras - Autoencoder

In [None]:
import pandas as pd
from pandas_datareader import data as pdr

# Dow Jones 30
symbols_table = pd.read_html("https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average#Components",
 header=0)[2]
symbols = list(symbols_table.loc[:, "Symbol"])
index_symbol = ['^DJI']


# Dates
start_date = '2008-01-01'
end_date = '2017-12-31'

# Download the data
data = pd.DataFrame()
for i in range(len(symbols)):
    print('Downloading.... ', i, symbols[i])

    # User pandas_reader.data.DataReader to load the desired data. As simple as that.
    data[symbols[i]] = pdr.DataReader(symbols[i], 'yahoo', start_date, end_date)['Adj Close']
    data_index = pdr.DataReader(index_symbol, 'yahoo', start_date, end_date)['Adj Close']

# Remove the missing the data from the dataframe
data = data.dropna()
data_index = data_index.dropna()

# Save the data
data.to_csv('dj30_10y.csv', sep=',', encoding='utf-8')
data_index.to_csv('dj30_index_10y.csv', sep=',', encoding='utf-8')

print(data.head())

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler

from keras.layers import Input, Dense
from keras.models import Model

# Load data
data = pd.read_csv('dj30_10y.csv', sep=',', engine='python')
assets = data.columns.values[1:].tolist()
data = data.iloc[:, 1:]

# Load index
index = pd.read_csv('dj30_index_10y.csv', sep=',', engine='python')
index = index.iloc[-data.values.shape[0]:, 1:]

# Normalize data
scaler = MinMaxScaler([0.1,0.9])
data_X = scaler.fit_transform(data)
scaler_index = MinMaxScaler([0.1,0.9])
index = scaler_index.fit_transform(index)

# Number of components
N_COMPONENTS = 3

## Autoencoder - Keras
# Network hyperparameters
n_inputs = len(assets)
n_core = N_COMPONENTS
n_outputs = n_inputs

# Create model
input = Input(shape=(n_inputs,))
# Encoder
encoded = Dense(n_core, activation='sigmoid')(input)
# Decoder
decoded = Dense(n_outputs, activation='sigmoid')(encoded)

# define model
autoencoder = Model(input, decoded)
autoencoder.compile(optimizer='adam', loss='mse')

# Testing in-sample
X_train = data_X
X_test = data_X

# Training parameters
epochs = 20

# Fit the model
history = autoencoder.fit(X_train,\
                          X_train,\
                          epochs=epochs,\
                          batch_size=1,\
                          shuffle=True,\
                          verbose=1)

# Make AE predictions
y_pred_AE_keras = autoencoder.predict(X_test)



Tensorflow - Autoencoder

In [None]:
import tensorflow as tf
from sklearn.utils import shuffle

n_inputs = len(assets)
n_core = N_COMPONENTS
n_outputs = n_inputs

initializer = tf.initializers.glorot_normal()
w1 = tf.Variable(initializer([n_inputs, n_core]))
w2 = tf.transpose(w1)
b1 = tf.Variable(tf.zeros([n_core]))
b2 = tf.Variable(tf.zeros([n_outputs]))

In [None]:
# Building the encoder
def encoder(x):
    return tf.nn.sigmoid(tf.add(tf.matmul(x, w1), b1))

# Building the decoder
def decoder(x):
    return tf.nn.sigmoid(tf.add(tf.matmul(x, w2), b2))

In [None]:
X = tf.placeholder("float", [None, n_inputs])
Y = tf.placeholder("float", [None, n_inputs])
# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
# Prediction
y_pred = decoder_op
y_true = X


# Training parameters
lr = 0.01
epochs = 20
batch_size = 1


mse = tf.losses.mean_squared_error(y_true, y_pred)
optimizer = tf.train.AdamOptimizer(lr).minimize(mse)

# Start Training
# Start a new TF session
with tf.Session() as sess:
    # Initialize the network
    sess.run(tf.global_variables_initializer())

    # Training
    for i in range(epochs):
        X_train1 = shuffle(X_train)
        for j in range(X_train.shape[0] // batch_size):
            batch_y = X_train1[j * batch_size:j * batch_size + batch_size, :]
            batch_x = X_train1[j * batch_size:j * batch_size + batch_size, :]
            _, loss_value = sess.run([optimizer, mse], feed_dict={X: batch_x, Y: batch_y})

        # Display loss
        print('Epoch: %i -> Loss: %f' % (i, loss_value))

    # Make predictions
    y_pred_AE_tf = sess.run(decoder_op, feed_dict={X: X_train, Y: X_train})
    print('Test Error: %f' % tf.losses.mean_squared_error(X_train, y_pred_AE_tf).eval())
