# 1. Libraries and settings <a class="anchor" id="1-bullet"></a> 

In [None]:
# !pip install numpy==1.13.3 Keras==2.0.5 wrapt~=1.12.1 lxml==4.0.0 
# !pip install --upgrade pandas colorama 


In [None]:
!pip install --upgrade tensorflow
import tensorflow as tf
print('Tensorflow version: {}'.format(tf.__version__))

In [None]:
import numpy as np
import pandas as pd
import math
import sklearn
import sklearn.preprocessing
import datetime
import os
import matplotlib.pyplot as plt


pd.set_option('display.float_format', lambda x: '%.5f' % x)
np.set_printoptions(suppress=True)
# split data in 80%/10%/10% train/validation/test sets
valid_set_size_percentage = 10 
test_set_size_percentage = 10 

#display parent directory and working directory
print(os.path.dirname(os.getcwd())+':', os.listdir(os.path.dirname(os.getcwd())));
print(os.getcwd()+':', os.listdir(os.getcwd()));


# 2. Analyze data <a class="anchor" id="2-bullet"></a> 
- load stock prices from prices-split-adjusted.csv
- analyze data

In [None]:
# import all stock prices 
df = pd.read_csv("../input/prices-split-adjusted.csv", index_col = 0)
df.info()
df.head()

# number of different stocks
print('\nnumber of different stocks: ', len(list(set(df.symbol))))
print(list(set(df.symbol))[:10])

In [None]:
df.tail()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
plt.figure(figsize=(15, 5));
plt.subplot(1,2,1);
plt.plot(df[df.symbol == 'EQIX'].open.values, color='red', label='open')
plt.plot(df[df.symbol == 'EQIX'].close.values, color='green', label='close')
plt.plot(df[df.symbol == 'EQIX'].low.values, color='blue', label='low')
plt.plot(df[df.symbol == 'EQIX'].high.values, color='black', label='high')
plt.title('stock price')
plt.xlabel('time [days]')
plt.ylabel('price')
plt.legend(loc='best')
#plt.show()

plt.subplot(1,2,2);
plt.plot(df[df.symbol == 'EQIX'].volume.values, color='black', label='volume')
plt.title('stock volume')
plt.xlabel('time [days]')
plt.ylabel('volume')
plt.legend(loc='best');

# 3. Subset data <a class="anchor" id="3-bullet"></a>

In [None]:
UL=45
LL=44

Can use a better subsetting system using Beta(By taking stocks with similar Beta values)

In [None]:
df_subset=df[df.symbol.isin(df.groupby('symbol').mean()[(df.groupby('symbol').mean()['open']<UL) & (df.groupby('symbol').mean()['open']>LL)].index)]
min_max_scaler = sklearn.preprocessing.MinMaxScaler()
df_subset.symbol.nunique()

# 3. Manipulate data <a class="anchor" id="3-bullet"></a> 
- One-hot encode symbol.
- normalize stock data
- create train, validation and test data sets

In [None]:
# function for min-max normalization of stock
def normalize_data(df,col_minmax):
    df=pd.concat([pd.DataFrame(min_max_scaler.fit_transform(df[col_minmax]),columns=col_minmax),
                  pd.get_dummies(df['symbol'],prefix="symbol").reset_index()],axis=1)
    df.drop(['date'],1,inplace=True)
    return df

# normalize stock
df_stock_norm = df_subset.copy()
df_stock_norm = normalize_data(df_stock_norm,['open', 'close', 'low', 'high', 'volume'])
print('df_stock.columns.values = ', list(df_stock_norm.columns.values))
# # create train, test data
seq_len = 20 # choose sequence length



In [None]:
# function to create train, validation, test data given stock data and sequence length
def load_data(stock, seq_len):
    data_raw = stock.as_matrix() # convert to numpy array
    data = []
    
    # create all possible sequences of length seq_len
    for index in range(len(data_raw) - seq_len): 
        data.append(data_raw[index: index + seq_len])
    
    data = np.array(data);
    valid_set_size = int(np.round(valid_set_size_percentage/100*data.shape[0]));  
    test_set_size = int(np.round(test_set_size_percentage/100*data.shape[0]));
    train_set_size = data.shape[0] - (valid_set_size + test_set_size);
    
    x_train = data[:train_set_size,:,:].copy()
    x_train[:,-1,1:]=[0]*(x_train.shape[2]-1)
    y_train = data[:train_set_size,-1,1:4]
    
    x_valid = data[train_set_size:train_set_size+valid_set_size,:,:].copy()
    x_valid[:,-1,1:]=[0]*(x_valid.shape[2]-1)
    y_valid = data[train_set_size:train_set_size+valid_set_size,-1,1:4]
    
    x_test = data[train_set_size+valid_set_size:,:,:].copy()
    x_test[:,-1,1:]=[0]*(x_test.shape[2]-1)
    y_test = data[train_set_size+valid_set_size:,-1,1:4]
    return [x_train, y_train, x_valid, y_valid, x_test, y_test]

def training_split_by_symbol(df_stock_norm_by_symbol):
    x_train, y_train, x_valid, y_valid, x_test, y_test = load_data(df_stock_norm_by_symbol, seq_len)
    return((x_train, y_train, x_valid, y_valid, x_test, y_test))

In [None]:
symbol_list=[i for i in df_stock_norm.columns if (~i.find("symbol"))]
print(symbol_list[0])
x_train_o, y_train_o, x_valid_o, y_valid_o, x_test_o, y_test_o = load_data(df_stock_norm[df_stock_norm[symbol_list[0]]==1].sort_index(), seq_len)
for i in symbol_list[1:]:
    print(i)
    temp=training_split_by_symbol(df_stock_norm[df_stock_norm[i]==1].sort_index())
    x_train_o=np.concatenate((x_train_o,temp[0]),axis=0)
    y_train_o=np.concatenate((y_train_o,temp[1]),axis=0)
    x_valid_o=np.concatenate((x_valid_o,temp[2]),axis=0)
    y_valid_o=np.concatenate((y_valid_o,temp[3]),axis=0)
    x_test_o=np.concatenate((x_test_o,temp[4]),axis=0)
    y_test_o=np.concatenate((y_test_o,temp[5]),axis=0)
        

In [None]:
x_train_o.shape

In [None]:
# row_num=5
# print("Train Input First row Values")
# print(x_train_o[row_num])
# print("Train Output First row Values - Normalized")
# print(y_train_o[row_num])
# print("Train Output First row Values - Orignal")
# print(min_max_scaler.inverse_transform([np.concatenate([[0],y_train_o[row_num],[0]])]))
# print("Train Output Dataframe - for checking the Logic")
# df[df.symbol=='ADSK'].iloc[row_num+(seq_len-1),:]

# 4. Model and validate data <a class="anchor" id="4-bullet"></a> 
- RNNs with basic, LSTM, GRU cells


In [None]:
y_train_o.shape[1]

In [None]:
## Basic Cell RNN in tensorflow

index_in_epoch = 0;
perm_array  = np.arange(x_train_o.shape[0])
np.random.shuffle(perm_array)

# function to get the next batch
def get_next_batch(batch_size):
    global index_in_epoch, x_train_o, perm_array   
    start = index_in_epoch
    index_in_epoch += batch_size
    
    if index_in_epoch > x_train_o.shape[0]:
        np.random.shuffle(perm_array) # shuffle permutation array
        start = 0 # start next epoch
        index_in_epoch = batch_size
        
    end = index_in_epoch
    return x_train_o[perm_array[start:end]], y_train_o[perm_array[start:end]]

# parameters
n_steps = seq_len
n_inputs = x_train_o.shape[2]
n_neurons = 50 
n_outputs = y_train_o.shape[1]
n_layers = 3
learning_rate = 0.001
batch_size = 25
n_epochs = 50 
train_set_size = x_train_o.shape[0]
test_set_size = x_test_o.shape[0]

tf.reset_default_graph()

X = tf.placeholder(tf.float32, [None, n_steps, n_inputs])
y = tf.placeholder(tf.float32, [None, n_outputs])

# use GRU cell
layers = [tf.contrib.rnn.GRUCell(num_units=n_neurons, activation=tf.nn.leaky_relu)
         for layer in range(n_layers)]
                                                                     
multi_layer_cell = tf.contrib.rnn.MultiRNNCell(layers)
rnn_outputs, states = tf.nn.dynamic_rnn(multi_layer_cell, X, dtype=tf.float32)

stacked_rnn_outputs = tf.reshape(rnn_outputs, [-1, n_neurons]) 
stacked_outputs = tf.layers.dense(stacked_rnn_outputs, n_outputs)
outputs = tf.reshape(stacked_outputs, [-1, n_steps, n_outputs])
outputs = outputs[:,n_steps-1,:] # keep only last output of sequence
                                              
loss = tf.reduce_mean(tf.square(outputs - y)) # loss function = mean squared error 
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 
training_op = optimizer.minimize(loss)
                                              
# run graph
with tf.Session() as sess: 
    sess.run(tf.global_variables_initializer())
    for iteration in range(int(n_epochs*train_set_size/batch_size)):
        x_batch, y_batch = get_next_batch(batch_size) # fetch the next training batch 
        sess.run(training_op, feed_dict={X: x_batch, y: y_batch}) 
        if iteration % int(5*train_set_size/batch_size) == 0:
            mse_train = loss.eval(feed_dict={X: x_train_o, y: y_train_o}) 
            mse_valid = loss.eval(feed_dict={X: x_valid_o, y: y_valid_o}) 
            print('%.2f epochs: MSE train/valid = %.6f/%.6f'%(
                iteration*batch_size/train_set_size, mse_train, mse_valid))

    y_train_pred = sess.run(outputs, feed_dict={X: x_train_o})
    y_valid_pred = sess.run(outputs, feed_dict={X: x_valid_o})
    y_test_pred = sess.run(outputs, feed_dict={X: x_test_o})
    

In [None]:
import random
rn=random.randint(1, 100)
print("Orignal")
print(min_max_scaler.inverse_transform([np.concatenate([[0],y_test_o[rn],[0]])])[0][1:-1])
print("Predicted")
print(min_max_scaler.inverse_transform([np.concatenate([[0],y_test_pred[rn],[0]])])[0][1:-1])
# y_test_o[0]

In [None]:

# y_test_pred[0]

In [None]:
import torch
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn

train_data = TensorDataset(torch.from_numpy(x_train_o), torch.from_numpy(y_train_o))
val_data = TensorDataset(torch.from_numpy(x_valid_o), torch.from_numpy(y_valid_o))
test_data = TensorDataset(torch.from_numpy(x_test_o), torch.from_numpy(y_test_o))

batch_size = 400

train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_data, shuffle=True, batch_size=batch_size)
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)

# 5. Predictions <a class="anchor" id="5-bullet"></a> 

In [None]:
y_train_o.shape

In [None]:
ft = 0 # 0 = open, 1 = close, 2 = highest, 3 = lowest

## show predictions
plt.figure(figsize=(15, 5));
plt.subplot(1,2,1);

plt.plot(np.arange(y_train_o.shape[0]), y_train_o[:,ft], color='blue', label='train target')

plt.plot(np.arange(y_train_o.shape[0], y_train_o.shape[0]+y_valid_o.shape[0]), y_valid_o[:,ft],
         color='gray', label='valid target')

plt.plot(np.arange(y_train_o.shape[0]+y_valid_o.shape[0],
                   y_train_o.shape[0]+y_test_o.shape[0]+y_test_o.shape[0]),
         y_test_o[:,ft], color='black', label='test target')

plt.plot(np.arange(y_train_pred.shape[0]),y_train_pred[:,ft], color='red',
         label='train prediction')

plt.plot(np.arange(y_train_pred.shape[0], y_train_pred.shape[0]+y_valid_pred.shape[0]),
         y_valid_pred[:,ft], color='orange', label='valid prediction')

plt.plot(np.arange(y_train_pred.shape[0]+y_valid_pred.shape[0],
                   y_train_pred.shape[0]+y_valid_pred.shape[0]+y_test_pred.shape[0]),
         y_test_pred[:,ft], color='green', label='test prediction')

plt.title('past and future stock prices')
plt.xlabel('time [days]')
plt.ylabel('normalized price')
plt.legend(loc='best');

plt.subplot(1,2,2);

plt.plot(np.arange(y_train_o.shape[0], y_train_o.shape[0]+y_test_o.shape[0]),
         y_test_o[:,ft], color='black', label='test target')

plt.plot(np.arange(y_train_pred.shape[0], y_train_pred.shape[0]+y_test_pred.shape[0]),
         y_test_pred[:,ft], color='green', label='test prediction')

plt.title('future stock prices')
plt.xlabel('time [days]')
plt.ylabel('normalized price')
plt.legend(loc='best');

corr_price_development_train = np.sum(np.equal(np.sign(y_train_o[:,1]-y_train_o[:,0]),
            np.sign(y_train_pred[:,1]-y_train_pred[:,0])).astype(int)) / y_train_o.shape[0]
corr_price_development_valid = np.sum(np.equal(np.sign(y_valid_o[:,1]-y_valid_o[:,0]),
            np.sign(y_valid_pred[:,1]-y_valid_pred[:,0])).astype(int)) / y_valid_o.shape[0]
corr_price_development_test = np.sum(np.equal(np.sign(y_test_o[:,1]-y_test_o[:,0]),
            np.sign(y_test_pred[:,1]-y_test_pred[:,0])).astype(int)) / y_test_o.shape[0]

print('correct sign prediction for close - open price for train/valid/test: %.2f/%.2f/%.2f'%(
    corr_price_development_train, corr_price_development_valid, corr_price_development_test))


In [None]:
!pip install --upgrade tensorflow
print('Tensorflow version: {}'.format(tf.__version__))

## Time2VEC and model classes

In [None]:
import tensorflow as tf
from tensorflow.keras.models import *
from tensorflow.keras.layers import *

In [None]:
class Time2Vector(Layer):
      def __init__(self, seq_len, **kwargs):
        super(Time2Vector, self).__init__()
        self.seq_len = seq_len

      def build(self, input_shape):
        '''Initialize weights and biases with shape (batch, seq_len)'''
        self.weights_linear = self.add_weight(name='weight_linear',
                                    shape=(int(self.seq_len),),
                                    initializer='uniform',
                                    trainable=True)

        self.bias_linear = self.add_weight(name='bias_linear',
                                    shape=(int(self.seq_len),),
                                    initializer='uniform',
                                    trainable=True)

        self.weights_periodic = self.add_weight(name='weight_periodic',
                                    shape=(int(self.seq_len),),
                                    initializer='uniform',
                                    trainable=True)

        self.bias_periodic = self.add_weight(name='bias_periodic',
                                    shape=(int(self.seq_len),),
                                    initializer='uniform',
                                    trainable=True)

      def call(self, x):
        '''Calculate linear and periodic time features'''
        x = tf.math.reduce_mean(x[:,:,:4], axis=-1) 
        time_linear = self.weights_linear * x + self.bias_linear # Linear time feature
        time_linear = tf.expand_dims(time_linear, axis=-1) # Add dimension (batch, seq_len, 1)

        time_periodic = tf.math.sin(tf.multiply(x, self.weights_periodic) + self.bias_periodic)
        time_periodic = tf.expand_dims(time_periodic, axis=-1) # Add dimension (batch, seq_len, 1)
        return tf.concat([time_linear, time_periodic], axis=-1) # shape = (batch, seq_len, 2)

      def get_config(self): # Needed for saving and loading model with custom layer
        config = super().get_config().copy()
        config.update({'seq_len': self.seq_len})
        return config

In [None]:
class SingleAttention(Layer):
      def __init__(self, d_k, d_v):
        super(SingleAttention, self).__init__()
        self.d_k = d_k
        self.d_v = d_v

      def build(self, input_shape):
        self.query = Dense(self.d_k, 
                           input_shape=input_shape, 
                           kernel_initializer='glorot_uniform', 
                           bias_initializer='glorot_uniform')

        self.key = Dense(self.d_k, 
                         input_shape=input_shape, 
                         kernel_initializer='glorot_uniform', 
                         bias_initializer='glorot_uniform')

        self.value = Dense(self.d_v, 
                           input_shape=input_shape, 
                           kernel_initializer='glorot_uniform', 
                           bias_initializer='glorot_uniform')

      def call(self, inputs): # inputs = (in_seq, in_seq, in_seq)
        q = self.query(inputs[0])
        k = self.key(inputs[1])

        attn_weights = tf.matmul(q, k, transpose_b=True)
        attn_weights = tf.map_fn(lambda x: x/np.sqrt(self.d_k), attn_weights)
        attn_weights = tf.nn.softmax(attn_weights, axis=-1)

        v = self.value(inputs[2])
        attn_out = tf.matmul(attn_weights, v)
        return attn_out    

#############################################################################

class MultiAttention(Layer):
  def __init__(self, d_k, d_v, n_heads):
    super(MultiAttention, self).__init__()
    self.d_k = d_k
    self.d_v = d_v
    self.n_heads = n_heads
    self.attn_heads = list()

  def build(self, input_shape):
    for n in range(self.n_heads):
      self.attn_heads.append(SingleAttention(self.d_k, self.d_v))  
    
    # input_shape[0]=(batch, seq_len, 7), input_shape[0][-1]=7 
    self.linear = Dense(input_shape[0][-1], 
                        input_shape=input_shape, 
                        kernel_initializer='glorot_uniform', 
                        bias_initializer='glorot_uniform')

  def call(self, inputs):
    attn = [self.attn_heads[i](inputs) for i in range(self.n_heads)]
    concat_attn = tf.concat(attn, axis=-1)
    multi_linear = self.linear(concat_attn)
    return multi_linear   

#############################################################################

class TransformerEncoder(Layer):
      def __init__(self, d_k, d_v, n_heads, ff_dim, dropout=0.1, **kwargs):
        super(TransformerEncoder, self).__init__()
        self.d_k = d_k
        self.d_v = d_v
        self.n_heads = n_heads
        self.ff_dim = ff_dim
        self.attn_heads = list()
        self.dropout_rate = dropout

      def build(self, input_shape):
        self.attn_multi = MultiAttention(self.d_k, self.d_v, self.n_heads)
        self.attn_dropout = Dropout(self.dropout_rate)
        self.attn_normalize = LayerNormalization(input_shape=input_shape, epsilon=1e-6)

        self.ff_conv1D_1 = Conv1D(filters=self.ff_dim, kernel_size=1, activation='relu')
        # input_shape[0]=(batch, seq_len, 7), input_shape[0][-1] = 7 
        self.ff_conv1D_2 = Conv1D(filters=input_shape[0][-1], kernel_size=1) 
        self.ff_dropout = Dropout(self.dropout_rate)
        self.ff_normalize = LayerNormalization(input_shape=input_shape, epsilon=1e-6)    

      def call(self, inputs): # inputs = (in_seq, in_seq, in_seq)
        attn_layer = self.attn_multi(inputs)
        attn_layer = self.attn_dropout(attn_layer)
        attn_layer = self.attn_normalize(inputs[0] + attn_layer)

        ff_layer = self.ff_conv1D_1(attn_layer)
        ff_layer = self.ff_conv1D_2(ff_layer)
        ff_layer = self.ff_dropout(ff_layer)
        ff_layer = self.ff_normalize(inputs[0] + ff_layer)
        return ff_layer 

      def get_config(self): # Needed for saving and loading model with custom layer
        config = super().get_config().copy()
        config.update({'d_k': self.d_k,
                       'd_v': self.d_v,
                       'n_heads': self.n_heads,
                       'ff_dim': self.ff_dim,
                       'attn_heads': self.attn_heads,
                       'dropout_rate': self.dropout_rate})
        return config 

In [None]:
def create_model():
      '''Initialize time and transformer layers'''
      time_embedding = Time2Vector(seq_len)
      attn_layer1 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)
      attn_layer2 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)
      attn_layer3 = TransformerEncoder(d_k, d_v, n_heads, ff_dim)

      '''Construct model'''
      in_seq = Input(shape=(seq_len, 5))
      x = time_embedding(in_seq)
      x = Concatenate(axis=-1)([in_seq, x])
      x = attn_layer1((x, x, x))
      x = attn_layer2((x, x, x))
      x = attn_layer3((x, x, x))
      x = GlobalAveragePooling1D(data_format='channels_first')(x)
      x = Dropout(0.1)(x)
      x = Dense(64, activation='relu')(x)
      x = Dropout(0.1)(x)
      out = Dense(1, activation='linear')(x)

      model = Model(inputs=in_seq, outputs=out)
      model.compile(loss='mse', optimizer='adam', metrics=['mae', 'mape'])
      return model


    model = create_model()
    model.summary()

    callback = tf.keras.callbacks.ModelCheckpoint('Transformer+TimeEmbedding.hdf5', 
                                                  monitor='val_loss', 
                                                  save_best_only=True, verbose=1)

    history = model.fit(X_train, y_train, 
                        batch_size=batch_size, 
                        epochs=35, 
                        callbacks=[callback],
                        validation_data=(X_val, y_val))  

    model = tf.keras.models.load_model('/content/Transformer+TimeEmbedding.hdf5',
                                       custom_objects={'Time2Vector': Time2Vector, 
                                                       'SingleAttention': SingleAttention,
                                                       'MultiAttention': MultiAttention,
                                                       'TransformerEncoder': TransformerEncoder})


###############################################################################
'''Calculate predictions and metrics'''

#Calculate predication for training, validation and test data
train_pred = model.predict(X_train)
val_pred = model.predict(X_val)
test_pred = model.predict(X_test)

#Print evaluation metrics for all datasets
train_eval = model.evaluate(X_train, y_train, verbose=0)
val_eval = model.evaluate(X_val, y_val, verbose=0)
test_eval = model.evaluate(X_test, y_test, verbose=0)
print(' ')
print('Evaluation metrics')
print('Training Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(train_eval[0], train_eval[1], train_eval[2]))
print('Validation Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(val_eval[0], val_eval[1], val_eval[2]))
print('Test Data - Loss: {:.4f}, MAE: {:.4f}, MAPE: {:.4f}'.format(test_eval[0], test_eval[1], test_eval[2]))

###############################################################################
'''Display results'''

fig = plt.figure(figsize=(15,20))
st = fig.suptitle("Transformer + TimeEmbedding Model", fontsize=22)
st.set_y(0.92)

#Plot training data results
ax11 = fig.add_subplot(311)
ax11.plot(train_data[:, 3], label='IBM Closing Returns')
ax11.plot(np.arange(seq_len, train_pred.shape[0]+seq_len), train_pred, linewidth=3, label='Predicted IBM Closing Returns')
ax11.set_title("Training Data", fontsize=18)
ax11.set_xlabel('Date')
ax11.set_ylabel('IBM Closing Returns')
ax11.legend(loc="best", fontsize=12)

#Plot validation data results
ax21 = fig.add_subplot(312)
ax21.plot(val_data[:, 3], label='IBM Closing Returns')
ax21.plot(np.arange(seq_len, val_pred.shape[0]+seq_len), val_pred, linewidth=3, label='Predicted IBM Closing Returns')
ax21.set_title("Validation Data", fontsize=18)
ax21.set_xlabel('Date')
ax21.set_ylabel('IBM Closing Returns')
ax21.legend(loc="best", fontsize=12)

#Plot test data results
ax31 = fig.add_subplot(313)
ax31.plot(test_data[:, 3], label='IBM Closing Returns')
ax31.plot(np.arange(seq_len, test_pred.shape[0]+seq_len), test_pred, linewidth=3, label='Predicted IBM Closing Returns')
ax31.set_title("Test Data", fontsize=18)
ax31.set_xlabel('Date')
ax31.set_ylabel('IBM Closing Returns')
ax31.legend(loc="best", fontsize=12)

Not a finished product needs to impliment the Transformer learning and the Model optimization.