In [83]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, Dense, SimpleRNN
from tensorflow.keras.layers import Layer
from tensorflow.keras.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
import numpy as np
import tensorflow.keras.backend as K

In [84]:
def get_data(path):
    data = []
    for i in ["train", "val", "test"]:
        data.append(pd.read_csv(path + i + ".csv", index_col="Datetime"))
    return data[0], data[1], data[2]
# Load data
#path = '../data/clean_data/univariate_Q_Kallveit/'
data = pd.read_csv("../data/clean_data/univariate_Q_Kallveit/univariate_Q_Kallveit.csv", index_col="Datetime")

In [85]:
def format_data(time_steps, train_percent, scale_data=True):
    dat = data.values
    Y_ind = np.arange(time_steps, len(dat), 1)
    Y = dat[Y_ind]
    rows_x = len(Y)
    X = dat[0:rows_x]
    for i in range(time_steps-1):
        temp = dat[i+1:rows_x+i+1]
        X = np.column_stack((X, temp))
    # random permutation with fixed seed
    rand = np.random.RandomState(seed=13)
    idx = rand.permutation(rows_x)
    split = int(train_percent*rows_x)
    train_ind = idx[0:split]
    test_ind = idx[split:]
    trainX = X[train_ind]
    trainY = Y[train_ind]
    testX = X[test_ind]
    testY = Y[test_ind]
    trainX = np.reshape(trainX, (len(trainX), time_steps, 1))
    testX = np.reshape(testX, (len(testX), time_steps, 1))
    return trainX, trainY, testX, testY

In [86]:
# Set up parameters
time_steps = 6
hidden_units = 4
epochs = 30
batch_size = 32 # 32, 64, 128, 256


# Create a traditional RNN network
def create_RNN(hidden_units, dense_units, input_shape, activation):
    model = Sequential()
    model.add(SimpleRNN(hidden_units, input_shape=input_shape, activation=activation[0]))
    model.add(Dense(units=dense_units, activation=activation[1]))
    model.compile(loss='mse', optimizer='adam')
    return model

model_RNN = create_RNN(hidden_units=hidden_units, dense_units=1,
                       input_shape=(time_steps,1), activation=['tanh', 'tanh'])

In [87]:
# Generate the dataset for the network
trainX, trainY, testX, testY  = format_data(time_steps, 0.7)
# Train the network
model_RNN.fit(trainX, trainY, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1)

# Evalute model
train_mse = model_RNN.evaluate(trainX, trainY)
test_mse = model_RNN.evaluate(testX, testY)

Epoch 1/30
1246/1246 - 2s - loss: 88.7397 - val_loss: 86.1185 - 2s/epoch - 1ms/step
Epoch 2/30
1246/1246 - 1s - loss: 87.4491 - val_loss: 86.0802 - 1s/epoch - 856us/step
Epoch 3/30
1246/1246 - 1s - loss: 87.4221 - val_loss: 86.0622 - 1s/epoch - 853us/step
Epoch 4/30
1246/1246 - 1s - loss: 87.4094 - val_loss: 86.0535 - 1s/epoch - 874us/step
Epoch 5/30
1246/1246 - 1s - loss: 87.4031 - val_loss: 86.0490 - 1s/epoch - 871us/step
Epoch 6/30
1246/1246 - 1s - loss: 87.3998 - val_loss: 86.0468 - 1s/epoch - 859us/step
Epoch 7/30
1246/1246 - 1s - loss: 87.3982 - val_loss: 86.0455 - 1s/epoch - 903us/step
Epoch 8/30
1246/1246 - 1s - loss: 87.3973 - val_loss: 86.0449 - 1s/epoch - 878us/step
Epoch 9/30
1246/1246 - 1s - loss: 87.3968 - val_loss: 86.0446 - 1s/epoch - 890us/step
Epoch 10/30
1246/1246 - 1s - loss: 87.3965 - val_loss: 86.0444 - 1s/epoch - 872us/step
Epoch 11/30
1246/1246 - 1s - loss: 87.3964 - val_loss: 86.0443 - 1s/epoch - 891us/step
Epoch 12/30
1246/1246 - 1s - loss: 87.3963 - val_loss:

In [88]:
# Print error
print("Train set MSE = ", train_mse)
print("Test set MSE = ", test_mse)

Train set MSE =  87.26095581054688
Test set MSE =  88.37645721435547


In [89]:
# Add attention layer to the deep learning network
class attention(Layer):
    def __init__(self,**kwargs):
        super().__init__(**kwargs)

    def build(self,input_shape):
        self.W=self.add_weight(name='attention_weight', shape=(input_shape[-1],1),
                               initializer='random_normal', trainable=True)
        self.b=self.add_weight(name='attention_bias', shape=(input_shape[1],1),
                               initializer='zeros', trainable=True)
        super().build(input_shape)

    def call(self,x):
        # Alignment scores. Pass them through tanh function
        e = K.tanh(K.dot(x,self.W)+self.b)
        # Remove dimension of size 1
        e = K.squeeze(e, axis=-1)
        # Compute the weights
        alpha = K.softmax(e)
        # Reshape to tensorFlow format
        alpha = K.expand_dims(alpha, axis=-1)
        # Compute the context vector
        context = x * alpha
        context = K.sum(context, axis=1)
        return context

In [90]:
def create_RNN_with_attention(hidden_units, dense_units, input_shape, activation):
    x = Input(shape=input_shape)
    RNN_layer = SimpleRNN(hidden_units, return_sequences=True, activation=activation)(x)
    attention_layer = attention()(RNN_layer)
    outputs = Dense(dense_units, trainable=True, activation=activation)(attention_layer)
    model = Model(x,outputs)
    model.compile(loss='mse', optimizer='adam')
    return model

In [91]:
# Create the model with attention, train and evaluate
model_attention = create_RNN_with_attention(hidden_units=hidden_units, dense_units=1,
                                  input_shape=(time_steps,1), activation='tanh')
model_attention.summary()
model_attention.fit(trainX, trainY, epochs=epochs, batch_size=batch_size, verbose=2, validation_split=0.1)

Model: "model_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 6, 1)]            0         
                                                                 
 simple_rnn_18 (SimpleRNN)   (None, 6, 4)              24        
                                                                 
 attention_9 (attention)     (None, 4)                 10        
                                                                 
 dense_18 (Dense)            (None, 1)                 5         
                                                                 
Total params: 39
Trainable params: 39
Non-trainable params: 0
_________________________________________________________________
Epoch 1/30
1246/1246 - 2s - loss: 91.4410 - val_loss: 86.2011 - 2s/epoch - 2ms/step
Epoch 2/30
1246/1246 - 1s - loss: 87.4849 - val_loss: 86.0930 - 1s/epoch - 976us/step
Epoch 3/30
1246/1246 - 1s

<keras.callbacks.History at 0x1e85df17708>

In [92]:
# Evalute model
train_mse_attn = model_attention.evaluate(trainX, trainY)
test_mse_attn = model_attention.evaluate(testX, testY)

# Print error
print("Train set MSE with attention = ", train_mse_attn)
print("Test set MSE with attention = ", test_mse_attn)

Train set MSE with attention =  87.26095581054688
Test set MSE with attention =  88.37645721435547
