In [2]:
import matplotlib.pyplot as plt
import numpy as np
import time

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Sequential
from keras.layers import Flatten, Dense, Conv1D, MaxPooling2D
from sklearn.model_selection import train_test_split
import random
from tensorflow.keras.layers import Input, LSTM,Flatten, Dense, Conv2D, BatchNormalization, LeakyReLU, Dropout, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import tensorflow.keras.backend as K
import pandas as pd

In [3]:
plt.style.use('/home/luuk/Documents/MNRAS_stylesheet')

In [4]:
def tomatrix(vectorSeries, sequence_length):
    matrix=[]
    for i in range(len(vectorSeries)-sequence_length+1):
        matrix.append(vectorSeries[i:i+sequence_length])
    return matrix


In [5]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.config.experimental.list_physical_devices('GPU')

Num GPUs Available:  1


[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [10]:
def get_model(units1, units2):
    # build the model
    model = Sequential()
    # layer 1: LSTM
    model.add(LSTM(input_dim=1,units=units1, return_sequences=True))
    model.add(Dropout(0.2))
    # layer 2: LSTM
    model.add(LSTM(units=units2, return_sequences=False))
    model.add(Dropout(0.2))
    # layer 3: dense
    # linear activation: a(x) = x
    model.add(Dense(units=1, activation='linear'))
    #
    return model


def get_score(hyperparameters):
    print(hyperparameters)
    hyperparameters = np.array(hyperparameters,dtype=int)
    sequence_length, units = hyperparameters
    
    # random seed to make sure we always obtain equal results
    np.random.seed(1234)

    # load the data
    RVOL = np.sqrt(252*pd.read_pickle('asml_RK.pickle'))
    
    # convert the vector to a 2D matrix
    matrix_RVOL = tomatrix(RVOL.values.flatten(), sequence_length)

    # shift all data by mean
    matrix_RVOL = np.array(matrix_RVOL)
    shifted_value = matrix_RVOL.mean()
    matrix_RVOL -= shifted_value
    print ("Data  shape: ", matrix_RVOL.shape)

    # split dataset
    train_row = int(round(0.9 * matrix_RVOL.shape[0]))
    train_set = matrix_RVOL[:train_row, :]

    np.random.shuffle(train_set)
    # the training set
    X_train = train_set[:, :-1]
    # the last column is the true value to compute the mean-squared-error loss
    y_train = train_set[:, -1] 
    # the test set
    X_test = matrix_RVOL[train_row:, :-1]
    y_test = matrix_RVOL[train_row:, -1]
    test_dates = RVOL.index[train_row+sequence_length-1:]


    # the input to LSTM layer needs to have the shape of (number of samples, the dimension of each element)
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
    
    model = get_model(units, units)
    
    # compile the model
    model.compile(loss="mae", optimizer="rmsprop")

    # train the model
    model.fit(X_train, y_train, batch_size=128, epochs=100, validation_split=0.05, verbose=0)
    
    MAE = np.mean(np.abs(model(X_test).numpy().flatten() - y_test))
    MSE = np.mean((model(X_test).numpy().flatten() - y_test)**2)
    RMSE = np.sqrt(MSE)
    print(MAE)
    
    return model, MAE,RMSE, X_train, y_train, X_test, y_test

In [7]:
window_range = np.arange(6,30,2)
unitrange = np.arange(30,120,10)

In [8]:
from scipy.optimize import brute

In [8]:
MAEdf = pd.DataFrame({'w':[]}).set_index(['w'])
RMSEdf = pd.DataFrame({'w':[]}).set_index(['w'])

In [227]:
for w in window_range:
    for u in unitrange:
        MAE,RMSE = get_score((w,u))
        MAEdf.loc[w,u] = MAE
        RMSEdf.loc[w,u] = RMSE

(6, 30)
Data  shape:  (1506, 6)
3.5452250081800774
(6, 40)
Data  shape:  (1506, 6)
3.543363241526873
(6, 50)
Data  shape:  (1506, 6)
3.566505750459531
(6, 60)
Data  shape:  (1506, 6)
3.564337019172244
(6, 70)
Data  shape:  (1506, 6)
3.6806800581563492
(6, 80)
Data  shape:  (1506, 6)
3.523028380128464
(6, 90)
Data  shape:  (1506, 6)
3.7310511053295805
(6, 100)
Data  shape:  (1506, 6)
3.5571806748380324
(6, 110)
Data  shape:  (1506, 6)
3.6401097459407454
(8, 30)
Data  shape:  (1504, 8)
3.4800229117369637
(8, 40)
Data  shape:  (1504, 8)
3.3981835834401832
(8, 50)
Data  shape:  (1504, 8)
3.515732623952873
(8, 60)
Data  shape:  (1504, 8)
3.4851134628599545
(8, 70)
Data  shape:  (1504, 8)
3.472688351095589
(8, 80)
Data  shape:  (1504, 8)
3.3993460047855106
(8, 90)
Data  shape:  (1504, 8)
3.491475178976542
(8, 100)
Data  shape:  (1504, 8)
3.470270085230223
(8, 110)
Data  shape:  (1504, 8)
3.4452109567345315
(10, 30)
Data  shape:  (1502, 10)
3.469067800303263
(10, 40)
Data  shape:  (1502, 10)


KeyboardInterrupt: 

In [9]:
model, MAE, RMSE, X_train, y_train, X_test, y_test = get_score((10,100))

(10, 100)


FileNotFoundError: [Errno 2] No such file or directory: '../asml_RK.pickle'

In [235]:
# get the predicted values


In [14]:
# save the result into txt file
#test_result = zip(predicted_values, y_test) + shifted_value
#np.savetxt('output_result.txt', test_result)