## Step 1 -- import necessary libraries

In [55]:
from numpy import array
%matplotlib inline
from matplotlib import pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.activations import softsign, relu
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn.preprocessing as skpp

import importlib
import foresight.data_functions as fx_df
import foresight.util as fxu
import foresight.model
import foresight.backtesting
importlib.reload(fx_df)
importlib.reload(fxu)
importlib.reload(foresight.model)
importlib.reload(foresight.backtesting)

  assert (forecast_horizon == 1,


<module 'foresight.backtesting' from '/var/local/foresight/pymodules/foresight/backtesting.py'>

## 2.1 Specify the path of the source data

In [2]:
src = '/var/local/foresight/timeseries/EURGBP-2016-01.pp1.xz'

### 2.1.1 Setup program parameters

In [3]:
scaler_type = 'MinMaxScaler' # [MinMaxScaler, None]
seq_len = 60 # 1 hours
n_vars = 1
num_outs = 1 # number of future outputs to forecast
sample_time = '5T' # T=minutes
Transform = 'LogDiff' # [Diff, LogDiff, None]

# 3 Prepare Data

## 3.1 Import the data

1. Load the data into a dataframe
2. Specify the column names
3. Convert the date field into the correct datetime64 object

In [4]:
data_raw = pd.read_csv(src)
data_raw.columns = ['date', 'bid', 'ask']
data_raw['date'] = pd.to_datetime(data_raw['date'], format="%Y %m %d %H %M %S %f")
data_raw.set_index('date', inplace=True)

## 3.2 Clean up the data

In [5]:
if not (fxu.VarExists(sample_time)):
    raise RuntimeError('\'sample_time\' must be defined')
    
data = fx_df.clean_data(data_raw, remove_duplicates = True, sample_frequency = sample_time, sample_type = 'nearest', remove_weekends = True)

## 3.3 Create transformer object

In [6]:
data_txr = fx_df.Data_Transformer(Transform, 2, scaler_type)

## 3.4 Extract data as an np.ndarray

In [7]:
print(data.head(10))
print(type(data.iloc[3]))
data_arr = data['bid'].to_numpy()

                         bid      ask
date                                 
2016-01-04 00:00:00  0.73643  0.73651
2016-01-04 00:05:00  0.73653  0.73657
2016-01-04 00:10:00  0.73651  0.73657
2016-01-04 00:15:00  0.73648  0.73654
2016-01-04 00:20:00  0.73641  0.73649
2016-01-04 00:25:00  0.73640  0.73646
2016-01-04 00:30:00  0.73623  0.73632
2016-01-04 00:35:00  0.73650  0.73661
2016-01-04 00:40:00  0.73646  0.73654
2016-01-04 00:45:00  0.73636  0.73644
<class 'pandas.core.series.Series'>


# 4 Prepare Keras model

In [8]:
if not (fxu.VarExists(seq_len)):
    raise RuntimeError('\'seq_len\' must be defined')

if not (fxu.VarExists(n_vars)):
    raise RuntimeError('\'n_vars\' must be defined')

# TODO: fix this somehow
output_columns = [0]
    
Activation_Function = 'tanh' # ['tanh', 'softsign', 'relu', 'elu', 'LeakyReLU']

LSTM_model = Sequential()
#model.add(Bidirectional(LSTM(60, activation=Activation_Function, return_sequences=True), input_shape=(seq_len, n_vars), merge_mode='concat'))
#model.add(Bidirectional(LSTM(60, activation=Activation_Function, return_sequences=True), merge_mode='concat'))
#model.add(Bidirectional(LSTM(60, activation=Activation_Function, return_sequences=True), merge_mode='concat'))
#model.add(Bidirectional(LSTM(60, activation=Activation_Function, return_sequences=True), merge_mode='concat'))
#model.add(Bidirectional(LSTM(60, activation=Activation_Function, return_sequences=True), merge_mode='concat'))
#model.add(Bidirectional(LSTM(60), input_shape=(seq_len,1)))
#model.add(Bidirectional(LSTM(60, return_sequences=True),  merge_mode='concat'))
#model.add(Bidirectional(LSTM(60, return_sequences=True),  merge_mode='concat'))
#model.add(Bidirectional(LSTM(60)))
LSTM_model.add(LSTM(160, activation=Activation_Function, return_sequences=True, input_shape=(seq_len, n_vars)))
LSTM_model.add(LSTM(160, activation=Activation_Function, return_sequences=True))
LSTM_model.add(LSTM(160, activation=Activation_Function, return_sequences=True))
LSTM_model.add(LSTM(160, activation=Activation_Function, return_sequences=True))
LSTM_model.add(LSTM(160, activation=Activation_Function))
LSTM_model.add(Dense(len(output_columns)))
#model.compile(loss= 'mae' , optimizer= 'nadam' )
#LSTM_model.compile(loss= 'mae' , optimizer= 'nadam' ) # 'adam'
#model.build()
#print(LSTM_model.summary())

# 5 Create foresight.Model object

In [56]:
fx_model = foresight.model.Model(model = LSTM_model,
                 data = data_arr,
                 data_freq = pd.Timedelta(sample_time),
                 seq_len = seq_len,
                 scaler=None,
                 forecast_horizon=1,
                 data_transform=data_txr,
                 stationary_transform='LogDiff')

# 6 Fit timeseries Model

In [10]:
fx_model.Fit(
            batch_size=128,
            epochs=2,
            train_frac=4/5,
            valid_frac=1 / 3,
            verbose=True,
            validate_model=True,
            print_test_stat=False)

Number of training samples:  4529
Number of test samples:  757
Number of validation samples:  377
Epoch 1/2


KeyboardInterrupt: 

# 6 Backtest the model

In [177]:
importlib.reload(foresight.backtesting)
importlib.reload(fx_df)
print(type(fx_model))
backtester = foresight.backtesting.Backtester(model = fx_model, retraining_freq=pd.Timedelta('1W'))

<class 'foresight.model.Model'>
Restarting every 2016 datapoints
New backtester created!!


In [178]:
forecast_data = data_raw.iloc[int(-0.2*len(data_raw)):]

In [179]:
backtester.Backtest(forecast_data)

Retraining every 2016 points
y_hat.shape (644,)
(644,)
-742339.9999999962
