## Step 1 -- import necessary libraries

In [1]:
import importlib

import numpy as np
import pandas as pd
import seaborn as sns
import sklearn.preprocessing as skpp

import dask.dataframe as dd

import os # os.path

%matplotlib inline
from matplotlib import pyplot as plt
from numpy import array
from tensorflow.keras.activations import relu, softsign
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, TimeDistributed
from tensorflow.keras.models import Sequential
import tensorflow as tf

%load_ext tensorboard

import foresight.backtesting
import foresight.data_functions as fx_df
import foresight.model
import foresight.util as fxu

importlib.reload(fx_df)
importlib.reload(fxu)
importlib.reload(foresight.model)
importlib.reload(foresight.backtesting)



<module 'foresight.backtesting' from '/var/local/foresight/pymodules/foresight/backtesting.py'>

## 2.1 Specify the path of the source data

In [2]:
# src = '/var/local/foresight/timeseries/EURGBP-2016-01.pp1.xz'
src = "/var/local/foresight/timeseries/EURGBP-2017-1-6.csv"
#src = "/var/local/foresight_local/timeseries/EURGBP-2017-1-6.csv" # for use on HPC04

### 2.1.1 Setup model parameters

In [3]:
scaler_type = "MinMaxScaler"  # [MinMaxScaler, None]
sample_time = "5T"  # T=minutes
seq_len = 36  # 1 hours
n_vars = 1
num_outs = 1  # number of future outputs to forecast
Transform = "LogDiff"  # [Diff, LogDiff, None]
model_name = '0000_testingModel'
models_path = '/var/local/foresight/models/Keras_Models/'

### 2.1.2 Setup training parameters

In [4]:
training_batch_size = 16
training_epochs = 2500

# 3 Prepare Data

## 3.1 Import the data

1. Load the data into a dataframe
2. Specify the column names
3. Convert the date field into the correct datetime64 object

In [5]:
#_src = src + '.xz' if os.path.isfile(src + '.xz') else src

# ensure that the pickle file exists and it is newer than the original source.  The 2nd condition should almost always be true
#if os.path.isfile(src + '.pkl') and os.path.getmtime(src + '.pkl') > os.path.getmtime(_src):
#    data_raw = pd.read_pickle(src + '.pkl')

#else:
#    #data_raw = pd.read_csv(src, names = ['date', 'bid', 'ask'], parse_dates=['date'], index_col=['date'], infer_datetime_format=True, memory_map=True)
#    data_raw = pd.read_csv(src + '.xz' if os.path.isfile(src + '.xz') else src, names = ['date', 'bid', 'ask'], memory_map=True)
#    #data_raw['date'] = pd.to_datetime(data_raw['date'], format="%Y %m %d %H %M %S %f")
#    data_raw["date"] = dd.to_datetime(data_raw["date"], format="%m/%d/%Y %H:%M:%S.%f")
#    data_raw.set_index("date", inplace=True)
#    #data_raw = data_raw.set_index("date")
#    data_raw.to_pickle(src + '.pkl')

data_raw = fx_df.GetTickdataDataframe(src, date_format_string='%m/%d/%Y %H:%M:%S.%f')
print(data_raw.index)


DatetimeIndex(['2017-01-03 00:00:00.308000', '2017-01-03 00:00:00.340000',
               '2017-01-03 00:00:00.368000', '2017-01-03 00:00:00.378000',
               '2017-01-03 00:00:00.438000', '2017-01-03 00:00:00.445000',
               '2017-01-03 00:00:00.510000', '2017-01-03 00:00:00.522000',
               '2017-01-03 00:00:00.641000', '2017-01-03 00:00:00.671000',
               ...
               '2017-02-10 21:57:58.415000', '2017-02-10 21:57:58.435000',
               '2017-02-10 21:57:59.157000', '2017-02-10 21:57:59.266000',
               '2017-02-10 21:57:59.359000', '2017-02-10 21:57:59.455000',
               '2017-02-10 21:57:59.485000', '2017-02-10 21:57:59.505000',
               '2017-02-10 21:57:59.532000', '2017-02-10 21:58:00.008000'],
              dtype='datetime64[ns]', name='date', length=9343308, freq=None)


## 3.2 Clean up the data

In [6]:
if not (fxu.VarExists(sample_time)):
    raise RuntimeError("'sample_time' must be defined")

data = fx_df.clean_data(
    data_raw,
    remove_duplicates=True,
    sample_frequency=sample_time,
    sample_type="nearest",
    remove_weekends=True,
)

## 3.3 Create transformer object

In [7]:
data_txr = fx_df.Data_Transformer(Transform, 2, scaler_type)

## 3.4 Extract data as an np.ndarray

In [8]:
print(data.head(10))
print(type(data.iloc[3]))
data_arr = data["bid"].to_numpy()

                         bid      ask
date                                 
2017-01-03 00:00:00  0.85147  0.85159
2017-01-03 00:05:00  0.85167  0.85176
2017-01-03 00:10:00  0.85194  0.85203
2017-01-03 00:15:00  0.85173  0.85182
2017-01-03 00:20:00  0.85183  0.85193
2017-01-03 00:25:00  0.85187  0.85195
2017-01-03 00:30:00  0.85183  0.85193
2017-01-03 00:35:00  0.85186  0.85193
2017-01-03 00:40:00  0.85190  0.85196
2017-01-03 00:45:00  0.85183  0.85194
<class 'pandas.core.series.Series'>


# 4 Prepare Keras model

In [9]:
if not (fxu.VarExists(seq_len)):
    raise RuntimeError("'seq_len' must be defined")

if not (fxu.VarExists(n_vars)):
    raise RuntimeError("'n_vars' must be defined")

# Try to import existing model
if os.path.exists(models_path + model_name):
    LSTM_model = tf.keras.models.load_model(models_path + model_name)
    existing_model = True

else:
    existing_model = False
    # TODO: fix this somehow
    output_columns = [0]

    Activation_Function = "tanh"  # ['tanh', 'softsign', 'relu', 'elu', 'LeakyReLU']

    LSTM_model = Sequential()
#    LSTM_model.add(Bidirectional(LSTM(150, activation=Activation_Function, return_sequences=True), input_shape=(seq_len, n_vars), merge_mode='concat'))
#    LSTM_model.add(Bidirectional(LSTM(150, activation=Activation_Function, return_sequences=True), merge_mode='concat'))
#    LSTM_model.add(Bidirectional(LSTM(150, activation=Activation_Function, return_sequences=True), merge_mode='concat'))
#    LSTM_model.add(Bidirectional(LSTM(150, activation=Activation_Function, return_sequences=True), merge_mode='concat'))
#    LSTM_model.add(Bidirectional(LSTM(150, activation=Activation_Function, return_sequences=True), merge_mode='concat'))
    # model.add(Bidirectional(LSTM(60), input_shape=(seq_len,1)))
    # model.add(Bidirectional(LSTM(60, return_sequences=True),  merge_mode='concat'))
    # model.add(Bidirectional(LSTM(60, return_sequences=True),  merge_mode='concat'))
#    LSTM_model.add(Bidirectional(LSTM(150)))

    LSTM_model.add(LSTM(150, activation=Activation_Function, return_sequences=True, input_shape=(seq_len, n_vars)))
    LSTM_model.add(LSTM(150, activation=Activation_Function, return_sequences=True))
    LSTM_model.add(LSTM(150, activation=Activation_Function, return_sequences=True))
    LSTM_model.add(LSTM(150, activation=Activation_Function, return_sequences=True))
    LSTM_model.add(LSTM(150, activation=Activation_Function, return_sequences=True))
    LSTM_model.add(LSTM(150, activation=Activation_Function, return_sequences=False))

#    LSTM_model.add(LSTM(260, activation=Activation_Function, return_sequences=True))
#    LSTM_model.add(LSTM(360, activation=Activation_Function, return_sequences=True))
#    LSTM_model.add(LSTM(460, activation=Activation_Function, return_sequences=True))
#    LSTM_model.add(LSTM(560, activation=Activation_Function))

    # LSTM_model.add(LSTM(500, activation=Activation_Function, return_sequences=True, input_shape=(seq_len, n_vars)))
    # LSTM_model.add(LSTM(500, activation=Activation_Function, return_sequences=True))
    # LSTM_model.add(LSTM(500, activation=Activation_Function, return_sequences=True))
    # LSTM_model.add(LSTM(500, activation=Activation_Function, return_sequences=True))
    # LSTM_model.add(LSTM(500, activation=Activation_Function))


    LSTM_model.add(Dense(len(output_columns)))
    # model.compile(loss= 'mae' , optimizer= 'nadam' )
    LSTM_model.compile(loss= 'mae' , optimizer= 'nadam' ) # 'adam'
    # model.build()
    print(LSTM_model.summary())

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 36, 150)           91200     
_________________________________________________________________
lstm_1 (LSTM)                (None, 36, 150)           180600    
_________________________________________________________________
lstm_2 (LSTM)                (None, 36, 150)           180600    
_________________________________________________________________
lstm_3 (LSTM)                (None, 36, 150)           180600    
_________________________________________________________________
lstm_4 (LSTM)                (None, 36, 150)           180600    
_________________________________________________________________
lstm_5 (LSTM)                (None, 150)               180600    
_________________________________________________________________
dense (Dense)                (None, 1)                 1

# 5 Create foresight.Model object

In [10]:
fx_model = foresight.model.Model(
    model=LSTM_model,
    data=data_arr,
    data_freq=pd.Timedelta(sample_time),
    seq_len=seq_len,
    scaler=None,
    forecast_horizon=1,
    data_transform=data_txr,
    stationary_transform="LogDiff",
    max_training_data_factor=2.5,
)

# 6 Fit timeseries Model

In [None]:
if not existing_model:
    _ = fx_model.Fit(
        batch_size= training_batch_size,
        epochs=training_epochs,
        train_frac=5 / 5,
        valid_frac=0 / 3,
        verbose=True,
        validate_model=False,
        print_test_stat=False,
    )
    fx_model._model.save(models_path + model_name)
    
else:
    _ = fx_model.Fit(
        batch_size=training_batch_size,
        epochs=0,
        train_frac=5 / 5,
        valid_frac=0 / 3,
        verbose=True,
        validate_model=False,
        print_test_stat=False,
    )

Number of training samples:  8290
Number of test samples:  0
Number of validation samples:  8290
Epoch 1/2500
Epoch 2/2500
Epoch 3/2500
Epoch 4/2500
Epoch 5/2500
Epoch 6/2500
Epoch 7/2500
Epoch 8/2500
Epoch 9/2500
Epoch 10/2500
Epoch 11/2500
Epoch 12/2500
Epoch 13/2500
Epoch 14/2500
Epoch 15/2500
Epoch 16/2500
Epoch 17/2500
Epoch 18/2500
Epoch 19/2500
Epoch 20/2500
Epoch 21/2500
Epoch 22/2500
Epoch 23/2500
Epoch 24/2500
Epoch 25/2500
Epoch 26/2500
Epoch 27/2500
Epoch 28/2500
Epoch 29/2500
Epoch 30/2500
Epoch 31/2500
Epoch 32/2500
Epoch 33/2500
Epoch 34/2500
Epoch 35/2500
Epoch 36/2500
Epoch 37/2500
Epoch 38/2500
Epoch 39/2500
Epoch 40/2500
Epoch 41/2500
Epoch 42/2500
Epoch 43/2500
Epoch 44/2500
Epoch 45/2500
Epoch 46/2500
Epoch 47/2500
Epoch 48/2500
Epoch 49/2500
Epoch 50/2500
Epoch 51/2500
Epoch 52/2500
Epoch 53/2500
Epoch 54/2500
Epoch 55/2500
Epoch 56/2500
Epoch 57/2500
Epoch 58/2500
Epoch 59/2500
Epoch 60/2500
Epoch 61/2500
Epoch 62/2500
Epoch 63/2500
Epoch 64/2500
Epoch 65/2500
Ep

In [None]:
#fx_model._model.save(models_path + model_name)

# 6 Backtest the model

In [None]:
importlib.reload(foresight.backtesting)
importlib.reload(fx_df)
print(type(fx_model))
backtester = foresight.backtesting.Backtester(
    model=fx_model,
    retraining_freq=pd.Timedelta("1D"),
    trading_rules={
        "trade_size": 1_000,
        "stop_loss": 0.00025,
        "take_profit": 0.00025,
        "min_change": 0.00005,
        "leverage": 1,
    },
    initial_money=1_000,
)

In [None]:
bt_src = "/var/local/foresight/timeseries/EURGBP-2017-7-13.csv"
forecast_data = fx_df.GetTickdataDataframe(bt_src, date_format_string='%m/%d/%Y %H:%M:%S.%f')

In [None]:
backtester.Backtest(forecast_data, initial_retraining = .15, retrain_epochs = 25, retrain_verbose = True)