## Step 1 -- import necessary libraries

In [None]:
import importlib

import numpy as np
import pandas as pd
import seaborn as sns
import sklearn.preprocessing as skpp

import dask.dataframe as dd

import os # os.path
import datetime

%matplotlib inline
from matplotlib import pyplot as plt
from numpy import array
from tensorflow.keras.activations import relu, softsign
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, TimeDistributed, Dropout
from tensorflow.keras.models import Sequential
import tensorflow as tf

%load_ext tensorboard

import foresight.backtesting
import foresight.data_functions as fx_df
import foresight.model
import foresight.util as fxu

importlib.reload(fx_df)
importlib.reload(fxu)
importlib.reload(foresight.model)
importlib.reload(foresight.backtesting)

from tensorflow.keras.mixed_precision import experimental as mixed_precision
policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
mixed_precision.set_policy(policy)

## 2.1 Specify the path of the source data

In [None]:
# src = '/var/local/foresight/timeseries/EURGBP-2016-01.pp1.xz'
src = "/var/local/foresight/timeseries/EURGBP-2017-1-6.csv"

### 2.1.1 Setup model parameters

In [None]:
scaler_type = "MinMaxScaler"  # [MinMaxScaler, None]
sample_time = "5T"  # T=minutes
seq_len = 12  # 1 hours
n_vars = 1
num_outs = 1  # number of future outputs to forecast
Transform = "LogDiff"  # [Diff, LogDiff, None]
model_name = '0001_6-LSTM_5minData_1h_dropoff'
models_base_path = '/var/local/foresight/models/Keras_Models/'
model_path = models_base_path + model_name

### 2.1.2 Setup training parameters

In [None]:
training_batch_size = 64
training_epochs = 5000
training_dropoff = 0.01

# 3 Prepare Data

## 3.1 Import the data

1. Load the data into a dataframe
2. Specify the column names
3. Convert the date field into the correct datetime64 object

In [None]:
data_raw = fx_df.GetTickdataDataframe(src, date_format_string='%m/%d/%Y %H:%M:%S.%f')

## 3.2 Clean up the data

In [None]:
if not (fxu.VarExists(sample_time)):
    raise RuntimeError("'sample_time' must be defined")

data = fx_df.clean_data(
    data_raw,
    remove_duplicates=True,
    sample_frequency=sample_time,
    sample_type="nearest",
    remove_weekends=True,
)

## 3.3 Create transformer object

In [None]:
data_txr = fx_df.Data_Transformer(Transform, 2, scaler_type)

## 3.4 Extract data as an np.ndarray

In [None]:
data_arr = data["bid"].to_numpy()

# 4 Prepare Keras model

In [None]:
def BuildModel():
    if not (fxu.VarExists(seq_len)):
        raise RuntimeError("'seq_len' must be defined")

    if not (fxu.VarExists(n_vars)):
        raise RuntimeError("'n_vars' must be defined")
    
    # TODO: fix this somehow
    output_columns = [0]

    Activation_Function = "tanh"  # ['tanh', 'softsign', 'relu', 'elu', 'LeakyReLU']

    _metrics = [tf.keras.metrics.MeanAbsoluteError(name='mae'),
                tf.keras.metrics.MeanAbsolutePercentageError(name='mape'), 
                tf.keras.metrics.MeanSquaredError(name='mse'), 
#                tf.keras.metrics.RootMeanSquaredError(name='rmse'), 
#                tf.keras.metrics.LogCoshError(name='logcosh'), 
#                tf.keras.metrics.MeanSquaredLogarithmicError(name='msle')
               ]
    
    LSTM_model = Sequential()

#    LSTM_model.add(LSTM(100, activation=Activation_Function, return_sequences=True, input_shape=(seq_len, n_vars), dropout=training_dropoff))
    LSTM_model.add(LSTM(100, activation=Activation_Function, return_sequences=True, input_shape=(seq_len, n_vars), dropout=0))
    LSTM_model.add(LSTM(100, activation=Activation_Function, return_sequences=True, dropout=training_dropoff))
    LSTM_model.add(LSTM(100, activation=Activation_Function, return_sequences=True, dropout=training_dropoff))
    LSTM_model.add(LSTM(100, activation=Activation_Function, return_sequences=True, dropout=training_dropoff))
    LSTM_model.add(LSTM(100, activation=Activation_Function, return_sequences=True, dropout=training_dropoff))
    LSTM_model.add(LSTM(100, activation=Activation_Function, return_sequences=False, dropout=training_dropoff))
    LSTM_model.add(Dropout(training_dropoff))
    LSTM_model.add(Dense(len(output_columns)))
    # model.compile(loss= 'mae' , optimizer= 'nadam' )
    LSTM_model.compile(loss= 'mae' , optimizer= 'adam', metrics = _metrics ) # 'adam'
    # model.build()
    print(LSTM_model.summary())
    
    return LSTM_model

# 5 Create foresight.Model object

In [None]:
fx_model = foresight.model.Model(
    model=BuildModel(),
    data=data_arr,
    data_freq=pd.Timedelta(sample_time),
    seq_len=seq_len,
    scaler=None,
    forecast_horizon=1,
    data_transform=data_txr,
    stationary_transform="LogDiff",
    max_training_data_factor=1.5,
)

# 6 Fit timeseries Model

## 6.1 Setup callbacks

Callbacks are used here for 

- saving checkpoint info to disk to allow resuming the training of a model if it becomes interrupted
- saving tensorboard log information for visualization

### 6.1.1 Parameters

In [None]:
# Checkpoint parameters
checkpoint_file = model_path + '/checkpoints/cp.ckpt'
checkpoint_dir = os.path.dirname(checkpoint_file)
cp_freq_in_epochs = 10

# Tensorboard Parameters
tb_path = model_path + '/logs'
logdir = os.path.join(tb_path, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

### 6.1.2 Create Checkpoint Callback

In [None]:
cp_approx_batches_per_epoch = (data_arr.shape[0] - seq_len)/training_batch_size
cp_save_freq = int(cp_approx_batches_per_epoch * cp_freq_in_epochs)

cp_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_file,
    save_weights_only=True,
    save_freq=cp_save_freq,
)

### 6.1.3 Create Tensorboard Callback

In [None]:
tb_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir, histogram_freq=10)

## 6.2 Setup model directory and load checkpoint data, if available

In [None]:
if not os.path.exists(checkpoint_dir):
    from pathlib import Path
    Path(checkpoint_dir).mkdir(parents=True, exist_ok=True)
    
if os.path.isfile(checkpoint_file + '.index'):
    fx_model._model.load_weights(checkpoint_file)
    print('Loaded saved weights from checkpoint file')
else:
    fx_model._model.save_weights(checkpoint_file)
    print('Initialized new checkpoint file')


## 6.3 Fit the model

## 6.3.1 Start tensorboard

In [None]:
#%tensorboard --logdir var/local/foresight/models/Keras_Models/0001_6-LSTM_5minData_3h/logs
%tensorboard --logdir $tb_path --bind_all
#from tensorboard import notebook
#notebook.list()
#notebook.display(port=6006)

In [None]:
_ = fx_model.Fit(
    batch_size=training_batch_size,
    epochs=training_epochs,
    train_frac=5 / 5,
    valid_frac=0 / 3,
    verbose=1,
    validate_model=False,
    print_test_stat=False,
    callbacks=[cp_callback, tb_callback],
#    callbacks=[cp_callback],
)

In [None]:
#fx_model._model.save(models_path + model_name)

# 6 Backtest the model

In [None]:
importlib.reload(foresight.backtesting)
importlib.reload(fx_df)
print(type(fx_model))
backtester = foresight.backtesting.Backtester(
    model=fx_model,
    retraining_freq=pd.Timedelta("1W"),
    trading_rules={
        "trade_size": 1_000,
        "stop_loss": 0.00025,
        "take_profit": 0.00025,
        "min_change": 0.00005,
        "leverage": 1,
    },
    initial_money=1_000,
)

In [None]:
bt_src = "/var/local/foresight/timeseries/EURGBP-2017-7-13.csv"
forecast_data = fx_df.GetTickdataDataframe(bt_src, date_format_string='%m/%d/%Y %H:%M:%S.%f')

In [None]:
backtester.Backtest(forecast_data, initial_retraining = 0, retrain_epochs = 25, retrain_verbose = True)