In [None]:
%cd ../..
%run cryptolytic/notebooks/init.ipynb
# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"
import pandas as pd
import cryptolytic.util as util
import cryptolytic.start as start
import cryptolytic.viz.plot as plot
import cryptolytic.data.sql as sql
import cryptolytic.data as d
from cryptolytic.util import *
import cryptolytic.data.historical as h
import cryptolytic.model as m
import cryptolytic.model.lstm_framework as lstm

from matplotlib.pylab import rcParams
from IPython.core.display import HTML
from pandas.plotting import register_matplotlib_converters # to stop a warning message


ohclv = ['open', 'high', 'close', 'low', 'volume']
plt.style.use('ggplot')
rcParams['figure.figsize'] = 20,7
start.init()
register_matplotlib_converters()


# Make math readable
HTML("""
<style>
.MathJax {
    font-size: 2rem;
}
</style>""")

https://machinelearningmastery.com/time-series-prediction-lstm-recurrent-neural-networks-python-keras/

### LSTM with Memory Between Batches

>We can gain finer control over when the internal state of the LSTM network is cleared in Keras by making the LSTM layer “stateful”. This means that it can build state over the entire training sequence and even maintain that state if needed to make predictions.

>It requires that the training data not be shuffled when fitting the network. It also requires explicit resetting of the network state after each exposure to the training data (epoch) by calls to model.reset_states(). This means that we must create our own outer loop of epochs and within each epoch call model.fit() and model.reset_states(). For example:

```{python}
for i in range(100):
	model.fit(trainX, trainY, epochs=1, batch_size=batch_size, verbose=2, shuffle=False)
	model.reset_states()
```

>Finally, when the LSTM layer is constructed, the stateful parameter must be set True and instead of specifying the input dimensions, we must hard code the number of samples in a batch, number of time steps in a sample and number of features in a time step by setting the batch_input_shape parameter. For example:
```
model.add(LSTM(4, batch_input_shape=(batch_size, time_steps, features), stateful=True))
```

In [None]:
# TRAIN_SPLIT = 5000
history_size = 1000
input_len = 8500
train_size = int(5000*0.8)
lahead = 10
step = 2
rolling_size = 6
period = 300
batch_size = 100
to_drop = max(rolling_size - 1, lahead - 1)
input_len = input_len + to_drop
df = d.get_df ({'start':'06-01-2019', 'period':period, 'trading_pair':'btc_usd', 'exchange_id':'bitfinex'},
               n=input_len)

In [None]:
from sklearn.preprocessing import MinMaxScaler
epsilon = 1e-9

# Version 2 
def normalize(df):
    df = df.copy()
    
    for col in df.columns: 
        # equation, (x - mu)/2sigma
        mu = np.mean(np.log(df[col]))
        sigma = np.sqrt(np.mean(np.log(df[col])**2) - mu**2)
        df[col] = (np.log(df[col]) - mu) / sigma
    return df
   
def denormalize(values, df, col=None):
    values = values.copy()
    if np.ndim(values) == 1 and col is not None:
        mu = np.mean(np.log(df[col]))
        sigma = np.sqrt(np.mean(np.log(df[col])**2) - mu**2)
        return (df[col] * sigma) + mu
    else:
        for i in range(values.shape[1]): 
            
            mu = np.mean(np.log(df.iloc[:, i]))
            sigma = np.sqrt(np.mean(np.log(df.iloc[:, i])**2) - mu**2)
            # equation, (x  * (0.5sigma - epsilon)) + mu
            eq = lambda x, i: (x * (sigma - epsilon)) + mu
            if isinstance(values, pd.DataFrame): 
                values.iloc[:, i] = eq(values.iloc[:, i], i)
            else:
                values[:, i] = eq(values[:, i], i) 
        return values

In [None]:
df = df.sort_index()
normed = normalize(df._get_numeric_data())
c = normed[['close', 'volume', 'diff', 'arb_signal', 'timestamp']]
a_df = c.ewm(alpha=0.9).mean().bfill().rename(columns=lambda x: x+'_mean')
b_df = c.rolling(rolling_size).std().bfill().rename(columns=lambda x: x+'_std')
c_df = c.ewm(alpha=0.9).std().bfill().rename(columns=lambda x: x+'_ewmstd')
d_df = c.rolling(rolling_size).skew().bfill().rename(columns=lambda x: x+'_skew')
e_df = c.rolling(rolling_size).kurt().bfill().rename(columns=lambda x: x+'_kurt')
f_df = c.ewm(alpha=0.9).mean().bfill().rename(columns=lambda x: x+'_ewmean') 
df = pd.concat([c, a_df, b_df, c_df, d_df, e_df, f_df], axis=1).dropna(axis=1)
# df_sub = df.drop(['timestamp', 'period', 'open', 'high', 'low', 'api', 'exchange', 'trading_pair'], axis=1)
df.head()
dataset = df
target = dataset.columns.get_loc('close') 
dataset = dataset.values
y = dataset[:, target]

In [None]:
def windowed(df, target, batch_size, history_size, step, lahead=1, ratio=0.8):
    xs = []
    ys = []
    
    x = dataset
    y = dataset[:, target]

    start = history_size # 1000
    end = df.shape[0] - lahead # 4990
    # 4990 - 1000 = 3990
    for i in range(start, end):
        # grab rows from i, to i+history_size
        indices = range(i-history_size, i, step)
        xs.append(x[indices])
        ys.append(y[i:i+lahead])
        
    xs = np.array(xs)
    ys = np.array(ys)
    
    nrows = xs.shape[0]
    train_size = int(nrows * ratio)
    # make sure the sizes are multiples of the batch size (needed for stateful lstm)
    train_size -= train_size % batch_size
    val_size = nrows - train_size
    val_size -= val_size  % batch_size
    total_size = train_size + val_size
    xs = xs[:total_size]
    ys = ys[:total_size]
    
    return xs[:train_size], ys[:train_size], xs[train_size:], ys[train_size:]

In [None]:
x_train, y_train, x_val, y_val = windowed(dataset, target, batch_size, history_size, step, lahead)
mapl(lambda x: x.shape, [x_train, y_train, x_val, y_val])

In [None]:
print ('Single window of past history : {}'.format(x_train[0].shape))
print ('\n Target temperature to predict : {}'.format(y_train[0].shape))

In [None]:
import tensorflow as tf
BUFFER_SIZE = 10_000
BATCH_SIZE = 250
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()

val_data = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_data = val_data.batch(BATCH_SIZE).repeat()

In [None]:
import tensorflow as tf
import tensorflow.keras.layers as layers

def lstm_model(train, stateful):
    model = tf.keras.models.Sequential()
# use_bias is True
    # batch size is 240 for the dataset instead of 256 for some reason
    model.add(layers.LSTM(5, return_sequences=True, input_shape=(train.shape[-2:])))
    model.add(layers.LSTM(5, activation='relu'))
              
    model.add(layers.Dense(lahead)) # global variable remove
    model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.00005), loss='mse')
    return model

In [None]:
import tensorflow.keras.models as models
import tensorflow.keras.regularizers as regularizers
def create_model(df, stateful):
    model = models.Sequential()
    model.add(layers.InputLayer(batch_size=batch_size, input_shape=x_train.shape[-2:]))
    model.add(layers.LSTM(32, return_sequences=True, kernel_regularizer=ergularizers.l2(0.01))
    model.add(layers.LSTM(32, return_sequences=True), kernel_regularizer=ergularizers.l2(0.01))
    model.add(layers.LSTM(32, return_sequences=True), kernel_regularizer=ergularizers.l2(0.01))
    model.add(layers.LSTM(32))
   
    model.add(layers.Dense(lahead))
    #model.compile(loss='mse', optimizer='adam') 
    model.compile(loss='mae', optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.0001))
    return model


In [None]:
model = create_model(df, stateful=True)
history = {'loss':[],
          'val_loss':[]}

In [None]:
def run_model():
    epochs = 10
    for i in range(epochs):
        print(f'Epoch {i}')
        # batch size higher than 1 causes to fail, not sure
        model.fit(x_train, y_train, 
                           batch_size=batch_size,
                           epochs=1,
                           verbose=1,
                           use_multiprocessing=True,
                           workers=4,
                           validation_data = (x_val, y_val), 
                           shuffle=False)
        history['loss'].append(model.history.history['loss'])
        history['val_loss'].append(model.history.history['val_loss'])
        model.reset_states()
        
    return model
        
model = run_model()

In [None]:
!ls

In [None]:
model.summary()

In [None]:
# BUFFER_SIZE = 10_000
# BATCH_SIZE = 100
# train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
# train_data = train_data.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()
# 
# val_data = tf.data.Dataset.from_tensor_slices((x_val, y_val))
# val_data = val_data.batch(BATCH_SIZE).repeat()
# 
# history = model.fit(train_data,
#                 steps_per_epoch=38,
#                 shuffle=False,     
#                 epochs=1,
#                 verbose=1,
#                 use_multiprocessing=True,
#                 workers=4,
#                 validation_data=val_data,
#                 validation_steps=5) 

In [None]:
def multi_step_plot(history, true_future, prediction):
    plt.figure(figsize=(12, 6))
    num_in = create_time_steps(len(history))
    num_out = len(true_future)
    
    plt.plot(num_in, np.array(history[:, 1]), label='History')
    plt.plot(np.arange(num_out)/STEP, np.array(true_future), 'bo',
        label='True Future')
    if prediction.any():
        plt.plot(np.arange(num_out)/STEP, np.array(prediction), 'ro',
            label='Predicted Future')
    plt.legend(loc='upper left')
    plt.show()
    
def plot_train_history(history, title):
    loss = history['loss']
    val_loss = history['val_loss']
    
    epochs = range(len(loss))
    
    plt.figure()
    
    plt.plot(epochs, loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title(title)
    plt.legend()
    
    plt.show()

    
plot_train_history(history, 'Multi Step Training and validation loss') 

# Predictions on the train set

In [None]:
preds = denormalize(model.predict(x_train)[:, 0], df, 'close_mean')
preds = model.predict(x_train)[:, 0]

In [None]:
n = len(preds)
plt.plot(range(n), df['close'][-n-len(x_val):-len(x_val)])
#plt.plot(range(n), preds)

In [None]:
plt.plot(preds)

In [None]:
n = len(preds)
yo= len(x_val)+history_size
z = yo
modman = len(df)% batch_size
w = z + len(df)%modman
len(preds), len(df)-w

In [None]:
np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])[1:-4]

In [None]:
help(denormalize)

In [None]:
plt.plot(denormalize(y_train[:, 0], df))
#plt.plot(preds)

In [None]:
rcParams['figure.figsize'] = 20,3
h = history_size
hn = n+history_size

plt.plot(range(n), d.denoise(df['close'][h:hn], 5), label='actual')
plt.plot(range(n), d.denoise(preds[:n], 5), label='predicted');
plt.plot(range(n), df.close_mean.values[h:hn], label='Mean')
plt.legend();

In [None]:
# plt.title('All predictions')
# plt.plot(range(2000), d.denoise(preds[:2000], 5));

# Predictions on validation set

In [None]:
len(df['close'][hn:]),val_n

In [None]:
len(x_val)

In [None]:
val_n

In [None]:
y_val[:, 0]

In [None]:
val_preds = model.predict(x_val)[:, 0]
val_n = len(val_preds)
hn = n+history_size
plt.plot(range(val_n), d.denoise(y_val[:, 0], 5), label='actual')
plt.plot(range(val_n), d.denoise(val_preds[:n], 5), label='predicted');
plt.legend();

In [None]:
# val_preds = denormalize_results(model.predict(x_val))
# # val_actual = df.close.iloc[TRAIN_SPLIT:]
# val_actual = denormalize_results(y_val[:, 0])
# plt.plot(np.arange(2000), d.denoise(val_actual[past_history:2000+past_history], 20), label='actual')
# plt.plot(range(2000), d.denoise(val_preds[:, 0][:2000], 20), label='predicted');
# plt.legend();

In [None]:
len(x_val)

In [None]:
yo

In [None]:
preds[0:5]

In [None]:
history_size