In [96]:
import numpy as np
import pandas as pd
import math
from sklearn import preprocessing
from sklearn.model_selection import cross_validate
# from sklearn.linear_model import LinearRegression
# from sklearn.preprocessing import MinMaxScaler
# from sklearn.linear_model import SGDRegressor
from collections import deque
import random
import time

main_df =pd.read_excel('ML_DF.xlsx')
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint

In [95]:
SEQ_LEN =2  # how long of a preceeding sequence to collect for RNN
FUTURE_PERIOD_PREDICT = 1  # how far into the future are we trying to predict?
RATIO_TO_PREDICT = "USD_RUB"
EPOCHS = 2  # how many passes through our data
BATCH_SIZE = 64  # how many batches? Try smaller batch if you're getting OOM (out of memory) errors.
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"  # a unique name for the model

In [73]:
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0

In [74]:
main_df['future'] = main_df[RATIO_TO_PREDICT].shift(-FUTURE_PERIOD_PREDICT)
# main_df.drop(["Date",'Swap_3M','Swap_12M','MOSPRIME_ON','MOSPRIME_3M','Copper','USD_Repo','Treas_4M'], 1,inplace=True)
main_df.tail(3)

Unnamed: 0,USD_RUB,RGBITR_YIELD,Russia_ETF,future
9,65.4,8.1,21.23,65.59
10,65.59,8.12,21.16,65.87
11,65.87,8.09,21.02,


In [76]:
main_df['target'] = list(map(classify, main_df[RATIO_TO_PREDICT], main_df['future']))
main_df.dropna(inplace=True)
main_df

Unnamed: 0,USD_RUB,RGBITR_YIELD,Russia_ETF,future,target
0,66.3675,8.2,20.219999,66.37,1
1,66.37,8.23,20.34,66.275,0
2,66.275,8.22,20.51,66.535,1
3,66.535,8.24,20.27,66.0725,0
4,66.0725,8.24,20.620001,65.7775,0
5,65.7775,8.21,20.66,66.0,1
6,66.0,8.22,20.74,66.05,1
7,66.05,8.27,20.690001,65.505,0
8,65.505,8.23,21.15,65.4,0
9,65.4,8.1,21.23,65.59,1


In [78]:
length = sorted(main_df.index.values)
last_15pct = sorted(main_df.index.values)[-int(0.2*len(length))]  # get the last 15% 
last_15pct

9

In [79]:
validation_df = main_df[(main_df.index >= last_15pct)]  # make the validation data where the index is in the last 5%
validation_df


Unnamed: 0,USD_RUB,RGBITR_YIELD,Russia_ETF,future,target
9,65.4,8.1,21.23,65.59,1
10,65.59,8.12,21.16,65.87,1


In [90]:
train_df = main_df[(main_df.index < last_15pct)]  # now the main_df is all the data up to the last 5%
train_df

Unnamed: 0,USD_RUB,RGBITR_YIELD,Russia_ETF,future,target
0,66.3675,8.2,20.219999,66.37,1
1,66.37,8.23,20.34,66.275,0
2,66.275,8.22,20.51,66.535,1
3,66.535,8.24,20.27,66.0725,0
4,66.0725,8.24,20.620001,65.7775,0
5,65.7775,8.21,20.66,66.0,1
6,66.0,8.22,20.74,66.05,1
7,66.05,8.27,20.690001,65.505,0
8,65.505,8.23,21.15,65.4,0


In [91]:
def preprocess_df(df):
    df = df.drop("future", 1)  # don't need this anymore.

    for col in df.columns:  # go through all of the columns
        if col != "target":  # normalize all ... except for the target itself!
#             df[col] = df[col].pct_change()  # pct change "normalizes" the different currencies (each crypto coin has vastly diff values, we're really more interested in the other coin's movements)
            df.dropna(inplace=True)  # remove the nas created by pct_change
#             df[col] = preprocessing.scale(df[col].values)  # scale between 0 and 1.

    df.dropna(inplace=True)  # cleanup again... jic.


    sequential_data = []  # this is a list that will CONTAIN the sequences
    prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in

    for i in df.values:  # iterate over the values
        prev_days.append([n for n in i[:-1]])  # store all but the target
        if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!
            sequential_data.append([np.array(prev_days), i[-1]])  # append those bad boys!

    random.shuffle(sequential_data)  # shuffle for good measure.

    buys = []  # list that will store our buy sequences and targets
    sells = []  # list that will store our sell sequences and targets

    for seq, target in sequential_data:  # iterate over the sequential data
        if target == 0:  # if it's a "not buy"
            sells.append([seq, target])  # append to sells list
        elif target == 1:  # otherwise if the target is a 1...
            buys.append([seq, target])  # it's a buy!

    random.shuffle(buys)  # shuffle the buys
    random.shuffle(sells)  # shuffle the sells!
    
    lower = min(len(buys), len(sells))  # what's the shorter length?

    buys = buys[:lower]  # make sure both lists are only up to the shortest length.
    sells = sells[:lower]  # make sure both lists are only up to the shortest length.

    sequential_data = buys+sells  # add them together
    random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.

    X = []
    y = []

    for seq, target in sequential_data:  # going over our new sequential data
        X.append(seq)  # X is the sequences
        y.append(target)  # y is the targets/labels (buys vs sell/notbuy)

    return np.array(X), y # return X and y...and make X a numpy array!

  

In [92]:
n=preprocess_df(train_df)


In [93]:
n

(array([[[66.        ,  8.22      , 20.73999977],
         [66.05      ,  8.27      , 20.69000053]],
 
        [[66.37      ,  8.23      , 20.34000015],
         [66.275     ,  8.22      , 20.51000023]],
 
        [[66.3675    ,  8.2       , 20.21999931],
         [66.37      ,  8.23      , 20.34000015]],
 
        [[65.7775    ,  8.21      , 20.65999985],
         [66.        ,  8.22      , 20.73999977]],
 
        [[66.0725    ,  8.24      , 20.62000084],
         [65.7775    ,  8.21      , 20.65999985]],
 
        [[66.05      ,  8.27      , 20.69000053],
         [65.505     ,  8.23      , 21.14999962]]]),
 [0.0, 1.0, 0.0, 1.0, 1.0, 0.0])

In [67]:
train_x, train_y = preprocess_df(train_df)
validation_x, validation_y = preprocess_df(validation_df)

In [68]:
print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

train data: 70 validation: 4
Dont buys: 35, buys: 35
VALIDATION Dont buys: 2, buys: 2


In [90]:
print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

train data: 132 validation: 4
Dont buys: 66, buys: 66
VALIDATION Dont buys: 2, buys: 2


In [97]:
model = Sequential()
model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())  #normalizes activation outputs, same reason you want to normalize your input data.

model.add(CuDNNLSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [98]:
opt = tf.keras.optimizers.Adam(lr=0.001, decay=1e-6)

# Compile model
model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)

In [100]:
tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))
filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

In [101]:
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

Train on 70 samples, validate on 4 samples
Instructions for updating:
Use tf.cast instead.


InvalidArgumentError: No OpKernel was registered to support Op 'CudnnRNN' used by node cu_dnnlstm/CudnnRNN (defined at <ipython-input-97-a8733ead8067>:2) with these attrs: [is_training=true, seed2=0, input_mode="linear_input", T=DT_FLOAT, dropout=0, rnn_mode="lstm", direction="unidirectional", seed=0]
Registered devices: [CPU]
Registered kernels:
  <no registered kernels>

	 [[node cu_dnnlstm/CudnnRNN (defined at <ipython-input-97-a8733ead8067>:2) ]]

Caused by op 'cu_dnnlstm/CudnnRNN', defined at:
  File "C:\Users\pc\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\pc\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\pc\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\pc\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\pc\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "C:\Users\pc\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\pc\Anaconda3\lib\asyncio\base_events.py", line 528, in run_forever
    self._run_once()
  File "C:\Users\pc\Anaconda3\lib\asyncio\base_events.py", line 1764, in _run_once
    handle._run()
  File "C:\Users\pc\Anaconda3\lib\asyncio\events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tornado\ioloop.py", line 758, in _run_callback
    ret = callback()
  File "C:\Users\pc\Anaconda3\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tornado\gen.py", line 1233, in inner
    self.run()
  File "C:\Users\pc\Anaconda3\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "C:\Users\pc\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "C:\Users\pc\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\pc\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "C:\Users\pc\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\pc\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "C:\Users\pc\Anaconda3\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "C:\Users\pc\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\pc\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\pc\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\pc\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "C:\Users\pc\Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "C:\Users\pc\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\pc\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "C:\Users\pc\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-97-a8733ead8067>", line 2, in <module>
    model.add(CuDNNLSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
  File "C:\Users\pc\Anaconda3\lib\site-packages\tensorflow\python\training\checkpointable\base.py", line 442, in _method_wrapper
    method(self, *args, **kwargs)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\sequential.py", line 164, in add
    layer(x)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tensorflow\python\keras\layers\recurrent.py", line 701, in __call__
    return super(RNN, self).__call__(inputs, **kwargs)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 554, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tensorflow\python\keras\layers\cudnn_recurrent.py", line 111, in call
    output, states = self._process_batch(inputs, initial_state)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tensorflow\python\keras\layers\cudnn_recurrent.py", line 501, in _process_batch
    is_training=True)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_cudnn_rnn_ops.py", line 141, in cudnn_rnn
    seed2=seed2, is_training=is_training, name=name)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3300, in create_op
    op_def=op_def)
  File "C:\Users\pc\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): No OpKernel was registered to support Op 'CudnnRNN' used by node cu_dnnlstm/CudnnRNN (defined at <ipython-input-97-a8733ead8067>:2) with these attrs: [is_training=true, seed2=0, input_mode="linear_input", T=DT_FLOAT, dropout=0, rnn_mode="lstm", direction="unidirectional", seed=0]
Registered devices: [CPU]
Registered kernels:
  <no registered kernels>

	 [[node cu_dnnlstm/CudnnRNN (defined at <ipython-input-97-a8733ead8067>:2) ]]
