# Predicting Cryptocurrency - RNN and KERAS
Credit: sentdex YouTube Channel


Step 1: Read in Data and Name Header Values

In [None]:
import pandas as pd
import os
from sklearn import preprocessing
from collections import deque
import random
import numpy as np
import time
#!pip install tensorflow
#!pip install --upgrade tensorflow
#!pip install keras
#!pip install tf-nightly
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint

SEQ_LEN = 60
FUTURE_PERIOD_PREDICT = 3
RATIO_TO_PREDICT = "LTC-USD"
EPOCHS = 10
BATCH_SIZE = 64
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

# Columns are not named, so naming when reading in
# This is just an example of one of the files to get a feel for what it contains
df = pd.read_csv("Documents/LTC-USD.csv", names = ["time", "low", "high", "open", "close", "volume"])

print(df.head())

    Read in all 4 files for the different bitcoin types

In [None]:
main_df = pd.DataFrame()

ratios = ["BTC-USD", "LTC-USD", "ETH-USD", "BCH-USD"]
for ratio in ratios:
    dataset = f"Documents/{ratio}.csv"
    #reading in all the data, naming columns based on which crypto 
    df = pd.read_csv(dataset, names = ["time", "low", "high", "open", "close", "volume"])
    df.rename(columns={"close": f"{ratio}_close", "volume":f"{ratio}_volume"}, inplace =True)
    # setting index to time and selecting columns of interest
    df.set_index("time", inplace=True)
    df = df[[f"{ratio}_close", f"{ratio}_volume"]]
    #
    if len(main_df) == 0:
        main_df = df
    else:
        main_df = main_df.join(df)
print(main_df.head())

for c in main_df.columns:
    print(c)


For supervised ML - need sequences and need targets. What are the targets?
SEQ_LEN = ?
FUTURE_PERIOD_PREDICT = ?
RATIO_TO_PREDICT = "LTC-USD"

In [None]:
# Should the reccomendation be to purchase or to sell, to buy = 1
def classify(current, future):
    if float(future) > float(current):
        return 1
    else:
        return 0
    
# ID the future price - where it will close - shifting close column however many periods forward I want to predict
main_df['future'] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)
print(main_df[[f"{RATIO_TO_PREDICT}_close", "future"]].head())

In [None]:
# showing whether the classify function works, whether to buy or not
main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"],main_df["future"]))
print(main_df[[f"{RATIO_TO_PREDICT}_close", "future", "target"]].head())

Step 2: Normalizing, creating sequences and balancing:

In [None]:
# sorting index (time) - should be sorted, but this makes sure
times = sorted(main_df.index.values)
# taking the last 5% of the data to be used for validation
last_5pct = times[-int(0.05*len(times))]
validation_main_df = main_df[(main_df.index >= last_5pct)]
# removing the last 5% from the main df
main_df = main_df[(main_df.index < last_5pct)]

# function to set up model
def preprocess_df(df):
    df = df.drop("future", 1)  # don't need this anymore.

    for col in df.columns:  # go through all of the columns
        if col != "target":  # normalize all ... except for the target itself!
            df[col] = df[col].pct_change()  # pct change "normalizes" the different currencies (each crypto coin has vastly diff values, we're really more interested in the other coin's movements)
            df.dropna(inplace=True)  # remove the nas created by pct_change
            df[col] = preprocessing.scale(df[col].values)  # scale between 0 and 1.

    df.dropna(inplace=True)  # cleanup again... jic.


    sequential_data = []  # this is a list that will CONTAIN the sequences
    prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in

    for i in df.values:  # iterate over the values
        prev_days.append([n for n in i[:-1]])  # store all but the target
        if len(prev_days) == SEQ_LEN:  # make sure we have 60 sequences!
            sequential_data.append([np.array(prev_days), i[-1]])  # append those bad boys!

    random.shuffle(sequential_data)  # shuffle for good measure.

    buys = []  # list that will store our buy sequences and targets
    sells = []  # list that will store our sell sequences and targets

    for seq, target in sequential_data:  # iterate over the sequential data
        if target == 0:  # if it's a "not buy"
            sells.append([seq, target])  # append to sells list
        elif target == 1:  # otherwise if the target is a 1...
            buys.append([seq, target])  # it's a buy!

    random.shuffle(buys)  # shuffle the buys
    random.shuffle(sells)  # shuffle the sells!

    lower = min(len(buys), len(sells))  # what's the shorter length?

    buys = buys[:lower]  # make sure both lists are only up to the shortest length.
    sells = sells[:lower]  # make sure both lists are only up to the shortest length.

    sequential_data = buys+sells  # add them together
    random.shuffle(sequential_data)  # another shuffle, so the model doesn't get confused with all 1 class then the other.

    X = []
    y = []

    for seq, target in sequential_data:  # going over our new sequential data
        X.append(seq)  # X is the sequences
        y.append(target)  # y is the targets/labels (buys vs sell/notbuy)

    return np.array(X), y
            
        
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

print(f"train data: {len(train_x)} validation: {len(validation_x)}")
print(f"Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}")
print(f"VALIDATION Dont buys: {validation_y.count(0)}, buys: {validation_y.count(1)}")

Step 3: Training Model

In [None]:
model = Sequential()
model.add(LSTM(128, input_shape=(train_x.shape[1:]), return_sequences=True))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation='softmax'))

opt = tf.keras.optimizers.Adam(learning_rate = 0.001, weight_decay=1e-6)

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=opt,
    metrics=['accuracy']
)

tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

filepath = "RNN_Final-{epoch:02d}-{val_accuracy:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.keras".format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')) # saves only the best ones

print(BATCH_SIZE)
# Train model
history = model.fit(
    train_x, train_y,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(validation_x, validation_y),
    callbacks=[tensorboard, checkpoint],
)

# Score model
score = model.evaluate(validation_x, validation_y, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# Save model
model.save("models/{}".format(NAME))


64
Epoch 1/10
1031/1031 ━━━━━━━━━━━━━━━━━━━━ 473s 440ms/step - accuracy: 0.5135 - loss: 0.7601 - val_accuracy: 0.5451 - val_loss: 0.6866
Epoch 2/10
1031/1031 ━━━━━━━━━━━━━━━━━━━━ 487s 472ms/step - accuracy: 0.5374 - loss: 0.6897 - val_accuracy: 0.5696 - val_loss: 0.6820
Epoch 3/10
1031/1031 ━━━━━━━━━━━━━━━━━━━━ 489s 474ms/step - accuracy: 0.5527 - loss: 0.6858 - val_accuracy: 0.5699 - val_loss: 0.6796
Epoch 4/10
1031/1031 ━━━━━━━━━━━━━━━━━━━━ 528s 512ms/step - accuracy: 0.5635 - loss: 0.6813 - val_accuracy: 0.5636 - val_loss: 0.6782
Epoch 5/10
1031/1031 ━━━━━━━━━━━━━━━━━━━━ 718s 696ms/step - accuracy: 0.5683 - loss: 0.6789 - val_accuracy: 0.5835 - val_loss: 0.6747
Epoch 6/10
1031/1031 ━━━━━━━━━━━━━━━━━━━━ 695s 674ms/step - accuracy: 0.5769 - loss: 0.6765 - val_accuracy: 0.5778 - val_loss: 0.6745
Epoch 7/10
1031/1031 ━━━━━━━━━━━━━━━━━━━━ 399s 386ms/step - accuracy: 0.5783 - loss: 0.6755 - val_accuracy: 0.5807 - val_loss: 0.6743
Epoch 8/10
1031/1031 ━━━━━━━━━━━━━━━━━━━━ 360s 349ms/step - accuracy: 0.5796 - loss: 0.6728 - val_accuracy: 0.5627 - val_loss: 0.6797
Epoch 9/10
1031/1031 ━━━━━━━━━━━━━━━━━━━━ 369s 357ms/step - accuracy: 0.5951 - loss: 0.6662 - val_accuracy: 0.5731 - val_loss: 0.6826
Epoch 10/10
1031/1031 ━━━━━━━━━━━━━━━━━━━━ 363s 352ms/step - accuracy: 0.6095 - loss: 0.6574 - val_accuracy: 0.5570 - val_loss: 0.6861
Test loss: 0.6859279870986938
Test accuracy: 0.5570258498191833