In [1]:
#This is an example of using an RNN to predict Cryptocurrency worth

In [2]:
import tensorflow as tf
import pandas as pd
from sklearn import preprocessing
from collections import deque
import random
import numpy as np
import time

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, CuDNNLSTM, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint


In [3]:
SEQ_LEN = 60 # Take the last 60 minutes
FUTURE_PERIOD_PREDICT = 3 #predict the next 3 minutes
RATIO_TO_PREDICT = 'LTC-USD' # Try to predict LTC data
DATA_DIR = ""
SAVE_DIR = ''
LITE_COIN = "LTC-USD.csv"
col_titles = ['time','low','high','open','close','volume']
EPOCHS = 1
BATCH_SIZE = 64
NAME = f"{SEQ_LEN}-SEQ-{FUTURE_PERIOD_PREDICT}-PRED-{int(time.time())}"

In [4]:
df = pd.read_csv(DATA_DIR+LITE_COIN, names = col_titles)
df.head()

FileNotFoundError: [Errno 2] File b'C:\\Users\\stevl\\Downloads\\crypto_data\\crypto_data\\LTC-USD.csv' does not exist: b'C:\\Users\\stevl\\Downloads\\crypto_data\\crypto_data\\LTC-USD.csv'

In [None]:
main_df = pd.DataFrame()

ratios = ['BTC-USD','LTC-USD', 'ETH-USD','BCH-USD']
for ratio in ratios:
    dataset = DATA_DIR + f"{ratio}.csv"
    #print(dataset)
    df = pd.read_csv(dataset, names = col_titles)
    df.rename(columns = {"close": f"{ratio}_close","volume":f"{ratio}_volume"}, inplace = True)
    
    df.set_index("time", inplace = True)
    df = df[[f"{ratio}_close", f"{ratio}_volume"]]
    
    #print(df.head())
    
    if(len(main_df) == 0):
        main_df = df
    else:
        main_df = main_df.join(df)
        
    

In [None]:
def classify(current, future):
    if (float(future ) > float(current)):
        return 1
    return 0

In [None]:
def preprocess_df(df):
    '''
    This function produces a series of sequences with their labels
    '''
    #We don't want the label, otherwise the RNN could just learn the label
    df = df.drop('future', 1)
    for col in df.columns:
        if col != 'target':
            df[col] = df[col].pct_change() # Normalize magnitudes of data trend
            df.dropna(inplace = True)
            df[col] = preprocessing.scale(df[col].values)
    df.dropna(inplace = True) #Just in case
    
    sequential_data = []
    prev_days = deque(maxlen = SEQ_LEN) #deque creates a list of length 60 which pops out old items
    
    for i in df.values:
        #As we iterate over the data, append the values without the value column
        prev_days.append([n for n in i[:-1]])
        if len(prev_days) == 60:
            #Get the previous 60 features as a list, and the target column
            sequential_data.append([np.array(prev_days), i[-1]])
        
    random.shuffle(sequential_data)
    
    #Now that the data has been shuffled, we need to balance between the two classes
    buys = []
    sells = []
    
    for seq, target in sequential_data:
        if target == 0:
            sells.append([seq,target])
        elif target == 1:
            buys.append([seq,target])
            
    random.shuffle(buys)
    random.shuffle(sells)
    
    lower = min(len(buys), len(sells))
    
    buys = buys[:lower]
    sells = sells[:lower]
    
    sequential_data = buys + sells
    
    random.shuffle(sequential_data)
    
    #Now we need to split the data into the predictors and label
    X = []
    y = []
    for seq, target in sequential_data:
        X.append(seq)
        y.append(target)
    return np.array(X),y
    

In [None]:
#The future column will be 3 minutes forward
main_df['future'] = main_df[f"{RATIO_TO_PREDICT}_close"].shift(-FUTURE_PERIOD_PREDICT)

In [None]:
main_df['target'] = list(map(classify, main_df[f"{RATIO_TO_PREDICT}_close"],main_df['future']))

In [None]:
#We need to separate out of series data and shuffle
#Time series data will be separated as the last 5% of the dataset

times = sorted(main_df.index.values)
last_5pct = times[-int(0.05 * len(times))]

In [None]:
validation_main_df = main_df[(main_df.index >= last_5pct)]
main_df = main_df[(main_df.index < last_5pct)]

In [None]:
train_x, train_y = preprocess_df(main_df)
validation_x, validation_y = preprocess_df(validation_main_df)

In [None]:
#Make the model
model = Sequential()
model.add(LSTM(128, input_shape = (train_x.shape[1:]), return_sequences = True, activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape = (train_x.shape[1:]), return_sequences = True, activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(LSTM(128, input_shape = (train_x.shape[1:]), activation = 'relu'))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation = 'relu'))
model.add(Dropout(0.2))

model.add(Dense(2, activation = 'softmax'))

opt = tf.keras.optimizers.Adam(lr = 0.001, decay = 1e-6)

model.compile(loss = 'sparse_categorical_crossentropy',
             optimizer = opt,
             metrics = ['accuracy'])

tensorboard = TensorBoard(log_dir = SAVE_DIR + '.\logs\{}'.format(NAME))

filepath = "RNN_Final-{epoch:02d}-{val_acc:.3f}"
checkpoint = ModelCheckpoint(SAVE_DIR + "/models/{}.model".format(filepath, monitor = 'val_acc', verbose = 1, save_best_only = True, mode = 'max'))

history = model.fit(
                    train_x, train_y,
                    batch_size = BATCH_SIZE,
                    epochs = EPOCHS,
                    validation_data = (validation_x, validation_y),
                    callbacks = [tensorboard, checkpoint])

In [None]:
pwd