In [37]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM, RNN, SimpleRNNCell, StackedRNNCells,  BatchNormalization
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint


import pandas as pd
import numpy as np
from sklearn import preprocessing
from collections import deque
import random
import time

In [2]:
# Download Dataset
!wget https://pythonprogramming.net/static/downloads/machine-learning-data/crypto_data.zip
!unzip crypto_data.zip 

--2021-10-21 22:07:58--  https://pythonprogramming.net/static/downloads/machine-learning-data/crypto_data.zip
Resolving pythonprogramming.net (pythonprogramming.net)... 104.237.143.20, 2600:3c00::f03c:91ff:fe84:176d
Connecting to pythonprogramming.net (pythonprogramming.net)|104.237.143.20|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5998694 (5.7M) [application/zip]
Saving to: ‘crypto_data.zip.1’


2021-10-21 22:07:59 (23.8 MB/s) - ‘crypto_data.zip.1’ saved [5998694/5998694]

Archive:  crypto_data.zip
replace crypto_data/BCH-USD.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace crypto_data/BTC-USD.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace crypto_data/ETH-USD.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace crypto_data/LTC-USD.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n


In [3]:
# Read Data:
df = pd.DataFrame()
vals = ['BTC-USD', 'LTC-USD', 'ETH-USD', 'BCH-USD']
for crypto in vals:
  path = "".join(['crypto_data/', crypto, '.csv'])
  mini_df = pd.read_csv(path, names=['time', 'low', 'high', 'open', 'close', 'volume'])
  mini_df.rename(columns={'close': f'{crypto}_close',
                     'volume': f'{crypto}_volume'},
            inplace=True)
  mini_df.set_index('time', inplace=True)
  mini_df = mini_df[[f'{crypto}_close', f'{crypto}_volume']]
  if len(df) == 0:
    df = mini_df
  else:
    df = df.join(mini_df)

df.fillna(method='ffill', inplace=True)
df.dropna(inplace=True)
print(df.head())

            BTC-USD_close  BTC-USD_volume  ...  BCH-USD_close  BCH-USD_volume
time                                       ...                               
1528968720    6487.379883        7.706374  ...     870.859985       26.856577
1528968780    6479.410156        3.088252  ...     870.099976        1.124300
1528968840    6479.410156        1.404100  ...     870.789978        1.749862
1528968900    6479.979980        0.753000  ...     870.000000        1.680500
1528968960    6480.000000        1.490900  ...     869.989990        1.669014

[5 rows x 8 columns]


In [4]:
# Create Target:
SEQ_LEN = 60
FUTURE_PERIOD_PREDICTION = 3
RATIO_TO_PREDICT = 'LTC-USD'

def classify(current, future):
  if float(future) > float(current):
    return 1
  else:
    return 0

df['future'] = df[f'{RATIO_TO_PREDICT}_close'].shift(-FUTURE_PERIOD_PREDICTION)
df['target'] = list(map(classify, df[f'{RATIO_TO_PREDICT}_close'], df['future']))

print(df[[f'{RATIO_TO_PREDICT}_close', 'future', 'target']].head(10))

            LTC-USD_close     future  target
time                                        
1528968720      96.660004  96.389999       0
1528968780      96.570000  96.519997       0
1528968840      96.500000  96.440002       0
1528968900      96.389999  96.470001       1
1528968960      96.519997  96.400002       0
1528969020      96.440002  96.400002       0
1528969080      96.470001  96.400002       0
1528969140      96.400002  96.400002       0
1528969200      96.400002  96.400002       0
1528969260      96.400002  96.449997       1


In [5]:
# Split Data:

times = sorted(df.index.values)
last_5pc = times[-int(0.05*len(times))]

valid = df[(df.index >= last_5pc)]
train = df[(df.index < last_5pc)]

# Preprocessing:
def preprocess_df(df):
  # Drop future col:
  df = df.drop('future', 1)
  # Normalize in percentage of change and scale:
  for col in df.columns:
    if col != 'target':
      df[col] = df[col].pct_change()
      df.dropna(inplace=True)
      df[col] = preprocessing.scale(df[col].values)
      df.dropna(inplace=True)
  # Create sequences:
  sequential_data = []
  prev_days = deque(maxlen=SEQ_LEN)
  for i in df.values:
    prev_days.append([n for n in i[:-1]])
    if len(prev_days) == SEQ_LEN:
      sequential_data.append([np.array(prev_days), i[-1]])
  # Shuffle Data:
  random.shuffle(sequential_data)
  # Balancing:
  buys = []
  sells = []

  for seq, target in sequential_data:
    if target == 0:
      sells.append([seq, target])
    elif target == 1:
      buys.append([seq, target])
  random.shuffle(buys)
  random.shuffle(sells)

  lower = min(len(buys), len(sells))
  buys = buys[:lower]
  sells = sells[:lower]

  sequential_data = buys + sells
  random.shuffle(sequential_data)

  # Split data:
  X = []
  y = []
  for seq, target in sequential_data:
    X.append(seq)
    y.append(target)
  
  return np.array(X), y

train_x, train_y = preprocess_df(train)
valid_x, valid_y = preprocess_df(valid)

print(f'train_data: {len(train_x)}, validation: {len(valid_x)}')
print(f'Train Dont buys: {train_y.count(0)}, buys: {train_y.count(1)}')
print(f'Validation Dont Buys: {valid_y.count(0)}, buys: {valid_y.count(1)}')

train_data: 77922, validation: 3860
Train Dont buys: 38961, buys: 38961
Validation Dont Buys: 1930, buys: 1930


In [53]:
# Build the model:

BATCH_SIZE = 64
EPOCHS = 2
NAME = f'{RATIO_TO_PREDICT}-SEQ-{FUTURE_PERIOD_PREDICTION}-PRED-{int(time.time())}'
rnn_cells = [SimpleRNNCell(units=128, recurrent_dropout=0.2) for _ in range(3)]
stacked_rnn = StackedRNNCells(rnn_cells)

model = Sequential()

model.add(RNN(stacked_rnn, input_shape=(train_x.shape[1:])))
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(Dense(32, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax'))

opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=1e-6)
model.compile(loss='sparse_categorical_crossentropy', 
              optimizer=opt, 
              metrics=['accuracy'])

print(model.summary())

Model: "sequential_28"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
rnn_22 (RNN)                 (None, 128)               83328     
_________________________________________________________________
dropout_42 (Dropout)         (None, 128)               0         
_________________________________________________________________
batch_normalization_27 (Batc (None, 128)               512       
_________________________________________________________________
dense_30 (Dense)             (None, 32)                4128      
_________________________________________________________________
dropout_43 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_31 (Dense)             (None, 2)                 66        
Total params: 88,034
Trainable params: 87,778
Non-trainable params: 256
_______________________________________________

In [54]:
# Training:
tensorboard = TensorBoard(log_dir=f'logs/{NAME}')
filepath = 'RNN_Final-{epoch:02d}-{val_accuracy:.3f}'
checkpoint = ModelCheckpoint('models/{}.model'.format(filepath, monitor='val_accuracy', verbose=1, save_best_only=True,mode='max'))
history = model.fit(train_x, np.array(train_y), batch_size=BATCH_SIZE, epochs=EPOCHS,
                    validation_split=0.3,
                    callbacks=[tensorboard, checkpoint],
                    )

Epoch 1/2




INFO:tensorflow:Assets written to: models/RNN_Final-01-0.502.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-01-0.502.model/assets


Epoch 2/2




INFO:tensorflow:Assets written to: models/RNN_Final-02-0.510.model/assets


INFO:tensorflow:Assets written to: models/RNN_Final-02-0.510.model/assets


In [56]:
# Evaluate:
# Score model
score = model.evaluate(valid_x, np.array(valid_y), verbose=1)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
# Save model
model.save("models/{}".format(NAME))

Test loss: 0.6941196918487549
Test accuracy: 0.5007771849632263




INFO:tensorflow:Assets written to: models/LTC-USD-SEQ-3-PRED-1634859091/assets


INFO:tensorflow:Assets written to: models/LTC-USD-SEQ-3-PRED-1634859091/assets
