<a href="https://colab.research.google.com/github/ArretVice/crypto_prediction_with_RNN/blob/master/cryprocurrency_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


**Data preparation step**

In [0]:
from google.colab import files
import pandas as pd
import os
import numpy as np
from sklearn.preprocessing import MaxAbsScaler
import pickle
from collections import deque
from keras.layers import Dense, LSTM, Dropout, CuDNNLSTM
from keras.models import Sequential
import keras
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint
from keras.models import load_model

In [0]:
# upload data files to work with
# data files are stored in a dictionary called "uploaded", where keys are file names
uploaded = files.upload()

In [0]:
# preprocessing data for training

def dataframe_for_rnn(uploaded,days_to_predict=3,training_mode=True,last_n_days=None):
  '''
  Fucntion for preparing dataframe for generating sequences
  ----
  uploaded - dictionary that contains keys with file names
  days_to_predict - number of days to predict prices (e.g. 3 means we predict prices for next 3 days). Default 3
  training_mode - if True this mode creates target columns for days_to_predict days. Default True
  last_n_days - if None, create dataframe from all available data, else only takes last n days. Default None
  '''
  main_df=pd.DataFrame()
  for datafile in uploaded.keys():
    df=pd.read_csv(datafile,
                   names=['date','symbol','open','high','low','close','volume_from','volume_to'])
    today_date=df.iloc[0,0][-10:] # stores the date for "today" - actually day for which data has been downloaded, string with format 'MM-DD-YYYY'
    df=df.iloc[2:,:].reset_index()
    df['volume']=df.volume_to.astype(dtype='float64')
    df=df.set_index('date')
    df=df[['close','volume']]
    # renaming columns according to their corresponding currency
    df=df.rename(columns={'close':f'close_{datafile[-12:-6]}','volume':f'volume_{datafile[-12:-6]}'})
    if len(main_df)==0:
      main_df=df
    else:
      main_df=main_df.join(df,how='outer')
  df=main_df
  
  transformer=MaxAbsScaler()
  
  # if training mode enabled, we create target columns
  if training_mode:
    for day_forward in range(1,days_to_predict+1):
      df[f'target_{day_forward}']=df.close_XRPUSD.shift(-day_forward)
    df=df.dropna()
    for col in df.columns:
      df[col]=df[col].astype(dtype='float32')
    
    # separating target and feature columns
    target_cols=[col for col in df.columns if col.startswith('target')]
    feature_cols=[col for col in df.drop(target_cols,axis=1).columns]

    for col in feature_cols:
      df[col]=df[col].pct_change()
    df=df.replace([np.inf, -np.inf], np.nan)
    df=df.dropna()
    
  else:
    df=df.dropna()
    for col in df.columns:
      df[col]=df[col].astype(dtype='float32')
      df[col]=df[col].pct_change()
    df=df.replace([np.inf, -np.inf], np.nan)
    df=df.dropna()
    feature_cols=df.columns
    
  df[feature_cols]=transformer.fit_transform(df[feature_cols])
  
  # picking up only last n days
  if last_n_days!=None:
    df=df[-last_n_days:]
  return df

In [0]:
# preparing sequences for training

def get_sequences(df,window=30,training_mode=True):
  '''
  Function for generating sequences with given dataframe df, containing volumes and close prices
  ----
  window - length of one sequence in days, default is 30 days
  training_mode - if True, returns tuple of sequences (X,y), else if False, returns single sequence
  
  '''
  if training_mode:
    sequences=[]
    sliding_window=deque(maxlen=window)

    # extracting days_to_predict from data frame
    days_to_predict=len([col for col in df.columns if col.startswith('target')])

    for value in df.values:
      sliding_window.append(value[:-days_to_predict])
      if len(sliding_window)==window:
        sequences.append([np.asarray(sliding_window,dtype=np.float32),value[-days_to_predict:]])
    np.random.shuffle(sequences)
    X,y=[],[]
    for features,targets in sequences:
      X.append(features)
      y.append(targets)
    return np.asarray(X,dtype=np.float32),np.asarray(y,dtype=np.float32)
  
  else:
    X=df[-window:].values
    X=np.asarray(X,dtype=np.float32).reshape(1,X.shape[0],X.shape[1])
    return X

**Training model on all available data**

In [0]:
# data is for 10-22-2018, so we predict the next 3
df=dataframe_for_rnn(uploaded,days_to_predict=3,training_mode=True,last_n_days=270)
window_len=14
X_train_all,y_train_all=get_sequences(df,window=window_len,training_mode=True)

In [0]:
# model
model=Sequential()

reg=keras.regularizers.l1_l2(l1=0.00001, l2=0.005)

model.add(CuDNNLSTM(input_shape=X_train.shape[1:],units=64,return_sequences=True,
               kernel_regularizer=None, recurrent_regularizer=reg, bias_regularizer=reg))
model.add(CuDNNLSTM(128,return_sequences=True,
               kernel_regularizer=None, recurrent_regularizer=reg, bias_regularizer=reg))
model.add(CuDNNLSTM(units=128,
               kernel_regularizer=None, recurrent_regularizer=reg, bias_regularizer=reg))

model.add(Dense(128,activation='relu'))
model.add(Dense(64,activation='relu'))

model.add(Dense(days_to_predict))
model.compile(optimizer=Adam(lr=0.007,decay=1e-9), loss='mean_squared_error', metrics=['mse'])
model.fit(X_train_all,y_train_all,
          epochs=100,
          shuffle=False,batch_size=window_len)

In [0]:
# create data for predicting
df_predict=dataframe_for_rnn(uploaded,days_to_predict=3,training_mode=False,last_n_days=270)
window_len=14
X_predict_all=get_sequences(df_predict,window=window_len,training_mode=False)


In [23]:
# predicting for october 23, 24 and 24
# actual close prices are 0.4593, 0.4543, 0.4553
predictions=model.predict(X_predict_all)

print(predictions)

[[0.45503038 0.45542666 0.46703637]]
