[View in Colaboratory](https://colab.research.google.com/github/KumarNavish/Personal-Projects/blob/master/CryptoCurrency_Prediction__RNN.ipynb)

## Getting  all our dependencies : 

In [46]:
%pylab inline
import pandas as pd
from sklearn.preprocessing import scale
from collections import deque ## ideal for storing time series data 
from random import shuffle
import tensorflow as tf
from tensorflow.keras.models import  Sequential
from tensorflow.keras.layers import Dense,CuDNNLSTM,BatchNormalization,Dropout

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [18]:
## This is what  data for individual CryptoCurrency looks like:
pd.read_csv('LTC-USD.csv',names=['time','Open','High', 'Low', 'Close', 'Volume']).head(5)

Unnamed: 0,time,Open,High,Low,Close,Volume
0,1528968660,96.580002,96.589996,96.589996,96.580002,9.6472
1,1528968720,96.449997,96.669998,96.589996,96.660004,314.387024
2,1528968780,96.470001,96.57,96.57,96.57,77.129799
3,1528968840,96.449997,96.57,96.57,96.5,7.216067
4,1528968900,96.279999,96.540001,96.5,96.389999,524.539978


### Getting required data in right format : 

In [65]:
## We only need Close ,Volume of each Crpto currency
currencies = ['BCH-USD','BTC-USD','ETH-USD','LTC-USD']
def dataframe(currency):
    df = pd.read_csv(f'{currency}.csv',names=['time','Open','High', 'Low', f'{currency}_Close', f'{currency}_Volume'])
    df = df[['time',f'{currency}_Close', f'{currency}_Volume']]
    df.set_index('time',inplace = True)
    return df
#------------------------------------------------------------------------------
for i,currency in enumerate(currencies):
    if i==0 :
        df = pd.read_csv(f'{currency}.csv',names=['time','Open','High', 'Low', f'{currency}_Close', f'{currency}_Volume'])
        df = df[['time',f'{currency}_Close', f'{currency}_Volume']]
        df.set_index('time',inplace = True)
    else:
        df = df.join(dataframe(currency))
df.head()

Unnamed: 0_level_0,BCH-USD_Close,BCH-USD_Volume,BTC-USD_Close,BTC-USD_Volume,ETH-USD_Close,ETH-USD_Volume,LTC-USD_Close,LTC-USD_Volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1528968660,871.719971,5.675361,6489.549805,0.5871,,,96.580002,9.6472
1528968720,870.859985,26.856577,6487.379883,7.706374,486.01001,26.019083,96.660004,314.387024
1528968780,870.099976,1.1243,6479.410156,3.088252,486.0,8.4494,96.57,77.129799
1528968840,870.789978,1.749862,6479.410156,1.4041,485.75,26.994646,96.5,7.216067
1528968900,870.0,1.6805,6479.97998,0.753,486.0,77.355759,96.389999,524.539978


In [0]:
SEQ_LEN = 60 ## THE SPAN OF TIME UNDER WHICH DATA TO BE CONSIDERED FOR PREDICTING FUTURE PRICE
FUT_PERIOD_PREDICT = 3  # TIME SPAN FOR PREDICTING DROP/RISE.
RATIO_TO_PREDICT = 'LTC-USD' # 

In [0]:
def classify(current,future):
    if float(future)>float(current):
        return 1  ## 1 Buy
    else:
        return 0  ## 0 Do not buy

In [0]:
## By this we have 3 min ahead of predictions for each current prediction 
df['future'] = df[f'{RATIO_TO_PREDICT}_Close'].shift(-FUT_PERIOD_PREDICT)

In [0]:
## In here we are only trying to predict whether after 3 min prices will increase or fall.
df['target']=list(map(classify,df[f'{RATIO_TO_PREDICT}_Close'],df['future']))
targets = df.target.values ##### Targets
df.drop(['future'],axis = 1,inplace = True) ## future was only needed to predict targets.
df.dropna(inplace =  True)

In [0]:
## Creating Validation set and preprocessing|Scaling data:
# LETS TAKE 5% OF DATA FOR VALIDATION:
#------------------------------------------------------------
last_5pct = sorted(df.index.values)[int(-0.05*df.index.shape[0])]
validation_df = df[(df.index>=last_5pct)]
train_df = df[(df.index<last_5pct)]
#------------------------------------------------------------
def preprocess_seq(col_df):
    for column in col_df.columns: 
        if column!='target':
            col_df.loc[:,column] = col_df[column].pct_change()
            col_df.dropna(inplace =  True)
            col_df.loc[:,column] = scale(col_df[column].values)
    col_df.dropna(inplace = True)
    
    ## Let's create actual sequences:
    sequential_data = [] ## creating our sequential data
    prev_data=  deque(maxlen=SEQ_LEN) ## Pops out extra data as new values come in to keep sequence length unchanged.
    
    for obs in col_df.values:
        prev_data.append([i for i in obs[:-1]])
        if len(prev_data)==SEQ_LEN:
            sequential_data.append([np.array(prev_data),obs[-1]])
            
    shuffle(sequential_data) ## So as to avoid any unwanted pattern in sequential learning 
            
    # Now let's make our prediction targets of equal length
    
    buy = [] ## Whether a buy is going to happen if price increases 
    sell = []  ## Whether a sell is going to happen if price decreases
    
    for seq,target in sequential_data:
      if target==1:
        buy.append([seq,target])
      else:
        sell.append([seq,target])
        
    lower = min(len(buy),len(sell)) ## To keep their lengths same
    
    buy = buy[:lower]
    sell = sell[:lower]
    
    shuffle(buy)                      ## Here shuffling is only done to avoid 
    shuffle(sell)                     ## any unwanted biases creeping in 
    sequential_data = buy+sell
    shuffle(sequential_data)
    
    X,Y = [],[]
    for seq,target in sequential_data:
      X.append(seq)
      Y.append(target)
    return np.array(X),np.array(Y)
#----------------------------------------------------------
val_x,val_y = preprocess_seq(validation_df)
train_x,train_y = preprocess_seq(train_df)

## Applying CuDNNLSTM for sequential predictions :

In [77]:
BATCH_SIZE = 64  ## Batch sizes for training
model = Sequential()

model.add(CuDNNLSTM(128,input_shape = (train_x.shape[1:]),return_sequences=True)) 
model.add(Dropout(0.2))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128,return_sequences=True)) 
model.add(Dropout(0.1))
model.add(BatchNormalization())

model.add(CuDNNLSTM(128)) 
model.add(Dropout(0.2))

model.add(Dense(64,activation = 'relu'))
model.add(Dropout(0.2))

model.add(Dense(2,activation = 'softmax'))

opt = tf.keras.optimizers.Adam(lr = 1e-3,decay = 1e-5)

model.compile(optimizer = opt,loss='sparse_categorical_crossentropy'
              , metrics = ['accuracy'])

model.fit(train_x, train_y, epochs = 10, batch_size = BATCH_SIZE,
          validation_data = (val_x,val_y))

Train on 25630 samples, validate on 1094 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f1b44d20358>