In [1]:
import gc
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from numpy.random import seed
from pylab import rcParams
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm_notebook

import keras
from keras.models import Sequential
from keras.layers import Activation, Dense
from keras.layers import LSTM
from keras.layers import Dropout

Using TensorFlow backend.


In [2]:
def get_market_data(market, tag=True):
    """
    market: the full name of the cryptocurrency as spelled on coinmarketcap.com. eg.: 'bitcoin'
    tag: eg.: 'btc', if provided it will add a tag to the name of every column.
    returns: panda DataFrame
    This function will use the coinmarketcap.com url for provided coin/token page. 
    Reads the OHLCV and Market Cap.
    Converts the date format to be readable. 
    Makes sure that the data is consistant by converting non_numeric values to a number very close to 0.
    And finally tags each columns if provided.
    """
    now = datetime.now()
    market_data = pd.read_html("https://coinmarketcap.com/currencies/" + market + 
                             "/historical-data/?start=20130428&end="+now.strftime("%Y%m%d"), flavor='html5lib')[0]
    market_data = market_data.assign(Date=pd.to_datetime(market_data['Date']))  
    market_data['Volume'] = (pd.to_numeric(market_data['Volume'], errors='coerce').fillna(0))
    if tag:
        market_data.columns = [market_data.columns[0]] + [tag + '_' + i for i in market_data.columns[1:]]
    return market_data

In [11]:

def merge_data(a, b, from_date):
    """
    a: first DataFrame
    b: second DataFrame
    from_date: includes the data from the provided date and drops the any data before that date.
    returns merged data as Pandas DataFrame
    """
    merged_data = pd.merge(a, b, on=['Date'])
    merged_data = merged_data[merged_data['Date'] >= from_date]
    return merged_data


def add_volatility(data, coins=['BTC_', 'ETH_']):
    """
    data: input data, pandas DataFrame
    coins: default is for 'btc and 'eth'. It could be changed as needed
    This function calculates the volatility and close_off_high of each given coin in 24 hours, 
    and adds the result as new columns to the DataFrame.
    Return: DataFrame with added columns
    """
    for coin in coins:
    # calculate the daily change
        kwargs = {coin + '_change': lambda x: (x[coin + '_Close'] - x[coin + '_Open']) / x[coin + '_Open'],
             coin + '_close_off_high': lambda x: 2*(x[coin + '_High'] - x[coin + '_Close']) / (x[coin + '_High'] - x[coin + '_Low']) - 1,
             coin + '_volatility': lambda x: (x[coin + '_High'] - x[coin + '_Low']) / (x[coin + '_Open'])}
    data = data.assign(**kwargs)
    return data


def create_model_data(data):
    """
    data: pandas DataFrame
    This function drops unnecessary columns and reverses the order of DataFrame based on decending dates.
    Return: pandas DataFrame
    """
    #data = data[['Date']+[coin+metric for coin in ['btc_', 'eth_'] for metric in ['Close','Volume','close_off_high','volatility']]]
    data = data[['Date']+[coin+metric for coin in ['BTC_', 'ETH_'] for metric in ['Close','Volume']]]
    data = data.sort_values(by='Date')
    return data


def split_data(data, training_size=0.8):
    """
    data: Pandas Dataframe
    training_size: proportion of the data to be used for training
    This function splits the data into training_set and test_set based on the given training_size
    Return: train_set and test_set as pandas DataFrame
    """
    return data[:int(training_size*len(data))], data[int(training_size*len(data)):]


def create_inputs(data, coins, window_len):
    """
    data: pandas DataFrame, this could be either training_set or test_set
    coins: coin datas which will be used as the input. Default is 'btc', 'eth'
    window_len: is an intiger to be used as the look back window for creating a single input sample.
    This function will create input array X from the given dataset and will normalize 'Close' and 'Volume' between 0 and 1
    Return: X, the input for our model as a python list which later needs to be converted to numpy array.
    """
    norm_cols = [coin + metric for coin in coins for metric in ['_Close', '_Volume']]
    inputs = []
    for i in range(len(data) - window_len):
        temp_set = data[i:(i + window_len)].copy()
        inputs.append(temp_set)
        for col in norm_cols:
            inputs[i].loc[:, col] = inputs[i].loc[:, col] / inputs[i].loc[:, col].iloc[0] - 1  
    return inputs


def create_outputs(data, coin, window_len):
    """
    data: pandas DataFrame, this could be either training_set or test_set
    coin: the target coin in which we need to create the output labels for
    window_len: is an intiger to be used as the look back window for creating a single input sample.
    This function will create the labels array for our training and validation and normalize it between 0 and 1
    Return: Normalized numpy array for 'Close' prices of the given coin
    """
    return (data[coin + '_Close'][window_len:].values / data[coin + '_Close'][:-window_len].values) - 1


def to_array(data):
    """
    data: DataFrame
    This function will convert list of inputs to a numpy array
    Return: numpy array
    """
    x = [np.array(data[i]) for i in range (len(data))]
    return np.array(x)

In [12]:
def show_plot(data, tag):
    fig, (ax1, ax2) = plt.subplots(2,1, gridspec_kw = {'height_ratios':[3, 1]})
    ax1.set_ylabel('Closing Price ($)',fontsize=12)
    ax2.set_ylabel('Volume ($ bn)',fontsize=12)
    ax2.set_yticks([int('%d000000000'%i) for i in range(10)])
    ax2.set_yticklabels(range(10))
    ax1.set_xticks([datetime.date(i,j,1) for i in range(2013,2019) for j in [1,7]])
    ax1.set_xticklabels('')
    ax2.set_xticks([datetime.date(i,j,1) for i in range(2013,2019) for j in [1,7]])
    ax2.set_xticklabels([datetime.date(i,j,1).strftime('%b %Y')  for i in range(2013,2019) for j in [1,7]])
    ax1.plot(data['Date'].astype(datetime.datetime),data[tag +'_Open'])
    ax2.bar(data['Date'].astype(datetime.datetime).values, data[tag +'_Volume'].values)
    fig.tight_layout()
    plt.show()
  

def date_labels():
    last_date = market_data.iloc[0, 0]
    date_list = [last_date - datetime.timedelta(days=x) for x in range(len(X_test))]
    return[date.strftime('%m/%d/%Y') for date in date_list][::-1]


def plot_results(history, model, Y_target, coin):
    plt.figure(figsize=(25, 20))
    plt.subplot(311)
    plt.plot(history.epoch, history.history['loss'], )
    plt.plot(history.epoch, history.history['val_loss'])
    plt.xlabel('Number of Epochs')
    plt.ylabel('Loss')
    plt.title(coin + ' Model Loss')
    plt.legend(['Training', 'Test'])

    plt.subplot(312)
    plt.plot(Y_target)
    plt.plot(model.predict(X_train))
    plt.xlabel('Dates')
    plt.ylabel('Price')
    plt.title(coin + ' Single Point Price Prediction on Training Set')
    plt.legend(['Actual','Predicted'])

    ax1 = plt.subplot(313)
    plt.plot(test_set[coin + '_Close'][window_len:].values.tolist())
    plt.plot(((np.transpose(model.predict(X_test)) + 1) * test_set[coin + '_Close'].values[:-window_len])[0])
    plt.xlabel('Dates')
    plt.ylabel('Price')
    plt.title(coin + ' Single Point Price Prediction on Test Set')
    plt.legend(['Actual','Predicted'])

    date_list = date_labels()
    ax1.set_xticks([x for x in range(len(date_list))])
    for label in ax1.set_xticklabels([date for date in date_list], rotation='vertical')[::2]:
        label.set_visible(False)

    plt.show()

In [13]:
btc_data = get_market_data('bitcoin', tag='BTC')
eth_data = get_market_data('ethereum', tag='ETH')
btc_data.head()

Unnamed: 0,Date,BTC_Open*,BTC_High,BTC_Low,BTC_Close**,BTC_Volume,BTC_Market Cap
0,2019-10-07,7989.12,8308.45,7905.77,8245.62,18009740000.0,148252589805
1,2019-10-06,8149.88,8161.41,7958.85,7988.16,13160830000.0,143607672862
2,2019-10-05,8210.15,8215.53,8071.12,8151.5,12200500000.0,146529229668
3,2019-10-04,8259.49,8260.06,8151.24,8205.94,13139460000.0,147491804056
4,2019-10-03,8390.77,8414.23,8146.44,8259.99,13668820000.0,148448162840


In [14]:
merged_data = merge_data(btc_data, eth_data, '2015-01-01')

In [7]:
# train_set = train_set.drop('Date', 1)
# test_set = test_set.drop('Date', 1)
# X_train = create_inputs(train_set)
# Y_train_btc = create_outputs(train_set, coin='BTC')
# X_test = create_inputs(test_set)
# Y_test_btc = create_outputs(test_set, coin='BTC')
# Y_train_eth = create_outputs(train_set, coin='ETH')
# Y_test_eth = create_outputs(test_set, coin='ETH')
# X_train, X_test = to_array(X_train), to_array(X_test)

In [15]:
model_data = create_model_data(merged_data)

KeyError: "['ETH_Close', 'BTC_Close'] not in index"