## Forecasting and trading cryptocurrencies with machine learning under changing market conditions
https://jfin-swufe.springeropen.com/articles/10.1186/s40854-020-00217-x

#### - coins
Bitcoin, ethereum, litecoin

#### - regressors
closing prices, high, low, daily trading volume, market capi (from coin market cap), 12 indicators of blockchain information (from https://coinmetrics.io/)

Dependent variable: daily log return (closing price)

In [234]:
import pandas as pd
import re
import numpy as np
import plotly.express as px
from helper_funcs import get_data, convert_unix_to_datetime, separate_symbols

In [145]:
tickers = ['BTC', 'ETH', 'LTC']
data = get_data(tickers)

df = pd.DataFrame(data, columns = ['id', 'symbol', 'date', 'high', 'low', 'open', 'close', 'volumeto', 'volumefor'])
df = df.drop('id', axis=1)
df['date'] = convert_unix_to_datetime(df['date'])

Finding data for: 'BTC', 'ETH', 'LTC'


In [227]:
def get_coin_cols(coin):
    cols = []
    for col in coinmetric_df.columns:
        if re.match(coin, col):
            cols.append(col)
    time_df = pd.DataFrame(coinmetric_df['Time'])
    time_df.rename(columns={"Time": "date"}, inplace=True)
    return time_df.join(coinmetric_df[cols])

def take_diff(column_list, df = btc_df):
    for col in column_list:
        df[col] = df[col].diff()
    return df

In [228]:
btc, eth, ltc = separate_symbols(df)
coinmetric_df = pd.read_csv('coin_metrics_btc_data.csv', encoding='utf-16')

btc['close'] = np.log(btc['close'])
btc['rel_price_change'] = 2 * (btc['high'] - btc['low']) / (btc['high'] + btc['low'])
btc['parkinson_vol'] = np.sqrt((np.log(btc['high']/btc['low'])**2)/4*np.log(2))
btc = btc[['date', 'close', 'volumeto', 'volumefor', 'rel_price_change', 'parkinson_vol']]

#### Dataframe processing list

- First 7 lags of the closing price and parkinson's volatility
- First diff of market cap, # transactions, active address, average difficulty, number of blocks, block size, number of payments


In [229]:
for i in range(8):
    lag_close_col = btc['close'].shift(i)
    lag_park_col = btc['parkinson_vol'].shift(i)
    btc['close_lag'+str(i)] = lag_close_col
    btc['parkinson_lag'+str(i)] = lag_park_col

In [230]:
btc_df = btc.merge(get_coin_cols('BTC'), on='date')
btc_df = btc_df.set_index('date')
btc_df.columns =[re.sub('BTC / ', '', col) for col in btc_df.columns]

In [231]:
column_list = ['Market Cap (USD)', 'Tx Cnt', 'Active Addr Cnt', 
               'Mean Difficulty', 'Block Cnt', 'Xfer Cnt']
btc_df = take_diff(column_list)

#### Model building
- Rolling window: 
    - First 50% used to train the model. Training sample
    - Next 25% each close is forecasted. Used to choose variables/hyperparameters. Validation sample
    - Use the models that showed the best performance in the validation sample. Test sample
    

In [301]:
p50 = int(len(btc_df['close']) * 0.5)
p75 = int(len(btc_df['close']) * 0.75)

In [None]:
def plot_graph_sets():
    fig = px.scatter(data_frame=btc_df['close'], range_color=(0,1000))
    fig.add_vrect(x0 = btc_df.index[0], x1=btc_df.index[p50:p50+1][0], annotation_text="Training_set",
                 annotation_position="top right", fillcolor="blue", opacity=0.25, line_width=0)

    fig.add_vrect(x0 = btc_df.index[p50:p50+1][0], x1=btc_df.index[p75:p75+1][0], annotation_text="Validation_set",
                 annotation_position="top right", fillcolor="green", opacity=0.25, line_width=0)

    fig.add_vrect(x0 = btc_df.index[p75:p75+1][0], x1=btc_df.index[-1], annotation_text="Test_set",
                 annotation_position="top right", fillcolor="orange", opacity=0.25, line_width=0)
    return fig

In [325]:
fig = px.scatter(data_frame=btc_df['close'], range_color=(0,1000))
fig.add_vrect(x0 = btc_df.index[0], x1=btc_df.index[p50:p50+1][0], annotation_text="Training_set",
             annotation_position="top right", fillcolor="blue", opacity=0.25, line_width=0)

fig.add_vrect(x0 = btc_df.index[p50:p50+1][0], x1=btc_df.index[p75:p75+1][0], annotation_text="Validation_set",
             annotation_position="top right", fillcolor="green", opacity=0.25, line_width=0)

fig.add_vrect(x0 = btc_df.index[p75:p75+1][0], x1=btc_df.index[-1], annotation_text="Test_set",
             annotation_position="top right", fillcolor="orange", opacity=0.25, line_width=0)
fig.show()