## Forecasting and trading cryptocurrencies with machine learning under changing market conditions
https://jfin-swufe.springeropen.com/articles/10.1186/s40854-020-00217-x

#### - coins
Bitcoin, ethereum, litecoin

#### - regressors
closing prices, high, low, daily trading volume, market capi (from coin market cap), 12 indicators of blockchain information (from https://coinmetrics.io/)

Dependent variable: daily log return (closing price)

In [62]:
from helper_funcs import get_data, convert_unix_to_datetime, separate_symbols
import pandas as pd
import re

In [2]:
tickers = ['BTC', 'ETH', 'LTC']
data = get_data(tickers)

df = pd.DataFrame(data, columns = ['id', 'symbol', 'date', 'high', 'low', 'open', 'close', 'volumeto', 'volumefor'])
df = df.drop('id', axis=1)
df['date'] = convert_unix_to_datetime(df['date'])

Finding data for: 'BTC', 'ETH', 'LTC'


In [3]:
btc, eth, ltc = separate_symbols(df)
coinmetric_df = pd.read_csv('coin_metrics_btc_data.csv', encoding='utf-16')

In [89]:
coinmetric_df

Unnamed: 0,Time,BTC / USD Denominated Closing Price,BTC / Median Tx Size (USD),BTC / Tx Cnt,BTC / Total Fees (native units),BTC / Median Tx Fee (native units),BTC / Active Addr Cnt,BTC / Mean Difficulty,BTC / Block Cnt,BTC / Xfer Cnt,...,LTC / USD Denominated Closing Price,LTC / Median Tx Size (USD),LTC / Tx Cnt,LTC / Total Fees (native units),LTC / Median Tx Fee (native units),LTC / Active Addr Cnt,LTC / Mean Difficulty,LTC / Block Cnt,LTC / Xfer Cnt,LTC / Market Cap (USD)
0,2010-07-18,0.085840,0.158804,248.0,0.790000,0.000000,860.0,1.815433e+02,172.0,396.0,...,,,,,,,,,,
1,2010-07-19,0.080800,0.212100,334.0,0.050000,0.000000,929.0,1.815433e+02,174.0,512.0,...,,,,,,,,,,
2,2010-07-20,0.074736,0.102762,423.0,0.000000,0.000000,936.0,1.815433e+02,181.0,738.0,...,,,,,,,,,,
3,2010-07-21,0.079193,1.465068,247.0,0.000000,0.000000,784.0,1.815433e+02,207.0,377.0,...,,,,,,,,,,
4,2010-07-22,0.058470,0.059054,221.0,0.000000,0.000000,594.0,1.815433e+02,176.0,344.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3960,2021-05-21,37174.331101,205.550631,225921.0,131.843012,0.000263,942736.0,2.504649e+13,110.0,813884.0,...,178.762577,2.484862,114892.0,21.449502,0.000005,361114.0,1.197931e+07,544.0,388833.0,1.210273e+10
3961,2021-05-22,37643.419798,156.084111,221219.0,98.818843,0.000204,918673.0,2.504649e+13,116.0,694860.0,...,171.300046,0.383672,101274.0,16.798795,0.000005,339342.0,1.152752e+07,530.0,374729.0,1.159863e+10
3962,2021-05-23,34774.207222,156.535398,195260.0,81.123794,0.000177,957018.0,2.504649e+13,119.0,650899.0,...,143.299482,1.429354,99546.0,16.976802,0.000005,337614.0,1.152752e+07,577.0,359088.0,9.703765e+09
3963,2021-05-24,38630.409119,205.557043,236897.0,98.207281,0.000189,975836.0,2.504649e+13,126.0,768556.0,...,182.954550,0.668044,102859.0,19.010659,0.000006,364314.0,1.152752e+07,578.0,387843.0,1.239040e+10


In [116]:
def get_coin_cols(coin):
    cols = []
    for col in coinmetric_df.columns:
        if re.match(coin, col):
            cols.append(col)
    time_df = pd.DataFrame(coinmetric_df['Time'])
    time_df.rename(columns={"Time": "date"})
    return time_df.join(coinmetric_df[cols])

In [117]:
get_coin_cols('BTC')

Unnamed: 0,Time,BTC / USD Denominated Closing Price,BTC / Median Tx Size (USD),BTC / Tx Cnt,BTC / Total Fees (native units),BTC / Median Tx Fee (native units),BTC / Active Addr Cnt,BTC / Mean Difficulty,BTC / Block Cnt,BTC / Xfer Cnt,BTC / Market Cap (USD)
0,2010-07-18,0.085840,0.158804,248.0,0.790000,0.000000,860.0,1.815433e+02,172.0,396.0,2.959592e+05
1,2010-07-19,0.080800,0.212100,334.0,0.050000,0.000000,929.0,1.815433e+02,174.0,512.0,2.792852e+05
2,2010-07-20,0.074736,0.102762,423.0,0.000000,0.000000,936.0,1.815433e+02,181.0,738.0,2.590004e+05
3,2010-07-21,0.079193,1.465068,247.0,0.000000,0.000000,784.0,1.815433e+02,207.0,377.0,2.752665e+05
4,2010-07-22,0.058470,0.059054,221.0,0.000000,0.000000,594.0,1.815433e+02,176.0,344.0,2.037496e+05
...,...,...,...,...,...,...,...,...,...,...,...
3960,2021-05-21,37174.331101,205.550631,225921.0,131.843012,0.000263,942736.0,2.504649e+13,110.0,813884.0,6.957288e+11
3961,2021-05-22,37643.419798,156.084111,221219.0,98.818843,0.000204,918673.0,2.504649e+13,116.0,694860.0,7.045352e+11
3962,2021-05-23,34774.207222,156.535398,195260.0,81.123794,0.000177,957018.0,2.504649e+13,119.0,650899.0,6.508608e+11
3963,2021-05-24,38630.409119,205.557043,236897.0,98.207281,0.000189,975836.0,2.504649e+13,126.0,768556.0,7.230669e+11


In [88]:
btc

Unnamed: 0,symbol,date,high,low,open,close,volumeto,volumefor
2001,BTC,2015-12-03,371.37,355.47,359.98,360.31,3.167328e+07,87225.21
3976,BTC,2015-12-04,363.95,354.31,360.31,361.67,2.182061e+07,60548.44
3977,BTC,2015-12-05,390.28,361.50,361.67,386.69,4.343810e+07,115198.52
3978,BTC,2015-12-06,402.19,382.62,386.69,393.38,5.206858e+07,131856.64
3979,BTC,2015-12-07,398.74,381.69,393.38,394.28,3.810288e+07,96750.51
...,...,...,...,...,...,...,...,...
3323,BTC,2021-05-21,42252.07,33552.80,40597.40,37350.39,4.573693e+09,120589.87
3324,BTC,2021-05-22,38841.92,35287.71,37350.39,37491.50,2.808008e+09,74964.98
3325,BTC,2021-05-23,38304.55,31157.18,37491.50,34716.84,4.494414e+09,131584.71
3326,BTC,2021-05-24,39945.35,34467.32,34716.84,38837.76,3.981461e+09,106765.14
