In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression
from math import sqrt

### Load data:

In [24]:
cleaned = pd.read_csv('data/cleaned.csv')
cleaned.drop(columns = ['Unnamed: 0'], axis=1, inplace=True)

### fit LR model:

In [3]:
start_test_date = '2018-03-01'

In [5]:
numeric_columns = [col for col in cleaned.columns if col not in ['time', 'target', 'refID_coin']]

In [6]:
def mape_error(actual, forecasted):
    actual = pd.Series(actual)
    forecasted = pd.Series(forecasted)
    return 100 * sum(abs((actual - forecasted) / actual)) / actual.size

def ds_error(actual, forecasted):
    actual = pd.Series(actual)
    forecasted = pd.Series(forecasted)
    return 100 * sum((actual - actual.shift(1)) * (forecasted - forecasted.shift(1)) > 0) / (actual.size - 1)

def mape_var_coeff(mape_errors):
    mape_errors = pd.Series(mape_errors)
    mean_mape_error = mape_errors.mean()
    return sqrt(sum((mape_errors - mean_mape_error)**2) / (mape_errors.size - 1)) / mean_mape_error

In [29]:
regex = "price|marketCap|CirculatingSupply|Volume24h|market_entropy|total_market_cap|target"
log_cols = cleaned.filter(regex = regex).columns

In [30]:
cleaned.loc[:, log_cols] = np.log(cleaned.loc[:, log_cols].values+1)

In [35]:
for coin in cleaned.refID_coin.unique():
    print(coin)
    train = cleaned[(cleaned.time <= start_test_date) & 
                             (cleaned.refID_coin == coin)]
    test  = cleaned[(cleaned.time > start_test_date) & 
                             (cleaned.refID_coin == coin)].head(-1)
    X_train = train[numeric_columns].values
    X_test = test[numeric_columns].values
    y_train = train.target.values
    y_test = test.target.values
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
    

    lr = LinearRegression()
    lr.fit(X_train, y_train)
    y_pred = np.exp(lr.predict(X_test))-1
    y_pred_random_walk = np.exp(test.price.values)
    y_test = np.exp(y_test)-1
    
    print(mape_error(y_pred, y_test), mape_error(y_pred_random_walk, y_test))
    print(ds_error(y_pred, y_test), 50)

Bitcoin
(12247, 29) (12247,) (6504, 29) (6504,)
0.13751347929618718 0.12282054800128257
39.59710902660311 50
Ethereum
(12247, 29) (12247,) (6504, 29) (6504,)
0.38284069713747015 0.20657005931644828
38.274642472704905 50
Ripple
(12247, 29) (12247,) (6504, 29) (6504,)
0.3974707640320253 55.8475361674998
36.552360449023524 50
Bitcoin Cash
(12247, 29) (12247,) (6504, 29) (6504,)
0.2632186874457408 0.21173898787885811
36.06027987082885 50
Cardano
(12247, 29) (12247,) (6504, 29) (6504,)
0.311998188603823 81.52431120454588
32.43118560664309 50
Litecoin
(12247, 29) (12247,) (6504, 29) (6504,)
0.23101619168151363 0.5765462676347342
36.72151314777795 50
NEMcoin
(12247, 29) (12247,) (6504, 29) (6504,)
0.4954002752292457 74.25997157336774
34.86083346147932 50
Neo
(12247, 29) (12247,) (6504, 29) (6504,)
0.9492046421078465 1.1858256882613987
35.56819929263417 50
Stellar
(12247, 29) (12247,) (6504, 29) (6504,)
0.556063836192398 77.80207868214117
35.337536521605415 50
Iota
(12247, 29) (12247,) (6504, 