In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression
from math import sqrt

### Load data:

In [2]:
cleaned = pd.read_csv('data/cleaned.csv')
cleaned.drop(columns = ['Unnamed: 0'], axis=1, inplace=True)

### fit LR model:

In [3]:
start_test_date = '2018-03-01'

In [4]:
train = cleaned[(cleaned.time <= start_test_date) & (cleaned.refID_coin == 'Bitcoin')]
# selects data from 03-01 00:05 onward
test  = cleaned[(cleaned.time > start_test_date) & (cleaned.refID_coin == 'Bitcoin')] 

In [5]:
numeric_columns = [col for col in cleaned.columns if col not in ['time', 'target', 'refID_coin']]

In [6]:
def mape_error(actual, forecasted):
    actual = pd.Series(actual)
    forecasted = pd.Series(forecasted)
    return 100 * sum(abs((actual - forecasted) / actual)) / actual.size

def ds_error(actual, forecasted):
    actual = pd.Series(actual)
    forecasted = pd.Series(forecasted)
    return 100 * sum((actual - actual.shift(1)) * (forecasted - forecasted.shift(1)) > 0) / (actual.size - 1)

def mape_var_coeff(mape_errors):
    mape_errors = pd.Series(mape_errors)
    mean_mape_error = mape_errors.mean()
    return sqrt(sum((mape_errors - mean_mape_error)**2) / (mape_errors.size - 1)) / mean_mape_error

In [7]:
for coin in cleaned.refID_coin.unique():
    print(coin)
    train = cleaned[(cleaned.time <= start_test_date) & 
                             (cleaned.refID_coin == coin)]
    test  = cleaned[(cleaned.time > start_test_date) & 
                             (cleaned.refID_coin == coin)].head(-1)
    X_train = train[numeric_columns].values
    X_test = test[numeric_columns].values
    y_train = train.target.values
    y_test = test.target.values
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
    

    lr = LinearRegression()
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    y_pred_random_walk = test.price.values
    
    print(mape_error(y_pred, y_test), mape_error(y_pred_random_walk, y_test))
    print(ds_error(y_pred, y_test), 50)

Bitcoin
(12247, 29) (12247,) (6504, 29) (6504,)
0.26263159360671967 0.11923706467679046
39.81239427956328 50
Ethereum
(12247, 29) (12247,) (6504, 29) (6504,)
0.5182637551435982 0.12302945207501462
38.22850991849916 50
Ripple
(12247, 29) (12247,) (6504, 29) (6504,)
0.43535699127971317 0.17303620852282306
36.33707519606335 50
Bitcoin Cash
(12247, 29) (12247,) (6504, 29) (6504,)
0.7142844734508446 0.17186864885318204
36.18330001537752 50
Cardano
(12247, 29) (12247,) (6504, 29) (6504,)
0.27464307189906734 0.24670836839102944
32.523450715054594 50
Litecoin
(12247, 29) (12247,) (6504, 29) (6504,)
0.3897604406360418 0.14963372453147397
36.99830847301246 50
NEMcoin
(12247, 29) (12247,) (6504, 29) (6504,)
0.6609427137998642 0.2614518096238588
36.52160541288636 50
Neo
(12247, 29) (12247,) (6504, 29) (6504,)
0.4541091245777046 0.22419521777245427
35.70659695525142 50
Stellar
(12247, 29) (12247,) (6504, 29) (6504,)
0.3282199906572679 0.22715859683139022
35.41442411194833 50
Iota
(12247, 29) (12247