In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.linear_model import LinearRegression, ElasticNetCV, ElasticNet
from xgboost import XGBRegressor
from math import sqrt
from ggplot import *

### Load data:

In [2]:
cleaned = pd.read_csv('data/cleaned.csv', parse_dates = ['time'])
cleaned.drop(columns = ['Unnamed: 0'], axis=1, inplace=True)

### fit LR model:

In [3]:
start_test_date = '2018-03-01'

In [4]:
numeric_columns = [col for col in cleaned.columns if col not in ['time', 'target', 'refID_coin']]

In [5]:
def mape_error(actual, forecasted):
    actual = pd.Series(actual)
    forecasted = pd.Series(forecasted)
    return 100 * sum(abs((actual - forecasted) / actual)) / actual.size

def ds_error(actual, forecasted):
    actual = pd.Series(actual)
    forecasted = pd.Series(forecasted)
    signs = (actual - actual.shift(1)) * (forecasted - forecasted.shift(1)) > 0
    return 100 * sum((actual - actual.shift(1)) * (forecasted - forecasted.shift(1)) > 0) / (actual.size - 1)

def mape_var_coeff(mape_errors):
    mape_errors = pd.Series(mape_errors)
    mean_mape_error = mape_errors.mean()
    return sqrt(sum((mape_errors - mean_mape_error)**2) / (mape_errors.size - 1)) / mean_mape_error

In [26]:
for coin in cleaned.refID_coin.unique():
    print(coin)
    train = cleaned[(cleaned.time <= start_test_date) & 
                             (cleaned.refID_coin == coin)].tail(-3)
    test  = cleaned[(cleaned.time > start_test_date) & 
                             (cleaned.refID_coin == coin)].head(-1)
    X_train = train[numeric_columns].values
    X_test = test[numeric_columns].values
    y_train = train.target.values - train.price.values
    y_test = test.target.values - test.price.values
    
    lr = ElasticNet(alpha=0.1, normalize=True)
    lr.fit(X_train, y_train*100)
    y_pred = lr.predict(X_test)/100 + test.price.values
#     print(lr.predict(X_test).mean(), lr.predict(X_test).std())
    y_pred_random_walk = test.price.values
    y_test = y_test + test.price.values
    
#     xgb = XGBRegressor()
#     xgb.fit(X_train, y_train)
#     y_pred = xgb.predict(X_test) + test.price.values
#     y_pred_random_walk = test.price.values
#     y_test = test.target.values
    
    print(mape_error(y_test, y_pred), mape_error(y_test, y_pred_random_walk))
    print(ds_error(y_pred, y_test), ds_error(y_pred_random_walk, y_test))

Bitcoin
0.11926604863281061 0.11922249359209515
39.81851737926792 29.990772070132266
Ethereum
0.12334752386134447 0.12301136305073479
38.92648415872039 29.40633651184251
Ripple
0.17365914339436406 0.1729989603698134
27.191633343586588 27.191633343586588


In [22]:
ElasticNet?

In [23]:
list(zip(lr.coef_, numeric_columns))

[(-0.0, 'price'),
 (-0.0, 'marketCap'),
 (0.0, 'CirculatingSupply'),
 (0.0, 'Volume24h'),
 (0.0, 'Movement1h'),
 (0.0, 'Movement24h'),
 (0.0, 'percent_market_cap'),
 (-0.0, 'market_entropy'),
 (-0.0, 'total_market_cap'),
 (-0.0, 'price_MA_2'),
 (-0.0, 'price_MA_4'),
 (-0.0, 'price_MA_8'),
 (-0.0, 'price_MA_12'),
 (-0.0, 'price_MA_16'),
 (-0.0, 'price_MA_24'),
 (-0.0, 'price_MA_36'),
 (-0.0, 'price_MA_48'),
 (-0.0, 'price_MA_72'),
 (-0.0, 'price_MA_144'),
 (-0.0, 'price_MA_288'),
 (-0.0, 'price_MA_1440'),
 (0.0, 'CirculatingSupply_MA_2'),
 (0.0, 'CirculatingSupply_MA_4'),
 (0.0, 'CirculatingSupply_MA_8'),
 (0.0, 'CirculatingSupply_MA_12'),
 (0.0, 'CirculatingSupply_MA_16'),
 (0.0, 'CirculatingSupply_MA_24'),
 (0.0, 'CirculatingSupply_MA_36'),
 (0.0, 'CirculatingSupply_MA_48'),
 (0.0, 'CirculatingSupply_MA_72'),
 (0.0, 'CirculatingSupply_MA_144'),
 (0.0, 'CirculatingSupply_MA_288'),
 (0.0, 'CirculatingSupply_MA_1440'),
 (-0.0, 'percent_market_cap_MA_2'),
 (-0.0, 'percent_market_cap_MA_4'