In [25]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso


data = pd.read_csv('C:/Users/JP/Documents/School/Practicum/Github/Practicum/data/dataset2.csv')

df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'],format='%Y-%m-%d')
df = df.set_index('Date')
df=df[df.index>'2017-05-30']

train=df[df.index<'2017-10-05']

features = ['TBondsOpenValue', 'UnemploymentValue', 'BTCOpenTM1', 'BTCOpenTM2',\
        'BTCOpenTM3', 'BTCOpenTM4', 'BTCOpenTM5', 'BTCOpenTM6', 'BTCOpenTM7','GDP','SandPValue',\
            'ETHOpenTM1', 'ETHOpenTM2','PrevHigh','PrevHigh2','PrevHigh3','PrevHigh4','PrevLow','PrevLow2','PrevLow3',\
            'PrevLow4','PrevVolTo','PrevVolTo2','PrevVolTo3','PrevVolTo4','PrevVolFrom','PrevVolFrom2','PrevVolFrom3',\
            'PrevVolFrom4','PrevSP','PrevSP2','PrevSP3','PrevSP4',\
        'ETHOpenTM3', 'ETHOpenTM4', 'ETHOpenTM5', 'ETHOpenTM6', 'ETHOpenTM7']


train=train.dropna()


dtrain = xgb.DMatrix(train.loc[:, features].values, \
                     label = train.loc[:, 'ETHOpen'].values)

params = {}
params['booster']  = 'gbtree'
params['objective'] = 'reg:linear'
params['max_depth'] = 6
params['subsample'] = 0.8
params['colsample_bytree'] = 0.8
params['silent'] = 1
params['eval_metric'] = 'rmse'
num_round = 50
eval_list  = [(dtrain,'train')]

train['Date'] = train.index.values

print('Training xgb model:')
bst = xgb.train(params, dtrain, num_round, eval_list)

print('Train Ridge Regression:')
lr = Ridge()
lr.fit(train.loc[:, features].values, \
       train.loc[:, 'ETHOpen'].values)

print('Training Lasso Regression:')
lassoreg = Lasso(alpha=.001,normalize=True, max_iter=1e7)
lassoreg.fit(train.loc[:, features].values,train.loc[:, 'ETHOpen'].values)

#print('Train Randomized Lasso Regression:')
#RandomizedLasso.fit(train.loc[:, features].values,train.loc[:, 'ETHOpen'].values)

    

test = df[df.index>='2017-10-05']
test=test.dropna()

while True:
    dtest = xgb.DMatrix(test[features].values)
    xgb_pred = bst.predict(dtest)
    lr_pred = lr.predict(test[features].values)
    lasso_pred = lassoreg.predict(test.loc[:, features].values)
    test['ETHOpenRidgexgb'] = 0.2*xgb_pred+0.8*lr_pred
    test['ETHOpenRidge'] = lr_pred
    test['ETHOpenxgb'] = xgb_pred
    test['ETHOpenLasso'] = lasso_pred

    target = train['ETHOpen']
    
    done = 1
    
    if done:
        print("Prediction: {}".format(test[['ETHOpen','ETHOpenRidgexgb','ETHOpenRidge','ETHOpenxgb','ETHOpenLasso']]))
        break

Training xgb model:
[0]	train-rmse:199.124
[1]	train-rmse:142.071
[2]	train-rmse:102.071
[3]	train-rmse:73.2356
[4]	train-rmse:53.5946
[5]	train-rmse:39.8505
[6]	train-rmse:30.4518
[7]	train-rmse:23.2104
[8]	train-rmse:18.0142
[9]	train-rmse:14.2562
[10]	train-rmse:11.3631
[11]	train-rmse:9.32248
[12]	train-rmse:7.55732
[13]	train-rmse:6.51066
[14]	train-rmse:5.5925
[15]	train-rmse:4.72941
[16]	train-rmse:4.28273
[17]	train-rmse:3.53256
[18]	train-rmse:3.13217
[19]	train-rmse:2.80517
[20]	train-rmse:2.48643
[21]	train-rmse:2.09021
[22]	train-rmse:1.8254
[23]	train-rmse:1.62571
[24]	train-rmse:1.41684
[25]	train-rmse:1.2609
[26]	train-rmse:1.15639
[27]	train-rmse:1.02136
[28]	train-rmse:0.918642
[29]	train-rmse:0.80341
[30]	train-rmse:0.721808
[31]	train-rmse:0.623424
[32]	train-rmse:0.546755
[33]	train-rmse:0.491724
[34]	train-rmse:0.427859
[35]	train-rmse:0.378517
[36]	train-rmse:0.334094
[37]	train-rmse:0.29361
[38]	train-rmse:0.25014
[39]	train-rmse:0.213723
[40]	train-rmse:0.188274

In [8]:
test.head()

Unnamed: 0_level_0,ETHOpen,ETHHigh,ETHLow,TBondsOpenValue,IndexFutures,InflationRateValue,UnemploymentValue,FedFundRateValue,GDP,SandPValue,...,BTCOpenTM2,BTCOpenTM3,BTCOpenTM4,BTCOpenTM5,BTCOpenTM6,BTCOpenTM7,ETHOpenRidgexgb,ETHOpenRidge,ETHOpenxgb,ETHOpenLasso
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-09-01,388.33,395.3,386.71,157.4375,92.36,1.939,4.4,1.16,18569.1,2472.0,...,317.94,338.92,335.37,299.21,297.95,294.1,306.514056,307.643593,301.995911,307.510751
2017-09-05,303.7,324.37,275.97,156.84375,92.395,1.939,4.4,1.16,18569.1,2463.0,...,338.92,335.37,299.21,297.95,294.1,275.84,317.277155,316.878683,318.871033,317.45077
2017-09-06,317.94,341.77,317.82,158.4375,91.955,1.939,4.4,1.16,18569.1,2459.3,...,335.37,299.21,297.95,294.1,275.84,223.14,337.161489,342.96321,313.95459,343.684012
2017-09-07,338.92,338.92,323.36,157.6875,91.925,1.939,4.4,1.16,18569.1,2461.0,...,299.21,297.95,294.1,275.84,223.14,258.4,336.524118,344.61622,304.155701,345.661304
2017-09-08,335.37,335.39,293.12,158.96875,91.215,1.939,4.4,1.16,18569.1,2461.0,...,297.95,294.1,275.84,223.14,258.4,297.53,311.350586,311.208882,311.917389,312.250268


In [26]:
print('Train Randomized Lasso Regression:')
RandomizedLasso.fit(train.loc[:, features].values,train.loc[:, 'ETHOpen'].values)

Train Randomized Lasso Regression:


NameError: name 'RandomizedLasso' is not defined

In [None]:
# Add randomized Lasso
# Add feature selection
# Compare to Arima
# Try to fill in the missing values
# Measure Success
# Make an 'Active' Model