In [42]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso


data = pd.read_csv('C:/Users/JP/Documents/School/Practicum/Github/Practicum/data/dataset2.csv')

df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'],format='%Y-%m-%d')
df = df.set_index('Date')
df=df[df.index>'2017-05-30']

train=df[df.index<'2017-10-05']

features = ['TBondsOpenValue', 'UnemploymentValue', 'BTCOpenTM1', 'BTCOpenTM2',\
        'BTCOpenTM3', 'BTCOpenTM4', 'BTCOpenTM5', 'BTCOpenTM6', 'BTCOpenTM7','GDP','SandPValue',\
            'ETHOpenTM1', 'ETHOpenTM2','PrevHigh','PrevHigh2','PrevHigh3','PrevHigh4','PrevLow','PrevLow2','PrevLow3',\
            'PrevLow4','PrevVolTo','PrevVolTo2','PrevVolTo3','PrevVolTo4','PrevVolFrom','PrevVolFrom2','PrevVolFrom3',\
            'PrevVolFrom4','PrevSP','PrevSP2','PrevSP3','PrevSP4',\
        'ETHOpenTM3', 'ETHOpenTM4', 'ETHOpenTM5', 'ETHOpenTM6', 'ETHOpenTM7']


train=train.dropna()


dtrain = xgb.DMatrix(train.loc[:, features].values, \
                     label = train.loc[:, 'ETHOpen'].values)

params = {}
params['booster']  = 'gbtree'
params['objective'] = 'reg:linear'
params['max_depth'] = 6
params['subsample'] = 0.8
params['colsample_bytree'] = 0.8
params['silent'] = 1
params['eval_metric'] = 'rmse'
num_round = 50
eval_list  = [(dtrain,'train')]

train['Date'] = train.index.values

print('Training xgb model:')
bst = xgb.train(params, dtrain, num_round, eval_list)

print('Train Ridge Regression:')
lr = Ridge()
lr.fit(train.loc[:, features].values, \
       train.loc[:, 'ETHOpen'].values)

print('Training Lasso Regression:')
lassoreg = Lasso(alpha=.001,normalize=True, max_iter=1e7)
lassoreg.fit(train.loc[:, features].values,train.loc[:, 'ETHOpen'].values)
 
test = df[df.index>='2017-10-05']
test=test.dropna()

while True:
    dtest = xgb.DMatrix(test[features].values)
    xgb_pred = bst.predict(dtest)
    lr_pred = lr.predict(test[features].values)
    lasso_pred = lassoreg.predict(test.loc[:, features].values)
    test['ETHOpenRidgexgb'] = 0.2*xgb_pred+0.8*lr_pred
    test['ETHOpenRidge'] = lr_pred
    test['ETHOpenxgb'] = xgb_pred
    test['ETHOpenLasso'] = lasso_pred


    target = train['ETHOpen']
    
    done = 1
    
    if done:
        print("Prediction: {}".format(test[['ETHOpen','ETHOpenRidgexgb','ETHOpenRidge','ETHOpenxgb','ETHOpenLasso']]))
        break

Training xgb model:
[0]	train-rmse:201.232
[1]	train-rmse:143.012
[2]	train-rmse:102.264
[3]	train-rmse:74.8139
[4]	train-rmse:53.7264
[5]	train-rmse:39.2052
[6]	train-rmse:29.1174
[7]	train-rmse:22.1461
[8]	train-rmse:16.7908
[9]	train-rmse:13.4215
[10]	train-rmse:10.3724
[11]	train-rmse:8.36058
[12]	train-rmse:7.17471
[13]	train-rmse:5.87937
[14]	train-rmse:4.84781
[15]	train-rmse:4.02787
[16]	train-rmse:3.39152
[17]	train-rmse:2.95932
[18]	train-rmse:2.57113
[19]	train-rmse:2.35651
[20]	train-rmse:2.0432
[21]	train-rmse:1.71312
[22]	train-rmse:1.53414
[23]	train-rmse:1.36991
[24]	train-rmse:1.15787
[25]	train-rmse:1.04841
[26]	train-rmse:0.916114
[27]	train-rmse:0.816167
[28]	train-rmse:0.721434
[29]	train-rmse:0.624753
[30]	train-rmse:0.541245
[31]	train-rmse:0.494745
[32]	train-rmse:0.458097
[33]	train-rmse:0.408072
[34]	train-rmse:0.374002
[35]	train-rmse:0.333974
[36]	train-rmse:0.30529
[37]	train-rmse:0.275639
[38]	train-rmse:0.250836
[39]	train-rmse:0.226013
[40]	train-rmse:0.

In [8]:
test.head()

Unnamed: 0_level_0,ETHOpen,ETHHigh,ETHLow,TBondsOpenValue,IndexFutures,InflationRateValue,UnemploymentValue,FedFundRateValue,GDP,SandPValue,...,BTCOpenTM2,BTCOpenTM3,BTCOpenTM4,BTCOpenTM5,BTCOpenTM6,BTCOpenTM7,ETHOpenRidgexgb,ETHOpenRidge,ETHOpenxgb,ETHOpenLasso
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-09-01,388.33,395.3,386.71,157.4375,92.36,1.939,4.4,1.16,18569.1,2472.0,...,317.94,338.92,335.37,299.21,297.95,294.1,306.514056,307.643593,301.995911,307.510751
2017-09-05,303.7,324.37,275.97,156.84375,92.395,1.939,4.4,1.16,18569.1,2463.0,...,338.92,335.37,299.21,297.95,294.1,275.84,317.277155,316.878683,318.871033,317.45077
2017-09-06,317.94,341.77,317.82,158.4375,91.955,1.939,4.4,1.16,18569.1,2459.3,...,335.37,299.21,297.95,294.1,275.84,223.14,337.161489,342.96321,313.95459,343.684012
2017-09-07,338.92,338.92,323.36,157.6875,91.925,1.939,4.4,1.16,18569.1,2461.0,...,299.21,297.95,294.1,275.84,223.14,258.4,336.524118,344.61622,304.155701,345.661304
2017-09-08,335.37,335.39,293.12,158.96875,91.215,1.939,4.4,1.16,18569.1,2461.0,...,297.95,294.1,275.84,223.14,258.4,297.53,311.350586,311.208882,311.917389,312.250268


In [33]:


from sklearn.linear_model import RandomizedLasso

print('Training Randomized Lasso')
rlasso = RandomizedLasso(alpha=0.0001,normalize=True,max_iter=1e7)
rlasso.fit(train.loc[:, features].values,train.loc[:, 'ETHOpen'].values)
 
print("Features sorted by their score:")
print(sorted(zip(map(lambda x: round(x, 4), rlasso.scores_), \
                 features), reverse=True))

  

Train Randomized Lasso Regression:


  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g1 = arrayfuncs.min_pos

Features sorted by their score:
[(1.0, 'PrevLow'), (0.97999999999999998, 'PrevHigh'), (0.91500000000000004, 'UnemploymentValue'), (0.89500000000000002, 'TBondsOpenValue'), (0.82999999999999996, 'PrevLow2'), (0.79500000000000004, 'PrevHigh4'), (0.79000000000000004, 'PrevVolFrom4'), (0.71999999999999997, 'PrevLow4'), (0.68000000000000005, 'PrevVolFrom'), (0.67000000000000004, 'PrevSP4'), (0.65500000000000003, 'PrevVolTo2'), (0.64000000000000001, 'PrevVolTo'), (0.63500000000000001, 'PrevVolTo3'), (0.60499999999999998, 'PrevHigh2'), (0.59999999999999998, 'PrevVolTo4'), (0.59999999999999998, 'ETHOpenTM1'), (0.58999999999999997, 'PrevSP3'), (0.57999999999999996, 'PrevVolFrom2'), (0.57499999999999996, 'SandPValue'), (0.55500000000000005, 'PrevSP2'), (0.54500000000000004, 'BTCOpenTM7'), (0.52500000000000002, 'PrevVolFrom3'), (0.52000000000000002, 'PrevSP'), (0.48999999999999999, 'PrevLow3'), (0.44500000000000001, 'PrevHigh3'), (0.39500000000000002, 'BTCOpenTM2'), (0.36499999999999999, 'BTCOpen

  g1 = arrayfuncs.min_pos((C - Cov) / (AA - corr_eq_dir + tiny))
  g2 = arrayfuncs.min_pos((C + Cov) / (AA + corr_eq_dir + tiny))


In [36]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
 

#use linear regression as the model
lr = LinearRegression()
#rank all features, i.e continue the elimination until the last one
rfe = RFE(lr, n_features_to_select=1)
rfe.fit(train.loc[:, features].values,train.loc[:, 'ETHOpen'].values)
 
print("Features sorted by their rank:")
print(sorted(zip(map(lambda x: round(x, 4), rfe.ranking_), features)))

Features sorted by their rank:
[(1, 'UnemploymentValue'), (2, 'PrevHigh'), (3, 'TBondsOpenValue'), (4, 'PrevLow'), (5, 'BTCOpenTM1'), (6, 'ETHOpenTM1'), (7, 'ETHOpenTM2'), (8, 'PrevLow2'), (9, 'BTCOpenTM2'), (10, 'PrevHigh3'), (11, 'PrevHigh4'), (12, 'BTCOpenTM4'), (13, 'ETHOpenTM4'), (14, 'BTCOpenTM3'), (15, 'PrevLow4'), (16, 'PrevSP3'), (17, 'PrevSP2'), (18, 'PrevSP'), (19, 'SandPValue'), (20, 'PrevSP4'), (21, 'BTCOpenTM6'), (22, 'ETHOpenTM5'), (23, 'PrevHigh2'), (24, 'ETHOpenTM3'), (25, 'ETHOpenTM6'), (26, 'BTCOpenTM5'), (27, 'BTCOpenTM7'), (28, 'ETHOpenTM7'), (29, 'PrevLow3'), (30, 'PrevVolFrom4'), (31, 'PrevVolFrom2'), (32, 'PrevVolFrom'), (33, 'PrevVolFrom3'), (34, 'PrevVolTo2'), (35, 'GDP'), (36, 'PrevVolTo4'), (37, 'PrevVolTo3'), (38, 'PrevVolTo')]


In [40]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso


data = pd.read_csv('C:/Users/JP/Documents/School/Practicum/Github/Practicum/data/dataset2.csv')

df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'],format='%Y-%m-%d')
df = df.set_index('Date')
df=df[df.index>'2017-05-30']

train=df[df.index<'2017-10-05']

features = ['TBondsOpenValue', 'UnemploymentValue', 'BTCOpenTM1', 'BTCOpenTM2',\
        'BTCOpenTM3',  'BTCOpenTM5', 'BTCOpenTM6', 'BTCOpenTM7','SandPValue',\
            'ETHOpenTM1', 'PrevHigh','PrevHigh2','PrevHigh3','PrevHigh4','PrevLow','PrevLow2','PrevLow3',\
            'PrevLow4','PrevVolTo','PrevVolTo2','PrevVolTo3','PrevVolTo4','PrevVolFrom','PrevVolFrom2','PrevVolFrom3',\
            'PrevVolFrom4','PrevSP','PrevSP2','PrevSP3','PrevSP4',\
         'ETHOpenTM5']


train=train.dropna()


dtrain = xgb.DMatrix(train.loc[:, features].values, \
                     label = train.loc[:, 'ETHOpen'].values)

params = {}
params['booster']  = 'gbtree'
params['objective'] = 'reg:linear'
params['max_depth'] = 8
params['subsample'] = 0.8
params['colsample_bytree'] = 0.8
params['silent'] = 1
params['eval_metric'] = 'rmse'
num_round = 93
eval_list  = [(dtrain,'train')]

train['Date'] = train.index.values

print('Training xgb model:')
bst = xgb.train(params, dtrain, num_round, eval_list)

print('Train Ridge Regression:')
lr = Ridge()
lr.fit(train.loc[:, features].values, \
       train.loc[:, 'ETHOpen'].values)

print('Training Lasso Regression:')
lassoreg = Lasso(alpha=.001,normalize=True, max_iter=1e7)
lassoreg.fit(train.loc[:, features].values,train.loc[:, 'ETHOpen'].values)
 
test = df[df.index>='2017-10-05']
test=test.dropna()
print("7 Features Removed:")
while True:
    dtest = xgb.DMatrix(test[features].values)
    xgb_pred = bst.predict(dtest)
    lr_pred = lr.predict(test[features].values)
    lasso_pred = lassoreg.predict(test.loc[:, features].values)
    test['ETHOpenRidgexgb'] = 0.2*xgb_pred+0.8*lr_pred
    test['ETHOpenRidge'] = lr_pred
    test['ETHOpenxgb'] = xgb_pred
    test['ETHOpenLasso'] = lasso_pred


    target = train['ETHOpen']
    
    done = 1
    
    if done:
        print("Prediction: {}".format(test[['ETHOpen','ETHOpenRidgexgb','ETHOpenRidge','ETHOpenxgb','ETHOpenLasso']]))
        break

Training xgb model:
[0]	train-rmse:201.232
[1]	train-rmse:143.494
[2]	train-rmse:102.64
[3]	train-rmse:74.2604
[4]	train-rmse:53.4743
[5]	train-rmse:39.1738
[6]	train-rmse:28.5136
[7]	train-rmse:21.0569
[8]	train-rmse:16.0333
[9]	train-rmse:12.3027
[10]	train-rmse:9.77773
[11]	train-rmse:7.71707
[12]	train-rmse:6.19113
[13]	train-rmse:5.08315
[14]	train-rmse:4.48224
[15]	train-rmse:3.70136
[16]	train-rmse:3.18813
[17]	train-rmse:2.70743
[18]	train-rmse:2.44679
[19]	train-rmse:2.2512
[20]	train-rmse:1.89181
[21]	train-rmse:1.65803
[22]	train-rmse:1.40373
[23]	train-rmse:1.21774
[24]	train-rmse:1.1575
[25]	train-rmse:0.969944
[26]	train-rmse:0.941463
[27]	train-rmse:0.920204
[28]	train-rmse:0.784973
[29]	train-rmse:0.668154
[30]	train-rmse:0.568021
[31]	train-rmse:0.481873
[32]	train-rmse:0.45366
[33]	train-rmse:0.387434
[34]	train-rmse:0.330912
[35]	train-rmse:0.281156
[36]	train-rmse:0.243255
[37]	train-rmse:0.206589
[38]	train-rmse:0.176389
[39]	train-rmse:0.171164
[40]	train-rmse:0.1

In [41]:
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso


data = pd.read_csv('C:/Users/JP/Documents/School/Practicum/Github/Practicum/data/dataset2.csv')

df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'],format='%Y-%m-%d')
df = df.set_index('Date')
df=df[df.index>'2017-05-30']

train=df[df.index<'2017-10-05']


features = ['TBondsOpenValue', 'UnemploymentValue', 'BTCOpenTM1', 'BTCOpenTM2',\
        'BTCOpenTM3', 'BTCOpenTM4', 'BTCOpenTM5', 'BTCOpenTM6', 'BTCOpenTM7','SandPValue',\
            'ETHOpenTM1', 'ETHOpenTM2','PrevHigh','PrevHigh2','PrevHigh3','PrevHigh4','PrevLow','PrevLow2',\
            'PrevLow4','PrevVolFrom2',\
            'PrevVolFrom4','PrevSP','PrevSP2','PrevSP3','PrevSP4',\
        'ETHOpenTM3', 'ETHOpenTM4', 'ETHOpenTM5', 'ETHOpenTM6', 'ETHOpenTM7']


train=train.dropna()


dtrain = xgb.DMatrix(train.loc[:, features].values, \
                     label = train.loc[:, 'ETHOpen'].values)

params = {}
params['booster']  = 'gbtree'
params['objective'] = 'reg:linear'
params['max_depth'] = 6
params['subsample'] = 0.8
params['colsample_bytree'] = 0.8
params['silent'] = 1
params['eval_metric'] = 'rmse'
num_round = 50
eval_list  = [(dtrain,'train')]

train['Date'] = train.index.values

print('Training xgb model:')
bst = xgb.train(params, dtrain, num_round, eval_list)

print('Train Ridge Regression:')
lr = Ridge()
lr.fit(train.loc[:, features].values, \
       train.loc[:, 'ETHOpen'].values)

print('Training Lasso Regression:')
lassoreg = Lasso(alpha=.001,normalize=True, max_iter=1e7)
lassoreg.fit(train.loc[:, features].values,train.loc[:, 'ETHOpen'].values)
 
test = df[df.index>='2017-10-05']
test=test.dropna()

while True:
    dtest = xgb.DMatrix(test[features].values)
    xgb_pred = bst.predict(dtest)
    lr_pred = lr.predict(test[features].values)
    lasso_pred = lassoreg.predict(test.loc[:, features].values)
    test['ETHOpenRidgexgb'] = 0.2*xgb_pred+0.8*lr_pred
    test['ETHOpenRidge'] = lr_pred
    test['ETHOpenxgb'] = xgb_pred
    test['ETHOpenLasso'] = lasso_pred


    target = train['ETHOpen']
    
    done = 1
    
    if done:
        print("Prediction: {}".format(test[['ETHOpen','ETHOpenRidgexgb','ETHOpenRidge','ETHOpenxgb','ETHOpenLasso']]))
        break

Training xgb model:
[0]	train-rmse:200.88
[1]	train-rmse:142.867
[2]	train-rmse:102.243
[3]	train-rmse:74.2216
[4]	train-rmse:53.2865
[5]	train-rmse:38.6644
[6]	train-rmse:28.9281
[7]	train-rmse:21.8171
[8]	train-rmse:17.1668
[9]	train-rmse:13.7975
[10]	train-rmse:10.9837
[11]	train-rmse:8.59815
[12]	train-rmse:6.95012
[13]	train-rmse:5.7386
[14]	train-rmse:5.06709
[15]	train-rmse:4.14391
[16]	train-rmse:3.44085
[17]	train-rmse:2.84726
[18]	train-rmse:2.43321
[19]	train-rmse:2.17908
[20]	train-rmse:1.98419
[21]	train-rmse:1.79858
[22]	train-rmse:1.59609
[23]	train-rmse:1.41195
[24]	train-rmse:1.21686
[25]	train-rmse:1.08826
[26]	train-rmse:0.964201
[27]	train-rmse:0.856938
[28]	train-rmse:0.768176
[29]	train-rmse:0.703939
[30]	train-rmse:0.630692
[31]	train-rmse:0.558575
[32]	train-rmse:0.496359
[33]	train-rmse:0.438795
[34]	train-rmse:0.403298
[35]	train-rmse:0.349623
[36]	train-rmse:0.308488
[37]	train-rmse:0.263289
[38]	train-rmse:0.242467
[39]	train-rmse:0.208795
[40]	train-rmse:0.

In [None]:
# Add randomized Lasso
# Add feature selection
# Compare to Arima
# Try to fill in the missing values
# Measure Success
# Make an 'Active' Model