In [47]:

import os
os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")  # Report only TF errors by default
import warnings
warnings.filterwarnings("ignore")
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from fredapi import Fred
from dataset import Dataset
import gtab
#sci-kit imports
from sklearn.linear_model import LinearRegression, Ridge
from sktime.performance_metrics.forecasting import mean_squared_percentage_error
from sklearn.svm import LinearSVR
import sklearn.preprocessing
import sklearn.model_selection
import sklearn.pipeline
from visualizations import Visualizer
np.random.seed(42)

In [48]:
#controling whether tensorflow does recognize my GPU
tf.config.get_visible_devices("GPU")

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [49]:
data = Dataset.get_btc_data()

In [50]:
data = data.loc["2014-9-17":]

In [51]:
data

Unnamed: 0,"BTC / Addresses, active, count","BTC / NVT, adjusted, 90d MA","BTC / NVT, adjusted, free float, 90d MA","BTC / NVT, adjusted","BTC / NVT, adjusted, free float","BTC / Flow, in, to exchanges, USD","BTC / Flow, out, from exchanges, USD","BTC / Fees, transaction, mean, USD","BTC / Fees, transaction, median, USD","BTC / Fees, total, USD",...,Close_IGV,Close_QQQ,Google_crypto_search,Wiki_crypto_search,RGDP_US,RGDP_PC_US,CPI_US,M2_US,USD_EUR_rate,BTC-USD
2014-09-17,191063.0,82.176970,76.651156,103.567097,96.602951,1.065752e+07,9.743524e+06,0.069971,0.045634,5494.516757,...,85.737656,92.093697,0.028571,,18406.941,57536.0,1.677134,11506.0,1.2958,457.334015
2014-09-18,190890.0,76.826435,71.646822,75.335079,70.256013,1.696480e+07,1.359548e+07,0.068289,0.042688,5214.342615,...,86.452225,92.787651,0.028571,,18406.941,57536.0,1.677134,11506.0,1.2913,424.440002
2014-09-19,172255.0,70.675713,65.897688,53.588317,49.965484,1.770344e+07,1.694158e+07,0.060801,0.039389,4261.732292,...,86.021515,92.730133,0.028571,,18406.941,57536.0,1.677134,11506.0,1.2835,394.795990
2014-09-20,169941.0,73.687502,68.691994,74.395015,69.351542,9.387692e+06,8.728492e+06,0.066003,0.041104,4235.264753,...,,,0.028571,,18406.941,57536.0,1.677134,11506.0,1.2835,408.903992
2014-09-21,200712.0,72.300627,67.387059,97.596787,90.964085,1.050540e+07,1.089809e+07,0.063093,0.040040,3638.427361,...,,,0.028571,,18406.941,57536.0,1.677134,11506.0,1.2835,398.821014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-26,787672.0,71.420128,52.225802,190.545333,139.335829,1.683094e+08,1.959590e+08,1.348707,0.530874,306349.750545,...,,,26.035326,3483.0,21989.981,65799.0,6.516674,21428.3,,16919.804688
2022-12-27,867369.0,71.311728,52.133758,146.054319,106.775712,2.861220e+08,2.541181e+08,1.629313,0.663281,393800.336021,...,251.721115,261.771362,26.035326,3747.0,21989.981,65799.0,6.516674,21428.3,1.0654,16717.173828
2022-12-28,904203.0,71.677896,52.380487,111.280678,81.321250,2.809234e+08,3.321595e+08,1.261058,0.607543,340456.769181,...,248.961319,258.315247,26.035326,4060.0,21989.981,65799.0,6.516674,21428.3,1.0622,16552.572266
2022-12-29,945186.0,73.206816,53.482266,106.721323,77.966758,2.713737e+08,3.133503e+08,1.263260,0.550805,337268.899500,...,256.320770,264.611725,26.035326,4064.0,21989.981,65799.0,6.516674,21428.3,1.0668,16642.341797


In [52]:
{col: [data[col].isnull().sum(), f'% {np.round(np.mean(data[col].isnull()*100), 3)}'
      ] for col in data.columns if data[col].isnull().any()}

{'BTC / Capitalization, market, estimated supply, USD': [1739, '% 57.45'],
 'Wiki_btc_search': [287, '% 9.481'],
 'Close_^DJI': [939, '% 31.021'],
 'Close_^GSPC': [939, '% 31.021'],
 'Close_GC=F': [942, '% 31.12'],
 'Close_^VIX': [939, '% 31.021'],
 'Close_^IXIC': [939, '% 31.021'],
 'Close_SMH': [939, '% 31.021'],
 'Close_VGT': [939, '% 31.021'],
 'Close_XSD': [939, '% 31.021'],
 'Close_IYW': [939, '% 31.021'],
 'Close_FTEC': [939, '% 31.021'],
 'Close_IGV': [939, '% 31.021'],
 'Close_QQQ': [939, '% 31.021'],
 'Wiki_crypto_search': [287, '% 9.481'],
 'USD_EUR_rate': [122, '% 4.03']}

In [53]:

data['Wiki_crypto_search'] = data['Wiki_crypto_search'].fillna(0)
data['Wiki_btc_search'] = data['Wiki_btc_search'].fillna(0)
#to be changed
data["BTC / Capitalization, market, estimated supply, USD"] = data["BTC / Capitalization, market, estimated supply, USD"].fillna(0)

In [54]:
data

Unnamed: 0,"BTC / Addresses, active, count","BTC / NVT, adjusted, 90d MA","BTC / NVT, adjusted, free float, 90d MA","BTC / NVT, adjusted","BTC / NVT, adjusted, free float","BTC / Flow, in, to exchanges, USD","BTC / Flow, out, from exchanges, USD","BTC / Fees, transaction, mean, USD","BTC / Fees, transaction, median, USD","BTC / Fees, total, USD",...,Close_IGV,Close_QQQ,Google_crypto_search,Wiki_crypto_search,RGDP_US,RGDP_PC_US,CPI_US,M2_US,USD_EUR_rate,BTC-USD
2014-09-17,191063.0,82.176970,76.651156,103.567097,96.602951,1.065752e+07,9.743524e+06,0.069971,0.045634,5494.516757,...,85.737656,92.093697,0.028571,0.0,18406.941,57536.0,1.677134,11506.0,1.2958,457.334015
2014-09-18,190890.0,76.826435,71.646822,75.335079,70.256013,1.696480e+07,1.359548e+07,0.068289,0.042688,5214.342615,...,86.452225,92.787651,0.028571,0.0,18406.941,57536.0,1.677134,11506.0,1.2913,424.440002
2014-09-19,172255.0,70.675713,65.897688,53.588317,49.965484,1.770344e+07,1.694158e+07,0.060801,0.039389,4261.732292,...,86.021515,92.730133,0.028571,0.0,18406.941,57536.0,1.677134,11506.0,1.2835,394.795990
2014-09-20,169941.0,73.687502,68.691994,74.395015,69.351542,9.387692e+06,8.728492e+06,0.066003,0.041104,4235.264753,...,,,0.028571,0.0,18406.941,57536.0,1.677134,11506.0,1.2835,408.903992
2014-09-21,200712.0,72.300627,67.387059,97.596787,90.964085,1.050540e+07,1.089809e+07,0.063093,0.040040,3638.427361,...,,,0.028571,0.0,18406.941,57536.0,1.677134,11506.0,1.2835,398.821014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-26,787672.0,71.420128,52.225802,190.545333,139.335829,1.683094e+08,1.959590e+08,1.348707,0.530874,306349.750545,...,,,26.035326,3483.0,21989.981,65799.0,6.516674,21428.3,,16919.804688
2022-12-27,867369.0,71.311728,52.133758,146.054319,106.775712,2.861220e+08,2.541181e+08,1.629313,0.663281,393800.336021,...,251.721115,261.771362,26.035326,3747.0,21989.981,65799.0,6.516674,21428.3,1.0654,16717.173828
2022-12-28,904203.0,71.677896,52.380487,111.280678,81.321250,2.809234e+08,3.321595e+08,1.261058,0.607543,340456.769181,...,248.961319,258.315247,26.035326,4060.0,21989.981,65799.0,6.516674,21428.3,1.0622,16552.572266
2022-12-29,945186.0,73.206816,53.482266,106.721323,77.966758,2.713737e+08,3.133503e+08,1.263260,0.550805,337268.899500,...,256.320770,264.611725,26.035326,4064.0,21989.981,65799.0,6.516674,21428.3,1.0668,16642.341797


In [55]:
data[['Close_^DJI','Close_^GSPC','Close_GC=F','Close_^VIX','Close_^IXIC',
    'Close_SMH','Close_VGT','Close_XSD','Close_IYW','Close_FTEC','Close_IGV',
    'Close_QQQ','USD_EUR_rate']] = data[['Close_^DJI','Close_^GSPC','Close_GC=F','Close_^VIX','Close_^IXIC',
    'Close_SMH','Close_VGT','Close_XSD','Close_IYW','Close_FTEC','Close_IGV',
    'Close_QQQ','USD_EUR_rate']].ffill()
data["BTC-USD"] = data["BTC-USD"].shift(-10)
data = data.dropna()

In [59]:
def create_lstm_input(data, target, lag_order, forecast_time = 1):
    X, Y = [], []
    data["BTC-USD"] = data["BTC-USD"].shift(forecast_time)
    data = data.dropna()
    for i in range(lag_order, len(data)):
        X.append(data.iloc[i - lag_order:i, :])
        Y.append(target.iloc[i - 1 + forecast_time])
    return np.array(X), np.array(Y)

In [63]:
x, y = create_lstm_input(data, data.iloc[:,-1], 3, 10)

In [66]:
x.shape

(3012, 3, 52)

In [None]:
data.head(20)

In [None]:
def create_lstm_input(data, target, lag_order):
    X, Y = [],[]
    for i in range(lag_order, len(data)):
        X.append(data.iloc[i - lag_order:i,:])
        Y.append(target.iloc[i])
    return np.array(X), np.array(Y)


In [None]:
scaler = sklearn.preprocessing.RobustScaler()
data.iloc[:,:-1] = scaler.fit_transform(data.iloc[:,:-1])
x,y = create_lstm_input(data.iloc[:,:-1], data.iloc[:,-1], 3)

In [None]:
x.shape

In [None]:
def root_mean_squared_error(y_true, y_pred):
    return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))
model = tf.keras.Sequential()
model.add(tf.keras.layers.LSTM(500, activation="relu", input_shape=(3, x.shape[2]),
                               return_sequences=True))
model.add(tf.keras.layers.LSTM(500, activation="relu"))
model.add(tf.keras.layers.Dense(1))
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss="mse", metrics = [root_mean_squared_error])
model.summary()
model.fit(x,y,epochs=100,batch_size=50)

In [None]:
y

In [None]:
x = model.predict(x)

In [None]:
mean_squared_percentage_error(y, x, square_root=True)

In [None]:
x

In [None]:
plt.plot(model.predict(x))
plt.plot(y)

In [None]:
data.columns

In [None]:
data

In [None]:
fig = Visualizer.draw_missing_data(data)
fig.show()

In [None]:
""" data.drop(["BTC / Capitalization, market, free float, USD",
           "BTC / Capitalization, realized, USD",
           "BTC / Capitalization, market, current supply, USD",
           "BTC / Capitalization, market, estimated supply, USD"], axis = 1, inplace=True) """

In [None]:
train_data, validation_data, train_target, validation_target = sklearn.model_selection.train_test_split(data.iloc[:, :-1], data.iloc[:, -1], test_size = 0.1, random_state=42, shuffle=False)
scaler = sklearn.preprocessing.RobustScaler()
pipe = sklearn.pipeline.Pipeline([("scaler",scaler),("estimator",LinearSVR())])
from pipeline import Pipeline
pipe = Pipeline.assembly_pipeline(estimator = LinearSVR(), dim_reducer = None)
parameter_grid = {"scaler__unit_variance":[True,False],"estimator__C": [0.2,0.5,0.7,1,3,5,10,20,30]}
split = sklearn.model_selection.TimeSeriesSplit(n_splits=3)
model = sklearn.model_selection.GridSearchCV(pipe, parameter_grid,cv=split, scoring = 'neg_root_mean_squared_error', verbose = 4, n_jobs=-1).fit(train_data,train_target)
print(model.best_params_)
print(model.best_estimator_)
print(model.best_score_)

In [None]:
model

In [None]:
[(el[0].shape, el[1].shape) for el in split.split(train_data)]

In [None]:
validation_data

In [None]:
predictions = model.predict(validation_data)

In [None]:
train_data

In [None]:
len(predictions)

In [None]:
validation_target.iloc[10:]

In [None]:
validation_target.shift(10).dropna()

In [None]:
from sktime.performance_metrics.forecasting import mean_squared_percentage_error
mean_squared_percentage_error(validation_target.shift(10).dropna(),validation_target.iloc[10:])

In [None]:
plt.plot(validation_target.shift(10).dropna())
plt.plot(validation_target.iloc[10:])

In [None]:
y_pred = pd.Series(predictions, index=validation_data.index)
train_pred = pd.Series(model.predict(train_data), index = train_data.index)

In [None]:
visualizer = Visualizer()
fig = visualizer.draw_prediction_full(train_target, train_pred, validation_target, y_pred, 10)
fig.show()

In [None]:
fig = visualizer.draw_prediction_test(validation_target, y_pred, 10)
fig.show()