## Importing the libraries

In [78]:


import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab

import statsmodels.api as sm
import statsmodels.tsa.api as smt
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.tsa.stattools as ts
from statsmodels.tsa.stattools import adfuller

from fbprophet import Prophet

import math

# import pyflux as pf

import warnings
warnings.filterwarnings('ignore')

import itertools

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV

from sklearn.model_selection import TimeSeriesSplit

from pandas.plotting import autocorrelation_plot

import re

import sys
import os

from functools import reduce

## Helper Functions

In [3]:
def calc_RMSE(validation_data, prediction_data):
   """
   Calculate RMSE
   """
   a = np.array(validation_data)
   b = np.array(prediction_data)

   return np.sqrt(np.mean((b-a)**2))

In [4]:
def get_fuller_test(series):
    values = series.values
    result = adfuller(values)
    print('ADF Statistic: %f' % result[0])
    print('p-value: %f' % result[1])
    print('Critical Values:')
    for key, value in result[4].items():
        print('\t%s: %.3f' % (key, value))

In [5]:
def make_plots(data, lags=None):
    '''
    plotting the data with specified number of lags.
    plotting raw data, then ACF and PACF
    '''
    layout = (1, 3)
    raw  = plt.subplot2grid(layout, (0, 0))
    acf  = plt.subplot2grid(layout, (0, 1))
    pacf = plt.subplot2grid(layout, (0, 2))
    
    data.plot(ax = raw, figsize=(12, 6))
    smt.graphics.plot_acf(data, lags = lags, ax = acf)
    smt.graphics.plot_pacf(data, lags = lags, ax = pacf)
    sns.despine()
    plt.tight_layout()

In [6]:
def make_plots_2(data, lags=None):
    '''
    plotting rolling mean, rolling std and original as per number of lags
    '''
    rolling_mean = data.rolling(window = lags).mean()
    rolling_std = data.rolling(window = lags).std()
    
    original = plt.plot(data, color='black',label = 'Original Timeseries')
    mean = plt.plot(rolling_mean, color='red', label = 'Rolling Mean')
    std = plt.plot(rolling_std, color='orange', label = 'Rolling Std')
    plt.legend(loc='best')
    plt.title('Original, Rolling Mean, Standard Deviation')
    sns.despine()
    plt.show()

In [175]:
def split_train_test_chronological(df, ratio = 0.9):
    
    '''
    Input is a dataframe, and a ratio. Splits dataframe into 2 dataframes chronologically.
    Returns first dataframe up to the index of the length of the input dataframe times the input ratio, 
    and returns second dataframe of remaining elements.
    '''
    
    size = len(df) * ratio
    size_round = round(size)
    
    df_train = df[0:(size_round)]
    df_test = df[size_round:]
    
#     if df_test.iloc[-1]['ds'] != df.iloc[-1]['ds'] :
#         df_test.iloc[len(df)] = df.iloc[-1]
    
    return df_train, df_test

## Importing data

In [8]:
df_bitcoin_price = pd.read_pickle('../crypto_currency_and_market_value_history/data/bitcoin_4_year_hourly_history_df.pickle')

In [10]:
df_bitcoin_price.head(3)

Unnamed: 0,timestamp,open,high,low,close,volume (btc),volume (currency),weighted price
0,2015-05-31 00:00:00,232.11,232.11,231.92,231.92,223.37,51822.18,232.0
1,2015-05-31 01:00:00,231.88,232.32,231.87,232.32,217.49,50452.68,231.98
2,2015-05-31 02:00:00,231.97,232.51,231.88,232.51,46.45,10782.23,232.14


In [11]:
df_vix = pd.read_pickle('../crypto_currency_and_market_value_history/data/vix_volatility_daily_change_history_df.pickle')

In [12]:
df_vix.head(3)

Unnamed: 0,trade date,open,high,low,close,settle,change,total volume
0,2019-06-07,-0.014837,-0.019264,-0.006116,0.006591,0.015175,-2.666667,-0.0666
1,2019-06-06,-0.023754,-0.021143,-0.01506,0.002402,-0.009023,-0.75,-0.038795
2,2019-06-05,-0.054247,-0.04736,-0.034884,-0.032539,-0.034833,-0.428571,-0.149433


In [16]:
df_gold = pd.read_csv('../crypto_currency_and_market_value_history/data/gold_hourly_data.csv', sep = ';')

In [18]:
df_gold.head(3)

Unnamed: 0,Date,Compound_Score,n,Count_Negatives,Count_Positives,Count_Neutrals,Sent_Negatives,Sent_Positives,Open,High,Low,Close,Volume (BTC),Volume (Currency)
0,01/08/2017 0:00,0.097156,1027.0,148.0,403.0,476.0,-0.504061,0.432704,2855.81,2863.06,2823.0,2825.92,184.02,52295100.0
1,01/08/2017 1:00,0.064507,778.0,143.0,239.0,396.0,-0.381007,0.437953,2823.01,2860.02,2821.01,2853.38,77.3,219605.16
2,01/08/2017 2:00,0.119218,836.0,118.0,333.0,385.0,-0.394999,0.439269,2846.27,2858.04,2837.31,2841.6,135.83,386739.15


## Preprocessing

In [22]:
df_vix = df_vix.sort_values(by=['trade date']).reset_index()

In [23]:
df_vix.head(3)

Unnamed: 0,index,trade date,open,high,low,close,settle,change,total volume
0,3509,2005-06-21,0.00472,0.00472,0.00472,0.00472,0.005394,-0.994606,
1,3508,2005-06-22,-0.004027,-0.004027,-0.004027,-0.004027,-0.003353,-1.625,-0.8
2,3507,2005-06-23,0.009434,0.014151,0.004717,0.014151,0.017497,-6.2,5.1


In [24]:
df_vix.drop(['index'], axis = 1, inplace = True)

In [29]:
df_vix.tail(3)

Unnamed: 0,trade date,open,high,low,close,settle,change,total volume
3507,2019-06-05,-0.054247,-0.04736,-0.034884,-0.032539,-0.034833,-0.428571,-0.149433
3508,2019-06-06,-0.023754,-0.021143,-0.01506,0.002402,-0.009023,-0.75,-0.038795
3509,2019-06-07,-0.014837,-0.019264,-0.006116,0.006591,0.015175,-2.666667,-0.0666


## Processing for FB Prophet

In [31]:
df_vix_FBP = df_vix.copy()

In [32]:
df_bitcoin_price_FBP = df_bitcoin_price.copy()

In [33]:
df_bitcoin_price_FBP.rename(columns={'timestamp': 'ds', 'weighted price': 'y'}, inplace=True)

In [64]:
df_bitcoin_price_FBP = df_bitcoin_price_FBP[['ds', 'y']]

In [65]:
df_bitcoin_price_FBP.tail()

Unnamed: 0,ds,y
35232,2019-06-09 16:00:00,7664.22
35233,2019-06-09 17:00:00,7645.34
35234,2019-06-09 18:00:00,7678.13
35235,2019-06-09 19:00:00,7711.1
35236,2019-06-09 20:00:00,7728.32


In [176]:
df_train, df_test = split_train_test_chronological(df_bitcoin_price_FBP, 0.95)

In [179]:
df_train, df_val = split_train_test_chronological(df_train, 0.95)

In [180]:
df_train.tail(3)

Unnamed: 0,ds,y
31798,2019-01-17 14:00:00,3587.82
31799,2019-01-17 15:00:00,3602.7
31800,2019-01-17 16:00:00,3618.3


In [181]:
df_val.head(3)

Unnamed: 0,ds,y
31801,2019-01-17 17:00:00,3610.16
31802,2019-01-17 18:00:00,3630.46
31803,2019-01-17 19:00:00,3628.71


In [182]:
df_val.tail(3)

Unnamed: 0,ds,y
33472,2019-03-28 08:00:00,4005.74
33473,2019-03-28 09:00:00,4006.11
33474,2019-03-28 10:00:00,4004.59


In [183]:
df_test.head(3)

Unnamed: 0,ds,y
33475,2019-03-28 11:00:00,4002.83
33476,2019-03-28 12:00:00,4002.23
33477,2019-03-28 13:00:00,4007.81


In [184]:
df_test.tail()

Unnamed: 0,ds,y
35232,2019-06-09 16:00:00,7664.22
35233,2019-06-09 17:00:00,7645.34
35234,2019-06-09 18:00:00,7678.13
35235,2019-06-09 19:00:00,7711.1
35236,2019-06-09 20:00:00,7728.32


In [174]:
df_bitcoin_price_FBP.tail(3)

Unnamed: 0,ds,y
35234,2019-06-09 18:00:00,7678.13
35235,2019-06-09 19:00:00,7711.1
35236,2019-06-09 20:00:00,7728.32


## FB Prophet Baseline

In [72]:
proph = Prophet()

In [73]:
proph.fit(df_bitcoin_train)

<fbprophet.forecaster.Prophet at 0x1c2657cd68>