In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from fbprophet import Prophet
from sklearn.metrics import mean_absolute_error,mean_squared_error
from fbprophet.plot import add_changepoints_to_plot
from fbprophet.diagnostics import performance_metrics
from fbprophet.diagnostics import cross_validation
from statsmodels.tsa.seasonal import seasonal_decompose
from fbprophet.plot import plot_cross_validation_metric

In [3]:
df = pd.read_csv('/kaggle/input/g-research-crypto-forecasting/train.csv')
asset_df = pd.read_csv('/kaggle/input/g-research-crypto-forecasting/asset_details.csv')

In [4]:
btc_df=df.query('Asset_ID==3')
btc_df.head()

In [5]:
btc_df=btc_df.set_index('timestamp')

In [6]:
(btc_df.index[1:]-btc_df.index[:-1]).value_counts()

In [7]:
btc_df= btc_df.reindex(range(btc_df.index[0],btc_df.index[-1]+60,60),method='pad')


In [8]:
print("Null values:")
btc_df.isna().sum()

In [9]:
(btc_df.index[1:]-btc_df.index[:-1]).value_counts()

In [10]:
btc_df=btc_df.reset_index()
btc_df['date_time'] = btc_df['timestamp'].astype('datetime64[s]')

In [11]:
df_new = btc_df.groupby([pd.Grouper(freq='H', key='date_time')]).mean().reset_index()
df_new=df_new.drop(['timestamp'],axis=1)
df_new.head()

In [12]:
df_new.shape

In [13]:
fig = go.Figure(data=[go.Candlestick(x=df_new['date_time'].iloc[-10000:],
                open=df_new['Open'].iloc[-10000:],
                high=df_new['High'].iloc[-10000:],
                low=df_new['Low'].iloc[-10000:],
                close=df_new['Close'].iloc[-10000:])])

fig.show()

In [14]:
fig = go.Figure(data=[go.Candlestick(x=df_new['date_time'].iloc[-3000:],
                open=df_new['Open'].iloc[-3000:],
                high=df_new['High'].iloc[-3000:],
                low=df_new['Low'].iloc[-3000:],
                close=df_new['Close'].iloc[-3000:])])

fig.show()

In [15]:
df_new=df_new.drop(['Asset_ID','Open','Count','High','Low','Volume','VWAP','Target'],axis=1)

In [16]:

def linear_features(df):
    df_feat=pd.DataFrame()
    df_feat['ds'] = df['date_time']
    df_feat['y'] = df['Close']
    return df_feat

In [17]:
# train_new['Low'] = list(map(lambda z,b: b-0.001 if z<=b else b, train_new.High, train_new.Low ))


In [18]:
train_df=df_new.iloc[0:-2952]
test_df=df_new.iloc[-2952:]
# train_df.head()

In [19]:
def mean_absolute_percentage_error(y_true, y_pred): 
    """Calculates MAPE given y_true and y_pred"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [20]:
# fig=plt.figure(figsize=(800, 800))
output = seasonal_decompose(df_new.set_index('date_time')[-6000:], model='additive')
plt.rcParams['figure.figsize'] = (18, 9)
# creating plot
output.plot()
plt.show()

In [21]:
train_li=linear_features(train_df)
model = Prophet(growth='linear',
                changepoint_prior_scale=0.001,seasonality_prior_scale=0.1,seasonality_mode='additive')
# model.add_seasonality(name='custom_monthly', period=30.5, fourier_order=24)

model.fit(train_li)

In [22]:
x = linear_features(df_new)
y=x.drop('y',axis=1)
y_pred = model.predict(y)
# y_pred

In [23]:
mean_absolute_percentage_error(y_true=df_new['Close'],
                   y_pred=y_pred['yhat'])


In [24]:
plt.plot(df_new['Close'].values, label='Actual')
plt.plot(y_pred['yhat'], label=['Predicted'])
plt.vlines(x=27000, ymin=0,ymax=3, colors='purple', ls='--', lw=2, label='test_train/split')
plt.legend()
plt.title('changepoint_scale=0.001')
plt.show()

In [25]:
x = linear_features(test_df)
y=x.drop('y',axis=1)
y_pred = model.predict(y)
# y_pred

In [26]:
mean_absolute_percentage_error(y_true=test_df['Close'],
                   y_pred=y_pred['yhat'])


In [27]:
plt.plot(test_df['Close'].values, label='Actual')
plt.plot(y_pred['yhat'], label=['Predicted'])
plt.legend()
plt.title('prediciton on test')
plt.show()

In [28]:
fig5 = model.plot(y_pred)
a = add_changepoints_to_plot(fig5.gca(), model, y_pred)

In [29]:
fig3 = model.plot_components(y_pred)

In [30]:
import itertools

In [None]:
param_grid = {  
    'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
    'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    'seasonality_mode':['addtive','multiplicative'],
#     'changepoint_range':[0.8,0.85,0.9,0.95,1],
}

# Generate all combinations of parameters
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
mape = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    m = Prophet(**params).fit(train_li)  # Fit model with given params
    x = linear_features(test_df)
    y=x.drop('y',axis=1)
    y_pr = m.predict(y)
    mape_value=mean_absolute_percentage_error(y_true=test_df['Close'],y_pred=y_pr['yhat'])
    mape.append(mape_value)
    print("ONE DONE ....")

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['mape'] = mape
print(tuning_results)

In [None]:
# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['mape'] = mape
print(tuning_results)

In [None]:
best_params = all_params[np.argmin(mape)]
print(best_params,":",min(mape))