In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install neuralprophet[live]

In [3]:
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from neuralprophet import NeuralProphet
from sklearn.metrics import mean_absolute_error,mean_squared_error
from fbprophet.plot import add_changepoints_to_plot
from fbprophet.diagnostics import performance_metrics
from fbprophet.diagnostics import cross_validation
from statsmodels.tsa.seasonal import seasonal_decompose
from fbprophet.plot import plot_cross_validation_metric
import itertools

In [4]:
df = pd.read_csv('/kaggle/input/g-research-crypto-forecasting/train.csv')

In [5]:
btc_df=df.query('Asset_ID==3')
btc_df.head()

In [6]:
btc_df=btc_df.set_index('timestamp')

In [7]:
(btc_df.index[1:]-btc_df.index[:-1]).value_counts()

In [8]:
btc_df= btc_df.reindex(range(btc_df.index[0],btc_df.index[-1]+60,60),method='pad')


In [9]:
print("Null values:")
btc_df.isna().sum()

In [10]:
(btc_df.index[1:]-btc_df.index[:-1]).value_counts()

In [11]:
btc_df=btc_df.reset_index()
btc_df['date_time'] = btc_df['timestamp'].astype('datetime64[s]')

In [12]:
df_new = btc_df.groupby([pd.Grouper(freq='H', key='date_time')]).mean().reset_index()
df_new=df_new.drop(['timestamp'],axis=1)
df_new.head()

In [13]:
px.line(df_new,'date_time','Close')

In [14]:
fig = go.Figure(data=[go.Candlestick(x=df_new['date_time'].iloc[-10000:],
                open=df_new['Open'].iloc[-10000:],
                high=df_new['High'].iloc[-10000:],
                low=df_new['Low'].iloc[-10000:],
                close=df_new['Close'].iloc[-10000:])])

fig.show()

In [15]:
fig = go.Figure(data=[go.Candlestick(x=df_new['date_time'].iloc[-3000:],
                open=df_new['Open'].iloc[-3000:],
                high=df_new['High'].iloc[-3000:],
                low=df_new['Low'].iloc[-3000:],
                close=df_new['Close'].iloc[-3000:])])

fig.show()

In [16]:
del df
del btc_df

In [21]:
df_new

In [51]:
df_new1=df_new[df_new.columns[2:-1]]
df_new1=df_new1.drop('Close',axis=1)

In [52]:
regions = list(df_new1)
regions

In [53]:
df_new=df_new.drop(['Asset_ID','Open','Count','High','Low','Volume','VWAP','Target'],axis=1)

In [54]:

def get_features(df):
    df_feat=pd.DataFrame()
    df_feat['ds'] = df['date_time']
    df_feat['y'] = df['Close']
    return df_feat

In [55]:
train_df=df_new.iloc[0:-2952]
test_df=df_new.iloc[-2952:]
train_lag_covar_df=df_new1.iloc[0:-2952]
# train_df.head()

In [56]:
def mean_absolute_percentage_error(y_true, y_pred): 
    """Calculates MAPE given y_true and y_pred"""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [57]:
train_li=get_features(train_df)


In [58]:
# fig=plt.figure(figsize=(800, 800))
output = seasonal_decompose(df_new.set_index('date_time')[-6000:], model='additive')
plt.rcParams['figure.figsize'] = (18, 9)
# creating plot
output.plot()
plt.show()

In [59]:
from neuralprophet import set_random_seed 
set_random_seed(0)

In [137]:
m = NeuralProphet(
#     growth="off",
#     yearly_seasonality=False,
#     trend_reg=0.001,
#     seasonality_reg=0.1,
#     changepoints_range=0.95,
#     n_changepoints=30,
    n_lags=24*30, n_forecasts=450,
    num_hidden_layers=6,
    d_hidden=16,
#     learning_rate=0.05
)

In [138]:
metrics = m.fit(train_li, freq="H")

In [139]:
# x = get_features(test_df)
# x['y']=None
# y_pred = m.predict(x)
# y_pred

In [140]:
# x1 = get_features(train_df)
# x1['y']=None
# y_pred1 = m.predict(x1)
# # y_pred1

In [141]:
y_pred = m.predict(train_li,decompose=False, raw=True)



In [142]:
zx=list(np.array(y_pred[-1:]))
zxc=zx[0][2:]

In [143]:
# print("Mean Absoulte Percentage Error on train data is:",mean_absolute_percentage_error(y_true=train_df['Close'],
#                    y_pred=y_pred1['yhat1']),"%")


In [145]:
print("Mean Absoulte Percentage Error on 1  test month data is: ",mean_absolute_percentage_error(y_true=test_df['Close'][0:449],
                   y_pred=zxc),"%")

plt.plot(test_df['Close'][0:449].values, label='Actual')
plt.plot(zxc, label=['Predicted'])
# plt.vlines(x=27000, ymin=0,ymax=3, colors='purple', ls='--', lw=2, label='test_train/split')
plt.legend()
plt.title('next 450 steps based on 720 steps,layers 6 and d_hidden 16')
plt.show()

In [129]:
import itertools

param_grid = {  
#     'changepoint_prior_scale': [0.001, 0.01, 0.1, 0.5],
#     'seasonality_prior_scale': [0.01, 0.1, 1.0, 10.0],
    'learning_rate':[0.05],
    'n_lags': [24*30],
    'n_forecasts':[451],
    'num_hidden_layers':[4,6,8,10,15,20],
    'd_hidden':[10,16,20,25,30],
}
all_params = [dict(zip(param_grid.keys(), v)) for v in itertools.product(*param_grid.values())]
mape = []  # Store the RMSEs for each params here

# Use cross validation to evaluate all parameters
for params in all_params:
    m = NeuralProphet(**params)
    metrics=m.fit(train_li,freq='H')  # Fit model with given params
    y_pred = m.predict(train_li,decompose=False, raw=True)
    zx=list(np.array(y_pred[-1:]))
    zxc=zx[0][2:]
    mape_value=mean_absolute_percentage_error(y_true=test_df['Close'][0:450],y_pred=zxc)
    mape.append(mape_value)

# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['mape'] = mape
print(tuning_results)

In [None]:
fig_param = m.plot_parameters()


In [44]:
# Find the best parameters
tuning_results = pd.DataFrame(all_params)
tuning_results['mape'] = mape
print(tuning_results)

In [43]:
best_params = all_params[np.argmin(mape)]
print(best_params,":",min(mape))