## 데이콘 비트코인 가격 예측 - Neural Prophet pilot

### library install & import, data load

In [1]:
# import library
from datetime import timedelta, datetime
from copy import deepcopy
import datetime
import argparse
import random
import warnings


import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import ParameterGrid

from tqdm import tqdm
import matplotlib.pyplot as plt

# model
from neuralprophet import NeuralProphet
from neuralprophet import set_random_seed


%matplotlib inline
warnings.filterwarnings(action = "ignore")

# read file
train_x_df = pd.read_csv("./data/train_x_df.csv")
train_y_df = pd.read_csv("./data/train_y_df.csv")
print("Reading Complete!")


Reading Complete!


### data preprocess functions

In [13]:
def df2d_to_array3d(df_2d):

    ''' 입력 받은 2차원 데이터 프레임을 3차원 numpy array로 변경하는 함수 '''

    feature_size = df_2d.iloc[:,2:].shape[1]
    time_size = len(df_2d.time.value_counts())
    sample_size = len(df_2d.sample_id.value_counts())
    array_3d = df_2d.iloc[:,2:].values.reshape([sample_size, time_size, feature_size])
    
    return array_3d
    

def coin_index_export(input_array, coin_num):

    ''' 코인별 인덱스를 뽑아 list형식으로 만들어주는 함수 '''
    
    index = []
    sample_id_len = input_array.shape[0]
    coin_num_col = 0 

    for sample_id in range(sample_id_len):
        if input_array[sample_id, 0, coin_num_col] == coin_num:
            #print(sample_id)
            index.append(sample_id)
    
    return index


def prophet_preprocessor(x_series):
    
    # start time initialization
    start_time = '2021-01-01 00:00:00'
    start_dt = datetime.datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')

    # datafram 만들기
    x_df = pd.DataFrame()
    # 분당 시간 데이터 시리즈 입력
    x_df['ds'] = [start_dt + datetime.timedelta(minutes = time_min) for time_min in np.arange(1, x_series.shape[0]+1).tolist()]
    # 가격 데이터 시리즈 입력
    x_df['y'] = x_series.tolist()

    return x_df

### parameter setting

In [15]:
# params_grid = {'n_changepoints':[5, 10, 15, 20], #default 5
#                 'changepoints_range' : [0.9, 0.95, 1.0], #defalut 0.8
#                 'num_hidden_layers' : [1, 2],
#                 'seasonality_fourier_oder' : [[5, 10], [10,15], [15, 20]]}

# grid = ParameterGrid(params_grid)
# cnt = 0
# for parameters in grid:
#     cnt = cnt+1

# print('Total Possible Models',cnt)


In [35]:
from fractions import Fraction

In [43]:
params_grid = {
'seasonality_fourier_order' : [[5, 10], [10,15], [15, 20]],
'seasonality_periods' : [[1/12, 1/8], [1/12, 1/6], [1/12, 1/4], [1/24, 1/16], [1/24, 1/12], [1/24, 1/8]]
}

grid = ParameterGrid(params_grid)
cnt = 0
for parameters in grid:
    cnt = cnt+1

print('Total Possible Models',cnt)


Total Possible Models 18


### grid_search

In [44]:
def train(x_series, y_series, **paras):
    
    x_df = prophet_preprocessor(x_series)
    
    model = NeuralProphet(
                        #   n_changepoints = paras['n_changepoints'],
                        #   changepoints_range = paras['changepoints_range'],
                        #   num_hidden_layers = paras['num_hidden_layers'],
                         
                          learning_rate = 0.01,
                        #   trend_reg = 0.05, seasonality_reg = 0.1,
                          yearly_seasonality = False, weekly_seasonality = False, daily_seasonality = False,
                          epochs= 50, batch_size = 64
                         )
    
    model.add_seasonality(name='first_seasonality', period=paras['seasonality_periods'][0], fourier_order=paras['seasonality_fourier_order'][0]) 
    model.add_seasonality(name='second_seasonality', period=paras['seasonality_periods'][1], fourier_order=paras['seasonality_fourier_order'][1])

    metrics = model.fit(x_df, freq="min")

    future = model.make_future_dataframe(x_df, periods=120)
    forecast = model.predict(future)
    error = mean_squared_error(y_series, forecast.yhat1.values[-120:])

    return error


In [60]:

def experiment(train_x_array, train_y_array, args):
    
    sample_indices = coin_index_export(input_array = train_x_array, coin_num= args.coin_num)
    args.sample_id_indices = random.sample(sample_indices, 5)
    print(args.sample_id_indices)

    
    result = pd.DataFrame(columns = ['MSE*10E5', 'PARAMETERS'])

    for paras in grid:
        print("")
        print(f'\n current parameters is {paras}\n')
        total_mse = 0.0

        for sample_id in args.sample_id_indices:

            x_series = train_x_array[sample_id, :, args.data_col_idx] 
            y_series = train_y_array[sample_id, :, args.data_col_idx]

            temp_mse = train(x_series, y_series, **paras)
            total_mse += temp_mse * 10E5
            
        print('\nTotal MSE Error------------------------------------', total_mse, end='\n')
        result = result.append({'MSE*10E5':total_mse, 'PARAMETERS':paras},ignore_index=True)


    return result

In [61]:
train_x_array = df2d_to_array3d(train_x_df)
train_y_array = df2d_to_array3d(train_y_df)

seed = 7

np.random.seed(seed)
set_random_seed(seed)
random.seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")

args.data_col_idx = 1 # 1 open, 2 high
args.coin_num = 9


In [62]:
result = experiment(train_x_array, train_y_array, deepcopy(args))

0/50]: 100%|██████████| 50/50 [00:02<00:00, 16.99it/s, SmoothL1Loss=0.0178, MAE=0.00189, RegLoss=0]
Epoch[50/50]: 100%|██████████| 50/50 [00:03<00:00, 16.56it/s, SmoothL1Loss=0.0232, MAE=0.00713, RegLoss=0]
Epoch[50/50]: 100%|██████████| 50/50 [00:02<00:00, 16.99it/s, SmoothL1Loss=0.00871, MAE=0.00535, RegLoss=0]
Epoch[3/50]:   4%|▍         | 2/50 [00:00<00:03, 15.91it/s, SmoothL1Loss=1.05, MAE=0.017, RegLoss=0]
Total MSE Error------------------------------------ 279.1283688657371


 current parameters is {'seasonality_fourier_order': [5, 10], 'seasonality_periods': [0.08333333333333333, 0.25]}

Epoch[50/50]: 100%|██████████| 50/50 [00:03<00:00, 16.43it/s, SmoothL1Loss=0.00828, MAE=0.00115, RegLoss=0]
Epoch[50/50]: 100%|██████████| 50/50 [00:02<00:00, 17.12it/s, SmoothL1Loss=0.0102, MAE=0.00725, RegLoss=0]
Epoch[50/50]: 100%|██████████| 50/50 [00:02<00:00, 16.86it/s, SmoothL1Loss=0.0304, MAE=0.00257, RegLoss=0]
Epoch[50/50]: 100%|██████████| 50/50 [00:04<00:00, 12.08it/s, SmoothL1Loss=

In [67]:
result = result.sort_values(by =['MSE*10E5'])
result = result.reset_index(drop=True)


In [70]:
result

Unnamed: 0,MSE*10E5,PARAMETERS
0,181.676408,"{'seasonality_fourier_order': [5, 10], 'season..."
1,194.084963,"{'seasonality_fourier_order': [15, 20], 'seaso..."
2,201.573086,"{'seasonality_fourier_order': [15, 20], 'seaso..."
3,215.12214,"{'seasonality_fourier_order': [10, 15], 'seaso..."
4,227.054778,"{'seasonality_fourier_order': [15, 20], 'seaso..."
5,227.849852,"{'seasonality_fourier_order': [5, 10], 'season..."
6,245.627082,"{'seasonality_fourier_order': [15, 20], 'seaso..."
7,251.455757,"{'seasonality_fourier_order': [5, 10], 'season..."
8,257.16361,"{'seasonality_fourier_order': [10, 15], 'seaso..."
9,279.128369,"{'seasonality_fourier_order': [5, 10], 'season..."


In [71]:
result.to_csv('add_seasonality_grid_search.csv')

In [76]:
result['PARAMETERS'][0]

{'seasonality_fourier_order': [5, 10],
 'seasonality_periods': [0.08333333333333333, 0.125]}

In [74]:
1/12, 1/8

(0.08333333333333333, 0.125)

In [77]:
# plt.plot(np.arange(1380, 1380+120), y_series, label = 'True series')
# plt.plot(np.arange(1380, 1380+120), forecast.yhat1.values[-120:], label = 'Neural Prophet1')
# plt.legend()
# plt.show()

# error = mean_squared_error(y_series, forecast.yhat1.values[-120:])
# print("Prophet MSE*10E5 is {}".format(error * 10E5))
