### 데이콘 비트코인 가격 예측 - Prophet

In [25]:
import os, datetime
import numpy as np
import pandas as pd
from tqdm import tqdm

import IPython
import IPython.display
import matplotlib.pyplot as plt

from fbprophet import Prophet
import preprocessor, coin_simulation

In [33]:
# modeling programing
def prophet_modeling(input_array):
    ''' 함수 설명 : prophet fitting & prediction'''
    
    #미래 데이터 저장을 위한 빈 array 생성
    valid_pred_array =  np.zeros([input_array.shape[0], 120])
    error_counter = 0
    
    #모델 돌리기 및 결과 저장
    for idx in tqdm(range(input_array.shape[0])):
        try:
            x_series = input_array[idx,:].reshape(-1)
            x_df = prophet_preprocessor(x_series)
            
            # prophet model create
            model = Prophet( 
                            yearly_seasonality=False, 
                            weekly_seasonality=True, 
                            daily_seasonality=True,
                            changepoint_prior_scale= 0.01,
                            changepoint_range= 1,
                            interval_width= 0.9,
                            seasonality_mode='multiplicative', 
            )
            model.add_seasonality(name='first_seasonality', period=1/24, fourier_order=5) 
            model.add_seasonality(name='second_seasonality', period=1/12, fourier_order=10)
            
            # 모델 학습
            model.fit(x_df)
            future = model.make_future_dataframe(periods=120, freq='min')
            forecast = model.predict(future)
            valid_pred_array[idx,:] = forecast.yhat.values[-120:]

        except:
            error_counter += 1
            print(f'Prophet modeling error!')
            pass
    
    # clear display
    IPython.display.clear_output()
    print(f'''
    Warning : {len(input_array)}의 샘플 내 {error_counter}개의 샘플에서 에러가 발생했습니다.\n
    Prediction Complete!'
    '''
    )        

    return valid_pred_array

def prophet_preprocessor(x_series):
    ''' 함수 설명 : 빈 x_df 만들기'''
    # start time initialization
    start_time = '2021-01-01 00:00:00'
    start_dt = datetime.datetime.strptime(start_time, '%Y-%m-%d %H:%M:%S')

    # datafram 만들기
    x_df = pd.DataFrame()
    # 분당 시간 데이터 시리즈 입력
    x_df['ds'] = [start_dt + datetime.timedelta(minutes = time_min) for time_min in np.arange(1, x_series.shape[0]+1).tolist()]
    # 가격 데이터 시리즈 입력
    x_df['y'] = x_series.tolist()

    return x_df

In [4]:
### ------------ Data upload part ---------------- ###

# 데이터가 위치한 폴더 경로
dir_path = './data'

# 파일명 설정
x_file_name, y_file_name = 'train_x_df.csv', 'train_y_df.csv'
x_file_path = os.path.join(dir_path, x_file_name)
y_file_path = os.path.join(dir_path, y_file_name)

#파일 업로드
train_x_df = pd.read_csv(x_file_path)
train_y_df = pd.read_csv(y_file_path)
print("Reading Complete!")



### --------------- Modeling part ---------------- ###

# 데이터 전처리 1 : dafaframe to array 
train_x_array = preprocessor.df2d_to_array3d(train_x_df)
train_y_array = preprocessor.df2d_to_array3d(train_y_df)

# 데이터 전처리 2-2 : 비차분 open 데이터 추출
normal_x_array = train_x_array[:383, :, 1].reshape(383, 1380, 1) # open col is 1

Reading Complete!


In [None]:
# 모델링 시작
valid_pred_array = prophet_modeling(train_x_array)


In [34]:
save_file_name = 'prophet_result.csv'
np.savetxt(save_file_name, valid_pred_array, delimiter = ",")


NameError: name 'valid_pred_array' is not defined