# prophet 써봄

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install prophet
from prophet import Prophet


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns




In [4]:
# 파일경로
train_file_path = '/content/drive/MyDrive/open-2/train.csv'
test_file_path = '/content/drive/MyDrive/open-2/test.csv'
trade_file_path = '/content/drive/MyDrive/open-2/international_trade.csv'

# csv 파일 불러옴
train_data = pd.read_csv(train_file_path)
test_data = pd.read_csv(test_file_path)
trade_data = pd.read_csv(trade_file_path)

sample_file_path = '/content/drive/MyDrive/open-2/sample_submission.csv'
submission_df = pd.read_csv(sample_file_path)


# 전처리
- prophet 모델에 맞춰서 데이터프레임 맞춤
- ID, ds, y컬럼만 선택해서 만듬
- ID 컬럼에서 정규표현식으로 _ 뒤 8자리 숫자 제거

In [5]:
prophet_data = train_data.rename(columns={'timestamp': 'ds', 'price(원/kg)': 'y'})
prophet_data = prophet_data[['ID', 'ds', 'y']]
prophet_data['ID'] = prophet_data['ID'].str.replace(r'_\d{8}$', '', regex=True)

# 모델
- 주어진 데이터프레임에서 고유한 ID 목록 추출
- 2023-3-4 부터 시작해서 28일 동안 미래 날짜 데이터프레임 생성
- 모델 설정값은 기본값임 바꿔야됨

In [7]:
RANDOM_SEED = 990313
np.random.seed(RANDOM_SEED)
def ph_train(df):
    pred_list = []
    for code in df['ID'].unique():
        d = df[df['ID'] == code].reset_index().drop(['ID'], axis=1).sort_values('ds')
        model = Prophet(
          growth='linear',
          changepoints=None,
          n_changepoints=25,
          changepoint_range=0.8,
          yearly_seasonality='auto',
          weekly_seasonality='auto',
          daily_seasonality='auto',
          holidays=None,
          seasonality_mode='additive',
          seasonality_prior_scale=10.0,
          holidays_prior_scale=10.0,
          changepoint_prior_scale=0.05,
          mcmc_samples=0,
          interval_width=0.8,
          uncertainty_samples=1000
        )
        model.fit(d)
        future = pd.DataFrame()
        future['ds'] = pd.date_range(start='2023-03-04', periods=28, freq='D')
        forecast = model.predict(future)
        pred_y = forecast['yhat'].values
        pred_code = [str(code)] * len(pred_y)
        for y_val, id_val in zip(pred_y, pred_code):
            pred_list.append({'ID': id_val, 'y': y_val})
    pred = pd.DataFrame(pred_list)
    return pred
pred = ph_train(prophet_data)
submission_df['answer'] = pred['y']
submission_df.to_csv('prophet.csv',index=False)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmph0m0yk28/7em_rpkl.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmph0m0yk28/oaw1e8u5.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=70321', 'data', 'file=/tmp/tmph0m0yk28/7em_rpkl.json', 'init=/tmp/tmph0m0yk28/oaw1e8u5.json', 'output', 'file=/tmp/tmph0m0yk28/prophet_modelpin_t6jc/prophet_model-20231108010849.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
01:08:49 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
01:08:50 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmph0

In [8]:
submission_df.to_csv('/content/drive/My Drive/prophet.csv', index=False)
