In [28]:
import random
import pandas as pd
import numpy as np
import os

from sklearn.preprocessing import LabelEncoder
from prophet import Prophet
import re
import copy

import warnings
warnings.filterwarnings(action='ignore')

In [2]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

In [3]:
drive_path = "/content/drive/MyDrive/데이터 경진대회/제주 특산물 가격 예측"
train_data_path = drive_path + "/train.csv"
test_data_path = drive_path + "/test.csv"
sample_submission_path = drive_path + "/sample_submission.csv"

In [4]:
train_df = pd.read_csv(train_data_path)
test_df = pd.read_csv(test_data_path)

In [5]:
train_df["timestamp"] = pd.to_datetime(train_df["timestamp"])

In [6]:
category_list = train_df["ID"].str.replace(r'_\d{8}$', '', regex=True).unique()

In [7]:
def handle_data_to_prophet(df) :
  transformed_df = df[["ID", "timestamp", "price(원/kg)"]]
  transformed_df["ID"] = transformed_df["ID"].str.replace(r'_\d{8}$', '', regex=True)
  transformed_df = transformed_df.rename(columns={'timestamp': 'ds', 'price(원/kg)': 'y'})
  return transformed_df.copy()

In [8]:
prophet_train_data = handle_data_to_prophet(train_df)

In [9]:
prophet_train_data.tail(5)

Unnamed: 0,ID,ds,y
59392,RD_F_J,2023-02-27,468.0
59393,RD_F_J,2023-02-28,531.0
59394,RD_F_J,2023-03-01,574.0
59395,RD_F_J,2023-03-02,523.0
59396,RD_F_J,2023-03-03,529.0


In [10]:
prophet_map = {}

In [11]:
for category in category_list :
  current_category_train_x = prophet_train_data[prophet_train_data["ID"] == category].drop(columns=["ID"]).sort_values("ds")
  prophet_map[category] = Prophet().fit(current_category_train_x)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpyxi6d051/re07zvh7.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpyxi6d051/z3978rxy.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=62492', 'data', 'file=/tmp/tmpyxi6d051/re07zvh7.json', 'init=/tmp/tmpyxi6d051/z3978rxy.json', 'output', 'file=/tmp/tmpyxi6d051/prophet_modelzk0te129/prophet_model-20231107085118.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
08:51:18 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
08:51:19 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpyx

In [12]:
test_df["price(원/kg)"] = 0

In [13]:
prophet_test_data = handle_data_to_prophet(test_df)

In [31]:
result_map = {}
for category in category_list :
  current_category_test_x = prophet_test_data[prophet_test_data["ID"] == category].drop(columns=["ID"])
  model = prophet_map[category]
  y = model.predict(current_category_test_x)
  result_map[category] = y["yhat"].values.tolist()

In [32]:
copy_result_map = copy.deepcopy(result_map)

In [30]:
def assign_result(row) :
  category = re.sub(r'_\d{8}$', '', row["ID"])
  result = copy_result_map[category]
  return result.pop(0)

In [16]:
submission = pd.read_csv(sample_submission_path)

In [17]:
submission.head()

Unnamed: 0,ID,answer
0,TG_A_J_20230304,0
1,TG_A_J_20230305,0
2,TG_A_J_20230306,0
3,TG_A_J_20230307,0
4,TG_A_J_20230308,0


In [33]:
submission["answer"] = 0

In [34]:
submission["answer"] = submission.apply(assign_result, axis=1)

In [20]:
submission.tail()

Unnamed: 0,ID,answer
1087,RD_F_J_20230327,409.107835
1088,RD_F_J_20230328,416.558523
1089,RD_F_J_20230329,406.792998
1090,RD_F_J_20230330,399.355576
1091,RD_F_J_20230331,393.630058


In [23]:
submission.to_csv(drive_path + "/submission_231107_2.csv", index=False)