In [None]:
! pip install autogluon

import pandas as pd
import numpy as np

from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

train_df = pd.read_csv('/kaggle/input/jeju-dataset/train.csv')[['ID','timestamp','item','supply(kg)', 'price(원/kg)']]
test_df = pd.read_csv('/kaggle/input/jeju-dataset/test.csv')[['ID','timestamp']]

!pip install pytimekr

from pytimekr import pytimekr

holiday_list = []
for i in range(2019, 2024):
    holiday = pytimekr.holidays(year = i)
    holiday_list = holiday_list + holiday
    
for i in range(len(holiday_list)):
    holiday_list[i] = holiday_list[i].strftime('%Y-%m-%d')
print(holiday_list)

date_list = list(train_df['timestamp'])

holiday_bool = []
for i in range(len(date_list)):
    if date_list[i] in holiday_list:
        holiday_bool.append(1)
    else:
        holiday_bool.append(0)
        
train_df['holiday'] = holiday_bool

# datetime 컬럼 처리
train_df['timestamp'] = pd.to_datetime(train_df['timestamp'])
test_df['timestamp'] = pd.to_datetime(test_df['timestamp'])

# datetime을 여러 파생 변수로 변환
for df in [train_df, test_df]:
    df['year'] = df['timestamp'].dt.year
    df['month'] = df['timestamp'].dt.month
    df['day'] = df['timestamp'].dt.day
    df['weekday'] = df['timestamp'].dt.weekday
    df['sat'] = 0
    df['sun'] = 0
    df['mon'] = 0
    df['fri'] = 0
    
for df in [train_df,test_df]:
    for i in range(len(df['weekday'])):
        if df['weekday'][i] == 6:
            df['sun'][i] = 1
        elif df['weekday'][i] == 5:
            df['sat'][i] = 1
        elif df['weekday'][i] == 4:
            df['fri'][i] = 1
        elif df['weekday'][i] == 0:
            df['mon'][i] = 1

train_df['item_id'] = train_df.ID.str[0:6]
test_df['item_id'] = test_df.ID.str[0:6]

train_df['log_target'] = np.log(train_df["price(원/kg)"] + 1)

data = TimeSeriesDataFrame(train_df.drop(columns=['ID','item','year','month','day','weekday']))

print(data)

from autogluon.timeseries.splitter import MultiWindowSplitter

cat_list = ['mon', 'fri', 'sat', 'sun', 'holiday']
for i in cat_list:
    data[i] = data[i].astype('float')
    
data.dtypes

In [None]:
predictor = TimeSeriesPredictor(
    prediction_length=28,
    target="price(원/kg)",
    eval_metric="RMSE",
    known_covariates_names = ['mon', 'fri', 'sat', 'sun' ,'holiday']
)

predictor.fit(data, random_seed=42, num_val_windows = 20,) #presets = "high_quality")#hyperparameter_tune_kwargs="auto")

In [None]:
# hyperparmeter tuning
from autogluon.common import space
predictor = TimeSeriesPredictor(
    prediction_length=28,
    target="price(원/kg)",
    eval_metric="RMSE",
    known_covariates_names = ['mon', 'fri', 'sat', 'sun' ,'holiday'])

predictor.fit(
    data,
    random_seed=42,
    num_val_windows = 1,
    hyperparameters={
        "Naive": {
            "n_jobs": 8,
        },
        
        "SeasonalNaive": {
            "seasonal_period": space.Categorical(1, 4, 7, 12, None),
            "n_jobs": 8,
        },
        
        "Theta": {
            "decomposition_type": space.Categorical("multiplicative", "additive"),
            "seasonal_period": space.Categorical(1, 4, 7, 12, None),
            "n_jobs": 8,
            "max_ts_length": space.Int(1000, 5000),
        },
        
        "AutoETS": {
            "max_ts_length": space.Int(1000, 5000),
            "n_jobs": 8,
            "seasonal_period": space.Categorical(1, 4, 7, 12, None),
        },
        
        "RecursiveTabular": {
            "scaler": space.Categorical("standard", "mean_abs", None),
            "max_num_items": space.Int(10000, 50000),
            "max_num_samples": space.Int(1000000, 5000000),
        },
        
        "DeepAR": {
            #"disable_static_features": space.Bool(),
            #"disable_known_covariates": space.Bool(),
            "num_layers": space.Int(1, 4),
            "hidden_size": space.Int(10, 100),
            "dropout_rate": space.Real(0.05, 0.3),
            "scaling": space.Bool(),
            "max_epochs": space.Int(100, 500),
            "batch_size": space.Int(8, 128),
            "lr": space.Real(1e-6, 1e-1),
            "early_stopping_patience": 100
        },
        
        "DLinear": {
            "context_length": space.Int(50, 500),
            "hidden_dimension": space.Int(10, 100),
            "scaling": space.Categorical("mean", "std", None),
            "max_epochs": space.Int(100, 1000),
            "batch_size": space.Int(8, 128),
            "lr": space.Real(1e-6, 1e-1),
            "early_stopping_patience": 100,
            "weight_decay": space.Real(1e-9, 1e-7),
        },
        
    },
    hyperparameter_tune_kwargs="auto",
)

In [None]:
predictor.refit_full()

from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe

# category 사용하면 float 대신 category 변환

mon = [0]
fri = [4]
sat = [5]
sun = [6]

future_index = get_forecast_horizon_index_ts_dataframe(data, prediction_length=28)
future_timestamps = future_index.get_level_values("timestamp")
known_covariates = pd.DataFrame(index=future_index)
known_covariates["mon"] = future_timestamps.weekday.isin(mon).astype(float)
known_covariates["fri"] = future_timestamps.weekday.isin(fri).astype(float)
known_covariates["sat"] = future_timestamps.weekday.isin(sat).astype(float)
known_covariates["sun"] = future_timestamps.weekday.isin(sun).astype(float)
known_covariates["holiday"] = future_timestamps.weekday.isin(holiday_list).astype(float)
known_covariates = TimeSeriesDataFrame(known_covariates)
known_covariates.head()

pred = predictor.predict(data, random_seed=42,known_covariates=known_covariates)# known_covariates=known_covariates)
#pred = predictor.predict(data, random_seed=42,)

print(known_covariates)
print(data)

known_covariates.dtypes

submission = pd.read_csv('/kaggle/input/jeju-dataset/sample_submission.csv')
answer = pred.reset_index()['mean']
test_df['answer'] = answer
test_df.loc[test_df['sun']==1.0,'answer'] = 0.0
test_df.loc[test_df['answer']<0.0,'answer'] = 0.0
print(test_df['answer'])

answer_fit = test_df['answer']
submission['answer'] = answer_fit
submission.to_csv('known_num_val_20.csv', index=False)

In [None]:
predictor_2 = TimeSeriesPredictor(
    prediction_length=28,
    target="price(원/kg)",
    eval_metric="RMSE",
    known_covariates_names = ['mon', 'fri', 'sat', 'sun' ,'holiday']
)

predictor_2.fit(data, random_seed=42, num_val_windows = 30,) #presets = "high_quality")#hyperparameter_tune_kwargs="auto")

In [None]:
predictor_2.refit_full()

from autogluon.timeseries.utils.forecast import get_forecast_horizon_index_ts_dataframe

# category 사용하면 float 대신 category 변환

mon = [0]
fri = [4]
sat = [5]
sun = [6]

future_index = get_forecast_horizon_index_ts_dataframe(data, prediction_length=28)
future_timestamps = future_index.get_level_values("timestamp")
known_covariates = pd.DataFrame(index=future_index)
known_covariates["mon"] = future_timestamps.weekday.isin(mon).astype(float)
known_covariates["fri"] = future_timestamps.weekday.isin(fri).astype(float)
known_covariates["sat"] = future_timestamps.weekday.isin(sat).astype(float)
known_covariates["sun"] = future_timestamps.weekday.isin(sun).astype(float)
known_covariates["holiday"] = future_timestamps.weekday.isin(holiday_list).astype(float)
known_covariates = TimeSeriesDataFrame(known_covariates)
known_covariates.head()

pred = predictor_2.predict(data, random_seed=42,known_covariates=known_covariates)# known_covariates=known_covariates)
#pred = predictor.predict(data, random_seed=42,)

print(known_covariates)
print(data)

known_covariates.dtypes

submission = pd.read_csv('/kaggle/input/jeju-dataset/sample_submission.csv')
answer = pred.reset_index()['mean']
test_df['answer'] = answer
test_df.loc[test_df['sun']==1.0,'answer'] = 0.0
test_df.loc[test_df['answer']<0.0,'answer'] = 0.0
print(test_df['answer'])

answer_fit = test_df['answer']
submission['answer'] = answer_fit
submission.to_csv('known_num_val_30.csv', index=False)