In [8]:
%%capture

!pip install darts
!pip install openpyxl
!pip install xlsx

In [9]:
import numpy as np
import pandas as pd
import torch
import warnings

from datetime import datetime
from glob import glob
from matplotlib import pyplot as plt

from darts import TimeSeries
from darts.utils.timeseries_generation import (
    gaussian_timeseries,
    linear_timeseries,
    sine_timeseries,
)
from darts.models import (
    RNNModel,
    TCNModel,
    TransformerModel,
    NBEATSModel,
    BlockRNNModel,
)
from darts.models.forecasting.auto_arima import AutoARIMA
from darts.models.forecasting.exponential_smoothing import ExponentialSmoothing
from darts.models.forecasting.exponential_smoothing import ModelMode, SeasonalityMode
from darts.models.forecasting.ensemble_model import EnsembleModel
from darts.models.forecasting.prophet_model import Prophet
from darts.models.forecasting.sf_auto_arima import StatsForecastAutoARIMA
from darts.models.forecasting.kalman_forecaster import KalmanForecaster
from darts.models.forecasting.baselines import NaiveEnsembleModel
from darts.dataprocessing.transformers import (
    Scaler,
    MissingValuesFiller,
    Mapper,
    InvertibleMapper,
)
from darts.dataprocessing import Pipeline
from darts.metrics import mape, smape
from darts.dataprocessing.transformers import Scaler
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.datasets import AirPassengersDataset, MonthlyMilkDataset

%matplotlib inline
warnings.filterwarnings(action='ignore', message='Setting attributes')

In [10]:
df = pd.read_excel("../input/haispb2022/Train-m.xlsx", engine="openpyxl", index_col="date")
df.drop([x for x, y in zip((df < 0).any().index, (df < 0).any()) if y], axis=1, inplace=True)
df.index = pd.DatetimeIndex(df.index, freq="MS")

In [11]:
dfs = [TimeSeries.from_times_and_values(df.index, df[cur_df_name]) for cur_df_name in df.columns]

In [12]:
test_dfs = []

for i in range(1, 14 + 1):
    test_df = pd.read_excel(f"../input/haispb2022/Test_example{i}.xlsx", engine="openpyxl", index_col="Unnamed: 0")
    try:
        test_df.index = pd.DatetimeIndex(map(lambda x: datetime.strptime(f"20{int(x[0]) + 1}{x[1:]}", "%Ym%m"), test_df.index), freq="MS")
    except ValueError as e:
#         test_df.index = pd.DatetimeIndex(map(lambda x: datetime.strptime(f"20{int(x[0]) + 1}{x[1:]}", "%Ym%m"), test_df.index), freq="QS-DEC")
        continue

    forcast_cols = [max(test_df[test_df[col] != "Forecast"].index) for col in test_df.columns]

    new_dfs = [TimeSeries.from_times_and_values(test_df.loc[:forcast_cols[idx]].index, test_df.loc[:forcast_cols[idx]][cur_df_name]) for idx, cur_df_name in enumerate(test_df.columns)]
#     dfs.extend(new_dfs)
    test_dfs.extend(new_dfs)

In [13]:
filler = MissingValuesFiller()

pipeline = Pipeline([filler])

transformed_dfs = pipeline.transform(dfs)
transformed_test_dfs = pipeline.transform(test_dfs)

In [14]:
# model = NBEATSModel(
#     input_chunk_length=48, 
#     output_chunk_length=24, 
#     num_blocks=2, num_layers=8,
#     layer_widths=256, dropout=.3,
#     n_epochs=25, random_state=23, 
#     pl_trainer_kwargs={"accelerator": "gpu", "devices": [0]}, 
#     optimizer_kwargs={"lr": 1e-2},
#     save_checkpoints=True, model_name="Full-test",
#     force_reset=True
# )

# model = AutoARIMA(start_p=0, start_q=0, max_p=5, max_q=5, max_d=5, alpha=.05, d=1, start_P=0, D=1, start_Q=0, max_P=5, max_D=5, max_Q=5, m=12, n_fits=25)

models = [
    ExponentialSmoothing(trend=ModelMode.ADDITIVE, seasonal=SeasonalityMode.ADDITIVE, damped=True),
    ExponentialSmoothing(trend=ModelMode.ADDITIVE, seasonal=SeasonalityMode.ADDITIVE, damped=True),
    ExponentialSmoothing(trend=ModelMode.NONE, seasonal=SeasonalityMode.ADDITIVE),
    KalmanForecaster(dim_x=4)
]

model = NaiveEnsembleModel(models)

# model = KalmanForecaster(dim_x=64)

# model = Prophet()

In [39]:
test_df = pd.read_excel("../input/haispb2022/Test_example8.xlsx", engine="openpyxl", index_col="Unnamed: 0")
test_df.index = pd.DatetimeIndex(map(lambda x: datetime.strptime(f"20{int(x[0]) + 1}{x[1:]}", "%Ym%m"), test_df.index))

col_name = "Var1"
X_test = TimeSeries.from_series(test_df[test_df[col_name] != "Forecast"][col_name])
# y_test = TimeSeries.from_dataframe(test_df[test_df["Var1"] == "Forecast"])

# X_test = transformed_test_dfs[0]


X_val, y_val = X_test.split_before(.75)

model.fit(series=X_val)
pred = model.predict(n=len(y_val), num_samples=1)
bias = pred[0].values()[0][0] - X_val[-1].values()[0][0]
pred -= bias / 2

# models[0].fit(series=X_val)
# pred2 = models[0].predict(n=len(y_val), num_samples=1)
# bias = pred2[0].values()[0][0] - X_val[-1].values()[0][0]
# pred2 -= bias / 2

plt.figure(figsize=(16, 9))
X_test.plot(label="actual")
pred.plot(label="forecast")
# pred2.plot(label="forecast")
plt.legend()
print("SMAPE (ensemb) = {:.2f}%".format(smape(y_val, pred)))
# print("SMAPE (single) = {:.2f}%".format(smape(y_val, pred2)))

In [None]:
test_path = "../input/haispb2022/*.xlsx"
new_path = "/kaggle/working/"
for path in glob(test_path):
    test_df = pd.read_excel(path, engine="openpyxl", index_col="Unnamed: 0")
    test_df.index = pd.DatetimeIndex(map(lambda x: datetime.strptime(f"20{int(x[0]) + 1}{x[1:]}", "%Ym%m"), test_df.index))
    cols = []
    for col_name in test_df.columns:
        X_test = TimeSeries.from_series(test_df[test_df[col_name] != "Forecast"][col_name])
        y_val = test_df[test_df[col_name] == "Forecast"]
        
        if len(y_val):
            pred = model.predict(n=len(y_val), num_samples=1)
            bias = pred[0].values()[0][0] - X_val[-1].values()[0][0]
            pred -= bias / 2
            col = np.append(X_test.values(), pred.values())
        else:
            col = X_test.values()
        
        cols.append(col.reshape(-1))
    df = pd.DataFrame(list(map(np.ravel, cols))).T
    df.columns = test_df.columns
    df.to_excel(new_path + test_path.split("/")[-1], index=False)