In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import darts
import numpy as np

In [None]:
CUSTOMERS_NAMES = ["NUTRAVANCE", "ARGALYS", "LES MIRACULEUX", "MINCI DELICE"]


def build_df_from_alki_csv(filepath):
    """Build a Pandas dataframe from the data CSV"""

    df = pd.read_csv(
        filepath,
        sep=";",
        dtype={"DATE": str, "CUSTOMER": str, "QUANTITY": int},
        index_col="DATE",
        parse_dates=["DATE"],
    )

    return df


def build_ts_from_alki_df(dataframe):
    """Build the customers Darts timeseries"""

    timeseries = dict()
    for customer_name in CUSTOMERS_NAMES:
        df_customer = dataframe[dataframe["CUSTOMER"] == customer_name]

        ts_customer = darts.TimeSeries.from_dataframe(
            df_customer, value_cols="QUANTITY", fill_missing_dates=True, freq="D"
        )

        timeseries[customer_name] = ts_customer

    return timeseries

In [None]:
data_alki_df = build_df_from_alki_csv("../train.csv")
series_alki = build_ts_from_alki_df(data_alki_df)

len(series_alki)

In [None]:
CUSTOMER = "NUTRAVANCE"

series_customer = series_alki[CUSTOMER]

In [None]:
series_na_mask = series_customer.pd_series().isna()
sample_weight = np.ones((len(series_customer), 1))
sample_weight[series_na_mask, 0] = 0.2
sample_weight = series_customer.with_values(sample_weight)

In [None]:
from darts.dataprocessing.transformers.missing_values_filler import MissingValuesFiller

transformer_filler = MissingValuesFiller()
series_filled = transformer_filler.transform(series_customer)

In [None]:
from darts.utils.model_selection import train_test_split

test_size = 0.2

series_train, series_val = train_test_split(
    series_filled,
    test_size=test_size,
    axis=1,
    input_size=48,
    horizon=24,
    vertical_split_type="model-aware",
)

sample_weight_train, sample_weight_val = train_test_split(
    sample_weight,
    test_size=test_size,
    axis=1,
    input_size=48,
    horizon=24,
    vertical_split_type="model-aware",
)

series_train.plot()
series_val.plot()

In [None]:
from darts.dataprocessing.transformers import Scaler

transformer_scaler = Scaler()

series_train_t = transformer_scaler.fit_transform(series_train)
series_val_t = transformer_scaler.fit_transform(series_val)
series_t = transformer_scaler.fit_transform(series_customer)

In [None]:
from darts.models import TFTModel
from darts.utils.likelihood_models import QuantileRegression
from torchmetrics import MeanAbsolutePercentageError
from pytorch_lightning.callbacks import EarlyStopping


# default quantiles for QuantileRegression
quantiles = [
    0.01,
    0.05,
    0.1,
    0.15,
    0.2,
    0.25,
    0.3,
    0.4,
    0.5,
    0.6,
    0.7,
    0.75,
    0.8,
    0.85,
    0.9,
    0.95,
    0.99,
]
input_chunk_length = 24
forecast_horizon = 12

add_encoders = {
    "cyclic": {"future": ["month"]},
    "datetime_attribute": {"future": ["dayofweek"]},
    "transformer": Scaler(),
}

torch_metrics = MeanAbsolutePercentageError()

my_stopper = EarlyStopping(
    monitor="val_MeanAbsolutePercentageError",  # "val_loss",
    patience=50,
    min_delta=0.05,
    mode="min",
)
pl_trainer_kwargs = {"callbacks": [my_stopper]}

model_tft = TFTModel(
    input_chunk_length=input_chunk_length,
    output_chunk_length=forecast_horizon,
    hidden_size=512,
    lstm_layers=1,
    num_attention_heads=4,
    full_attention=False,
    dropout=0.1,
    batch_size=512,
    n_epochs=600,
    add_relative_index=True,
    add_encoders=add_encoders,
    likelihood=QuantileRegression(
        quantiles=quantiles
    ),  # QuantileRegression is set per default
    # loss_fn=MSELoss(),
    random_state=42,
    log_tensorboard=True,
    torch_metrics=torch_metrics,
    optimizer_kwargs={"lr": 0.001},
    pl_trainer_kwargs=pl_trainer_kwargs,
)

model_tft

In [None]:
from darts.models.forecasting.forecasting_model import GlobalForecastingModel

isinstance(model_tft, GlobalForecastingModel)

In [None]:
model_tft.fit(
    series_train_t,
    val_series=series_val_t,
    verbose=True,
    sample_weight=sample_weight_train,
    val_sample_weight=sample_weight_val,
)

In [None]:
from darts.models.forecasting.sf_auto_arima import StatsForecastAutoARIMA

add_encoders = {
    "cyclic": {"future": ["month"]},
    "datetime_attribute": {"future": ["dayofweek"]},
    "transformer": Scaler(),
}

model_arima = StatsForecastAutoARIMA(
    season_length=30,
    add_encoders=add_encoders,
)

In [None]:
model_arima.fit(series_train_t)

In [None]:
# before starting, we define some constants
num_samples = 10

figsize = (16, 6)
lowest_q, low_q, high_q, highest_q = 0.01, 0.1, 0.9, 0.99
label_q_outer = f"{int(lowest_q * 100)}-{int(highest_q * 100)}th percentiles"
label_q_inner = f"{int(low_q * 100)}-{int(high_q * 100)}th percentiles"

In [None]:
from darts.metrics import mape, mase, rmse
import matplotlib.pyplot as plt


def eval_model(model, n, actual_series, val_series):
    pred_series = model.predict(
        n=n,
        num_samples=num_samples,
    )

    # plot actual series
    plt.figure(figsize=figsize)
    actual_series[val_series.start_time() : val_series.end_time()].plot(label="actual")

    # plot prediction with quantile ranges
    pred_series.plot(
        low_quantile=lowest_q, high_quantile=highest_q, label=label_q_outer
    )
    pred_series.plot(low_quantile=low_q, high_quantile=high_q, label=label_q_inner)

    plt.title("MAPE: {:.2f}%".format(mape(val_series, pred_series)))
    plt.legend()


n = (series_val_t.end_time() - series_val_t.start_time()).days

eval_model(model_arima, 200, series_t, series_val_t)