### Libraries

In [None]:
from sklearn.model_selection import train_test_split

from sktime.performance_metrics.forecasting import MeanSquaredError, MeanAbsolutePercentageError
from sktime.forecasting.fbprophet import Prophet
from sktime.forecasting.sarimax import SARIMAX
from sktime.forecasting.varmax import VARMAX
from sktime.forecasting.ets import AutoETS

import matplotlib.pyplot as plt
import pandas as pd

### Data preparation

In [None]:
df = pd.read_csv("../data/TSLA.csv")
df["Date"] = pd.to_datetime(df["Date"])
df.set_index("Date", inplace=True)
train_df, test_df = train_test_split(df, test_size=90, shuffle=False)

### Baselines

In [None]:
def get_metrics(y_hat: pd.Series, y_true: pd.Series) -> dict[str, float]:
    rmse = MeanSquaredError()
    mape = MeanAbsolutePercentageError()
    days_desc = {7: "weekly", 30: "monthly", 90: "quarterly"}
    metrics = {}
    for day_id in days_desc:
        interval_rmse = rmse(y_hat[:day_id], y_true[:day_id], square_root=True)
        interval_mape = mape(y_hat[:day_id], y_true[:day_id])
        metrics[days_desc[day_id]] = {"RMSE": interval_rmse, "MAPE": interval_mape}
    return metrics

metrics = {}
targets = test_df["Close"]

### Random Walk

In [None]:
import random

In [None]:
avg_vol = train_df["Close"].diff().abs().mean()
walks = [train_df["Close"].values[-1]]
for _ in range(90):
    sign = random.choice([-1, 1])
    walks.append(walks[-1] + sign * avg_vol)
walks = pd.Series(walks[1:])
walks.index = targets.index
print(get_metrics(y_hat=walks, y_true=targets))

#### VARMAX

In [None]:
varmax = VARMAX(maxiter=10)
varmax.fit(y=train_df)

In [None]:
preds = varmax.predict(fh=list(range(1, len(test_df)+1)))["Close"]
metrics["varmax"] = {
    "series": preds,
    "metrics": get_metrics(y_hat=preds, y_true=targets)
}

#### Prophet

In [None]:
prophet = Prophet(
    freq="D",
    n_changepoints=int(len(train_df) / 12),
    add_country_holidays={"country_name": "USA"},
    yearly_seasonality=True
)
prophet.fit(train_df)

In [None]:
preds = prophet.predict(fh=list(range(1, len(test_df)+1)))["Close"]
metrics["prophet"] = {
    "series": preds,
    "metrics": get_metrics(y_hat=preds, y_true=targets)
}

#### ETS

In [None]:
ets = AutoETS(auto=True)
ets.fit(y=train_df)
preds = ets.predict(fh=list(range(1, len(test_df)+1)))["Close"]
metrics["ets"] = {
    "series": preds,
    "metrics": get_metrics(y_hat=preds, y_true=targets)
}

#### SARIMAX

In [None]:
sarimax = SARIMAX(order=(5, 0, 5))
sarimax.fit(y=train_df)
preds = sarimax.predict(fh=list(range(1, len(test_df)+1)))["Close"]
metrics["sarimax"] = {
    "series": preds,
    "metrics": get_metrics(y_hat=preds, y_true=targets)
}

#### DeepAR

In [None]:
import lightning.pytorch as pl
from lightning.pytorch.callbacks import EarlyStopping
from pytorch_forecasting.data import GroupNormalizer
import pandas as pd
import torch

from pytorch_forecasting import DeepAR, TimeSeriesDataSet
from pytorch_forecasting.metrics import MAE, SMAPE, MultivariateNormalDistributionLoss

In [None]:
df["time_idx"] = (df.index - df.index.min()).days
df["group"] = "TSLA"

In [None]:
# Define the parameters for the TimeSeriesDataSet
max_prediction_length = 90
max_encoder_length = 120
training_cutoff = 1000

cols = list(df.columns)
_ = cols.pop(3)

# Create the TimeSeriesDataSet
training = TimeSeriesDataSet(
    df[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="Close",
    group_ids=["group"],
    min_encoder_length=max_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=max_prediction_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=[],
    static_reals=[],
    time_varying_known_categoricals=[],
    time_varying_known_reals=["time_idx"] + cols[:-2],  # include other known features here
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=["Close"],  # include other unknown features here
    target_normalizer=GroupNormalizer(groups=["group"], transformation="softplus"),
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

In [None]:
validation = TimeSeriesDataSet.from_dataset(training, df, predict=True, stop_randomization=True)
train_dataloader = training.to_dataloader(train=True, batch_size=16, num_workers=0, batch_sampler="synchronized")

In [None]:
trainer = pl.Trainer(
    max_epochs=200,
    accelerator="cpu",
    enable_model_summary=True,
    gradient_clip_val=0.1,
    limit_train_batches=50,
    enable_checkpointing=True,
)

net = DeepAR.from_dataset(
    training,
    learning_rate=1e-2,
    log_interval=10,
    log_val_interval=1,
    hidden_size=30,
    rnn_layers=5,
    optimizer="Adam",
    loss=MultivariateNormalDistributionLoss(),
)

trainer.fit(net, train_dataloaders=train_dataloader)

In [None]:
encoder_data = df.iloc[-210:]
encoder_dataset = TimeSeriesDataSet(
    encoder_data,
    time_idx="time_idx",
    target="Close",
    group_ids=["group"],
    min_encoder_length=max_encoder_length,
    max_encoder_length=max_encoder_length,
    min_prediction_length=max_prediction_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=[],
    static_reals=[],
    time_varying_known_categoricals=[],
    time_varying_known_reals=["time_idx"] + cols[:-2],
    time_varying_unknown_categoricals=[],
    time_varying_unknown_reals=["Close"],
    target_normalizer=training.target_normalizer,
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)
encoder_dataloader = encoder_dataset.to_dataloader(train=False, batch_size=1, num_workers=0)
preds = net.predict(encoder_dataloader, return_x=True, mode="prediction")
get_metrics(
    y_hat=pd.Series(preds.output[0].tolist()),
    y_true=df.iloc[-90:]["Close"]
)

### Temporal Fusion Transformer

In [None]:
from pytorch_forecasting import TemporalFusionTransformer, QuantileLoss

net = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.01,
    hidden_size=16,
    attention_head_size=2,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=QuantileLoss(),
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    optimizer="Ranger",
    reduce_on_plateau_patience=4,
)

trainer = pl.Trainer(
    max_epochs=10,
    accelerator="cpu",
    enable_model_summary=True,
    gradient_clip_val=0.1,
    # limit_train_batches=50,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
)
trainer.fit(net, train_dataloaders=train_dataloader)

In [None]:
preds = net.predict(encoder_dataloader, return_x=True, mode="prediction")
get_metrics(
    y_hat=pd.Series(preds.output[0].tolist()),
    y_true=df.iloc[-90:]["Close"]
)