Please run those two cells before running the Notebook!

As those plotting settings are standard throughout the book, we do not show them in the book every time we plot something.

In [3]:
# %matplotlib inline
%config InlineBackend.figure_format = "retina"

In [4]:
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
# FIX: Use the official public API path from pandas.errors
from pandas.errors import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)

# feel free to modify, for example, change the context to "notebook"
sns.set_theme(context="talk", style="whitegrid", 
              palette="colorblind", color_codes=True, 
              rc={"figure.figsize": [12, 8]})

# Chapter 15 - Deep Learning in Finance

## 15.1 Exploring `fastai`'s Tabular Learner

### How to do it...

1. Import the libraries:

In [None]:
# FIX: Install the fastai library
!pip install fastai

# Now your original imports will work
from fastai.tabular.all import *
from sklearn.model_selection import train_test_split
from chapter_15_utils import performance_evaluation_report_fastai
import pandas as pd


2. Load the dataset from a CSV file:

In [None]:
df = pd.read_csv("../Datasets/credit_card_default.csv", 
                 na_values="")
df.head()

In [None]:
# as a reminder, where the possible missing values are
df.isna().any()

3. Define the target, lists of categorical/numerical features, and the preprocessing steps:

In [None]:
TARGET = "default_payment_next_month"

cat_features = list(df.select_dtypes("object").columns)
num_features = list(df.select_dtypes("number").columns)
num_features.remove(TARGET)

preprocessing = [FillMissing, Categorify, Normalize]

4. Define the splitter used to create training and validation sets:

In [None]:
splits = RandomSplitter(valid_pct=0.2, seed=42)(range_of(df))
splits

5. Create the `TabularPandas` dataset:

In [None]:
tabular_df = TabularPandas(
    df, 
    procs=preprocessing,
    cat_names=cat_features,
    cont_names=num_features,
    y_names=TARGET,
    y_block=CategoryBlock(),
    splits=splits
)

PREVIEW_COLS = ["sex", "education", "marriage", 
                "payment_status_sep", "age_na", "limit_bal",
                "age", "bill_statement_sep"]
tabular_df.xs.iloc[:5][PREVIEW_COLS]

In [None]:
tabular_df.xs.columns

6. Define a `DataLoaders` object from the `TabularPandas` dataset:

In [None]:
data_loader = tabular_df.dataloaders(bs=64, drop_last=True)
data_loader.show_batch()

7. Define the metrics of choice and the tabular learner:

In [None]:
recall = Recall()
precision = Precision()
learn = tabular_learner(
    data_loader, 
    [500, 200], 
    metrics=[accuracy, recall, precision]
)
learn.model

In [None]:
# we can also figure out the embeddings using the following snippet
emb_szs = get_emb_sz(tabular_df)
emb_szs

`Embedding(11, 6)` means that a categorical embedding was created with 11 input values and 6 output latent features.

8. Find the suggested learning rate:

In [None]:
learn.lr_find()

# plt.savefig("images/figure_15_3")

9. Train the Tabular learner:

In [None]:
learn.fit(n_epoch=25, lr=1e-3, wd=0.2)

10. Plot the losses:

In [None]:
learn.recorder.plot_loss()

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_5")

11. Define the validation `DataLoaders`:

In [None]:
valid_data_loader = learn.dls.test_dl(df.loc[list(splits[1])])
valid_data_loader.show_batch()

12. Evaluate the performance on the validation set:

In [None]:
learn.validate(dl=valid_data_loader)

13. Get predictions for the validation set:

In [None]:
preds, y_true = learn.get_preds(dl=valid_data_loader)

In [None]:
preds

In [None]:
preds.argmax(dim=-1)

In [None]:
y_true

14. Inspect the performance evaluation metrics:

In [None]:
perf = performance_evaluation_report_fastai(
    learn, valid_data_loader, show_plot=True
)

sns.despine()
# plt.savefig("images/figure_15_6", dpi=200)

In [None]:
perf

### There's more

We can also be more specific when creating the training/validation split. Below, we use the `sklearn` funcitonalities and pass indices to the `IndexSplitter` class.

In [None]:
from sklearn.model_selection import StratifiedKFold

X = df.copy()
y = X.pop(TARGET)

strat_split = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
train_ind, test_ind = next(strat_split.split(X, y))
ind_splits = IndexSplitter(valid_idx=list(test_ind))(range_of(df))

tabular_df = TabularPandas(
    df, 
    procs=preprocessing,
    cat_names=cat_features,
    cont_names=num_features,
    y_names=TARGET,
    y_block=CategoryBlock(),
    splits=ind_splits
)

We can look into the example results.

In [None]:
learn.show_results()

Or create predictions for a single row:

In [None]:
row, clas, probs = learn.predict(df.iloc[0])

In [None]:
row

In [None]:
clas

In [None]:
probs

## 15.2 Exploring Google's TabNet

### How to do it...

1. Import the libraries:

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import recall_score

from pytorch_tabnet.tab_model import TabNetClassifier
from pytorch_tabnet.metrics import Metric
import torch

import pandas as pd
import numpy as np

2. Load the dataset from a CSV file:

In [None]:
df = pd.read_csv("../Datasets/credit_card_default.csv", 
                 na_values="")
df.head()

3. Separate the target from the features and create lists with numerical/categorical features:

In [None]:
X = df.copy()
y = X.pop("default_payment_next_month")

cat_features = list(X.select_dtypes("object").columns)
num_features = list(X.select_dtypes("number").columns)

In [None]:
# as a reminder, where the possible missing values are
X.isna().any()

4. Impute missing values in the categorical features, encode them using `LabelEncoder` and store the number of unique categories per feature:

In [None]:
cat_dims = {}

for col in cat_features:
    label_encoder = LabelEncoder()
    X[col] = X[col].fillna("Missing")
    X[col] = label_encoder.fit_transform(X[col].values)
    cat_dims[col] = len(label_encoder.classes_)

cat_dims

5. Create a train/valid/test split using the 70-15-15 ratio:

In [None]:
# create the initial split - training and temp
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, 
    test_size=0.3, 
    stratify=y, 
    random_state=42
)

# create the valid and test sets
X_valid, X_test, y_valid, y_test = train_test_split(
    X_temp, y_temp, 
    test_size=0.5, 
    stratify=y_temp, 
    random_state=42
)

In [None]:
print("Percentage of data in each set ----")
print(f"Train: {100 * len(X_train) / len(X):.2f}%")
print(f"Valid: {100 * len(X_valid) / len(X):.2f}%")
print(f"Test: {100 * len(X_test) / len(X):.2f}%")
print("")
print("Class distribution in each set ----")
print(f"Train: {y_train.value_counts(normalize=True).values}")
print(f"Valid: {y_valid.value_counts(normalize=True).values}")
print(f"Test: {y_test.value_counts(normalize=True).values}")

6. Impute the missing values in the numerical features across all the sets:

In [None]:
for col in num_features:
    imp_mean = X_train[col].mean()
    X_train[col] = X_train[col].fillna(imp_mean)
    X_valid[col] = X_valid[col].fillna(imp_mean)
    X_test[col] = X_test[col].fillna(imp_mean)

7. Prepare lists with the indices of categorical features and the number of unique categories:

In [None]:
features = X.columns.to_list()
cat_ind = [features.index(feat) for feat in cat_features] 
cat_dims = list(cat_dims.values())
cat_ind

8. Define a custom recall metric:

In [None]:
class Recall(Metric):
    def __init__(self):
        self._name = "recall"
        self._maximize = True

    def __call__(self, y_true, y_score):
        y_pred = np.argmax(y_score, axis=1)
        return recall_score(y_true, y_pred)

9. Define TabNet's parameters and instantiate the classifier:

In [None]:
tabnet_params = {
    "cat_idxs": cat_ind,
    "cat_dims": cat_dims,
    "optimizer_fn": torch.optim.Adam,
    "optimizer_params": dict(lr=2e-2),
    "scheduler_params": {
        "step_size":20,
        "gamma":0.9
    },
    "scheduler_fn": torch.optim.lr_scheduler.StepLR,
    "mask_type": "sparsemax",
    "seed": 42,
}

tabnet = TabNetClassifier(**tabnet_params)

10. Train the TabNet classifier:

In [None]:
tabnet.fit(
    X_train=X_train.values, 
    y_train=y_train.values,
    eval_set=[
        (X_train.values, y_train.values), 
        (X_valid.values, y_valid.values)
    ],
    eval_name=["train", "valid"],
    eval_metric=["auc", Recall],
    max_epochs=200, 
    patience=20,
    batch_size=1024, 
    virtual_batch_size=128,
    weights=1,
)

11. Prepare the history DataFrame and plot the scores over epochs:

In [None]:
history_df = pd.DataFrame(tabnet.history.history)
history_df.head(10)

In [None]:
history_df["loss"].plot(
    title="Loss over epochs",
    xlabel="epochs",
    ylabel="loss"
)

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_7")

In [None]:
(
    history_df[["train_auc", "valid_auc"]]
    .plot(title="AUC over epochs",
          xlabel="epochs",
          ylabel="AUC")
);

plt.tight_layout()
sns.despine()

In [None]:
(
    history_df[["train_recall", "valid_recall"]]
    .plot(title="Recall over epochs",
          xlabel="epochs",
          ylabel="recall")
);

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_8")

12. Create predictions for the test set and evaluate their performance:

In [None]:
y_pred = tabnet.predict(X_test.values)

print(f"Best validation score: {tabnet.best_cost:.4f}")
print(f"Test set score: {recall_score(y_test, y_pred):.4f}")

13. Extract and plot the global feature importance:

In [None]:
tabnet_feat_imp = pd.Series(tabnet.feature_importances_, 
                            index=X_train.columns)
(
    tabnet_feat_imp
    .nlargest(20)
    .sort_values()
    .plot(kind="barh",
          title="TabNet's feature importances")
)

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_9")

In [None]:
np.sum(tabnet.feature_importances_)

### There's more

In [None]:
explain_matrix, masks = tabnet.explain(X_test.values)

fig, axs = plt.subplots(1, 3)

for i in range(3):
    axs[i].imshow(masks[i][:50])
    axs[i].set_title(f"mask {i}")

In [None]:
explain_matrix.shape

In [None]:
X_test.shape

In [None]:
# save tabnet model
MODEL_PATH = "models/tabnet_model"
saved_filepath = tabnet.save_model(MODEL_PATH)

# define new model with basic parameters and load state dict weights
loaded_tabnet = TabNetClassifier()
loaded_tabnet.load_model(saved_filepath)

## 15.3 Time series forecasting with Amazon's DeepAR

### How to do it...

1. Import the libraries:

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import torch
import yfinance as yf
from random import sample, seed

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_forecasting import DeepAR, TimeSeriesDataSet

2. Download the tickers of the SP500 constituents and sample 100 random tickers from the list:

In [None]:
df = pd.read_html(
    "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
)
df = df[0]

seed(44)
sampled_tickers = sample(df["Symbol"].to_list(), 100)

3. Download the historical stock prices of the selected stocks:

In [None]:
raw_df = yf.download(sampled_tickers,
                     start="2020-01-01",
                     end="2021-12-31")

4. Keep the adjusted close price and remove the stocks with missing values:

In [None]:
df = raw_df["Adj Close"]
df = df.loc[:, ~df.isna().any()]
selected_tickers = df.columns
df.head()

5. Convert the data's format from wide to long and add the time index:

In [None]:
df = df.reset_index(drop=False)
df = (
    pd.melt(df, 
            id_vars=["Date"], 
            value_vars=selected_tickers, 
            value_name="price"
    ).rename(columns={"variable": "ticker"})
)
df["time_idx"] = df.groupby("ticker").cumcount()
df

In [None]:
df.info()

6. Define constants used for setting up the model's training:

In [None]:
MAX_ENCODER_LENGTH = 40
MAX_PRED_LENGTH = 20
BATCH_SIZE = 128
MAX_EPOCHS = 30
training_cutoff = df["time_idx"].max() - MAX_PRED_LENGTH

7. Define the training and validation datasets:

In [None]:
train_set = TimeSeriesDataSet(
    df[lambda x: x["time_idx"] <= training_cutoff],
    time_idx="time_idx",
    target="price",
    group_ids=["ticker"],
    time_varying_unknown_reals=["price"],
    max_encoder_length=MAX_ENCODER_LENGTH,
    max_prediction_length=MAX_PRED_LENGTH,
)

valid_set = TimeSeriesDataSet.from_dataset(
    train_set, df, min_prediction_idx=training_cutoff+1
)

8. Get the DataLoaders from the datasets:

In [None]:
train_dataloader = train_set.to_dataloader(
    train=True, batch_size=BATCH_SIZE
)
valid_dataloader = valid_set.to_dataloader(
    train=False, batch_size=BATCH_SIZE
)

9. Define the DeepAR model and find the suggested learning rate:

In [None]:
pl.seed_everything(42)

deep_ar = DeepAR.from_dataset(
    train_set, 
    learning_rate=1e-2,
    hidden_size=30, 
    rnn_layers=4
)

trainer = pl.Trainer(gradient_clip_val=1e-1)
res = trainer.tuner.lr_find(
    deep_ar,
    train_dataloaders=train_dataloader,
    val_dataloaders=valid_dataloader,
    min_lr=1e-5,
    max_lr=1e0,
    early_stop_threshold=100,
)

print(f"Suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_11")

10. Train the DeepAR model:

In [None]:
pl.seed_everything(42)

deep_ar.hparams.learning_rate = res.suggestion()

early_stop_callback = EarlyStopping(
    monitor="val_loss", 
    min_delta=1e-4, 
    patience=10
)

trainer = pl.Trainer(
    max_epochs=MAX_EPOCHS,
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback]
)

trainer.fit(
    deep_ar,
    train_dataloaders=train_dataloader,
    val_dataloaders=valid_dataloader,
)

11. Extract the best DeepAR model from a checkpoint:

In [None]:
best_model = DeepAR.load_from_checkpoint(
    trainer.checkpoint_callback.best_model_path
)

12. Create the predictions for the validation set and plot 5 of them:

In [None]:
raw_predictions, x = best_model.predict(
    valid_dataloader, 
    mode="raw", 
    return_x=True, 
    n_samples=100
)

tickers = valid_set.x_to_index(x)["ticker"]

for idx in range(5):
    best_model.plot_prediction(
        x, raw_predictions, idx=idx, add_loss_to_title=True
    )
    plt.suptitle(f"Ticker: {tickers.iloc[idx]}")

    plt.tight_layout()
    sns.despine()
    # plt.savefig(f"images/figure_15_12_{idx}")

### There's more

1. Import the libraries:

In [None]:
from pytorch_forecasting.metrics import MultivariateNormalDistributionLoss
import seaborn as sns
import numpy as np

In [None]:
# df = generate_ar_data(
#     seasonality=10.0, 
#     timesteps=len(raw_df), 
#     n_series=len(selected_tickers), 
#     seed=42
# )
# df["date"] = pd.Timestamp("2020-01-01") + pd.to_timedelta(df.time_idx, "D")
# df = df.astype(dict(series=str))
# df.columns = ["ticker", "time_idx", "price", "date"]
# df

2. Define the DataLoaders again, this time specifying the `batch_sampler`:

In [None]:
train_set = TimeSeriesDataSet(
    df[lambda x: x["time_idx"] <= training_cutoff],
    time_idx="time_idx",
    target="price",
    group_ids=["ticker"],
    static_categoricals=["ticker"],  
    time_varying_unknown_reals=["price"],
    max_encoder_length=MAX_ENCODER_LENGTH,
    max_prediction_length=MAX_PRED_LENGTH,
)

valid_set = TimeSeriesDataSet.from_dataset(
    train_set, df, min_prediction_idx=training_cutoff+1
)

train_dataloader = train_set.to_dataloader(
    train=True, 
    batch_size=BATCH_SIZE,
    batch_sampler="synchronized"
)
valid_dataloader = valid_set.to_dataloader(
    train=False, 
    batch_size=BATCH_SIZE, 
    batch_sampler="synchronized"
)

3. Define the DeepVAR model and find the learning rate:

In [None]:
pl.seed_everything(42)

deep_var = DeepAR.from_dataset(
    train_set, 
    learning_rate=1e-2, 
    hidden_size=30, 
    rnn_layers=4,
    loss=MultivariateNormalDistributionLoss()
)

trainer = pl.Trainer(gradient_clip_val=1e-1)
res = trainer.tuner.lr_find(
    deep_var,
    train_dataloaders=train_dataloader,
    val_dataloaders=valid_dataloader,
    min_lr=1e-5,
    max_lr=1e0,
    early_stop_threshold=100,
)

print(f"Suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)

4. Train the DeepVAR model using the selected learning rate:

In [None]:
pl.seed_everything(42)

deep_var.hparams.learning_rate = res.suggestion()

early_stop_callback = EarlyStopping(
    monitor="val_loss", 
    min_delta=1e-4, 
    patience=10
)

trainer = pl.Trainer(
    max_epochs=MAX_EPOCHS,
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback]
)

trainer.fit(
    deep_var,
    train_dataloaders=train_dataloader,
    val_dataloaders=valid_dataloader,
)

5. Extract the best DeepVAR model from a checkpoint:

In [None]:
best_model = DeepAR.load_from_checkpoint(
    trainer.checkpoint_callback.best_model_path
)

6. Create the predictions for the validation set and plot 5 of them:

In [None]:
raw_predictions, x = best_model.predict(
    valid_dataloader, 
    mode="raw", 
    return_x=True, 
    n_samples=100
)
tickers = valid_set.x_to_index(x)["ticker"]

for idx in range(5):
    best_model.plot_prediction(
        x, raw_predictions, idx=idx, add_loss_to_title=True
    )
    plt.suptitle(f"Ticker: {tickers.iloc[idx]}")

7. Extract the correlation matrix:

In [None]:
preds = best_model.predict(valid_dataloader, 
                           mode=("raw", "prediction"), 
                           n_samples=None)
                           
cov_matrix = (
    best_model
    .loss
    .map_x_to_distribution(preds)
    .base_dist
    .covariance_matrix
    .mean(0)
)

# normalize the covariance matrix diagonal to 1.0
cov_diag_mult = (
    torch.diag(cov_matrix)[None] * torch.diag(cov_matrix)[None].T
)
corr_matrix = cov_matrix / torch.sqrt(cov_diag_mult)


8. Plot the correlation matrix:

In [None]:
mask = np.triu(np.ones_like(corr_matrix, dtype=bool))

fif, ax = plt.subplots()

cmap = sns.diverging_palette(230, 20, as_cmap=True)

sns.heatmap(
    corr_matrix, mask=mask, cmap=cmap, 
    vmax=.3, center=0, square=True, 
    linewidths=.5, cbar_kws={"shrink": .5}
)

ax.set_title("Correlation matrix")

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_14")

In [None]:
# distribution of off-diagonal correlations
plt.hist(corr_matrix[corr_matrix < 1].numpy())

plt.xlabel("Correlation")
plt.ylabel("Count")

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_15")

## 15.4 Time series forecasting with NeuralProphet

### How to do it...

1. Import the libraries:

In [None]:
import yfinance as yf
import pandas as pd
from neuralprophet import NeuralProphet
from neuralprophet.utils import set_random_seed
from neuralprophet.utils import set_log_level

2. Download the historical prices of the S&P 500 index and prepare the DataFrame for modeling with NeuralProphet:

In [None]:
df = yf.download("^GSPC",
                 start="2010-01-01",
                 end="2021-12-31")
df = df[["Adj Close"]].reset_index(drop=False)
df.columns = ["ds", "y"]

3. Create the train/test split:

In [None]:
TEST_LENGTH = 60
df_train = df.iloc[:-TEST_LENGTH]
df_test = df.iloc[-TEST_LENGTH:]

4. Train the default Prophet model and plot the evaluation metrics:

In [None]:
from matplotlib.pyplot import xlabel


set_random_seed(42)
set_log_level(log_level="ERROR")
model = NeuralProphet(changepoints_range=0.95)
metrics = model.fit(df_train, freq="B")

(
    metrics
    .drop(columns=["RegLoss"])
    .plot(title="Evaluation metrics during training", 
          subplots=True,
          xlabel="epochs",
          ylabel="metric")
)

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_16")

5. Calculate the predictions and plot the fit:

In [None]:
pred_df = model.predict(df)

pred_df.plot(x="ds", y=["y", "yhat1"], 
             title="S&P 500 - forecast vs ground truth",
             ylabel="value");

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_17")

In [None]:
(
    pred_df
    .iloc[-TEST_LENGTH:]
    .plot(x="ds", y=["y", "yhat1"], 
          title="S&P 500 - forecast vs ground truth",
          ylabel="value")
);

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_18")

In [None]:
model.test(df_test)

6. Add the AR components to NeuralProphet:

In [None]:
set_random_seed(42)
set_log_level(log_level="ERROR")
model = NeuralProphet(
    changepoints_range=0.95,
    n_lags=10,
    ar_reg=1,
)
metrics = model.fit(df_train, freq="B")

pred_df = model.predict(df)
pred_df.plot(x="ds", y=["y", "yhat1"], 
             title="S&P 500 - forecast vs ground truth",
             ylabel="value");

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_19")

In [None]:
(
    pred_df
    .iloc[-TEST_LENGTH:]
    .plot(x="ds", y=["y", "yhat1"], 
          title="S&P 500 - forecast vs ground truth",
          ylabel="value")
);

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_20")

7. Add the AR-Net to NeuralProphet:

In [None]:
set_random_seed(42)
set_log_level(log_level="ERROR")
model = NeuralProphet(
    changepoints_range=0.95,
    n_lags=10,
    ar_reg=1,
    num_hidden_layers=3,
    d_hidden=32,
)
metrics = model.fit(df_train, freq="B")

pred_df = model.predict(df)

(
    pred_df
    .iloc[-TEST_LENGTH:]
    .plot(x="ds", y=["y", "yhat1"], 
          title="S&P 500 - forecast vs ground truth",
          ylabel="value")
);

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_21")

In [None]:
model.test(df_test)

8. Plot the components and parameters of the model:

In [None]:
# for plotting only, as there is some issue with the AR plot
# after plotting the components we can revert to the settings at the top of the Notebook
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)

In [None]:
model.plot_components(model.predict(df_train));

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_22")

In [None]:
model.plot_parameters();

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_23")

### There's more

1. Add holidays to the model:

In [None]:
set_random_seed(42)
set_log_level(log_level="ERROR")
model = NeuralProphet(
    changepoints_range=0.95,
    n_lags=10,
    ar_reg=1,
    num_hidden_layers=3,
    d_hidden=32,
)

model = model.add_country_holidays(
    "US", lower_window=-1, upper_window=1
)
metrics = model.fit(df_train, freq="B")

In [None]:
pred_df = model.predict(df_train)
model.plot_components(pred_df)

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_24")

In [None]:
model.plot_parameters();

2. Create a 10-day ahead multi-step forecast:

In [None]:
set_random_seed(42)
set_log_level(log_level="ERROR")
model = NeuralProphet(
    n_lags=10,
    n_forecasts=10,
    ar_reg=1,
    learning_rate=0.01
)
metrics = model.fit(df_train, freq="B")
pred_df = model.predict(df)
pred_df.tail()

In [None]:
# set_random_seed(42)
pred_df = model.predict(df, raw=True, decompose=False)
pred_df.tail().round(2)

In [None]:
pred_df = model.predict(df_test)
model.plot(pred_df)
ax = plt.gca()
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
ax.set_title("10-day ahead multi-step forecast")

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_27")

In [None]:
model = model.highlight_nth_step_ahead_of_each_forecast(1)
model.plot(pred_df)
ax = plt.gca()
ax.set_title("Step 1 of the 10-day ahead multi-step forecast")

plt.tight_layout()
sns.despine()
# plt.savefig("images/figure_15_28")