In [1]:
# 01_toy_data.ipynb
# This notebook simulates a toy dataset of forecasters making predictions for economic indicators.


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# --- Parameters ---
p = 5 # number of forecasters
M = 3 # number of indicators
T = 20 # number of periods
np.random.seed(42)


# --- True skill of forecasters ---
# Lower = better, controlling forecast noise
true_skill = np.linspace(0.8, 1.2, p)


# --- Simulate forecasts and realizations ---
data = []
for j in range(p):
for m in range(M):
for t in range(T):
realized = np.random.normal(0, 1)
forecast = realized + np.random.normal(0, true_skill[j])
data.append([f'F{j+1}', f'Ind{m+1}', t+1, forecast, realized])


df = pd.DataFrame(data, columns=["forecaster", "indicator", "period", "forecast", "realized"])


# --- Compute forecast errors ---
df["error"] = df["forecast"] - df["realized"]
df["squared_error"] = df["error"]**2


# --- Display sample ---
print(df.head())


# --- Plot distribution of forecast errors ---
plt.figure(figsize=(8, 5))
sns.histplot(df, x="error", hue="forecaster", element="step", stat="density")
plt.title("Distribution of Forecast Errors by Forecaster")
plt.show()


# --- Compute mean squared forecast error (MSFE) per forecaster ---
msfe = df.groupby("forecaster")["squared_error"].mean().reset_index()
msfe = msfe.rename(columns={"squared_error": "MSFE"})
print(msfe)


# --- Plot average MSFE ---
plt.figure(figsize=(6, 4))
sns.barplot(data=msfe, x="forecaster", y="MSFE", palette="Blues_d")
plt.title("Mean Squared Forecast Error per Forecaster")
plt.show()


# --- Save dataset ---
df.to_csv("../examples/toy_dataset.csv", index=False)
print("Toy dataset saved to ../examples/toy_dataset.csv")

IndentationError: expected an indented block after 'for' statement on line 25 (1769396641.py, line 26)