In [None]:
import pandas as pd
from datetime import date
from IPython.display import display, Markdown
import sys, os
from report_utils import *

# import scores 
scores_path = os.getenv("SCORES_PATH", "scores.csv")
print("Loading scores from:", scores_path)
scores_rel = pd.read_csv(scores_path)

In [None]:
#jupyter nbconvert "Influcast Evaluation Report.ipynb" --to pdf --no-input --output influcast_evaluation_report.pdf

Generated on:

In [None]:
print(date.today().isoformat())

%%latex
\tableofcontents
\newpage

# Forecasting target: ILI incidence

## Leaderboard - ILI (location = IT)
The table below aggregates per-model performance across weeks/horizons for target ```ILI incidence``` and location IT.


In [None]:
print("Leaderboard (ILI, IT)")
compute_leaderboard(scores_rel, "ILI", "IT", groupby_round=True)

%%latex
\newpage

## Forecasting Performance by Forecast Week (ILI)

The plot below shows the relative WIS and relative MAE by forecast week for all models, highlighting the ensemble model, the baseline model and the average of all other models for target ```ILI incidence```. Values are averaged across all horizons for each forecast week. 

Values below 1 indicate that the model is outperforming the baseline model, while values above 1 indicate that the model is underperforming. 

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(10, 8), dpi=300, sharex=True)

# WIS
plot_performance_by_forecast_week(scores_rel, "ILI", "IT", 
                                  metric="rel_wis", use_log_scale=True, 
                                  title="Relative WIS by Forecast Week (ILI, IT)", 
                                  xlabel="Forecast Week", 
                                  ylabel="Relative WIS", ax=axes[0])

# MAE
plot_performance_by_forecast_week(scores_rel, "ILI", "IT", 
                                  metric="rel_ae_median", use_log_scale=True, 
                                  title="Relative MAE by Forecast Week (ILI, IT)", 
                                  xlabel="Forecast Week", 
                                  ylabel="Relative MAE", ax=axes[1])

%%latex
\newpage

## Forecasting Performance by Region (ILI)

The plot below shows the relative WIS and relative MAE by region for all models for target ```ILI incidence```. Values are averaged across all horizons and forecast weeks for each region. 

Values below 1 indicate that the model is outperforming the baseline model, while values above 1 indicate that the model is underperforming. 

In [None]:
fig, axes = plt.subplots(nrows=2, figsize=(10, 8), dpi=300)

plot_performance_by_region(scores_rel, 
                            target_name="ILI", 
                            metric="rel_wis",
                            title="Relative WIS by Region (ILI)", 
                            ax=axes[0])

plot_performance_by_region(scores_rel, 
                            target_name="ILI", 
                            metric="rel_ae_median",
                            title="Relative MAE by Region (ILI)", 
                            ax=axes[1])

plt.tight_layout()

%%latex
\newpage

# Forecasting target: ILI+/A incidence

## Leaderboard - ILI+/A (location = IT)
The tables below aggregate per-model performance across weeks/horizons for target ```ILI+/A incidence``` for location IT.


In [None]:
print("Leaderboard (ILI+/A, IT)")
compute_leaderboard(scores_rel, "ILI+_FLU_A", "IT", groupby_round=True)

%%latex
\newpage

## Forecasting Performance by Forecast Week (ILI+/A)

The plot below shows the relative WIS and relative MAE by forecast week for all models, highlighting the ensemble model, the baseline model and the average of all other models for target ```ILI+/A incidence```. Values are averaged across all horizons for each forecast week. 

Values below 1 indicate that the model is outperforming the baseline model, while values above 1 indicate that the model is underperforming. 

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(10, 8), dpi=300, sharex=True)

# WIS
plot_performance_by_forecast_week(scores_rel, "ILI+_FLU_A", "IT", 
                                  metric="rel_wis", use_log_scale=True, 
                                  title="Relative WIS by Forecast Week (ILI+/A, IT)", 
                                  xlabel="Forecast Week", 
                                  ylabel="Relative WIS", ax=axes[0])

# MAE
plot_performance_by_forecast_week(scores_rel, "ILI+_FLU_A", "IT", 
                                  metric="rel_ae_median", use_log_scale=True, 
                                  title="Relative MAE by Forecast Week (ILI+/A, IT)", 
                                  xlabel="Forecast Week", 
                                  ylabel="Relative MAE", ax=axes[1])

%%latex
\newpage

# Forecasting target: ILI+/B incidence

## Leaderboard - ILI+/B (location = IT)
The tables below aggregate per-model performance across weeks/horizons for target ```ILI+/B incidence``` for location IT.

In [None]:
print("Leaderboard (ILI+/B, IT)")
compute_leaderboard(scores_rel, "ILI+_FLU_B", "IT", groupby_round=True)

%%latex
\newpage

## Forecasting Performance by Forecast Week (ILI+/B)

The plot below shows the relative WIS and relative MAE by forecast week for all models, highlighting the ensemble model, the baseline model and the average of all other models for target ```ILI+/B incidence```. Values are averaged across all horizons for each forecast week. 

Values below 1 indicate that the model is outperforming the baseline model, while values above 1 indicate that the model is underperforming. 

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(10, 8), dpi=300, sharex=True)

# WIS
plot_performance_by_forecast_week(scores_rel, "ILI+_FLU_B", "IT", 
                                  metric="rel_wis", use_log_scale=True, 
                                  title="Relative WIS by Forecast Week (ILI+/B, IT)", 
                                  xlabel="Forecast Week", 
                                  ylabel="Relative WIS", ax=axes[0])

# MAE
plot_performance_by_forecast_week(scores_rel, "ILI+_FLU_B", "IT", 
                                  metric="rel_ae_median", use_log_scale=True, 
                                  title="Relative MAE by Forecast Week (ILI+/B, IT)", 
                                  xlabel="Forecast Week", 
                                  ylabel="Relative MAE", ax=axes[1])

# Model Specific Plots

In [None]:
models = [m for m in scores_rel.model.unique() if m not in ["Influcast-quantileBaseline", "Influcast-Ensemble"]]

# average for each round
scores_aggr = scores_rel.groupby(["model", 
                                  "location", 
                                  "forecast_week", 
                                  "target"], as_index=False).mean(numeric_only=True)

# sum for each model, location, target
scores_aggr = scores_aggr.groupby(["model", 
                                  "location", 
                                  "target"], as_index=False).agg({
                                            "overprediction": "sum",
                                            "underprediction": "sum",
                                            "dispersion": "sum"})

styles = {"Influcast-quantileBaseline": {"color": "coral", "alpha": 1.0, "linestyle": "--", "label": "Baseline"},
          "Influcast-Ensemble": {"color": "#028A0F", "alpha": 0.5, "linestyle": "solid", "label": "Ensemble"}, 
          "Other Models Average": {"color": "k", "alpha": 0.5, "linestyle": "dotted", "label": "Other Models Average"}, 
          }

In [None]:
for model in models:
    display(Markdown(f"## {model}"))

    model_targets = scores_aggr.loc[scores_aggr.model == model].target.unique()

    for target in ['ILI', 'ILI+_FLU_A', 'ILI+_FLU_B']:
        if target in model_targets:
            display(Markdown(f"### {model}: {target}"))

            styles_model = styles.copy()
            styles_model[model] = {"color": "dodgerblue", "alpha": 1., "linestyle": "solid", "label": model}

            fig, axes = plt.subplot_mosaic([["A", "A", "B"], ["C", "C", "B"]], dpi=300, figsize=(10, 8))

            # Rel MAE in time 
            plot_performance_by_forecast_week(scores_rel, target, "IT", 
                                            metric="rel_ae_median", use_log_scale=True, 
                                            title=f"Relative MAE by Forecast Week ({target}, IT)", 
                                            xlabel="Forecast Week", 
                                            ylabel="Relative MAE", ax=axes["C"], styles=styles_model)

            # Rel WIS in time 
            axes["A"].sharex(axes["C"])
            axes["A"].label_outer()
            plot_performance_by_forecast_week(scores_rel, target, "IT", 
                                            metric="rel_wis", use_log_scale=True, 
                                            title=f"Relative WIS by Forecast Week ({target}, IT)", 
                                            xlabel="Forecast Week", 
                                            ylabel="Relative WIS", ax=axes["A"], styles=styles_model)

            # WIS components
            plot_wis_components(scores_aggr, model, location="IT", target=target, ax=axes["B"])
            plt.tight_layout()  
            plt.show()