This notebook computes the WIS for the predictions, following the six scoring criteria specified in the repository’s README:

In [1]:
import numpy as np
import pandas as pd
import mosqlient as mosq
from epiweeks import Week
from itertools import product
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)

In [2]:
from mosqlient.scoring import compute_wis

Load the data:

In [3]:
data = pd.read_csv('/Users/eduardoaraujo/Documents/sprint_data_2025/dengue.csv.gz')
data.date = pd.to_datetime(data.date)
data = data.groupby(['date', 'uf'])[['casos']].sum().reset_index()
data = data.sort_values(by = 'date')
data.head()

Unnamed: 0,date,uf,casos
0,2010-01-03,AC,760
26,2010-01-03,TO,231
25,2010-01-03,SP,1628
24,2010-01-03,SE,3
23,2010-01-03,SC,10


In [4]:
data.max()

date     2025-07-27 00:00:00
uf                        TO
casos                 159561
dtype: object

In [5]:
Week.fromdate(pd.to_datetime('2025-07-27'))

Week(2025, 31, CDC)

In [6]:
data.uf.unique()

array(['AC', 'TO', 'SP', 'SE', 'SC', 'RS', 'RR', 'RO', 'RJ', 'PR', 'PI',
       'PE', 'PB', 'RN', 'MT', 'PA', 'AM', 'AP', 'BA', 'CE', 'DF', 'AL',
       'GO', 'MA', 'MG', 'MS', 'ES'], dtype=object)

Get the range of the peaks in a three week window centered in the peak: 

In [7]:
data_23 = data.loc[(data.date >= Week(2022, 41).startdate().strftime('%Y-%m-%d')) & (data.date <= Week(2023, 40).startdate().strftime('%Y-%m-%d'))] 
data_24 = data.loc[(data.date >= Week(2023, 41).startdate().strftime('%Y-%m-%d')) & (data.date <= Week(2024, 40).startdate().strftime('%Y-%m-%d'))] 
data_25 = data.loc[(data.date >= Week(2024, 41).startdate().strftime('%Y-%m-%d')) & (data.date <= Week(2025, 40).startdate().strftime('%Y-%m-%d'))] 

states = data.uf.unique()
years = [2023, 2024, 2025]
peak_dates = {state: {year: {} for year in years} for state in states}

for state in states:
    
    df_23 = data_23.loc[data_23.uf == state]
    
    peak_date = pd.to_datetime(df_23.loc[df_23.casos == df_23['casos'].max()].date.values[0])

    peak_dates[state][2023]['peak_date'] = peak_date

    df_24 = data_24.loc[data_24.uf == state]
    
    peak_date = pd.to_datetime(df_24.loc[df_24.casos == df_24['casos'].max()].date.values[0])

    peak_dates[state][2024]['peak_date'] = peak_date

    df_25 = data_25.loc[data_25.uf == state]
    
    peak_date = pd.to_datetime(df_25.loc[df_25.casos == df_25['casos'].max()].date.values[0])

    peak_dates[state][2025]['peak_date'] = peak_date

In [8]:
peak_dates

{'AC': {2023: {'peak_date': Timestamp('2023-03-12 00:00:00')},
  2024: {'peak_date': Timestamp('2023-12-10 00:00:00')},
  2025: {'peak_date': Timestamp('2025-02-02 00:00:00')}},
 'TO': {2023: {'peak_date': Timestamp('2023-04-09 00:00:00')},
  2024: {'peak_date': Timestamp('2024-04-14 00:00:00')},
  2025: {'peak_date': Timestamp('2025-05-11 00:00:00')}},
 'SP': {2023: {'peak_date': Timestamp('2023-04-09 00:00:00')},
  2024: {'peak_date': Timestamp('2024-05-05 00:00:00')},
  2025: {'peak_date': Timestamp('2025-03-16 00:00:00')}},
 'SE': {2023: {'peak_date': Timestamp('2023-06-11 00:00:00')},
  2024: {'peak_date': Timestamp('2024-03-31 00:00:00')},
  2025: {'peak_date': Timestamp('2025-06-29 00:00:00')}},
 'SC': {2023: {'peak_date': Timestamp('2023-04-16 00:00:00')},
  2024: {'peak_date': Timestamp('2024-04-07 00:00:00')},
  2025: {'peak_date': Timestamp('2025-04-20 00:00:00')}},
 'RS': {2023: {'peak_date': Timestamp('2023-04-16 00:00:00')},
  2024: {'peak_date': Timestamp('2024-04-14 00:

Load predictions: 

In [19]:
df_preds = pd.read_csv('predictions/preds_2nd_sprint.csv.gz')

df_preds['date'] = pd.to_datetime(df_preds.date)

df_preds['valid_test'] = df_preds['valid_test'].astype(str)

df_preds = df_preds.loc[df_preds.valid_test.isin(['1', '2', '3'])]

df_preds.head()

  df_preds = pd.read_csv('predictions/preds_2nd_sprint.csv.gz')


Unnamed: 0,date,lower_95,lower_90,lower_80,lower_50,pred,upper_50,upper_80,upper_90,upper_95,valid_test,state,model_id
0,2024-10-06,22.422222,40.304679,79.16297,244.274988,853.700012,2982.915845,9196.810733,18041.732884,32366.797486,3,RS,108
1,2024-10-13,18.743651,34.187301,68.26913,216.515891,779.997873,2809.263104,8900.886361,17748.532118,32294.740637,3,RS,108
2,2024-10-20,6.581569,12.226861,24.874887,81.165077,301.351891,1118.132411,3638.484729,7371.869282,13600.483733,3,RS,108
3,2024-10-27,17.097895,32.060474,66.082749,220.93438,843.905436,3222.679992,10763.205908,22149.610067,41420.387215,3,RS,108
4,2024-11-03,18.724941,35.572182,74.438413,255.270334,1003.039414,3940.40738,13500.169745,28209.239339,53454.977878,3,RS,108


In [20]:
df_preds.valid_test.unique()

array(['3', '2', '1'], dtype=object)

In [21]:
df_preds.model_id.unique()

array([108, 133, 134, 135, 136, 137, 138, 139, 141, 143, 144, 145, 150,
       152, 154, 155, 156, 157])

In [22]:
t_ = df_preds.groupby(['valid_test', 'state', 'model_id'])[['pred']].count()

t_.loc[t_.pred != 52]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,pred
valid_test,state,model_id,Unnamed: 3_level_1


In [23]:
t_ = df_preds.groupby(['date', 'state', 'model_id']).count()
t_.loc[t_.pred > 1]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,lower_95,lower_90,lower_80,lower_50,pred,upper_50,upper_80,upper_90,upper_95,valid_test
date,state,model_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


Dataframe with the metrics computed: 

In [24]:
def compute_metrics(model, df_w, df_preds_, state=None, year=None, peak = False):
    '''
    Function to compute the score for the entire validation test or only around the peak (if peak=True).
    '''

    if peak: 

        pw = peak_dates[state][2022+int(year)]['peak_date']

        start = Week.fromdate(pw - pd.Timedelta(days = 7)).startdate().strftime('%Y-%m-%d')
    
        end = Week.fromdate(pw + pd.Timedelta(days = 7)).startdate().strftime('%Y-%m-%d')
    
        dates_ =  pd.date_range(start= start,
                  end= end,
                  freq='W-SUN')

        df_preds_2 = df_preds_.loc[df_preds_.date.isin(dates_)]
        df_w2 = df_w.loc[df_w.date.isin(dates_)]
        
    else:
        df_preds_2 = df_preds_
        df_w2 = df_w
        
    df_preds_model = df_preds_2.loc[df_preds_2.model_id == model].reset_index(drop = True)
        
    df_preds_to_score = df_w2.merge(df_preds_model, left_on = ['date', 'uf'], right_on = ['date', 'state'])

    wis = np.mean(compute_wis( 
                        df_preds_to_score[['date',  'lower_95', 'lower_90', 'lower_80', 'lower_50',
                           'pred', 'upper_50', 'upper_80', 'upper_90', 'upper_95']],
                        observed_value = df_preds_to_score['casos'].values)) 
    return wis

In [27]:
%%time
df_metrics = pd.DataFrame()

for state, test_year in product(df_preds.state.unique(), ['1', '2', '3']):
    #print(f'{state} - {test_year}')
    df_preds_ = df_preds.loc[(df_preds.state == state) & (df_preds.valid_test == test_year)].sort_values(by='date')

    df_w = data.loc[(data.uf == state) & data.date.isin(df_preds_.date.unique())].reset_index(drop=True)

    scores =[]
    models_ids = df_preds_.model_id.unique()
    
    for model in models_ids: 
        #print(model)
    
        wis = compute_metrics(model, df_w, df_preds_, state=state, year=test_year, peak = False)
        
        df_metrics = pd.concat([df_metrics,
                           pd.DataFrame([[model, state, test_year, wis ]],
                            columns = ['model', 'state', 'validation_test', 'WIS'] 
                            )], ignore_index = True)


for state in df_preds.state.unique():
    #print(f'{state} - {test_year}')
    df_preds_ = df_preds.loc[(df_preds.state == state)].sort_values(by='date')

    df_w = data.loc[(data.uf == state) & data.date.isin(df_preds_.date.unique())].reset_index(drop=True)

    scores =[]
    models_ids = df_preds_.model_id.unique()
    
    for model in models_ids: 
        #print(model)
    
        wis = compute_metrics(model, df_w, df_preds_, state=state, year=test_year, peak = False)
        
        df_metrics = pd.concat([df_metrics,
                           pd.DataFrame([[model, state, 'all', wis ]],
                            columns = ['model', 'state', 'validation_test', 'WIS'] 
                            )], ignore_index = True)




df_metrics.head()

CPU times: user 2.86 s, sys: 28.4 ms, total: 2.89 s
Wall time: 2.96 s


Unnamed: 0,model,state,validation_test,WIS
0,108,RS,1,8921.236508
1,136,RS,1,509.433869
2,155,RS,1,317.469096
3,156,RS,1,277.092772
4,144,RS,1,355.863504


In [28]:
df_metrics.validation_test.unique()

array(['1', '2', '3', 'all'], dtype=object)

In [29]:
import warnings

# Turn the RuntimeWarning for empty slices into an exception
warnings.filterwarnings("error", category=RuntimeWarning)

In [30]:
df_metrics_peak = pd.DataFrame()

for state, test_year in product(df_preds.state.unique(), ['1', '2', '3']):
    #print(f'{state} - {test_year}')
    df_preds_ = df_preds.loc[(df_preds.state == state) & (df_preds.valid_test ==test_year)]

    df_w = data.loc[(data.uf == state) & data.date.isin(df_preds_.date.unique())].reset_index(drop=True)
        
    scores =[]
    models_ids = df_preds_.model_id.unique()
    
    for model in models_ids: 
        #print(model)
    
        wis = compute_metrics(model, df_w, df_preds_, state=state, year=test_year, peak = True)
        
        df_metrics_peak = pd.concat([df_metrics_peak,
                           pd.DataFrame([[model, state, test_year, wis ]],
                            columns = ['model', 'state', 'validation_test', 'WIS'] 
                            )], ignore_index = True)

df_metrics_peak.head()

Unnamed: 0,model,state,validation_test,WIS
0,108,RS,1,39719.212357
1,133,RS,1,2961.581235
2,134,RS,1,7863.638277
3,135,RS,1,2660.073287
4,136,RS,1,2744.19018


In [32]:
df_end = df_metrics.merge(df_metrics_peak.rename(columns = {'WIS': 'WIS_peak'}), left_on = ['model', 'state', 'validation_test'], 
                                         right_on = ['model', 'state', 'validation_test'], how = 'outer')

df_end.isnull().sum()

model                0
state                0
validation_test      0
WIS                  0
WIS_peak           481
dtype: int64

In [33]:
df_end.to_csv('results/metrics_wis.csv', index = False)