Notebook used to load the predictions sent in the mosqlimate data platform: 

In [1]:
import numpy as np
import pandas as pd
import mosqlient as mosq
from epiweeks import Week

from dotenv import load_dotenv
import os

# Load the .env file
load_dotenv()  # by default, it looks for a file named ".env" in the current directory

# Access variables
api_key = os.getenv("API_KEY")



In [2]:
ref_dates_23 = pd.date_range(start= Week(2022, 41).startdate().strftime('%Y-%m-%d'),
              end= Week(2023, 40).startdate().strftime('%Y-%m-%d'),
              freq='W-SUN')

ref_dates_24 = pd.date_range(start= Week(2023, 41).startdate().strftime('%Y-%m-%d'),
              end= Week(2024, 40).startdate().strftime('%Y-%m-%d'),
              freq='W-SUN')

ref_dates_25 = pd.date_range(start= Week(2024, 41).startdate().strftime('%Y-%m-%d'),
              end= Week(2025, 40).startdate().strftime('%Y-%m-%d'),
              freq='W-SUN')

ref_dates_26 = pd.date_range(start= Week(2025, 41).startdate().strftime('%Y-%m-%d'),
              end= Week(2026, 40).startdate().strftime('%Y-%m-%d'),
              freq='W-SUN')

In [3]:
models_id = [108, 133,134, 135, 136,137,138,139, 143, 144, 145, 150, 152, 154, 155,156, 157] 

In [5]:
list_preds = []

for model in models_id: 
    preds = mosq.get_predictions(api_key=api_key, model_id=model)

    list_preds = list_preds + preds

100%|███████████████████████████████████████| 3/3 [00:02<00:00,  1.14requests/s]
100%|███████████████████████████████████████| 2/2 [00:02<00:00,  1.31s/requests]
100%|███████████████████████████████████████| 2/2 [00:02<00:00,  1.28s/requests]
100%|███████████████████████████████████████| 2/2 [00:02<00:00,  1.17s/requests]
100%|███████████████████████████████████████| 2/2 [00:02<00:00,  1.34s/requests]
100%|███████████████████████████████████████| 2/2 [00:02<00:00,  1.35s/requests]
100%|███████████████████████████████████████| 2/2 [00:02<00:00,  1.35s/requests]
100%|███████████████████████████████████████| 2/2 [00:02<00:00,  1.24s/requests]
100%|███████████████████████████████████████| 2/2 [00:02<00:00,  1.38s/requests]
100%|███████████████████████████████████████| 3/3 [00:02<00:00,  1.13requests/s]
100%|███████████████████████████████████████| 2/2 [00:02<00:00,  1.30s/requests]
100%|███████████████████████████████████████| 3/3 [00:02<00:00,  1.13requests/s]
100%|███████████████████████

In [6]:
list_preds[0].model.id

108

In [7]:
list_preds[0].dict()['model']['id']

108

In [15]:
%%time 
df_predictions = pd.DataFrame()

for pred_ in list_preds:

    preds_df = pred_.to_dataframe()

    preds_df.date = pd.to_datetime(preds_df.date)

    min_date = min(preds_df.date)
    
    max_date = max(preds_df.date)

    df_dates = set(preds_df.date)

    if min_date.year == 2022:
        preds_df = preds_df.loc[preds_df.date.isin(ref_dates_23)]
        preds_df['valid_test'] = 1 
        
    elif min_date.year == 2023:
        preds_df = preds_df.loc[preds_df.date.isin(ref_dates_24)]
        preds_df['valid_test'] = 2 

    elif min_date.year == 2024:
        preds_df = preds_df.loc[preds_df.date.isin(ref_dates_25)]
        preds_df['valid_test'] = 3 
        
    elif min_date.year == 2025:
        preds_df = preds_df.loc[preds_df.date.isin(ref_dates_26)]
        preds_df['valid_test'] = 'Forecast'

    pred_dict = pred_.dict()
    preds_df['state'] = pred_dict['adm_1']
    preds_df['model_id'] = pred_dict['model']['id']
    preds_df['valid_test'] = preds_df['valid_test'].astype(str)

    df_predictions = pd.concat([df_predictions, preds_df], ignore_index = True)


CPU times: user 2.51 s, sys: 275 ms, total: 2.78 s
Wall time: 2.81 s


In [16]:
len(df_predictions.model_id.unique())

18

In [17]:
count_models = df_predictions.groupby(['state', 'valid_test'])[['model_id']].nunique()

count_models.loc[count_models.model_id < 18]

Unnamed: 0_level_0,Unnamed: 1_level_0,model_id
state,valid_test,Unnamed: 2_level_1
AC,Forecast,3
AL,Forecast,3
AM,Forecast,3
AP,Forecast,3
BA,Forecast,3
CE,Forecast,3
DF,Forecast,3
ES,1,13
ES,2,13
ES,3,13


In [18]:
df_predictions.model_id.unique()

array([108, 133, 134, 135, 136, 137, 138, 139, 141, 143, 144, 145, 150,
       152, 154, 155, 156, 157])

In [20]:
df_predictions.to_csv('predictions/preds_2nd_sprint.csv.gz', index = False)