## Upload Retrospective Forecasts to `cdc-flusight` Zoltar Project

In [6]:
# import libraries
import glob
import pymmwr as pm
from pathlib import Path
import ntpath
import time
import pandas as pd
from zoltpy import util
import os
import datetime
import yaml

In [4]:
# initialize variables
my_path = "../../model-forecasts/component-models"
df_retro = pd.read_csv("retrospective-forecasts.csv")
directory = "master"
cdc_project_name = "CDC Retrospective Forecasts"

In [29]:
def get_available_timezeros(model):
    model_dict = vars(model)
    model_timezero_dict = model_dict['json']['forecasts']
    empty_model_timezeros = []
    for i in range(len(model_timezero_dict)):
        if model_timezero_dict[i]['forecast'] == None:
            empty_model_timezeros += [model_timezero_dict[i]['timezero_date']]
    return empty_model_timezeros

In [27]:
def get_timezero(epi_week, year):
    # create timezero_date
    if  40 <= epi_week <= 53: # fall
        ew = pm.Epiweek(year, epi_week)
    else: # spring
        ew = pm.Epiweek(year + 1, epi_week)

    timezero = pm.epiweek_to_date(ew)
    timezero = timezero + datetime.timedelta(days = 1)

    # create data_version_date
    data_version_date = timezero + datetime.timedelta(weeks = 2)
    data_version_date = data_version_date.strftime('%Y%m%d')

    # format dates
    timezero = timezero.strftime('%Y%m%d')
    return timezero, data_version_date

In [30]:
# for loop to find all csvs in forecasts
error_upload = []
start = time.time()
conn = util.authenticate()
project = [project for project in conn.projects if project.name == cdc_project_name][0]
models = [model for model in project.models]
model_names = [model.name for model in project.models]

# loop through model directories
for first_path in glob.iglob(my_path + "**/**/", recursive=False):
    
    # initialize zoltar model name
    model_name = ''
    
    # get model directory
    path = os.path.dirname(first_path)
    curr_model_dir = os.path.basename(path)
    print("Model: ", curr_model_dir)
    
    # set up batch variables
    predx_batch = []
    forecast_filename_batch = []
    timezero_batch = []
    
    if curr_model_dir == 'CUBMA':
        metadata = yaml.load(open(path + "/metadata.txt", 'r'))
        
        # loop through model forecasts
        for csv_file in glob.iglob(first_path + "*.csv", recursive=False):

            # get model timezero
            csv_file_name = ntpath.basename(csv_file)
            csv_file_list = csv_file_name.split("-")
            epi_week = int(csv_file_list[0][-2:])
            epi_year = int(csv_file_list[1])

            timezero, data_version_date = get_timezero(epi_week, epi_year)

            # get zoltar model name
            model_name = metadata["model_name"]
            model = [model for model in models if model.name == model_name][0]

            # check what timezeros are available
            avail_timezeros = get_available_timezeros(model)
            
            if timezero in avail_timezeros:
                print("Add model =", model_name, "timezero =", timezero)
                try:
                    conn.re_authenticate_if_necessary()
                    predx_json, forecast_filename = util.convert_cdc_csv_to_json_io_dict(csv_file)
                    predx_batch += [predx_json]
                    forecast_filename_batch += [forecast_filename]
                    timezero_batch += [timezero]
                except:
                    print("ERROR")
                    error_upload += [csv_file + ", "] 
        if model_name in model_names and len(predx_batch) != 0:
            print('\n\nUPLOADING BATCH OF FORECASTS')
            conn.re_authenticate_if_necessary()
            util.upload_forecast_batch(conn, predx_batch, forecast_filename_batch, 
                               cdc_project_name, model_name, timezero_batch, overwrite=False)

print("\n\nRUN COMPLETE")
end = time.time()
interval = end - start
print("Total minutes elapsed: ", interval / 60.0)
print("ERROR UPLOADING...", error_upload)
print("\n".join(map(str, error_upload)))

Model:  Protea_Kudu
Model:  ReichLab_kcde_backfill_none
Model:  Delphi_BasisRegression
Model:  FluOutlook_Mech
Model:  FluX_LSTM
Model:  CU_EKF_SEIRS
Model:  Protea_Springbok
Model:  CU_EAKFC_SEIRS
Model:  LANL_DBMplus
Model:  CU_EKF_SIRS
Model:  CUBMA
Add model = Bayesian Model Averaging timezero = 20171113
Add model = Bayesian Model Averaging timezero = 20171009
Add model = Bayesian Model Averaging timezero = 20171204
Add model = Bayesian Model Averaging timezero = 20171127
Add model = Bayesian Model Averaging timezero = 20171002
Add model = Bayesian Model Averaging timezero = 20171225
Add model = Bayesian Model Averaging timezero = 20171120
Add model = Bayesian Model Averaging timezero = 20171211
Add model = Bayesian Model Averaging timezero = 20171106
Add model = Bayesian Model Averaging timezero = 20171016
Add model = Bayesian Model Averaging timezero = 20171023
Add model = Bayesian Model Averaging timezero = 20180514
Add model = Bayesian Model Averaging timezero = 20171030
Add mo