In [1]:
import numpy as np
import pandas as pd
from epiweeks import Week
from format_data import *
from scipy.stats import boxcox
from scipy.special import inv_boxcox
import matplotlib.pyplot as plt

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
state = 'RJ'

df_st = filter_agg_data(state)

df_org = org_data(df_st)

df_org.to_csv(f'data/dengue_{state}.csv.gz')

df_org.head()

Unnamed: 0_level_0,SE,casos,diff_casos,casos_mean,casos_std,casos_slope
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2010-01-24,4,7.370625,0.183963,7.190246,0.111927,0.095332
2010-01-31,5,7.629492,0.258866,7.329928,0.194118,0.167364
2010-02-07,6,7.462964,-0.166528,7.412436,0.159991,0.108777
2010-02-14,7,7.627335,0.164371,7.522604,0.110734,0.06036
2010-02-21,8,8.073944,0.446609,7.698434,0.22708,0.149773


### Train the models

In [3]:
import model_gp as gp
import model_arima as ar 
import model_lstm as lstm 

In [4]:
start_train_date = str(Week(2015,1).startdate())
end_train_date = str(Week(2022,52).startdate())

print(state)
# input to arima model
print('--------------------- Training ARIMA ---------------------')

ar.train_model(df_org.reset_index()[['date', 'casos']], state, train_ini_date = start_train_date, train_end_date = end_train_date)

# train gpr model 
print('--------------------- Training GP ---------------------')

gp.train_model(state, ini_train = start_train_date, end_train = end_train_date)

# train lstm model
print('--------------------- Training LSTM ---------------------')


feat = 6
HIDDEN = 64
LOOK_BACK = 4
PREDICT_N = 3

model = lstm.build_lstm(hidden=HIDDEN, features=feat, predict_n=PREDICT_N, look_back=LOOK_BACK,
                            batch_size=4, loss='mse')

model.compile(loss='mse', optimizer='adam', metrics=["accuracy", "mape", "mse"])
        
lstm.train_model(model, state, doenca='dengue',
                    end_train_date=None,
                    ratio = 1,
                    ini_date = start_train_date,
                    end_date = end_train_date,
                    filename=f'data/dengue_{state}.csv.gz',
                    min_delta=0.001, label='state',
                    patience = 30, 
                    epochs=300,
                    batch_size=4,
                    predict_n=PREDICT_N,
                    look_back=LOOK_BACK)


### Apply model

In [6]:
def apply_models(state, end_date): 
    df_arima = ar.apply_model(state, end_date)

    #print('--------------------- Apply GP ---------------------')
    
    df_gp = gp.apply_model(state, end_date)
            
    #print('--------------------- Apply LSTM ---------------------')
    
    #FILENAME_DATA = f'data/dengue_{state}.csv.gz'
    #df_ = pd.read_csv(FILENAME_DATA, index_col = 'date')
    
    #feat = df_.shape[1]
            
    #model_name = f'trained_{state}_dengue_state'
    
    #print(model_name)
    
    #df_lstm = lstm.apply_forecast(state, None, end_date, look_back=4, predict_n=3,
    #                                        filename=FILENAME_DATA, model_name=model_name)
    
    #df_concat = pd.concat([df_arima, df_gp, df_lstm])

    return df_arima#df_concat

In [7]:
date = Week(2023, 52).startdate()

In [8]:
df_gp = apply_models(state, date.strftime(format = '%Y-%m-%d'))

df_gp.head()

Unnamed: 0,lower_95,upper_95,lower_90,upper_90,lower_80,upper_80,lower_50,upper_50,pred,date,step,model
0,1514.115115,4641.941325,1648.482671,4219.323016,1820.118414,3784.348674,2153.089913,3164.481491,2604.820455,2023-12-31,1,arima
1,1386.096727,5777.63705,1542.107097,5105.857086,1746.698531,4436.992528,2159.472184,3526.088188,2750.13437,2024-01-07,2,arima
2,1302.390421,7132.505999,1476.25327,6142.888571,1709.500906,5186.914712,2196.465534,3937.20281,2926.702411,2024-01-14,3,arima


In [7]:
state = 'PR'

In [8]:
end_date = Week(2023, 52).startdate()

In [9]:
df_forecast = pd.DataFrame()
for date in pd.date_range(start=end_train_date, end=end_date, freq = 'W-SUN'):

    df_concat = apply_models(state, date.strftime(format = '%Y-%m-%d'))

    df_concat['epiweek'] =  Week.fromdate(date).isoformat()

    df_forecast = pd.concat([df_forecast, df_concat], ignore_index = True)

df_forecast.head(12)

trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengue_state
trained_PR_dengu

Unnamed: 0,date,pred,lower,upper,step,model,epiweek
0,2023-01-01,1509.0,907.72402,2677.743458,1,arima,2022W52
1,2023-01-08,1509.0,747.127417,3476.099876,2,arima,2022W52
2,2023-01-15,1509.0,647.020726,4293.043466,3,arima,2022W52
3,2023-01-01,3173.296739,2627.952657,3825.025866,1,gp,2022W52
4,2023-01-08,2075.424656,1701.101873,2527.117102,2,gp,2022W52
5,2023-01-15,2458.464923,1968.525809,3062.795028,3,gp,2022W52
6,2023-01-01,1640.217102,1135.159735,2002.01311,1,lstm,2022W52
7,2023-01-08,1582.272034,1110.932202,1944.282571,2,lstm,2022W52
8,2023-01-15,1522.531982,1064.945331,1891.817053,3,lstm,2022W52
9,2023-01-08,1818.0,1071.663204,3310.855541,1,arima,2023W01


In [10]:
df_forecast.to_csv(f'forecast_tables/for_2023_{state}.csv', index = False)