<a href="https://colab.research.google.com/github/NPCA-TEAM/COVID-19/blob/main/Scripts/%204%20-%20TREINAR_MODELOS_covid_casos_ver1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Instalação, importação de bibliotecas e inicialização

##Instalação das bibliotecas

In [None]:
#Inicia permitindo acesso ao GDrive
from google.colab import drive 
drive.mount('/content/drive')

In [None]:
!pip install pyyaml==5.4.1

In [None]:
!pip install darts

In [None]:
!pip install matplotlib==3.1.3  #é necessário restart runtime 

In [None]:
!pip install pytorch-lightning

##Importação das Bibliotecas

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import pandas as pd
import numpy as np
import glob
import os
import sys
import time
import torch
import json

import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import matplotlib.dates as mdates

from datetime import datetime

from darts import timeseries
from darts.dataprocessing.transformers import Scaler
from darts import concatenate

from darts.models import (    
    NBEATSModel,
    TCNModel,
    TransformerModel,
    TFTModel,
    NHiTSModel #NHiTS
    )

from darts.metrics import mape, rmse, r2_score
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.utils.missing_values import fill_missing_values
from darts.models import KalmanFilter

from darts.utils.likelihood_models import (
    GaussianLikelihood, 
    QuantileRegression 
    )

import warnings
warnings.filterwarnings('ignore')
import logging
logging.disable(logging.CRITICAL)
torch.manual_seed(1); np.random.seed(1)  # for reproducibility

#Dados e campos de entrada do framework


In [None]:
#Campos de entrada do framework
NAME_SERIE_SYMPTOM_CASES = "Casos_DataSintoma_MM_atual_PA"   #Variavel onde será adicionada o nome para a serie a ser analisada
NAME_SERIE_PUBLICATION_CASES = "Casos_Publicacao_MM_atual_PA"   #Variavel onde será adicionada o nome para a serie a ser analisada

#Número de anos para validação/teste. Similar a quantidade de dados de teste
#Se num_days_of_slice_val = 1, indica que o ultimo ano da serie será usado para testar o modelo
#Também indica a fatia usada como validação na fase de treinamento
VALIDATION_DAYS_SLICED = 7

#Número que indica o tamanho da fatia usada como treino na fase de treinamento. 
#Logo, o processo disposto neste script é baseado no metodo de janela deslizante.
WORKOUT_DAYS_SLICED = 30

#Numero de passos da previsão a frente da serie
FORECAST_DAYS = 7

# Arquivo com a base de dados
PATH_DATASET = '/content/drive/MyDrive/NPCA - COVID/_CASOS/DataSet/' 

#Pasta para armazenamento do modelo
PATH_MODELS = '/content/drive/MyDrive/NPCA - COVID/_CASOS/Models/'
PATH_PARAMS = '/content/drive/MyDrive/NPCA - COVID/_CASOS/BestModelParameters/'
PATH_FIGS = '/content/drive/MyDrive/NPCA - COVID/_CASOS/Figs/'
PATH_CSV = '/content/drive/MyDrive/NPCA - COVID/_CASOS/ForecastsOutputModel/'

#Leitura do dataset

In [None]:
#Seleciona o arquivo da base
FILE_LIST = glob.glob(PATH_DATASET + '*')

#Seleciona o ultimo arquivo que foi criado e adiciona na pasta
FILE_PATH = max(FILE_LIST, key=os.path.getctime)

#Faz leitura do arquivo excel como datafrade
DATAFRAME = pd.read_excel(FILE_PATH, sheet_name=0)

DATAFRAME

#Seleção das variáveis

In [None]:
#Seleção das variáveis alvo de casos
SYMPTOM_CASES = DATAFRAME.loc[:, ['data', 'Casos_DataSintoma_MM_atual_PA']]
PUBLICATION_CASES = DATAFRAME.loc[:, ['data', 'Casos_Publicacao_MM_atual_PA']]

#Seleção das variáveis auxiliares
#SYMPTOM_CASES_7DAYS = DATAFRAME.loc[:, ['data', 'Casos_DataSintoma_MM_7dias_PA']].fillna(0) 
#PUBLICATION_CASES_7DAYS = DATAFRAME.loc[:, ['data', 'Casos_Publicacao_MM_7dias_PA']].fillna(0)
SYMPTOM_CASES_14DAYS = DATAFRAME.loc[:, ['data', 'Casos_DataSintoma_MM_14dias_PA']].fillna(0)
PUBLICATION_CASES_14DAYS = DATAFRAME.loc[:, ['data', 'Casos_Publicacao_MM_14dias_PA']].fillna(0)
#OBS: No treinamento usaremos somete o 14Dias. 

VACCINATION_1DOSE = DATAFRAME.loc[:, ['data', 'Vacinacao_Dose1_%decimal_PA']].fillna(0)
VACCINATION_2DOSE = DATAFRAME.loc[:, ['data', 'Vacinacao_Dose2_%decimal_PA']].fillna(0)
VACCINATION_3DOSE = DATAFRAME.loc[:, ['data', 'Vacinacao_Dose3_%decimal_PA']].fillna(0)

#Renomeia variaveis.
SYMPTOM_CASES_7PREVIOUSDAYS = SYMPTOM_CASES_14DAYS.rename(columns = {'Casos_DataSintoma_MM_14dias_PA':'Casos_DataSintoma_MM_7diasAnteriores_PA'}, inplace = False)
PUBLICATION_CASES_7PRECIOUSDAYS = PUBLICATION_CASES_14DAYS.rename(columns = {'Casos_Publicacao_MM_14dias_PA':'Casos_Publicacao_MM_7diasAnteriores_PA'}, inplace = False)

#Instanciação das séries temporais

In [None]:
# criação das séries temporais alvos
SERIE_SYMPTOM_CASES = timeseries.TimeSeries.from_dataframe(df=SYMPTOM_CASES, time_col='data')
SERIE_PUBLICATION_CASES = timeseries.TimeSeries.from_dataframe(df=PUBLICATION_CASES, time_col='data')

plt.figure(figsize = (10, 6))

SERIE_SYMPTOM_CASES.plot()
SERIE_PUBLICATION_CASES.plot()

In [None]:
# criação das séries temporais auxiliares de casos
#SERIE_SYMPTOM_CASES_7DAYS = timeseries.TimeSeries.from_dataframe(df=SINTOMA_7DIAS, time_col='data')
#SERIE_PUBLICATION_CASES_7DAYS = timeseries.TimeSeries.from_dataframe(df=PUBLICACAO_7DIAS, time_col='data')
SERIE_SYMPTOM_CASES_7PREVIOUSDAYS = timeseries.TimeSeries.from_dataframe(df=SYMPTOM_CASES_7PREVIOUSDAYS, time_col='data')
SERIE_PUBLICATION_CASES_7PREVIOUSDAYS = timeseries.TimeSeries.from_dataframe(df=PUBLICATION_CASES_7PRECIOUSDAYS, time_col='data')

plt.figure(figsize = (10, 6))

#SERIE_SYMPTOM_CASES_7DAYS.plot()
#SERIE_PUBLICATION_CASES_7DAYS.plot()
SERIE_SYMPTOM_CASES_7PREVIOUSDAYS.plot()
SERIE_PUBLICATION_CASES_7PREVIOUSDAYS.plot()

In [None]:
# criação das séries temporais auxiliares de vacinação
serie_vaccination_1Dose = timeseries.TimeSeries.from_dataframe(df=VACCINATION_1DOSE, time_col='data')
serie_vaccination_2Dose = timeseries.TimeSeries.from_dataframe(df=VACCINATION_2DOSE, time_col='data')
serie_vaccination_3Dose = timeseries.TimeSeries.from_dataframe(df=VACCINATION_3DOSE, time_col='data')

plt.figure(figsize = (10, 6))

serie_vaccination_1Dose.plot()
serie_vaccination_2Dose.plot()
serie_vaccination_3Dose.plot()

#Filtro de suaviação das variáveis

In [None]:
def filter_for_suavization(serie):
  filterKalman = KalmanFilter(dim_x = 1)
  filterKalman.fit(serie)
  serieTemp_filtered = filterKalman.filter(serie)

  return serieTemp_filtered

In [None]:
#Suavização das series alvos
serie_symptom_cases_smoothed = filter_for_suavization(SERIE_SYMPTOM_CASES)
serie_publication_cases_smoothed = filter_for_suavization(SERIE_PUBLICATION_CASES)

plt.figure(figsize = (10, 6))

SERIE_SYMPTOM_CASES.plot(label="Sintoma")
serie_symptom_cases_smoothed.plot(label="Sintoma Suav")
SERIE_PUBLICATION_CASES.plot(label="Publicação")
serie_publication_cases_smoothed.plot(label="Publicação Suav")

#Escalar (normalização entre 0 e 1)

In [None]:
def process_scaler(serie):
  #Pré-processamento - scalar (normalizar entre 0 e 1)
  Scaler_obj = Scaler()
  serieTemp_scaled = Scaler_obj.fit_transform(serie)

  return serieTemp_scaled, Scaler_obj
  #Retorno: Objeto Scaler da série, objeto scaler da covariável, serie temporal já normalizada

In [None]:
#pré-processamento e normalização [0, 1]
serie_symptom_cases_normalized, SCALER_symptom_cases = process_scaler(serie_symptom_cases_smoothed)
serie_publication_cases_normalized, SCALER_publication_cases = process_scaler(serie_publication_cases_smoothed)

serie_symptom_cases_7previousdays_normalized, SCALER_symptom_7days = process_scaler(SERIE_SYMPTOM_CASES_7PREVIOUSDAYS)
serie_publication_cases_7previousdays_normalized, SCALER_publication_7days = process_scaler(SERIE_PUBLICATION_CASES_7PREVIOUSDAYS)

In [None]:
plt.figure(figsize = (10, 6))

serie_symptom_cases_normalized.plot()
serie_publication_cases_normalized.plot()

serie_symptom_cases_7previousdays_normalized.plot()
serie_publication_cases_7previousdays_normalized.plot()

#Definição de Covariáveis

In [None]:
def define_covariate_dates(serie_temp_normalizada):  #modificar nomes
  #Definir covariavel ano
  scaler_CovarYear, scaler_CovarMonth = Scaler(), Scaler()
  covariate_y = datetime_attribute_timeseries(serie_temp_normalizada, attribute='year', add_length = FORECAST_DAYS)
  covariate_y = scaler_CovarYear.fit_transform(covariate_y)

  #Definir covariavel mes
  covariate_m = datetime_attribute_timeseries(serie_temp_normalizada, attribute='month', add_length = FORECAST_DAYS)
  covariate_m = scaler_CovarMonth.fit_transform(covariate_m)

  #Definir covariavel day of week
  covariate_dw = datetime_attribute_timeseries(serie_temp_normalizada, attribute='dayofweek', add_length = FORECAST_DAYS)
  covariate_dw = scaler_CovarMonth.fit_transform(covariate_dw)

  return covariate_y, covariate_m, covariate_dw

In [None]:
#definição das covariáveis
covariate_year_symptom_cases, covariate_month_symptom_cases, covariate_dayweek_symptom_cases = define_covariate_dates(serie_symptom_cases_normalized)

In [None]:
#Visualização das series covariaveis de data
plt.figure(figsize = (10, 6))
covariate_year_symptom_cases.plot()
covariate_month_symptom_cases.plot()

In [None]:
plt.figure(figsize = (10, 6))
covariate_dayweek_symptom_cases[-30:].plot(marker='o')

#Divisão do dataset de treino e validação

In [None]:
#Dividir em dados de treino e teste
def split_train_val_series(serie):
  s_train, s_val = serie[:-VALIDATION_DAYS_SLICED], serie[-VALIDATION_DAYS_SLICED:]
  
  return s_train, s_val

In [None]:
#divisão dos dados de treino e testes
serieTrain_symptom_cases, serieVal_symptom_cases = split_train_val_series(serie_symptom_cases_normalized)
serieTrain_publication_cases, serieVal_publication_cases = split_train_val_series(serie_publication_cases_normalized)

In [None]:
#Visualização das Series Alvos
plt.figure(figsize = (10, 6))

serieTrain_symptom_cases[-30:].plot()   # plotagem dos ultimos 30 dias para treino
serieVal_symptom_cases.plot()
serieTrain_publication_cases[-30:].plot()   # plotagem dos ultimos 30 dias para treino
serieVal_publication_cases.plot()

#Treinamento Modelos: NBEATS, TCN, Transformer, TFT, NHITS

##Preparação dos dados para treino

In [None]:
#Junção das variaveis alvo e coovariaveis
series_Train = serieTrain_symptom_cases.stack(serieTrain_publication_cases)

covariatesDate = covariate_year_symptom_cases.stack(covariate_month_symptom_cases.stack(covariate_dayweek_symptom_cases))
covariatesCases = serie_symptom_cases_7previousdays_normalized.stack(serie_publication_cases_7previousdays_normalized)
covariatesVacination = serie_vaccination_1Dose.stack(serie_vaccination_2Dose.stack(serie_vaccination_3Dose))

#O [:-FORECAST_DAYS] é um corte na covariatesDate para que as covariaveis tenham o mesmo numero de dias.
covariates = covariatesDate[:-FORECAST_DAYS].stack(covariatesCases.stack(covariatesVacination))

plt.figure(figsize = (10, 6))
series_Train.plot()
covariates.plot()
print(len(covariates))

In [None]:
plt.figure(figsize = (10, 6))
series_Train[-40:].plot()
covariates[-40:].plot()

##Acelerador COLAB

In [None]:
# Utilizar o acelerador de GPU do colab (para treinamento)
import tensorflow as tf

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print('GPU device not found')
else:
  print('Found GPU at: {}'.format(device_name))
  tf.device('/device:GPU:0')
  print()

  physical_devices = tf.config.experimental.list_logical_devices('GPU')
  print("GPUs available: ", physical_devices)
  print("GPUs available: ", len(physical_devices))
  print(physical_devices[0].name)
  print()

  # Qual placa de video GPU estou usando?
  from tensorflow.python.client import device_lib
  device_lib.list_local_devices()
  print(device_lib.list_local_devices())

##Área de funções para treinar modelos

In [None]:
#Parâmetros para auxiliar no desempenho computacional dos modelos,
#e parâmetros que são comuns a todos os modelos.
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

stopper = EarlyStopping(
    monitor="train_loss",
    patience=5,
    #min_delta=0.05,
    mode='min'
)

pl_trainer_dic = {
      "callbacks": [stopper]#,
      #"accelerator": "auto"
      #"max_epochs": 150,
      #"restore_best_weights": True
    }

n_epochs_list = [30]
batch_size_list = [38]#[16,32,64]
force_reset_list = [True]
dropout_list = [0.05, 0.1]
likelihood_list = [
    GaussianLikelihood(), 
    QuantileRegression(
        quantiles = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]
        )]

start_day = int(round(len(serieTrain_symptom_cases)*0.05,0))

stride_steps = FORECAST_DAYS * 4


##NBEATS

In [None]:
def gridsearch_NBEATS(gridComplextyModel: int, time_series, covariates, FORECAST_DAYS):
  
  name_model = "NBEATS-Model_" + time.strftime("%d_%m_%Y", time.localtime()) 

  if gridComplextyModel == 0:
    model = NBEATSModel(
      input_chunk_length = WORKOUT_DAYS_SLICED,
      output_chunk_length = VALIDATION_DAYS_SLICED,   
      kwargs= [0,"Relu"],   
      n_epochs=10,
      likelihood = likelihood_list[1],# GaussianLikelihood(),
      model_name = name_model)
    
  elif gridComplextyModel == 1:
    parameters = {
      "input_chunk_length":[WORKOUT_DAYS_SLICED],
      "output_chunk_length":[VALIDATION_DAYS_SLICED],
      "n_epochs":[4],
      "batch_size":[32] ,                           
      "force_reset":force_reset_list,                             
      "generic_architecture":[True,False],
      "num_stacks":[2],
      "num_layers":[2],
      "num_blocks":[1],
      "layer_widths":[8],  
      "dropout":dropout_list,
      "activation":['ReLU'],
      "likelihood":likelihood_list, 
      "model_name":[name_model],
      "pl_trainer_kwargs":[pl_trainer_dic]} 

    model_tuple = NBEATSModel.gridsearch(
        parameters = parameters, 
        series = time_series, 
        forecast_horizon = FORECAST_DAYS, 
        start = start_day, #Começa apartir do dia correspondente ao valor de 11% da serie # WORKOUT_DAYS_SLICED * 3,
        past_covariates = covariates,
        stride = FORECAST_DAYS,
        metric = rmse, 
        verbose = True,
        n_jobs = -1)
      
    model = model_tuple[0]

  elif gridComplextyModel == 2:
    parameters = {
      "input_chunk_length":[WORKOUT_DAYS_SLICED],
      "output_chunk_length":[VALIDATION_DAYS_SLICED],
      "n_epochs":n_epochs_list,
      "batch_size":batch_size_list,                           
      "force_reset":force_reset_list,                             
      "generic_architecture":[True],
      "num_stacks":[3],
      "num_layers":[2,3],
      "num_blocks":[1,2],
      "layer_widths":[128],  
      "dropout":dropout_list,
      "activation":['ReLU','Tanh','Sigmoid'],
      "likelihood":likelihood_list, 
      "model_name":[name_model],
      "pl_trainer_kwargs":[pl_trainer_dic]}

    model_tuple = NBEATSModel.gridsearch(
      parameters = parameters, 
      series = time_series, 
      forecast_horizon = FORECAST_DAYS, 
      start = start_day, #Começa apartir do dia correspondente ao valor de 11% da serie # WORKOUT_DAYS_SLICED * 3,
      past_covariates = covariates,
      stride = stride_steps,# 1
      metric = rmse, 
      verbose = True,
      n_jobs = -1)
    
    model = model_tuple[0]

  else:
    print('Valor de parâmetro indisponível')

  model.fit(
      series = time_series, 
      past_covariates = covariates, 
      verbose = False)
  
  return model, model_tuple


##NHITS

In [None]:
def gridsearch_NHITS(gridComplextyModel: int, time_series, covariates, FORECAST_DAYS):
  
  name_model = "NHITS-Model_" + time.strftime("%d_%m_%Y", time.localtime()) 

  if gridComplextyModel == 0:
    model = NHiTSModel(
      input_chunk_length = WORKOUT_DAYS_SLICED,
      output_chunk_length = VALIDATION_DAYS_SLICED,      
      n_epochs=10,
      kwargs= [0,"Relu"],
      likelihood = likelihood_list[1],# GaussianLikelihood(),
      model_name = name_model)

  elif gridComplextyModel == 1:
    parameters = {
      "input_chunk_length":[WORKOUT_DAYS_SLICED],
      "output_chunk_length":[VALIDATION_DAYS_SLICED],
      "n_epochs":[4],
      "batch_size":[32],                           
      "force_reset": force_reset_list,                             
      "num_stacks":[2],
      "num_layers":[1],
      "num_blocks":[1],
      "layer_widths":[8],
      "dropout":[0],
      "activation":['ReLU'],  
      "likelihood": likelihood_list, 
      "model_name": [name_model],      
      #"pl_trainer_kwargs":[pl_trainer_dic]
      }

    model_tuple = NHiTSModel.gridsearch(
      parameters = parameters, 
      series = time_series, 
      forecast_horizon = FORECAST_DAYS, 
      start = start_day, #Começa apartir do dia correspondente ao valor de 11% da serie # WORKOUT_DAYS_SLICED * 3,
      past_covariates = covariates,
      stride = FORECAST_DAYS,
      metric = rmse, 
      verbose = False,
      n_jobs = 1
      )
  
    model = model_tuple[0]    

  elif gridComplextyModel == 2:
    parameters = {
      "input_chunk_length":[WORKOUT_DAYS_SLICED],
      "output_chunk_length":[VALIDATION_DAYS_SLICED],
      "n_epochs":n_epochs_list,
      "batch_size":batch_size_list,                           
      "force_reset":force_reset_list,                             
      "num_stacks":[3],
      "num_layers":[2,3],
      "num_blocks":[1,2],
      "layer_widths":[128],
      "dropout": dropout_list,
      "activation":['ReLU','Tanh','Sigmoid'],      
      "likelihood":likelihood_list, 
      "model_name":[name_model],      
      "pl_trainer_kwargs":[pl_trainer_dic]
    }

    model_tuple = NHiTSModel.gridsearch(
      parameters = parameters, 
      series = time_series, 
      forecast_horizon = FORECAST_DAYS, 
      start = start_day, #Começa apartir do dia correspondente ao valor de 11% da serie # WORKOUT_DAYS_SLICED * 3, 
      past_covariates = covariates,
      stride = stride_steps,# 1
      metric = rmse, 
      verbose = True,
      n_jobs = -1
      )
  
    model = model_tuple[0]

  else:
    print('Valor de parâmetro indisponível')
      
  model.fit(
      series = time_series, 
      past_covariates = covariates, 
      verbose = False)
  
  return model, model_tuple


##TCN

In [None]:
def gridsearch_TCN(gridComplextyModel: int, time_series, covariates, FORECAST_DAYS):
  
  name_model = "TCN-Model_" + time.strftime("%d_%m_%Y", time.localtime()) 

  if gridComplextyModel == 0:
    model = TCNModel(
      input_chunk_length = WORKOUT_DAYS_SLICED,
      output_chunk_length = VALIDATION_DAYS_SLICED,
      n_epochs=10,
      dropout=0.1,
      dilation_base=2,
      weight_norm=True,
      kernel_size=5,
      num_filters=3,
      likelihood = likelihood_list[1],#GaussianLikelihood(),
      model_name = name_model
      )

  elif gridComplextyModel == 1:
    parameters = {
      "input_chunk_length":[WORKOUT_DAYS_SLICED],
      "output_chunk_length":[VALIDATION_DAYS_SLICED],
      "n_epochs":[4],
      "batch_size":[32],  
      "force_reset":[True],
      "likelihood":likelihood_list,
      "kernel_size":[2],
      "num_layers":[1],
      "num_filters":[2],  
      "dilation_base":[2],
      "weight_norm":[False], 
      "dropout":[0],  
      "model_name":[name_model],
      #"pl_trainer_kwargs":[pl_trainer_dic]
    }

    model_tuple = TCNModel.gridsearch(
        parameters = parameters,
        series = time_series, 
        past_covariates = covariates,
        forecast_horizon = FORECAST_DAYS, 
        stride = FORECAST_DAYS,# 1 
        start = start_day, #Começa apartir do dia correspondente ao valor de 11% da serie # WORKOUT_DAYS_SLICED * 3,
        metric = rmse, 
        verbose = True,
        n_jobs = -1
      )
      
    model = model_tuple[0]

  elif gridComplextyModel == 2:
    parameters = {
      "input_chunk_length":[WORKOUT_DAYS_SLICED],
      "output_chunk_length":[VALIDATION_DAYS_SLICED],
      "n_epochs":n_epochs_list,
      "batch_size":batch_size_list,  
      "force_reset":force_reset_list,
      "likelihood":likelihood_list,
      "kernel_size":[3,4],
      "num_layers":[None,1,2],
      "num_filters":[2,3,4],  
      "dilation_base":[2],
      "weight_norm":[True,False],  
      "dropout":dropout_list,  
      "model_name":[name_model],
      "pl_trainer_kwargs":[pl_trainer_dic]
    }

    model_tuple = TCNModel.gridsearch(
      parameters = parameters,
      series = time_series, 
      past_covariates = covariates,
      forecast_horizon = FORECAST_DAYS, 
      stride = stride_steps,# 1 
      start = start_day, 
      metric = rmse, 
      verbose = True,
      n_jobs = -1
    )
  
    model = model_tuple[0]

  else:
    print('Valor de parâmetro indisponível')
  
  model.fit(series = time_series, 
      past_covariates = covariates,
      verbose = False)
  
  return model, model_tuple

##TFT

In [None]:
def gridsearch_TFT(gridComplextyModel: int, time_series, covariates, FORECAST_DAYS):    

  name_model = "TFT-Model_" + time.strftime("%d_%m_%Y", time.localtime())

  if gridComplextyModel == 0:
    model = TFTModel(
      input_chunk_length = WORKOUT_DAYS_SLICED,
      output_chunk_length = VALIDATION_DAYS_SLICED,
      hidden_size=4,
      lstm_layers=1,
      num_attention_heads=4,
      dropout=0.1,
      batch_size=2,
      n_epochs=4,
      add_relative_index = True,
      add_encoders = None, 
      likelihood = likelihood_list[1],
      model_name = name_model
    )

  elif gridComplextyModel == 1:
    parameters = {
      "input_chunk_length":[WORKOUT_DAYS_SLICED],
      "output_chunk_length":[VALIDATION_DAYS_SLICED],
      "n_epochs":[4],
      "batch_size":[32],  
      "force_reset": force_reset_list,
      "likelihood": likelihood_list,
      "hidden_size": [1],
      "lstm_layers": [1], 
      "num_attention_heads": [2], 
      "full_attention": [False], 
      "hidden_continuous_size": [2], 
      "add_relative_index": [True],  
      "dropout":[0], 
      #"pl_trainer_kwargs":[pl_trainer_dic], 
      "model_name":[name_model]  
    }

    model_tuple = TFTModel.gridsearch(
      parameters = parameters,
      series = time_series, 
      past_covariates = covariates,
      forecast_horizon = FORECAST_DAYS, 
      stride = FORECAST_DAYS,
      start = start_day, #Começa apartir do dia correspondente ao valor de 11% da serie # WORKOUT_DAYS_SLICED * 3,
      metric = rmse, 
      verbose = True,
      n_jobs = -1
    )
  
    model = model_tuple[0]

  elif gridComplextyModel == 2:
    parameters = {
      "input_chunk_length":[WORKOUT_DAYS_SLICED],
      "output_chunk_length":[VALIDATION_DAYS_SLICED],
      "n_epochs":n_epochs_list,
      "batch_size":batch_size_list,  
      "force_reset":force_reset_list,
      "likelihood":likelihood_list,
      "hidden_size":[8],
      "lstm_layers":[1,2,3], 
      "num_attention_heads":[3,4,5], 
      "full_attention":[True], 
      "hidden_continuous_size":[8], 
      "add_relative_index":[True],  
      "dropout":dropout_list, 
      "pl_trainer_kwargs":[pl_trainer_dic], 
      "model_name":[name_model]  
    }

    #WORKOUT_DAYS_SLICED * 3
    model_tuple = TFTModel.gridsearch(
      parameters = parameters,
      series = time_series, 
      past_covariates = covariates,
      forecast_horizon = FORECAST_DAYS, 
      stride = stride_steps,# 1 
      start = start_day, #Começa apartir do dia correspondente ao valor de 11% da serie # WORKOUT_DAYS_SLICED * 3,
      metric = rmse, 
      verbose = True,
      n_jobs = -1
    )
  
    model = model_tuple[0]

  else:
    print('Valor de parâmetro indisponível')

  model.fit(series = time_series, 
            past_covariates = covariates, 
            verbose = False)
  
  return model, model_tuple

##Transformer

In [None]:
def gridsearch_TRANSFORMER(gridComplextyModel: int, time_series, covariates,FORECAST_DAYS):
  
  name_model = "TRANSFORMER-Model_" + time.strftime("%d_%m_%Y", time.localtime())

  if gridComplextyModel == 0:
    model = TransformerModel(
      input_chunk_length = WORKOUT_DAYS_SLICED,
      output_chunk_length = VALIDATION_DAYS_SLICED,
      batch_size=32,
      n_epochs=10,
      model_name = name_model,    
      d_model=4,
      nhead=2,
      num_encoder_layers=2,
      num_decoder_layers=2,
      dim_feedforward=128,
      dropout=0,
      activation="relu",
      force_reset=True,
      likelihood = likelihood_list[1])

  elif gridComplextyModel == 1:
    parameters = {
      "input_chunk_length":[WORKOUT_DAYS_SLICED],
      "output_chunk_length":[VALIDATION_DAYS_SLICED],
      "n_epochs":[4],
      "batch_size":[32],  
      "force_reset":force_reset_list,
      "likelihood":likelihood_list,
      "d_model":[2], 
      "nhead":[2], 
      "num_encoder_layers":[2], 
      "num_decoder_layers":[2], 
      "dim_feedforward":[2],
      "activation":['relu'],   
      "dropout":[0],
      #"pl_trainer_kwargs":[pl_trainer_dic],
      "model_name":[name_model]  
    }

    model_tuple = TransformerModel.gridsearch(
      parameters = parameters,
      series = time_series, 
      past_covariates = covariates,
      forecast_horizon = FORECAST_DAYS, 
      stride = FORECAST_DAYS,# 1 
      start = start_day, #Começa apartir do dia correspondente ao valor de 11% da serie # WORKOUT_DAYS_SLICED * 3,
      metric = rmse, 
      verbose = True,
      n_jobs = -1
    )

    model = model_tuple[0]

  elif gridComplextyModel == 2:
    parameters = {
      "input_chunk_length":[WORKOUT_DAYS_SLICED],
      "output_chunk_length":[VALIDATION_DAYS_SLICED],
      "n_epochs":n_epochs_list,
      "batch_size":batch_size_list,  
      "force_reset":force_reset_list,
      "likelihood":likelihood_list,
      "d_model":[32], 
      "nhead":[2,4], 
      "num_encoder_layers":[2,3,4], 
      "num_decoder_layers":[2,3,4], 
      "dim_feedforward":[256],
      "activation":['relu','gelu'],   
      "dropout":dropout_list,
      "pl_trainer_kwargs":[pl_trainer_dic],
      "model_name":[name_model]  
      }

    model_tuple = TransformerModel.gridsearch(
      parameters = parameters,
      series = time_series, 
      past_covariates = covariates,
      forecast_horizon = FORECAST_DAYS, 
      stride = stride_steps,# 1 
      start = start_day, #Começa apartir do dia correspondente ao valor de 11% da serie # WORKOUT_DAYS_SLICED * 3,
      metric = rmse, 
      verbose = True,
      n_jobs = -1
    )

    model = model_tuple[0]

  else:
    print('Valor de parâmetro indisponível')

  model.fit(series = time_series, 
            past_covariates = covariates, 
            verbose=False)
  
  return model, model_tuple

##Treinamento

In [None]:
from timeit import default_timer as timer
import datetime
start_T = timer()
hora_atual = datetime.datetime.now()
print('Hora de inicialiação casos: ')
print(hora_atual)

In [None]:
start = timer()
#model_NBEATSModel, gridValues_NBEATSModel = gridsearch_NBEATS(1, series_Train, covariates, FORECAST_DAYS)
end = timer()
print("Tempo decorrido de treinamento: {:.2f} minutos".format((end - start)/60))

In [None]:
start = timer()
#model_TCNModel, gridValues_TCNModel = gridsearch_TCN(1, series_Train, covariates, FORECAST_DAYS)
end = timer()
print("Tempo decorrido de treinamento: {:.2f} minutos".format((end - start)/60))

In [None]:
#start = timer()
#model_TFTModel, gridValues_TFTModel = gridsearch_TFT(1, series_Train, covariates, FORECAST_DAYS)
end = timer()
print("Tempo decorrido de treinamento: {:.2f} minutos".format((end - start)/60))

In [None]:
start = timer()
#model_TRANSFORMERModel, gridValues_TRANSFORMERModel = gridsearch_TRANSFORMER(1, series_Train, covariates, FORECAST_DAYS)
end = timer()
print("Tempo decorrido de treinamento: {:.2f} minutos".format((end - start)/60))

In [None]:
start = timer()
model_NHITSModel, gridValues_NHITSModel = gridsearch_NHITS(1, series_Train, covariates, FORECAST_DAYS)
end = timer()
print("Tempo decorrido de treinamento: {:.2f} minutos".format((end - start)/60))

In [None]:
end_T = timer()
print("Tempo decorrido de treinamento total: {:.2f} minutos".format((end_T - start_T)/60))

In [None]:
hora_atual = datetime.datetime.now()
print('Hora de finalização casos: ') 
print(hora_atual)

#Exportação dos modelo

In [None]:
def saving_templates(model, path_model):
  model.save_model(path_model + model.model_name + '.pth.tar')
  print("Caminho (pasta) do Modelo: ", path_model + model.model_name + '.pth.tar')

In [None]:
#saving_templates(model_NBEATSModel, PATH_MODELS)
#print('NBEATS EXPORT')

#saving_templates(model_TCNModel, PATH_MODELS)
#print('TCN EXPORT')

#saving_templates(model_TFTModel, PATH_MODELS)
#print('TFT EXPORT')

#saving_templates(model_TRANSFORMERModel, PATH_MODELS)
#print('TRNSFORMER EXPORT')

saving_templates(model_NHITSModel, PATH_MODELS)
print('NHITS EXPORT')

#Salvar parâmetros

In [None]:
def save_parameters(path_params, model, grid):
  arquivo = open(path_params + 'BestParameters_' + model.model_name + '_param.txt', 'w')
  string = str(grid)
  arquivo.write(string)
  arquivo.close()

In [None]:
save_parameters(PATH_PARAMS, model_NBEATSModel, gridValues_NBEATSModel[1])

save_parameters(PATH_PARAMS, model_TCNModel, gridValues_TCNModel[1])

save_parameters(PATH_PARAMS, model_TFTModel, gridValues_TFTModel[1])

save_parameters(PATH_PARAMS, model_TRANSFORMERModel, gridValues_TRANSFORMERModel[1])

save_parameters(PATH_PARAMS, model_NHITSModel, gridValues_NHITSModel[1])

print('/n/nParametros salvos')

In [None]:
#%run "/content/drive/My Drive/NPCA - COVID/Colab/TREINO E PREDIÇÃO/TREINAR_MODELOS_covid_obitos_ver1.ipynb"