# In this model, I will train the NBEATS time series prediction model on all states in the USA, the USA, and other countries as well to forecast deaths and cases

In [115]:
import numpy as np
from nbeats_keras.model import NBeatsNet
import pandas as pd
from collections import deque
import matplotlib.pyplot as plt

In [162]:
df = pd.read_csv('coviddata.csv')
df['Date'] = pd.to_datetime(df.Date)

(880250, 18)


# Let's try to train the model on all areas inside the USA

In [134]:
#get list of all represented countries
countries = df['Country_Region'].unique()
countries = np.sort(countries)

**Since time series for NBEATS is univariate, we cannot make an additional feature which encodes the country and case type per country as another feature because then the time series is not univariate**

**So, we will create a function where the user inputs a specific country, forecast horizon, and the case type(confirmed or deaths) and the model will train accordingly**

**The function will save and return the trained model, which then can be used to perform predictions**

In [186]:
def train_for_country(countries, df, country, forecast_horizon, case):
    print(df.shape)
    current_df = df.query(f'Country_Region == "{country}" and Case_Type == "{case}"')
    print(current_df.shape)
    sorteddf = current_df.sort_values(by='Date')
    sorteddf = sorteddf.groupby(sorteddf['Date'].dt.date).sum()
    print(sorteddf.shape)
    
    data=[]
    for j in range(len(sorteddf['Cases'])):
        if j+14 > len(sorteddf['Cases']):
            data.append([[sorteddf['Cases'][k]] for k in range (j,len(sorteddf['Cases']))])
        else:
            data.append([[sorteddf['Cases'][k]] for k in range (j,j+14)])
    
    while len(data[-1]) < 14:
        data.remove(data[-1])
        
    predictions=[]
    for j in range(13+forecast_horizon,len(sorteddf['Cases'])):
        predictions.append([[sorteddf['Cases'][j]]])
        
    data=np.array(data)
    predictions=np.array(predictions)
    
    future = np.array([data[j] for j in range(len(data)-forecast_horizon, len(data))]).astype('float32')
    
    for j in range(0,forecast_horizon):
        data = np.delete(data, -1,0)
        
    splitter = int(0.9*len(data))
    x_train, y_train, x_test, y_test = data[:splitter].astype('float32'), predictions[:splitter].astype('float32'), data[splitter:].astype('float32'), predictions[splitter:].astype('float32')
    
    #train
    num_samples, time_steps, input_dim, output_dim = len(data), 14, 1, 1
    
    model = NBeatsNet(backcast_length=time_steps, forecast_length=output_dim,
                  stack_types=(NBeatsNet.GENERIC_BLOCK, NBeatsNet.GENERIC_BLOCK,), nb_blocks_per_stack=12,
                  thetas_dim=(4, 4), share_weights_in_stack=True, hidden_layer_units=256)
    
    model.compile_model(loss='mape', learning_rate=1e-5)
    
    model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=55, batch_size=5)
    
    model.save(f'n_beats_model.{country}.{case}')
    
    modelreturn = NBeatsNet.load(f'n_beats_model.{country}.{case}')
    
    return modelreturn

In [187]:
my_model = train_for_country(countries, df, "US", 7, "Confirmed")

(880250, 18)
(407375, 18)
(125, 8)
Model: "model_8"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_variable (InputLayer)     (None, 14, 1)        0                                            
__________________________________________________________________________________________________
lambda_8 (Lambda)               (None, 14)           0           input_variable[0][0]             
__________________________________________________________________________________________________
0/0/generic/d1 (Dense)          (None, 256)          3840        lambda_8[0][0]                   
                                                                 subtract_169[0][0]               
                                                                 subtract_170[0][0]               
                                                         

Train on 94 samples, validate on 11 samples
Epoch 1/55
Epoch 2/55
Epoch 3/55
Epoch 4/55
Epoch 5/55
Epoch 6/55
Epoch 7/55
Epoch 8/55
Epoch 9/55
Epoch 10/55
Epoch 11/55
Epoch 12/55
Epoch 13/55
Epoch 14/55
Epoch 15/55
Epoch 16/55
Epoch 17/55
Epoch 18/55
Epoch 19/55
Epoch 20/55
Epoch 21/55
Epoch 22/55
Epoch 23/55
Epoch 24/55
Epoch 25/55
Epoch 26/55
Epoch 27/55
Epoch 28/55
Epoch 29/55
Epoch 30/55
Epoch 31/55
Epoch 32/55
Epoch 33/55
Epoch 34/55
Epoch 35/55
Epoch 36/55
Epoch 37/55
Epoch 38/55
Epoch 39/55
Epoch 40/55
Epoch 41/55
Epoch 42/55
Epoch 43/55
Epoch 44/55
Epoch 45/55
Epoch 46/55
Epoch 47/55
Epoch 48/55
Epoch 49/55
Epoch 50/55
Epoch 51/55
Epoch 52/55
Epoch 53/55
Epoch 54/55
Epoch 55/55
