# In this model, I will train the NBEATS time series prediction model on all states in the USA, the USA, and other countries as well to forecast deaths and cases

In [115]:
import numpy as np
from nbeats_keras.model import NBeatsNet
import pandas as pd
from collections import deque

In [116]:
df = pd.read_csv('coviddata.csv')
df['Date'] = pd.to_datetime(df.Date)

# Let's try to train the model on all areas inside the USA

In [134]:
#get list of all represented countries
countries = df['Country_Region'].unique()
countries = np.sort(countries)

**Since time series for NBEATS is univariate, we cannot make an additional feature which encodes the country and case type per country as another feature because then the time series is not univariate**

**So, we will create a function where the user inputs a specific country, forecast horizon, and the case type(confirmed or deaths) and the model will train accordingly**

In [138]:
def train_for_country(countries, df, country, forecast_horizon, case)
    current_df = df.query(f'Country_Region == "{country}" and Case_Type == "{case}"')
    sorteddf = current_df.sort_values(by='Date')
    sorteddf = sorteddf.groupby(sorteddf['Date'].dt.date).sum()
    
    
    data=[]
    for j in range(len(sorteddf['Cases'])):
        if j+14 > len(sorteddf['Cases']):
            data.append([[sorteddf['Cases'][k]] for k in range (j,len(sorteddf['Cases']))])
        else:
            data.append([[sorteddf['Cases'][k]] for k in range (j,j+14)])
            
    while len(data[-1]) < 14:
        data.remove(data[-1])
        
    predictions=[]
    for j in range(13+forecadt_horizon,len(sorteddf['Cases'])):
        predictions.append([[sorteddf['Cases'][j]]])
        
    data=np.array(data)
    predictions=np.array(predictions)
    
    future = np.array([data[j] for j in range(len(data)-14, len(data))]).astype('float32')
    
    for j in range(0,14):
        data = np.delete(data, -1,0)
        
    splitter = int(0.9*len(data))
    x_train, y_train, x_test, y_test = data[:splitter].astype('float32'), predictions[:splitter].astype('float32'), data[splitter:].astype('float32'), predictions[splitter:].astype('float32')
    
    #train
    num_samples, time_steps, input_dim, output_dim = len(data), 14, 1, 1
    
    model = NBeatsNet(backcast_length=time_steps, forecast_length=output_dim,
                  #stack_types=(NBeatsNet.GENERIC_BLOCK, NBeatsNet.GENERIC_BLOCK,), nb_blocks_per_stack=12,
                  #thetas_dim=(4, 4), share_weights_in_stack=True, hidden_layer_units=256)
    
    model.compile_model(loss='mae', learning_rate=1e-5)
    
    model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=55, batch_size=5)