In [None]:
 from google.colab import drive
 drive.mount('/content/drive')

In [None]:
import pandas as pd
import numpy as np
import os
import keras
from numpy import array
from keras.models import Sequential
from keras.layers import Dense

from math import sqrt
from numpy import median
from numpy import mean
from numpy import std
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot

In [None]:
# MLP on 133 data - test on last 44

MLP - grid search

In [None]:
from math import sqrt
from numpy import array
from numpy import mean
from pandas import DataFrame
from pandas import concat
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
  return data[:-n_test], data[-n_test:]

# transform list into supervised learning format
def series_to_supervised(data, n_in, n_out=1):
  df = DataFrame(data)
  cols = list()
  # input sequence (t-n, ... t-1)
  for i in range(n_in, 0, -1):
    cols.append(df.shift(i))
  # forecast sequence (t, t+1, ... t+n)
  for i in range(0, n_out):
    cols.append(df.shift(-i))
  # put it all together
  agg = concat(cols, axis=1)
  # drop rows with NaN values
  agg.dropna(inplace=True)
  return agg.values

# root mean squared error or rmse
def measure_rmse(actual, predicted):
  return sqrt(mean_squared_error(actual, predicted))

# difference dataset
def difference(data, order):
  return [data[i] - data[i - order] for i in range(order, len(data))]

# fit a model
def model_fit(train, config):
  # unpack config
  n_input, n_nodes, n_epochs, n_batch, n_diff = config
  # prepare data
  if n_diff > 0:
    train = difference(train, n_diff)
  # transform series into supervised format
  data = series_to_supervised(train, n_in=n_input)
  # separate inputs and outputs
  train_x, train_y = data[:, :-1], data[:, -1]
  
  # define model
  model = Sequential()
  model.add(Dense(n_nodes, activation='relu', input_dim=n_input))
  model.add(Dense(1))
  model.compile(loss='mse', optimizer='adam')
  # fit model
  model.fit(train_x, train_y, epochs=n_epochs, batch_size=n_batch, verbose=0)
  return model

# forecast with the fit model
def model_predict(model, history, config):
  # unpack config
  n_input, _, _, _, n_diff = config
  # prepare data
  correction = 0.0
  if n_diff > 0:
    correction = history[-n_diff]
    history = difference(history, n_diff)
  # shape input for model
  x_input = array(history[-n_input:]).reshape((1, n_input))
  # make forecast
  yhat = model.predict(x_input, verbose=0)
  # correct forecast if it was differenced
  return correction + yhat[0]

# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
  predictions = list()
  # split dataset
  train, test = train_test_split(data, n_test)
  # fit model
  model = model_fit(train, cfg)
  # seed history with training dataset
  history = [x for x in train]
  # step over each time-step in the test set
  for i in range(len(test)):
    # fit model and make forecast for history
    yhat = model_predict(model, history, cfg)
   # store forecast in list of predictions
    predictions.append(yhat)
    # add actual observation to history for the next loop
    history.append(test[i])
  # estimate prediction error
  error = measure_rmse(test, predictions)
  print(' > %.3f' % error)
  return error

# score a model, return None on failure
def repeat_evaluate(data, config, n_test, n_repeats=10):
  # convert config to a key
  key = str(config)
  # fit and evaluate the model n times
  scores = [walk_forward_validation(data, n_test, config) for _ in range(n_repeats)]
  # summarize score
  result = mean(scores)
  print('> Model[%s] %.3f' % (key, result))
  return (key, result)

# grid search configs
def grid_search(data, cfg_list, n_test):
  # evaluate configs
  scores = scores = [repeat_evaluate(data, cfg, n_test) for cfg in cfg_list]
  # sort configs by error, asc
  scores.sort(key=lambda tup: tup[1])
  return scores

# create a list of configs to try
def model_configs():
  # define scope of configs
  n_input = [1,2,3,7]
  n_nodes = [16, 12, 7,8]
  n_epochs = [1000]
  n_batch = [1, 32, 16, 64]
  n_diff = [0, 1, 2, 7]
  # create configs
  configs = list()
  for i in n_input:
    for j in n_nodes:
      for k in n_epochs:
        for l in n_batch:
          for m in n_diff:
            cfg = [i, j, k, l, m]
            configs.append(cfg)
  print('Total configs: %d' % len(configs))
  return configs

# define dataset
series = read_csv('/content/drive/My Drive/Colab Notebooks/dpc-covid19-ita-andamento-nazionale.csv', index_col=0)
series=series[['totale_positivi']]
series.rename({'totale_positivi':'total_positives'}, inplace=True)
data=series.values

# data split
n_test = 44
# model configs
cfg_list = model_configs()
# grid search
scores = grid_search(data, cfg_list, n_test)
print('done')
# list top 10 configs
for cfg, error in scores[:10]:
  print(cfg, error)

Total configs: 256
 > 1104.270
 > 1309.337
 > 1194.254
 > 1368.960
 > 1696.615
 > 1648.365
 > 1314.385
 > 1250.847
 > 1687.479
 > 1559.210
> Model[[1, 16, 1000, 1, 0]] 1413.372
 > 625.320
 > 629.975
 > 631.212
 > 622.416
 > 635.570
 > 632.127
 > 620.339
 > 611.490
 > 621.268
 > 618.298
> Model[[1, 16, 1000, 1, 1]] 624.801
 > 660.696
 > 652.178
 > 661.242
 > 661.872
 > 663.033
 > 657.247
 > 661.839
 > 659.761
 > 659.471
 > 657.373
> Model[[1, 16, 1000, 1, 2]] 659.471
 > 702.382
 > 707.608
 > 708.169
 > 740.805
 > 713.937
 > 722.463
 > 714.042
 > 742.440
 > 696.313
 > 695.184
> Model[[1, 16, 1000, 1, 7]] 714.334
 > 1324.724
 > 1324.417
 > 1324.886
 > 1336.345
 > 1308.072
 > 1323.755
 > 1320.769
 > 1316.926
 > 1323.975
 > 1320.827
> Model[[1, 16, 1000, 32, 0]] 1322.470
 > 643.464
 > 645.001
 > 641.505
 > 645.900
 > 639.934
 > 649.738
 > 640.306
 > 642.012
 > 642.840
 > 647.535
> Model[[1, 16, 1000, 32, 1]] 643.823
 > 662.566
 > 662.934
 > 661.361
 > 660.008
 > 669.604
 > 665.848
 > 663.65

In [None]:
# mlp on first 30 days - test on last 10

In [None]:
from math import sqrt
from numpy import array
from numpy import mean
from pandas import DataFrame
from pandas import concat
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
  return data[:-n_test], data[-n_test:]

# transform list into supervised learning format
def series_to_supervised(data, n_in, n_out=1):
  df = DataFrame(data)
  cols = list()
  # input sequence (t-n, ... t-1)
  for i in range(n_in, 0, -1):
    cols.append(df.shift(i))
  # forecast sequence (t, t+1, ... t+n)
  for i in range(0, n_out):
    cols.append(df.shift(-i))
  # put it all together
  agg = concat(cols, axis=1)
  # drop rows with NaN values
  agg.dropna(inplace=True)
  return agg.values

# root mean squared error or rmse
def measure_rmse(actual, predicted):
  return sqrt(mean_squared_error(actual, predicted))

# difference dataset
def difference(data, order):
  return [data[i] - data[i - order] for i in range(order, len(data))]

# fit a model
def model_fit(train, config):
  # unpack config
  n_input, n_nodes, n_epochs, n_batch, n_diff = config
  # prepare data
  if n_diff > 0:
    train = difference(train, n_diff)
  # transform series into supervised format
  data = series_to_supervised(train, n_in=n_input)
  # separate inputs and outputs
  train_x, train_y = data[:, :-1], data[:, -1]
  
  # define model
  model = Sequential()
  model.add(Dense(n_nodes, activation='relu', input_dim=n_input))
  model.add(Dense(1))
  model.compile(loss='mse', optimizer='adam')
  # fit model
  model.fit(train_x, train_y, epochs=n_epochs, batch_size=n_batch, verbose=0)
  return model

# forecast with the fit model
def model_predict(model, history, config):
  # unpack config
  n_input, _, _, _, n_diff = config
  # prepare data
  correction = 0.0
  if n_diff > 0:
    correction = history[-n_diff]
    history = difference(history, n_diff)
  # shape input for model
  x_input = array(history[-n_input:]).reshape((1, n_input))
  # make forecast
  yhat = model.predict(x_input, verbose=0)
  # correct forecast if it was differenced
  return correction + yhat[0]

# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
  predictions = list()
  # split dataset
  train, test = train_test_split(data, n_test)
  # fit model
  model = model_fit(train, cfg)
  # seed history with training dataset
  history = [x for x in train]
  # step over each time-step in the test set
  for i in range(len(test)):
    # fit model and make forecast for history
    yhat = model_predict(model, history, cfg)
   # store forecast in list of predictions
    predictions.append(yhat)
    # add actual observation to history for the next loop
    history.append(test[i])
  # estimate prediction error
  error = measure_rmse(test, predictions)
  print(' > %.3f' % error)
  return error

# score a model, return None on failure
def repeat_evaluate(data, config, n_test, n_repeats=10):
  # convert config to a key
  key = str(config)
  # fit and evaluate the model n times
  scores = [walk_forward_validation(data, n_test, config) for _ in range(n_repeats)]
  # summarize score
  result = mean(scores)
  print('> Model[%s] %.3f' % (key, result))
  return (key, result)

# grid search configs
def grid_search(data, cfg_list, n_test):
  # evaluate configs
  scores = scores = [repeat_evaluate(data, cfg, n_test) for cfg in cfg_list]
  # sort configs by error, asc
  scores.sort(key=lambda tup: tup[1])
  return scores

# create a list of configs to try
def model_configs():
  # define scope of configs
  n_input = [1,2,3,7]
  n_nodes = [16, 12, 7,8]
  n_epochs = [1000]
  n_batch = [1, 32, 16, 64]
  n_diff = [0, 1, 2, 7]
  # create configs
  configs = list()
  for i in n_input:
    for j in n_nodes:
      for k in n_epochs:
        for l in n_batch:
          for m in n_diff:
            cfg = [i, j, k, l, m]
            configs.append(cfg)
  print('Total configs: %d' % len(configs))
  return configs

# define dataset
series = read_csv('/content/drive/My Drive/Colab Notebooks/dpc-covid19-ita-andamento-nazionale.csv', index_col=0)
series=series[['totale_positivi']]
series.rename({'totale_positivi':'total_positives'}, inplace=True)
data=series.values[:30]

# data split
n_test = 10
# model configs
cfg_list = model_configs()
# grid search
scores = grid_search(data, cfg_list, n_test)
print('done')
# list top 10 configs
for cfg, error in scores[:10]:
  print(cfg, error)

Total configs: 256
 > 3038.951
 > 3184.896
 > 2898.831
 > 3121.342
 > 3087.153
 > 3220.222
 > 2936.471
 > 3262.372
 > 2647.848
 > 3103.792
> Model[[1, 16, 1000, 1, 0]] 3050.188
 > 725.526
 > 729.165
 > 730.797
 > 731.391
 > 730.594
 > 719.692
 > 740.051
 > 725.418
 > 726.408
 > 729.949
> Model[[1, 16, 1000, 1, 1]] 728.899
 > 1174.629
 > 1216.522
 > 1185.359
 > 1174.148
 > 1202.183
 > 1191.412
 > 1162.347
 > 1212.782
 > 1200.847
 > 1200.134
> Model[[1, 16, 1000, 1, 2]] 1192.036
 > 2250.995
 > 2265.400
 > 2156.119
 > 2475.333
 > 2197.552
 > 2278.864
 > 2202.145
 > 2300.228
 > 2281.866
 > 2244.347
> Model[[1, 16, 1000, 1, 7]] 2265.285
 > 3136.148
 > 3136.856
 > 3136.227
 > 3137.063
 > 3137.139
 > 3136.687
 > 3137.100
 > 3136.076
 > 3136.331
 > 3135.936
> Model[[1, 16, 1000, 32, 0]] 3136.556
 > 736.817
 > 736.741
 > 736.593
 > 736.939
 > 736.843
 > 736.931
 > 736.699
 > 736.607
 > 736.770
 > 736.189
> Model[[1, 16, 1000, 32, 1]] 736.713
 > 1204.590
 > 1204.462
 > 1204.554
 > 1192.644
 > 12

In [None]:
from math import sqrt
from numpy import array
from numpy import mean
from pandas import DataFrame
from pandas import concat
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
  return data[:-n_test], data[-n_test:]

# transform list into supervised learning format
def series_to_supervised(data, n_in, n_out=1):
  df = DataFrame(data)
  cols = list()
  # input sequence (t-n, ... t-1)
  for i in range(n_in, 0, -1):
    cols.append(df.shift(i))
  # forecast sequence (t, t+1, ... t+n)
  for i in range(0, n_out):
    cols.append(df.shift(-i))
  # put it all together
  agg = concat(cols, axis=1)
  # drop rows with NaN values
  agg.dropna(inplace=True)
  return agg.values

# root mean squared error or rmse
def measure_rmse(actual, predicted):
  return sqrt(mean_squared_error(actual, predicted))

# difference dataset
def difference(data, order):
  return [data[i] - data[i - order] for i in range(order, len(data))]

# fit a model
def model_fit(train, config):
  # unpack config
  n_input, n_nodes, n_epochs, n_batch, n_diff = config
  # prepare data
  if n_diff > 0:
    train = difference(train, n_diff)
  # transform series into supervised format
  data = series_to_supervised(train, n_in=n_input)
  # separate inputs and outputs
  train_x, train_y = data[:, :-1], data[:, -1]
  
  # define model
  model = Sequential()
  model.add(Dense(n_nodes, activation='relu', input_dim=n_input))
  model.add(Dense(1))
  model.compile(loss='mse', optimizer='adam')
  # fit model
  model.fit(train_x, train_y, epochs=n_epochs, batch_size=n_batch, verbose=0)
  return model

# forecast with the fit model
def model_predict(model, history, config):
  # unpack config
  n_input, _, _, _, n_diff = config
  # prepare data
  correction = 0.0
  if n_diff > 0:
    correction = history[-n_diff]
    history = difference(history, n_diff)
  # shape input for model
  x_input = array(history[-n_input:]).reshape((1, n_input))
  # make forecast
  yhat = model.predict(x_input, verbose=0)
  # correct forecast if it was differenced
  return correction + yhat[0]

# walk-forward validation for univariate data
def walk_forward_validation(data, n_test, cfg):
  predictions = list()
  # split dataset
  train, test = train_test_split(data, n_test)
  # fit model
  model = model_fit(train, cfg)
  # seed history with training dataset
  history = [x for x in train]
  # step over each time-step in the test set
  for i in range(len(test)):
    # fit model and make forecast for history
    yhat = model_predict(model, history, cfg)
   # store forecast in list of predictions
    predictions.append(yhat)
    # add actual observation to history for the next loop
    history.append(test[i])
  # estimate prediction error
  error = measure_rmse(test, predictions)
  print(' > %.3f' % error)
  return error

# score a model, return None on failure
def repeat_evaluate(data, config, n_test, n_repeats=10):
  # convert config to a key
  key = str(config)
  # fit and evaluate the model n times
  scores = [walk_forward_validation(data, n_test, config) for _ in range(n_repeats)]
  # summarize score
  result = mean(scores)
  print('> Model[%s] %.3f' % (key, result))
  return (key, result)

# grid search configs
def grid_search(data, cfg_list, n_test):
  # evaluate configs
  scores = scores = [repeat_evaluate(data, cfg, n_test) for cfg in cfg_list]
  # sort configs by error, asc
  scores.sort(key=lambda tup: tup[1])
  return scores

# create a list of configs to try

def model_configs():
  # define scope of configs
  n_input = [2,3,7]
  n_nodes = [16, 12, 7,8]
  n_epochs = [1000]
  n_batch = [1, 32, 16, 64]
  n_diff = [0, 1, 2, 7]
  # create configs
  configs = list()
  for i in n_input:
    for j in n_nodes:
      for k in n_epochs:
        for l in n_batch:
          for m in n_diff:
            cfg = [i, j, k, l, m]
            configs.append(cfg)
  print('Total configs: %d' % len(configs))
  return configs

# define dataset
series = read_csv('/content/drive/My Drive/Colab Notebooks/dpc-covid19-ita-andamento-nazionale.csv', index_col=0)
series=series[['totale_positivi']]
series.rename({'totale_positivi':'total_positives'}, inplace=True)
data=series.values[:30]

# data split
n_test = 10
# model configs
cfg_list = model_configs()
# grid search
scores = grid_search(data, cfg_list, n_test)
print('done')
# list top 10 configs
for cfg, error in scores[:10]:
  print(cfg, error)

Total configs: 192
 > 3762.702
 > 3966.523
 > 3765.086
 > 3839.772
 > 3413.947
 > 3320.480
 > 3980.993
 > 4219.189
 > 3988.401
 > 3490.988
> Model[[2, 16, 1000, 1, 0]] 3774.808
 > 822.575
 > 969.020
 > 1015.063
 > 810.693
 > 828.823
 > 1003.027
 > 827.618
 > 876.351
 > 854.693
 > 839.067
> Model[[2, 16, 1000, 1, 1]] 884.693
 > 1345.276
 > 1348.138
 > 1245.477
 > 1273.381
 > 1261.805
 > 1345.533
 > 1266.956
 > 1321.413
 > 1361.474
 > 1317.489
> Model[[2, 16, 1000, 1, 2]] 1308.694
 > 3000.885
 > 3232.419
 > 2845.815
 > 3309.887
 > 3338.112
 > 3012.713
 > 3038.079
 > 2916.953
 > 2919.519
 > 3254.703
> Model[[2, 16, 1000, 1, 7]] 3086.908
 > 4469.514
 > 4338.323
 > 3876.249
 > 4721.173
 > 4046.038
 > 3860.434
 > 5643.027
 > 5038.661
 > 4941.731
 > 5127.240
> Model[[2, 16, 1000, 32, 0]] 4606.239
 > 870.705
 > 860.138
 > 897.491
 > 962.217
 > 951.785
 > 787.412
 > 942.324
 > 829.477
 > 912.490
 > 856.114
> Model[[2, 16, 1000, 32, 1]] 887.015
 > 1427.187
 > 1406.022
 > 1113.278
 > 1514.131
 > 