In [1]:
import numpy as np
import pandas as pd
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras import metrics
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import math


## Load data

In [2]:
dataset = pd.read_csv("covid_dataset_preprocessed.csv", sep=";")
dataset = dataset.set_index("Date")
dataset

Unnamed: 0_level_0,DayOfWeek,Country,Tests,Confirmed,Confirmed1,Confirmed2,Confirmed3,Confirmed4,Confirmed5,Confirmed6,Confirmed7,Recovered,Deaths,SchoolsAreClose,Lockdown,MasksObligatory
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2020-03-04,wednesday,Poland,92,1,0,0,0,0,0,0,0,0,0,0,0,0
2020-03-05,thursday,Poland,179,1,1,0,0,0,0,0,0,0,0,0,0,0
2020-03-06,friday,Poland,0,4,1,1,0,0,0,0,0,0,0,0,0,0
2020-03-07,saturday,Poland,299,0,4,1,1,0,0,0,0,0,0,0,0,0
2020-03-08,sunday,Poland,230,6,0,4,1,1,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-11-13,friday,Poland,56084,24051,22683,25221,25454,21713,24785,27875,27086,267580,9499,1,0,1
2020-11-14,saturday,Poland,46607,25571,24051,22683,25221,25454,21713,24785,27875,282215,10045,1,0,1
2020-11-15,sunday,Poland,35105,21854,25571,24051,22683,25221,25454,21713,24785,294783,10348,1,0,1
2020-11-16,monday,Poland,41983,20816,21854,25571,24051,22683,25221,25454,21713,306022,10491,1,0,1


In [3]:
# ,"Confirmed2","Confirmed3","Confirmed4","Confirmed5","Confirmed6","Confirmed7","Tests","Recovered","Deaths","SchoolsAreClose","Lockdown","MasksObligatory"

## Selecting columns for model

In [4]:
data = dataset[["Confirmed","Confirmed1"]].values

## Spliting into test/train sets

Normalizing data

In [5]:
scaler = MinMaxScaler(feature_range=(0, 1))

Spliting labals from atributes

In [6]:
X = data[:, 1:]
Y = data[:, :1]

In [7]:
print("Before normalization X[100:103]\n", X[100:103])
X = scaler.fit_transform(X)
print("\nAfter normalization Y[100:103]:\n", X[100:103])

Before normalization X[100:103]
 [[359]
 [376]
 [440]]

After normalization Y[100:103]:
 [[0.01287892]
 [0.01348879]
 [0.01578475]]


In [8]:
print("Before normalization Y[100:103]\n", Y[100:103])
Y = scaler.transform(Y)
print("\nAfter normalization Y[100:103]:\n", Y[100:103])

Before normalization Y[100:103]
 [[376]
 [440]
 [375]]

After normalization Y[100:103]:
 [[0.01348879]
 [0.01578475]
 [0.01345291]]


Reshaping sets into shape (samples, time steps, features)

In [9]:
X = np.reshape(X, (X.shape[0], 1, X.shape[1]))
X.shape

(259, 1, 1)

In [10]:
# Y = np.reshape(Y, (Y.shape[0], 1, Y.shape[1]))
Y.shape

(259, 1)

Spliting

In [11]:
test_size = int(len(X) * 0.1)
x_train, x_test = X[:-test_size],X[-test_size:]
y_train, y_test = Y[:-test_size],Y[-test_size:]

In [12]:
print("x_train.shape: "+str(x_train.shape))
print("x_test.shape: "+str(x_test.shape))

x_train.shape: (234, 1, 1)
x_test.shape: (25, 1, 1)


In [13]:
print("x_train[10:13]:\n", x_train[10:13])

x_train[10:13]:
 [[[0.00068161]]

 [[0.00125561]]

 [[0.00057399]]]


## Creating model
Creating sequential model

In [14]:
model = Sequential()
model.add(LSTM(7,return_sequences=True,input_shape=(1,x_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(7,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(7,return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(7,return_sequences=False))
model.add(Dense(1))


In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 1, 7)              252       
_________________________________________________________________
dropout (Dropout)            (None, 1, 7)              0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 1, 7)              420       
_________________________________________________________________
dropout_1 (Dropout)          (None, 1, 7)              0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 1, 7)              420       
_________________________________________________________________
dropout_2 (Dropout)          (None, 1, 7)              0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 7)                 4

## Compiling and training model

In [16]:
model.compile(loss='mean_squared_error',
              optimizer='adam')
model.fit(x_train, y_train, epochs=50, batch_size=1, verbose=2)

Epoch 1/50
234/234 - 1s - loss: 0.0052
Epoch 2/50
234/234 - 1s - loss: 0.0051
Epoch 3/50
234/234 - 1s - loss: 0.0051
Epoch 4/50
234/234 - 1s - loss: 0.0050
Epoch 5/50
234/234 - 1s - loss: 0.0045
Epoch 6/50
234/234 - 1s - loss: 0.0023
Epoch 7/50
234/234 - 1s - loss: 0.0015
Epoch 8/50
234/234 - 1s - loss: 5.6461e-04
Epoch 9/50
234/234 - 1s - loss: 7.4525e-04
Epoch 10/50
234/234 - 1s - loss: 6.3897e-04
Epoch 11/50
234/234 - 1s - loss: 0.0012
Epoch 12/50
234/234 - 1s - loss: 7.6618e-04
Epoch 13/50
234/234 - 1s - loss: 0.0011
Epoch 14/50
234/234 - 1s - loss: 5.6258e-04
Epoch 15/50
234/234 - 1s - loss: 8.5791e-04
Epoch 16/50
234/234 - 1s - loss: 7.4339e-04
Epoch 17/50
234/234 - 1s - loss: 4.6328e-04
Epoch 18/50
234/234 - 1s - loss: 7.9797e-04
Epoch 19/50
234/234 - 1s - loss: 9.0488e-04
Epoch 20/50
234/234 - 1s - loss: 8.4724e-04
Epoch 21/50
234/234 - 1s - loss: 9.0659e-04
Epoch 22/50
234/234 - 1s - loss: 0.0011
Epoch 23/50
234/234 - 1s - loss: 0.0012
Epoch 24/50
234/234 - 1s - loss: 6.7317e-

<tensorflow.python.keras.callbacks.History at 0x7fd887998640>

## Scorings
Making predictions and checking scoring achived by model

In [17]:
# make predictions
trainPredict = model.predict(x_train)
testPredict = model.predict(x_test)


In [18]:
# invert predictions

trainPredict = scaler.inverse_transform(trainPredict)
testPredict = scaler.inverse_transform(testPredict)



In [19]:
# invert labels
y_test = scaler.inverse_transform(y_test)
y_train = scaler.inverse_transform(y_train)

In [20]:
# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train, trainPredict))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test, testPredict))
print('Test Score: %.2f RMSE' % (testScore))

Train Score: 362.60 RMSE
Test Score: 8100.49 RMSE


In [21]:
y_test

array([[13628.],
       [11742.],
       [10241.],
       [16300.],
       [18820.],
       [20156.],
       [21629.],
       [21897.],
       [17171.],
       [15578.],
       [19364.],
       [24692.],
       [27143.],
       [27086.],
       [27875.],
       [24785.],
       [21713.],
       [25454.],
       [25221.],
       [22683.],
       [24051.],
       [25571.],
       [21854.],
       [20816.],
       [19152.]])

In [22]:
testPredict

array([[12656.76 ],
       [12655.249],
       [11819.458],
       [10949.2  ],
       [13470.245],
       [13969.822],
       [14163.082],
       [14334.559],
       [14361.863],
       [13666.63 ],
       [13284.521],
       [14053.455],
       [14591.488],
       [14731.722],
       [14728.94 ],
       [14765.706],
       [14597.686],
       [14343.236],
       [14640.043],
       [14625.723],
       [14435.985],
       [14546.547],
       [14647.068],
       [14357.556],
       [14244.689]], dtype=float32)