# Importing Packages

In [1]:
import pandas as pd
import numpy as np
from math import sqrt
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import matplotlib.pyplot as plt
import plotly.graph_objects as go

Using TensorFlow backend.


# Loading the Dataset

In [5]:
Data_train = pd.read_csv("Data/train.csv")

# Data Cleaning

In [6]:
#Converting the date column to Datetime Format
Data_train['Date'] = pd.to_datetime(Data_train['Date'], format = '%Y-%m-%d')
#Replacing null values with the string 'Null'
Data_train['Province_State'] = Data_train['Province_State'].fillna('Null', inplace = True)
Data_train.drop('Id',axis=1,inplace=True)
#Converting Categorical value into Numerical Value
LE = LabelEncoder()
Data_train['Province_State'] = LE.fit_transform(Data_train['Province_State'])
Data_train['Country_Region'] = LE.fit_transform(Data_train['Country_Region'])

# Splitting the Dataset into Train and Validation

In [24]:
train_size = int(len(Data_train) * 0.70)
train1 = Data_train.iloc[:train_size, :]
validation1 = Data_train.iloc[train_size:, :]
train1.set_index("Date",inplace= True)
validation1.set_index("Date",inplace= True)

# Data Preprocessing

In [25]:
#Normalizing the input variables
train = train1.values
train = train.astype('float32')
scaler = MinMaxScaler(feature_range=(0,1))
train_scaled = scaler.fit_transform(train)
validation = validation1.values
validation = validation.astype('float32')
validation_scaled = scaler.fit_transform(validation)

In [26]:
#Function to convert Time series to supervised learning
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]  
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg
train_reframed = series_to_supervised(train_scaled,1,1)
validation_reframed = series_to_supervised(validation_scaled,1,1)
train_reframed.drop(train_reframed.columns[[4,5]], axis=1, inplace=True)
validation_reframed.drop(validation_reframed.columns[[4,5]], axis=1, inplace=True)

In [27]:
#split the input and Outputs for cases & Fatalities
train = train_reframed.values
validation = validation_reframed.values
train_X, train_case, train_fatalities = train[:, :-2], train[:, -2], train[:, -1]
validation_X, validation_case, validation_fatalities = validation[:, :-2], validation[:, -2], validation[:, -1]

# Data Modelling

In [28]:
#reshape the inputs into 3 Dimensional
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
validation_X = validation_X.reshape((validation_X.shape[0], 1, validation_X.shape[1]))
print(train_X.shape, train_case.shape, train_fatalities.shape, validation_X.shape, validation_case.shape, validation_fatalities.shape)

(17965, 1, 4) (17965,) (17965,) (7699, 1, 4) (7699,) (7699,)


### LSTM Model for Case

In [29]:
#Building the model for case
model_case = Sequential()
model_case.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
model_case.add(Dense(1))
model_case.compile(loss='mae', optimizer='adam')
# fit network
cases = model_case.fit(train_X, train_case, epochs=50, batch_size=72, validation_data=(validation_X, validation_case), verbose=2, shuffle=False)

Train on 17965 samples, validate on 7699 samples
Epoch 1/50
 - 2s - loss: 0.0058 - val_loss: 0.0060
Epoch 2/50
 - 1s - loss: 0.0049 - val_loss: 0.0055
Epoch 3/50
 - 1s - loss: 0.0041 - val_loss: 0.0064
Epoch 4/50
 - 1s - loss: 0.0029 - val_loss: 0.0065
Epoch 5/50
 - 1s - loss: 0.0029 - val_loss: 0.0088
Epoch 6/50
 - 1s - loss: 0.0026 - val_loss: 0.0040
Epoch 7/50
 - 1s - loss: 0.0019 - val_loss: 0.0062
Epoch 8/50
 - 1s - loss: 0.0019 - val_loss: 0.0040
Epoch 9/50
 - 1s - loss: 0.0019 - val_loss: 0.0048
Epoch 10/50
 - 1s - loss: 0.0018 - val_loss: 0.0020
Epoch 11/50
 - 1s - loss: 0.0014 - val_loss: 0.0022
Epoch 12/50
 - 1s - loss: 0.0014 - val_loss: 0.0023
Epoch 13/50
 - 1s - loss: 0.0018 - val_loss: 0.0037
Epoch 14/50
 - 1s - loss: 0.0015 - val_loss: 0.0060
Epoch 15/50
 - 1s - loss: 0.0017 - val_loss: 0.0018
Epoch 16/50
 - 1s - loss: 0.0012 - val_loss: 0.0024
Epoch 17/50
 - 1s - loss: 0.0013 - val_loss: 0.0016
Epoch 18/50
 - 1s - loss: 0.0012 - val_loss: 0.0014
Epoch 19/50
 - 1s - loss

### Prediction and Evaluation(Cases)

In [30]:
# Case prediction 
y_pred_case = model_case.predict(validation_X)
validation_X = validation_X.reshape((validation_X.shape[0],validation_X.shape[2]))
#invert scaling for forecast
inv_pred_case = np.concatenate((y_pred_case,validation_X[:,1:]),axis = 1)
inv_pred_case = scaler.inverse_transform(inv_pred_case)
inv_pred_case = inv_pred_case[:,0]
#invert scaling for actual
validation_case = validation_case.reshape((len(validation_case),1))
inv_validation_case = np.concatenate((validation_case,validation_X[:,1:]),axis = 1)
inv_validation_case = scaler.inverse_transform(inv_validation_case)
inv_validation_case = inv_validation_case[:,0]
# calculate RMSE for case
rmse = sqrt(mean_squared_error(inv_validation_case, inv_pred_case))
print('Test RMSE: %.3f' % rmse)

Test RMSE: 0.019


### LSTM Model for Fatalities

In [12]:
#Building the model for Fatalities
model_fatalities = Sequential()
model_fatalities.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
model_fatalities.add(Dense(1))
model_fatalities.compile(loss='mae', optimizer='adam')
# fit network
fatalities = model_fatalities.fit(train_X, train_fatalities, epochs=50, batch_size=72, validation_data=(validation_X, validation_fatalities), verbose=2, shuffle=False)

Train on 25195 samples, validate on 10799 samples
Epoch 1/50
 - 5s - loss: 0.0044 - val_loss: 0.0070
Epoch 2/50
 - 2s - loss: 0.0034 - val_loss: 0.0040
Epoch 3/50
 - 2s - loss: 0.0028 - val_loss: 0.0034
Epoch 4/50
 - 2s - loss: 0.0024 - val_loss: 0.0031
Epoch 5/50
 - 2s - loss: 0.0019 - val_loss: 0.0025
Epoch 6/50
 - 2s - loss: 0.0019 - val_loss: 0.0043
Epoch 7/50
 - 2s - loss: 0.0015 - val_loss: 0.0033
Epoch 8/50
 - 2s - loss: 0.0012 - val_loss: 0.0018
Epoch 9/50
 - 2s - loss: 0.0012 - val_loss: 0.0031
Epoch 10/50
 - 2s - loss: 0.0012 - val_loss: 0.0023
Epoch 11/50
 - 2s - loss: 0.0012 - val_loss: 0.0020
Epoch 12/50
 - 2s - loss: 0.0011 - val_loss: 0.0017
Epoch 13/50
 - 2s - loss: 0.0013 - val_loss: 0.0036
Epoch 14/50
 - 2s - loss: 0.0012 - val_loss: 0.0022
Epoch 15/50
 - 2s - loss: 0.0011 - val_loss: 0.0031
Epoch 16/50
 - 2s - loss: 0.0012 - val_loss: 0.0052
Epoch 17/50
 - 2s - loss: 0.0010 - val_loss: 0.0014
Epoch 18/50
 - 2s - loss: 0.0012 - val_loss: 0.0032
Epoch 19/50
 - 2s - los

### Prediction and Evaluation(Fatalities)

In [13]:
# Fatalities prediction
y_pred_fatal = model_fatalities.predict(validation_X)
validation_X = validation_X.reshape((validation_X.shape[0],validation_X.shape[2]))
#invert scaling for forecast
inv_pred_fatal = np.concatenate((y_pred_fatal,validation_X[:,1:]),axis = 1)
inv_pred_fatal = scale.inverse_transform(inv_pred_fatal)
inv_pred_fatal = inv_pred_fatal[:,0]
#invert scaling for actual
validation_fatalities = validation_fatalities.reshape((len(validation_case),1))
inv_validation_fatalities = np.concatenate((validation_fatalities,validation_X[:,1:]),axis = 1)
inv_validation_fatalities = scale.inverse_transform(inv_validation_fatalities)
inv_validation_fatalities = inv_validation_fatalities[:,0]
# calculate RMSE for fatalities
rmse_fatalities = sqrt(mean_squared_error(inv_pred_fatal, inv_validation_fatalities))
print('Test RMSE: %.3f' % rmse_fatalities)

Test RMSE: 0.015


## Comparison actual vs Predicted Value

In [22]:
predvalue = pd.DataFrame(inv_pred_case,index=validation1.iloc[1:].index, columns=['Prediction'])
actualvalue = pd.DataFrame(inv_validation_case,index=validation1.iloc[1:].index, columns=['Actual'])
result = pd.concat([predvalue,actualvalue], axis=1)
result1 = result.groupby(result.index)['Prediction','Actual'].sum()
fig = go.Figure(data=[
    go.Bar(name='actual', x=result1.index, y=result1['Actual']),
    go.Bar(name='Forecasted', x=result1.index, y= result1['Prediction'])
])
fig.update_layout(barmode= 'group', title='Comparison actual  vs. predicted',xaxis_title='Month',yaxis_title='Number of Cases')
fig.show()

### Comparing Actual vs Predicted Cases for United States

In [23]:
validation1['Country_Region'] = LE.inverse_transform(validation1['Country_Region'])
new_result = pd.concat([validation1.iloc[1:],result],axis = 1)
new_result = new_result.groupby([new_result.index,'Country_Region'])['Prediction','Actual'].sum()
df = new_result.query("Country_Region == 'US'")
df.reset_index(inplace = True)
fig = go.Figure(data=[
    go.Bar(name='actual', x=df['Date'], y=df['Actual']),
    go.Bar(name='Forecasted', x=df['Date'], y= df['Prediction'])
])
fig.update_layout(barmode= 'group', title='Comparison actual  vs. predicted for United States',xaxis_title='Month',xaxis_range=['2020-03-15','2020-04-13'],yaxis_title='Number of Cases')
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy

