In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
# split into standard weeks
from numpy import split
from numpy import array
from numpy import hstack
from pandas import read_csv
import math
import sklearn.metrics as skm
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.layers import Conv1D,MaxPooling1D
from darts.dataprocessing.transformers import Scaler
from darts.models import RNNModel, Theta, TCNModel
import time
import warnings
warnings.filterwarnings("ignore")

# Build CNN model

## specify a cut off point, and split the entire data into train and test sets

In [5]:
def split_data(df, cut):
    train=df.iloc[:cut,:]
    test=df.iloc[cut:, :]
    return train, test

## given a dataset, separate it into independent variables set X, and response variable Y
n_step means how many days of independent variables set X need to be used to predict an outcome Y

In [6]:
def supervise(df , n_step):
    df_seq=df.to_numpy()
    x, y=list() , list()
    for i in range(len(df_seq)):
        end_index=i+n_step
        if end_index > len(df_seq):
            break
        seq_x=df_seq[i:end_index, 1:]
        seq_y=df_seq[end_index-1, 0]
        x.append(seq_x)
        y.append(seq_y)
    return array(x), array(y)

## CNN model, we can adjust the paramters to fit the optimal model

In [20]:
def CNN(n_step, n_feature, xtrain, ytrain, optimizer,epochs, batch, verbose):
    model=Sequential()
    model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_step, n_feature)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1))
    #opt={'Adam':Adam(lr=learning_rate)}
    model.compile(optimizer=optimizer ,loss='mse')
    print(model.summary())
    model.fit(xtrain, ytrain,  epochs=epochs, batch_size=batch, verbose=verbose )
    return model

## To forecast and evaluate the prediction outcomes using CNN model

In [38]:
def forecast_evaluate(model,xtest, actual):
    y_pred=model.predict(xtest[:5], verbose=0)
    
    for i in range(len(y_pred)):
         print('prediction result:{}'.format(y_pred[i][0]))
            
    mse=0
    mape=0
    for i in range(len(actual)):
        mse+=pow((actual[i].item()-y_pred[i].item()),2)
        #print(mse)
    
        mape+=abs((actual[i].item()-y_pred[i].item())/actual[i].item())
        
    mse_result=mse/len(actual)
    rmse=pow(mse_result, 1/2)
    mape_result=mape/len(actual)
    
    print('mse result:{}'.format(mse_result))
    print('mape result:{}'.format(mape_result))
    print('rmse result:{}'.format(rmse))

    
    
    return y_pred, mse_result, rmse, mape_result

# read the original dataset

In [9]:
data=pd.read_csv('new_data.csv')
data.columns

Index(['date', 'new_cases', 'new_deaths', 'icu_patients', 'hosp_patients',
       'stringency_index', 'temp', 'humidity', 'precip', 'windspeed',
       'transit_stations', 'residential'],
      dtype='object')

In [10]:
data.head()

Unnamed: 0,date,new_cases,new_deaths,icu_patients,hosp_patients,stringency_index,temp,humidity,precip,windspeed,transit_stations,residential
0,2021-01-01,8603,97,703.0,1957.0,78.7,2.4,93.0,0.65,12.4,-64.857,16.286
1,2021-01-02,7402,36,698.0,2005.0,78.7,2.9,97.5,1.118,13.5,-63.857,16.571
2,2021-01-03,6563,47,704.0,2006.0,78.7,3.5,87.0,0.005,24.2,-62.857,16.571
3,2021-01-04,6341,63,715.0,2130.0,78.7,3.1,84.8,0.886,33.4,-62.857,16.286
4,2021-01-05,7138,187,731.0,2159.0,78.7,3.0,89.1,0.23,25.7,-62.429,15.857


# Hosptial admission 

In [14]:
data_hosp=data.iloc[3:479,[4,1,2,5,6,7,8,9,10,11]]
print(data_hosp.shape)
data_hosp.head()

(476, 10)


Unnamed: 0,hosp_patients,new_cases,new_deaths,stringency_index,temp,humidity,precip,windspeed,transit_stations,residential
3,2130.0,6341,63,78.7,3.1,84.8,0.886,33.4,-62.857,16.286
4,2159.0,7138,187,78.7,3.0,89.1,0.23,25.7,-62.429,15.857
5,2131.0,9635,137,78.7,2.5,86.9,0.23,23.4,-62.0,15.571
6,2052.0,8143,85,78.7,2.9,91.3,6.695,23.0,-61.857,15.429
7,1936.0,7344,87,78.7,1.9,93.8,0.826,10.1,-59.286,14.143


In [17]:
scaler = StandardScaler()
data_hosp_std = pd.DataFrame(scaler.fit_transform(data_hosp), columns=data_hosp.columns)
data_hosp_std.head()

Unnamed: 0,hosp_patients,new_cases,new_deaths,stringency_index,temp,humidity,precip,windspeed,transit_stations,residential
0,1.664588,-0.342771,1.574782,1.352531,-1.214421,0.466075,-0.310046,0.693307,-2.351961,2.668601
1,1.713738,-0.311822,6.385855,1.352531,-1.232458,0.920602,-0.462693,-0.022398,-2.316208,2.546084
2,1.666283,-0.21486,4.445906,1.352531,-1.322643,0.688053,-0.462693,-0.236179,-2.280372,2.464406
3,1.532393,-0.272796,2.42836,1.352531,-1.250495,1.153151,1.041673,-0.273359,-2.268427,2.423852
4,1.335795,-0.303823,2.505957,1.352531,-1.430864,1.417411,-0.324007,-1.472396,-2.05366,2.056586


In [35]:
n_step_hosp=7
train_hosp, test_hosp=split_data(data_hosp_std, 450)
xtrain_hosp, ytrain_hosp=supervise(train_hosp, n_step_hosp)
xtest_hosp, ytest_hosp=supervise(test_hosp, n_step_hosp)

n_feature_hosp=xtrain_hosp.shape[2]
model_hosp=CNN(n_step_hosp, n_feature_hosp,xtrain_hosp, ytrain_hosp,'Adam' ,1000, 10, 0)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_2 (Conv1D)           (None, 6, 64)             1216      
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 3, 64)            0         
 1D)                                                             
                                                                 
 flatten_2 (Flatten)         (None, 192)               0         
                                                                 
 dense_4 (Dense)             (None, 50)                9650      
                                                                 
 dense_5 (Dense)             (None, 1)                 51        
                                                                 
Total params: 10,917
Trainable params: 10,917
Non-trainable params: 0
__________________________________________________

In [39]:
y_pred_hosp, mse_hosp, rmse_hosp, mape_hosp=forecast_evaluate(model_hosp,xtest_hosp,ytest_hosp[:5])

prediction result:0.4743238091468811
prediction result:0.06894572079181671
prediction result:-0.20505939424037933
prediction result:-0.10552117228507996
prediction result:-0.39708411693573
mse result:0.4144280325920926
mape result:1.1763451163833651
rmse result:0.6437608504655223


In [34]:
ytest_hosp[:5]

array([0.82735121, 0.63583748, 0.59346718, 0.4951681 , 0.39517421])

## need some graphs to compare predictions and actual values, error changes in different batch size, epochs

# ICU Admission

In [15]:
data_icu=data.iloc[3:479,[3,1,2,5,6,7,8,9,10,11]]
print(data_icu.shape)
data_icu.head()

(476, 10)


Unnamed: 0,icu_patients,new_cases,new_deaths,stringency_index,temp,humidity,precip,windspeed,transit_stations,residential
3,715.0,6341,63,78.7,3.1,84.8,0.886,33.4,-62.857,16.286
4,731.0,7138,187,78.7,3.0,89.1,0.23,25.7,-62.429,15.857
5,704.0,9635,137,78.7,2.5,86.9,0.23,23.4,-62.0,15.571
6,712.0,8143,85,78.7,2.9,91.3,6.695,23.0,-61.857,15.429
7,708.0,7344,87,78.7,1.9,93.8,0.826,10.1,-59.286,14.143


In [28]:
scaler = StandardScaler()
data_icu_std = pd.DataFrame(scaler.fit_transform(data_icu), columns=data_icu.columns)
data_icu_std.head()

Unnamed: 0,icu_patients,new_cases,new_deaths,stringency_index,temp,humidity,precip,windspeed,transit_stations,residential
0,1.44978,-0.342771,1.574782,1.352531,-1.214421,0.466075,-0.310046,0.693307,-2.351961,2.668601
1,1.51793,-0.311822,6.385855,1.352531,-1.232458,0.920602,-0.462693,-0.022398,-2.316208,2.546084
2,1.402927,-0.21486,4.445906,1.352531,-1.322643,0.688053,-0.462693,-0.236179,-2.280372,2.464406
3,1.437002,-0.272796,2.42836,1.352531,-1.250495,1.153151,1.041673,-0.273359,-2.268427,2.423852
4,1.419964,-0.303823,2.505957,1.352531,-1.430864,1.417411,-0.324007,-1.472396,-2.05366,2.056586


In [42]:
n_step_icu=7
train_icu, test_icu=split_data(data_icu_std, 450)
xtrain_icu, ytrain_icu=supervise(train_icu, n_step_icu)
xtest_icu, ytest_icu=supervise(test_icu, n_step_icu)

n_feature_icu=xtrain_icu.shape[2]
model_icu=CNN(n_step_icu, n_feature_icu,xtrain_icu, ytrain_icu,'Adam' ,1000, 10, 0)

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_3 (Conv1D)           (None, 6, 64)             1216      
                                                                 
 max_pooling1d_3 (MaxPooling  (None, 3, 64)            0         
 1D)                                                             
                                                                 
 flatten_3 (Flatten)         (None, 192)               0         
                                                                 
 dense_6 (Dense)             (None, 50)                9650      
                                                                 
 dense_7 (Dense)             (None, 1)                 51        
                                                                 
Total params: 10,917
Trainable params: 10,917
Non-trainable params: 0
__________________________________________________

In [43]:
y_pred_icu, mse_icu, rmse_icu, mape_icu=forecast_evaluate(model_icu,xtest_icu,ytest_icu[:5])

prediction result:-1.1441922187805176
prediction result:-1.18936026096344
prediction result:-0.9603097438812256
prediction result:-1.0256623029708862
prediction result:-0.9664812088012695
mse result:0.019227231309798958
mape result:0.09101495148999757
rmse result:0.13866229231409294


In [32]:
ytest_icu[:5]

array([-1.15697304, -1.19104825, -1.18252945, -1.12715724, -1.15697304])