# Covid 19 Prediction Kerala

In [None]:
#importing necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import plotly.express as px
from keras.models import Sequential
from keras.layers import Dense, GRU
from keras.layers import LSTM
from keras.optimizers import adam
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error,mean_absolute_error, mean_absolute_percentage_error
import math
import plotly.graph_objects as go

In [None]:
#loading Data
data=pd.read_csv("/content/drive/MyDrive/Covid 19 Confirmed Cases-Kerala.csv")
data.head()

Unnamed: 0,Date,Confirmed
0,2020-01-31,0.0
1,2020-02-01,0.0
2,2020-02-02,1.0
3,2020-02-03,1.0
4,2020-02-04,0.0


In [None]:
data['Date']= pd.to_datetime(data['Date'])
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 841 entries, 0 to 840
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date       841 non-null    datetime64[ns]
 1   Confirmed  841 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 13.3 KB


In [None]:
data.isna().sum()

Date         0
Confirmed    0
dtype: int64

In [None]:
#Ploting the data
px.line(data,x='Date',y="Confirmed")

In [None]:
np.random.seed(7)
df1=pd.DataFrame(data["Confirmed"])
df=df1.values
df[:5]

array([[0.],
       [0.],
       [1.],
       [1.],
       [0.]])

In [None]:
#converting type to float and normalising values using minmax scaler
df=df.astype('float32')
scale=MinMaxScaler(feature_range=(0,1))
df=scale.fit_transform(df)

In [None]:
#spliting data into train and test with a ratio of 70:30
train,test=df[:int(len(df)*0.60),:],df[int(len(df)*0.60):,:]
print(len(train))
print(len(test))

504
337


In [None]:
#function to create dataset forcast data
def create_data(dataset,lookback):
  dataX,dataY=[],[]
  for i in range(len(dataset)-lookback-1):
    a=dataset[i:(i+lookback),0]
    dataX.append(a)
    dataY.append(dataset[i+lookback,0])
  return(np.array(dataX),np.array(dataY))

In [None]:
lookback=1
trainX,trainY=create_data(train,lookback)
testX,testY=create_data(test,lookback)

In [None]:
#building LSTM model
model=Sequential()
model.add(LSTM(4,input_shape=(1,lookback)))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')
model.fit(trainX,trainY,epochs=100,batch_size=1,verbose=2)

Epoch 1/100
502/502 - 3s - loss: 0.0273 - 3s/epoch - 5ms/step
Epoch 2/100
502/502 - 1s - loss: 0.0177 - 856ms/epoch - 2ms/step
Epoch 3/100
502/502 - 1s - loss: 0.0089 - 820ms/epoch - 2ms/step
Epoch 4/100
502/502 - 1s - loss: 0.0031 - 843ms/epoch - 2ms/step
Epoch 5/100
502/502 - 1s - loss: 0.0015 - 790ms/epoch - 2ms/step
Epoch 6/100
502/502 - 1s - loss: 0.0013 - 817ms/epoch - 2ms/step
Epoch 7/100
502/502 - 1s - loss: 0.0013 - 806ms/epoch - 2ms/step
Epoch 8/100
502/502 - 1s - loss: 0.0013 - 989ms/epoch - 2ms/step
Epoch 9/100


KeyboardInterrupt: ignored

In [None]:
#Testing the model
tr_pre=model.predict(trainX)
te_pre=model.predict(testX)
tr_pre=scale.inverse_transform(tr_pre)
trainY=scale.inverse_transform([trainY])
te_pre=scale.inverse_transform(te_pre)
testY=scale.inverse_transform([testY])

In [None]:
#finding mean squared error of actual value and predicted value
tr_score=math.sqrt(mean_squared_error(trainY[0],tr_pre[:,0]))
print("Train Score : %.2f RMSE"%(tr_score))
te_score=math.sqrt(mean_squared_error(testY[0],te_pre[:,0]))
print("Test Score : %.2f RMSE"%(te_score))
print('Train MAE : ',mean_absolute_error(trainY[0],tr_pre[:,0]))
print('Test MAE : ',mean_absolute_error(testY[0],te_pre[:,0]))
print('Train MAPE : ',mean_absolute_percentage_error(trainY[0],tr_pre[:,0]))
print('Test MAPE : ',mean_absolute_percentage_error(testY[0],te_pre[:,0]))

Train Score : 2019.23 RMSE
Test Score : 3437.36 RMSE
Train MAE :  1213.0049491506343
Test MAE :  2083.2371609702827
Train MAPE :  1.8390986991433555e+17
Test MAPE :  0.4300949740536975


In [None]:
#creating a dataset for ploting
tr_plot=np.empty_like(df)
tr_plot[:,:]=np.nan
tr_plot[lookback:len(tr_pre)+lookback,:]=tr_pre
te_plot=np.empty_like(df)
te_plot[:,:]=np.nan
te_plot[len(tr_pre)+(lookback*2)+1:len(data)-1,:]=te_pre
x=data['Date']
y=list(scale.inverse_transform(df).ravel())
train=list(tr_plot.ravel())
test=list(te_plot.ravel())
df4=pd.DataFrame({'y':y,
                  'train':train,
                  'test':test},index=x)
df4.head()

Unnamed: 0_level_0,y,train,test
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-31,0.0,,
2020-02-01,0.0,465.413361,
2020-02-02,1.0,465.413361,
2020-02-03,1.0,466.394073,
2020-02-04,0.0,466.394073,


In [None]:
#plot comparing actual data and predicted data
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df4.index,
    y=df4['y'],
    name="Actual"
))

fig.add_trace(go.Scatter(
    x=df4.index,
    y=df4['train'],
    name="train_predicted"
))

fig.add_trace(go.Scatter(
    x=df4.index,
    y=df4['test'],
    name="test_predicted"
))
fig.update_layout( title="Covid 19 Forecast - LSTM")
fig.show()


# GRU

In [None]:
# Define the GRU model
model = Sequential()
model.add(GRU(50, activation='tanh', input_shape=(1,lookback)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

In [None]:
# Train the model
model.fit(trainX,trainY,epochs=100,batch_size=1,verbose=2)

Epoch 1/100
502/502 - 3s - loss: 0.0120 - 3s/epoch - 6ms/step
Epoch 2/100
502/502 - 1s - loss: 0.0015 - 1s/epoch - 2ms/step
Epoch 3/100
502/502 - 1s - loss: 0.0015 - 1s/epoch - 2ms/step
Epoch 4/100
502/502 - 1s - loss: 0.0015 - 1s/epoch - 2ms/step
Epoch 5/100
502/502 - 1s - loss: 0.0015 - 836ms/epoch - 2ms/step
Epoch 6/100
502/502 - 1s - loss: 0.0015 - 842ms/epoch - 2ms/step
Epoch 7/100
502/502 - 1s - loss: 0.0015 - 854ms/epoch - 2ms/step
Epoch 8/100
502/502 - 1s - loss: 0.0015 - 866ms/epoch - 2ms/step
Epoch 9/100
502/502 - 1s - loss: 0.0015 - 829ms/epoch - 2ms/step
Epoch 10/100
502/502 - 1s - loss: 0.0014 - 805ms/epoch - 2ms/step
Epoch 11/100
502/502 - 1s - loss: 0.0014 - 849ms/epoch - 2ms/step
Epoch 12/100
502/502 - 1s - loss: 0.0014 - 812ms/epoch - 2ms/step
Epoch 13/100
502/502 - 1s - loss: 0.0015 - 822ms/epoch - 2ms/step
Epoch 14/100
502/502 - 1s - loss: 0.0014 - 826ms/epoch - 2ms/step
Epoch 15/100
502/502 - 1s - loss: 0.0014 - 808ms/epoch - 2ms/step
Epoch 16/100
502/502 - 1s - los

<keras.callbacks.History at 0x7f677395b670>

In [None]:
# Evaluate the model
train_loss = model.evaluate(trainX, trainY, verbose=2)
test_loss = model.evaluate(testX, testY, verbose=2)
print('Train loss:', train_loss)
print('Test loss:', test_loss)

16/16 - 1s - loss: 0.0012 - 943ms/epoch - 59ms/step
11/11 - 0s - loss: 0.0037 - 35ms/epoch - 3ms/step
Train loss: 0.0012363470159471035
Test loss: 0.003662085859104991


In [None]:
# Make predictions
tr_pre=model.predict(trainX)
te_pre=model.predict(testX)
tr_pre=scale.inverse_transform(tr_pre)
trainY=scale.inverse_transform([trainY])
te_pre=scale.inverse_transform(te_pre)
testY=scale.inverse_transform([testY])



In [None]:
#finding mean squared error of actual value and predicted value
tr_score=math.sqrt(mean_squared_error(trainY[0],tr_pre[:,0]))
print("Train Score : %.2f RMSE"%(tr_score))
te_score=math.sqrt(mean_squared_error(testY[0],te_pre[:,0]))
print("Test Score : %.2f RMSE"%(te_score))
print('Train MAE : ',mean_absolute_error(trainY[0],tr_pre[:,0]))
print('Test MAE : ',mean_absolute_error(testY[0],te_pre[:,0]))
print('Train MAPE : ',mean_absolute_percentage_error(trainY[0],tr_pre[:,0]))
print('Test MAPE : ',mean_absolute_percentage_error(testY[0],te_pre[:,0]))

Train Score : 1950.60 RMSE
Test Score : 3357.08 RMSE
Train MAE :  1003.0483345870879
Test MAE :  1790.3500229408062
Train MAPE :  5.5020659449059384e+16
Test MAPE :  0.22840846152557248


In [None]:
#creating a dataset for ploting
tr_plot=np.empty_like(df)
tr_plot[:,:]=np.nan
tr_plot[lookback:len(tr_pre)+lookback,:]=tr_pre
te_plot=np.empty_like(df)
te_plot[:,:]=np.nan
te_plot[len(tr_pre)+(lookback*2)+1:len(data)-1,:]=te_pre
x=data['Date']
y=list(scale.inverse_transform(df).ravel())
train=list(tr_plot.ravel())
test=list(te_plot.ravel())
df4=pd.DataFrame({'y':y,
                  'train':train,
                  'test':test},index=x)
df4.head()

Unnamed: 0_level_0,y,train,test
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-31,0.0,,
2020-02-01,0.0,-20.649691,
2020-02-02,1.0,-20.649691,
2020-02-03,1.0,-19.702332,
2020-02-04,0.0,-19.702332,


In [None]:
#plot comparing actual data and predicted data
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df4.index,
    y=df4['y'],
    name="Actual",    
))

fig.add_trace(go.Scatter(
    x=df4.index,
    y=df4['train'],
    name="train_predicted"
))

fig.add_trace(go.Scatter(
    x=df4.index,
    y=df4['test'],
    name="test_predicted"
))
fig.update_layout( title="Covid 19 Forecast - GRU")
fig.show()
