# Covid 19 Prediction Kerala

In [2]:
#importing necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import plotly.express as px
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math
import plotly.graph_objects as go

In [3]:
#loading Data
data=pd.read_csv("/content/Covid19_Kerala_07052021.csv")
data.head()

Unnamed: 0,Date,Confirmed
0,30/01/2020,1
1,31/01/2020,1
2,01/02/2020,2
3,02/02/2020,3
4,03/02/2020,3


In [4]:
#creating new column with cases confirmed per day
data["Daily_Cases"]=data['Confirmed'].sub(data['Confirmed'].shift())
data.head()

Unnamed: 0,Date,Confirmed,Daily_Cases
0,30/01/2020,1,
1,31/01/2020,1,0.0
2,01/02/2020,2,1.0
3,02/02/2020,3,1.0
4,03/02/2020,3,0.0


In [5]:
#Removing confirmed column
del data["Confirmed"]
data.head()

Unnamed: 0,Date,Daily_Cases
0,30/01/2020,
1,31/01/2020,0.0
2,01/02/2020,1.0
3,02/02/2020,1.0
4,03/02/2020,0.0


In [6]:
data.isna().sum()

Date           0
Daily_Cases    1
dtype: int64

In [7]:
# filling the 1st nan value with 1 since there was 1 confirmed case
data.fillna(1,inplace=True)
data.head()

Unnamed: 0,Date,Daily_Cases
0,30/01/2020,1.0
1,31/01/2020,0.0
2,01/02/2020,1.0
3,02/02/2020,1.0
4,03/02/2020,0.0


In [8]:
#Ploting the data
px.line(data,x='Date',y="Daily_Cases")

In [9]:
np.random.seed(7)
df1=pd.DataFrame(data["Daily_Cases"])
df=df1.values
df[:5]

array([[1.],
       [0.],
       [1.],
       [1.],
       [0.]])

In [10]:
#converting type to float and normalising values using minmax scaler
df=df.astype('float32')
scale=MinMaxScaler(feature_range=(0,1))
df=scale.fit_transform(df)

In [11]:
#spliting data into train and test with a ratio of 70:30
train,test=df[:int(len(df)*0.70),:],df[int(len(df)*0.70):,:]
print(len(train))
print(len(test))

324
140


In [12]:
#function to create dataset forcast data
def create_data(dataset,lookback):
  dataX,dataY=[],[]
  for i in range(len(dataset)-lookback-1):
    a=dataset[i:(i+lookback),0]
    dataX.append(a)
    dataY.append(dataset[i+lookback,0])
  return(np.array(dataX),np.array(dataY))

In [13]:
lookback=1
trainX,trainY=create_data(train,lookback)
testX,testY=create_data(test,lookback)

In [18]:
trainX=np.reshape(trainX,(trainX.shape[0],1,trainX.shape[1]))
testX=np.reshape(testX,(testX.shape[0],1,testX.shape[1]))

In [20]:
#building LSTM model
model=Sequential()
model.add(LSTM(4,input_shape=(1,lookback)))
model.add(Dense(1))
model.compile(loss='mean_squared_error',optimizer='adam')
model.fit(trainX,trainY,epochs=100,batch_size=1,verbose=2)

Epoch 1/100
322/322 - 3s - loss: 0.0034 - 3s/epoch - 10ms/step
Epoch 2/100
322/322 - 1s - loss: 0.0024 - 512ms/epoch - 2ms/step
Epoch 3/100
322/322 - 0s - loss: 0.0013 - 487ms/epoch - 2ms/step
Epoch 4/100
322/322 - 1s - loss: 6.5851e-04 - 505ms/epoch - 2ms/step
Epoch 5/100
322/322 - 1s - loss: 5.8288e-04 - 514ms/epoch - 2ms/step
Epoch 6/100
322/322 - 1s - loss: 5.5923e-04 - 547ms/epoch - 2ms/step
Epoch 7/100
322/322 - 0s - loss: 5.5692e-04 - 499ms/epoch - 2ms/step
Epoch 8/100
322/322 - 1s - loss: 5.4492e-04 - 506ms/epoch - 2ms/step
Epoch 9/100
322/322 - 0s - loss: 5.5472e-04 - 469ms/epoch - 1ms/step
Epoch 10/100
322/322 - 0s - loss: 5.4279e-04 - 488ms/epoch - 2ms/step
Epoch 11/100
322/322 - 1s - loss: 5.6352e-04 - 504ms/epoch - 2ms/step
Epoch 12/100
322/322 - 0s - loss: 5.5492e-04 - 469ms/epoch - 1ms/step
Epoch 13/100
322/322 - 0s - loss: 5.5794e-04 - 482ms/epoch - 1ms/step
Epoch 14/100
322/322 - 0s - loss: 5.5190e-04 - 476ms/epoch - 1ms/step
Epoch 15/100
322/322 - 0s - loss: 5.2992e-0

<keras.callbacks.History at 0x7ff8bb8adc70>

In [22]:
#Testing the model
tr_pre=model.predict(trainX)
te_pre=model.predict(testX)
tr_pre=scale.inverse_transform(tr_pre)
trainY=scale.inverse_transform([trainY])
te_pre=scale.inverse_transform(te_pre)
testY=scale.inverse_transform([testY])



In [23]:
#finding mean squared error of actual value and predicted value
tr_score=math.sqrt(mean_squared_error(trainY[0],tr_pre[:,0]))
print("Train Score : %.2f RMSE"%(tr_score))
te_score=math.sqrt(mean_squared_error(testY[0],te_pre[:,0]))
print("Test Score : %.2f RMSE"%(te_score))

Train Score : 895.60 RMSE
Test Score : 6084.40 RMSE


In [24]:
#creating a dataset for ploting
tr_plot=np.empty_like(df)
tr_plot[:,:]=np.nan
tr_plot[lookback:len(tr_pre)+lookback,:]=tr_pre
te_plot=np.empty_like(df)
te_plot[:,:]=np.nan
te_plot[len(tr_pre)+(lookback*2)+1:len(data)-1,:]=te_pre
x=data['Date']
y=list(scale.inverse_transform(df).ravel())
train=list(tr_plot.ravel())
test=list(te_plot.ravel())
df4=pd.DataFrame({'y':y,
                  'train':train,
                  'test':test},index=x)
df4.head()

Unnamed: 0_level_0,y,train,test
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
30/01/2020,1.0,,
31/01/2020,0.0,-44.954998,
01/02/2020,1.0,-46.148865,
02/02/2020,1.0,-44.954998,
03/02/2020,0.0,-44.954998,


In [25]:
#plot comparing actual data and predicted data
fig=px.line(df4,x=df4.index,y=df4['y'])
fig.add_scatter(x=df4.index, y=df4['train'])
fig.add_scatter(x=df4.index, y=df4['test'])