In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from datetime import datetime

In [None]:
daily_cases = pd.read_csv('../cleaned_datasets/india/daily_cases_india.csv')
daily_cases['Date'] = pd.to_datetime(daily_cases['Date'], format = '%Y-%m-%d')
daily_cases

In [None]:
indexed = daily_cases.set_index('Date')
indexed

In [None]:
indexed['Confirmed'].plot()

In [None]:
indexed['Deaths'].plot()

In [None]:
indexed['Recovered'].plot()

In [None]:
indexed['Active'].plot()

In [None]:
train = indexed[:"2021-08-04"]
val = indexed["2021-08-05":]

Keep upto 04-08 as train for timeseries and beyond that for forecasting. This also eliminates the outlier caused due to missing data in Recovered

In [None]:
train['Recovered'].plot()

In [None]:
confirmed_ts = train['Confirmed'].dropna()

In [None]:
#Unvariate Time Series for Confirmed Cases 
def roll_stats(ts, window):
  ''' Function to find rolling mean and rolling std dev and plot them'''
  rollmean = ts.rolling(window = window).mean()
  rollstd = ts.rolling(window = window).std()
  print(rollmean, rollstd)

  close = plt.plot(ts, color = 'blue', label = 'Original')
  mean = plt.plot(rollmean, color = 'red', label = 'Rolling Mean')
  std = plt.plot(rollstd, color = 'green', label = 'Rolling Standard Dev')
  plt.legend(loc = 'best')
  plt.title('Rolling Statistics for Confirmed')
  plt.show()

In [None]:
roll_stats(confirmed_ts, 30)

In [None]:
roll_stats(confirmed_ts, 90)

In [None]:
from statsmodels.tsa.stattools import adfuller

def run_dicky_fuller(ts):
  '''Function to run Augmented Dicky Fuller test on the passed time series and report the statistics from the test'''
  print("Observations of Dickey-fuller test")
  dftest = adfuller(ts,autolag='AIC')
  dfoutput=pd.Series(dftest[0:4],index=['Test Statistic','p-value','#lags used','number of observations used'])

  for key,value in dftest[4].items():
      dfoutput['critical value (%s)'%key]= value
  print(dfoutput)

In [None]:
run_dicky_fuller(confirmed_ts)

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
decomp = seasonal_decompose(confirmed_ts, model='additive', freq=10)
fig = plt.figure()  
fig = decomp.plot()  
fig.set_size_inches(16, 9)

In [None]:
diff = confirmed_ts.diff() 
roll_stats(diff, 30)

In [None]:
diff = confirmed_ts.diff() 
roll_stats(diff, 90)

In [None]:
run_dicky_fuller(diff.dropna())

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf 

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = plot_acf(diff.dropna(), lags=50, ax = ax1)
ax2 = fig.add_subplot(212)
fig = plot_pacf(diff.dropna(), lags=50, ax = ax2)

In [None]:
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf 

fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = plot_acf(confirmed_ts, lags=200, ax = ax1)
ax2 = fig.add_subplot(212)
fig = plot_pacf(confirmed_ts, lags=100, ax = ax2)

In [None]:
from statsmodels.tsa.arima_model import ARIMA 

# ARIMA(p,d,q) = (1,1,1)
model = ARIMA(confirmed_ts, order=(1,1,1))
results_ARIMA = model.fit()
results_ARIMA.summary()

In [None]:
results_ARIMA.plot_predict(start = 100, end = 700, dynamic = False);

In [None]:
from statsmodels.tsa.arima_model import ARIMA 

# ARIMA(p,d,q) = (1,1,1)
model = ARIMA(confirmed_ts, order=(1,1,0))
results_AR = model.fit()
results_AR.summary()

In [None]:
results_AR.plot_predict(start = 100, end = 700, dynamic = False);

In [None]:
from statsmodels.tsa.arima_model import ARIMA 

# ARIMA(p,d,q) = (1,1,1)
model = ARIMA(confirmed_ts, order=(0,1,1))
results_MA = model.fit()
results_MA.summary()

In [None]:
results_MA.plot_predict(start = 100, end = 700, dynamic = False);