In [9]:
import datetime as dt
import os

import matplotlib.pyplot as plt 
from numpy import *
import pandas as pd

In [11]:
# data
DATA_PATH = os.path.join(os.pardir, 'data')
COUNTRY = 'INDIA'
DATA = os.path.join(DATA_PATH, COUNTRY)
CONFIRMED_CASES_PATH = os.path.join(DATA_PATH, COUNTRY, 'confirmed_cases.dat')
RECOVERED_CASES_PATH = os.path.join(DATA_PATH, COUNTRY, 'recovered_cases.dat')
DEATH_CASES_PATH = os.path.join(DATA_PATH, COUNTRY, 'death_cases.dat')
TESTS_PATH = os.path.join(DATA_PATH, COUNTRY, 'tests.dat')

confirmed_cases = np.loadtxt(CONFIRMED_CASES_PATH)
recovered_cases = np.loadtxt(RECOVERED_CASES_PATH)
death_cases = np.loadtxt(DEATH_CASES_PATH)
daily_tests = np.loadtxt(TESTS_PATH)

epidemics_start_date = dt.datetime(2020, 3, 25)
date_list = [epidemics_start_date + dt.timedelta(days=i) for i in range(confirmed_cases.size)]

# full dataframe
df = pd.DataFrame({
    'date': date_list,
    'confirmed_cases': confirmed_cases,
    'recovered_cases': recovered_cases,
    'death_cases': death_cases})
print(df)

NameError: name 'np' is not defined

In [None]:
def ordinary_least_squares(x, y):
    """Return the intercept and the bias of the linear regression line for (x, y) data.
    Implementation details: https://en.wikipedia.org/wiki/Ordinary_least_squares 
    
    Parameters
    ----------
    x : numpy.ndarray
        1-D array of x values.
    y : numpy.ndarray
        1-D array of y values.
    
    Returns
    -------
    tuple
        (intercept, bias)
    """
    assert x.ndim == 1, 'x must be array_like with ndim = 1.'
    assert y.ndim == 1, 'y must be array_like with ndim = 1.'
    x_col = x.reshape(-1, 1)
    x_col_bias = np.c_[x_col, np.ones(x_col.shape)]
    y_col = y.reshape(-1, 1)
    sol = np.linalg.pinv(x_col_bias.T @ x_col_bias) @ x_col_bias.T @ y_col 
    return (sol[0, 0], sol[1, 0])

In [None]:
def fit(confirmed_cases, death_cases, death_offset=22, extrapolation_addon_confirmed_cases=None, plot=False):
    """Return slope and intercept from linear fit delayed number of new death cases over new daily infected cases.
    
    confirmed_cases : numpy.ndarray
        Time series of confirmed infections.
    death_cases : numpy.ndarray
        Time series of confirmed deaths.
    death_offset : int, optional
        Number of days from infection until death.
    extrapolation_addon_confirmed_cases : int, optional
        Number of additional infections to predict deaths to.
    plot : bool, optional
        Visualize fitted model and data.
        
    Returns
    -------
    tuple
        (slope, intercept)
    """
    x = np.diff(confirmed_cases)[:-death_offset]
    y = np.diff(death_cases)[death_offset:]

    k, b = ordinary_least_squares(x, y)
    if plot:
        plt.figure(figsize=(10, 4))
        plt.scatter(x, y, label='Data', edgecolors='blue', facecolors='none')
        plt.plot(x, k*x + b, 'b-', label='Linear fit')
        if extrapolation_addon_confirmed_cases:
            plt.plot(np.arange(np.max(x), np.max(x) + extrapolation_addon_confirmed_cases), k*np.arange(np.max(x), np.max(x) + extrapolation_addon_confirmed_cases) + b, 'r--', label='Linear extrapolation')
        plt.text(1.03*np.max(x), 0.97*k*np.max(x) + b, f'k = {round(k, 5)}', verticalalignment='top', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
        plt.xlabel('New confirmed infections daily')
        plt.ylabel('New death cases daily')
        plt.title(f'{death_offset} days delayed death cases over new daily cases')
        plt.legend()
        plt.grid()
        plt.show()
    return (k, b)

In [None]:
# full data set
k, b = fit(confirmed_cases=df.confirmed_cases, death_cases=df.death_cases, death_offset=18, extrapolation_addon_confirmed_cases=1000, plot=True)

In [None]:
# first wave data dataframe
df_1 = df[df.date<=dt.datetime(2020, 6, 1)]
df_1 = df_1.reset_index(drop=True)

k, b = fit(confirmed_cases=df_1.confirmed_cases,  death_cases=df_1.death_cases,  death_offset=22, extrapolation_addon_confirmed_cases=100, plot=True)

In [None]:
# second wave data dataframe
df_2 = df[df.date>dt.datetime(2020, 6, 1)]
df_2 = df_2.reset_index(drop=True)

k, b = fit(confirmed_cases=df_2.confirmed_cases, death_cases=df_2.death_cases, death_offset=22, extrapolation_addon_confirmed_cases=500, plot=True)

In [None]:
# during August
df_aug = df[(df.date>=dt.datetime(2020, 4, 1)) & (df.date<dt.datetime(2020, 9, 1))]
df_aug = df_aug.reset_index(drop=True)

k, b = fit(confirmed_cases=df_aug.confirmed_cases, death_cases=df_aug.death_cases, death_offset=12, extrapolation_addon_confirmed_cases=150, plot=True)

In [None]:
# during September 
df_sep = df[(df.date>=dt.datetime(2020, 9, 1)) & (df.date<dt.datetime(2020, 10, 1))]
df_sep = df_sep.reset_index(drop=True)

k, b = fit(confirmed_cases=df_sep.confirmed_cases, death_cases=df_sep.death_cases, death_offset=12, plot=True)

In [None]:
# during October 
df_oct = df[(df.date>=dt.datetime(2020, 10, 1)) & (df.date<dt.datetime(2020, 11, 1))]
df_oct = df_oct.reset_index(drop=True)

k, b = fit(confirmed_cases=df_oct.confirmed_cases, death_cases=df_oct.death_cases, death_offset=12, plot=True)