In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
import urllib.request
import numpy as np
import pandas as pd
from statsmodels.nonparametric.smoothers_lowess import lowess

In [None]:
# Load dataset from Johns Hopkins website
def load_url_as_df(url):
    """Loads csv data from a remote URL as a pandas Dataframe"""
    
    with urllib.request.urlopen(url) as stream:
        return pd.read_csv(stream, index_col='Country/Region')

In [None]:
url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_19-covid-Confirmed.csv'
df_confirmed = load_url_as_df(url)
df_confirmed.head()

In [None]:
class TimeSeries:
    """TimeSeries
    
    Parameters
    ----------
    tseries: 1D array,
        the time-series as a 1D array
    t_origin: int,
        the time-stamp for the origin of the time-series. Default: 54
    
    Attributes
    ----------
    change_ratio: 1D array, length of tseries - 1
        the value of the time-series on given day, divided by value on day -1.
        mode: {'cumulative','cumulative_smooth'} controls whether change_ratio 
            is applied on raw or smoothed time-series
    smooth: 1D array
        LOWESS smoothed time-series.
        frac: 0.15 (default)
        it  : 0    (default)
    """
    def __init__(self, tseries, t_origin=54):
        self.tseries = tseries
        self.tstamps = np.arange(t_origin, tseries.shape[0]+t_origin)
        
    def change_ratio(self, mode='cumulative'):
        """change_ratio: value today divided by value yesterday
        Arguments:
        =========
        mode: - cumulative (default) uses the input cumulative time-series
              - cumulative_smooth first smoothes the input cumulative time-series
        """
        if(mode=='cumulative'):
            data = self.tseries
        elif(mode=='cumulative_smooth'):
            data = self.smooth()
        today = np.copy(data)[1:]
        yesterday = np.copy(data[np.arange(data.size-1)])
        return today/yesterday
    
    def smooth(self, frac=0.15, it=0): 
        """smoothen a time-series using LOWESS
        https://www.statsmodels.org/stable/generated/statsmodels.nonparametric.smoothers_lowess.lowess.html
        """
        tseries_smooth = lowess(self.tseries, self.tstamps, frac=frac, it=it)
        return tseries_smooth[:,1]

In [None]:
toto = TimeSeries(confirmed_total.values)
tstamps              = toto.tstamps
tseries              = toto.tseries
tseries_smoothened   = toto.smooth()
tseries_change_ratio = toto.change_ratio()
smooth_change_ratio  = toto.change_ratio(mode='cumulative_smooth')

In [None]:
fig, (ax1,ax2) = plt.subplots(1,2,figsize=(8,4), dpi=180)
ax1.plot(tstamps, tseries, 'o', label='data', )
ax1.plot(tstamps, tseries_smoothened, '.-', label='smoothened')
ax1.set_xlabel('Days since first case')
ax1.set_ylabel('Confirmed cases')
ax1.grid()
ax1.legend()
ax2.plot(tstamps[1:], tseries_change_ratio, 'o', label='data', )
ax2.plot(tstamps[1:], smooth_change_ratio, '.-', label='smoothened')
ax2.set_xlabel('Days since first case')
ax2.set_ylabel('Confirmed cases (change ratio)')
ax2.grid()
ax2.legend()
plt.tight_layout()
plt.show()