In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np



#matplotlib inline
mpl.rcParams['figure.figsize'] = (16, 10)
pd.set_option('display.max_rows', 500)

import plotly.graph_objects as go

## Data Load

In [2]:
# try to parse the dates at the beginning
# it also works out of the box if dates are stored in ISO format

df_analyse = pd.read_csv(r"..\data\processed\COVID_small_flat_table.csv",sep=';', parse_dates=[0])
df_analyse.sort_values("date", ascending=True).tail()

Unnamed: 0,date,Italy,US,Spain,Germany,"Korea, South"
880,2022-06-20,17896065,86297081,12563399,27334993,18289373
881,2022-06-21,17959329,86452232,12613634,27454225,18298341
882,2022-06-22,18014202,86636306,12613634,27573585,18305783
883,2022-06-23,18071634,86757621,12613634,27681775,18312993
884,2022-06-24,18128044,86909716,12681820,27771111,18319773


# Helper Functions

In [4]:
def generate_plot(x_in, df_input, y_scale='log', slider=False):
    """ Quick basic plot for quick static evaluation of a time series
    
        you can push selective columns of your data frame by .iloc[:,[0,6,7,8]]
        
        Parameters:
        ----------
        x_in : array 
            array of date time object, or array of numbers
        df_input : pandas dataframe 
            the plotting matrix where each column is plotted
            the name of the column will be used for the legend
        scale: str
            y-axis scale as 'log' or 'linear'
        slider: bool
            True or False for x-axis slider
    
        
        Returns:
        ----------
        
    """
    figure = go.Figure()

    for each in df_input.columns:
        figure.add_trace(go.Scatter(
                        x=x_in,
                        y=df_input[each],
                        name=each,
                        opacity=0.8))
    
    figure.update_layout(autosize=True,
        width=1024,
        height=768,
        font=dict(
            family="PT Sans, monospace",
            size=18,
            color="#7f7f7f"
            )
        )
    
    figure.update_yaxes(type=y_scale),
    figure.update_xaxes(tickangle=-45,
                        nticks=20,
                        tickfont=dict(size=14,color="#7f7f7f")
                        )
    
    if slider==True:
        figure.update_layout(xaxis_rangeslider_visible=True)
    
    figure.show()

In [5]:
generate_plot(df_analyse.date,
              df_analyse.iloc[:,1:],
              y_scale='linear',
              slider=True)

In [6]:
threshold = 100

In [7]:
compare_list=[]
for pos,country in enumerate(df_analyse.columns[1:]):
    compare_list.append(np.array(df_analyse[country][df_analyse[country]>threshold]))

In [8]:
pd_sync_timelines=pd.DataFrame(compare_list,index=df_analyse.columns[1:]).T

In [9]:
pd_sync_timelines['date']=np.arange(pd_sync_timelines.shape[0])

In [10]:
pd_sync_timelines.head()

Unnamed: 0,Italy,US,Spain,Germany,"Korea, South",date
0,155.0,107.0,120.0,117.0,104.0,0
1,229.0,184.0,165.0,150.0,204.0,1
2,322.0,237.0,222.0,188.0,433.0,2
3,453.0,403.0,259.0,240.0,602.0,3
4,655.0,519.0,400.0,349.0,833.0,4


In [11]:
generate_plot(pd_sync_timelines.date,
              pd_sync_timelines.iloc[:,:-1],
              y_scale='log',
              slider=True)

 # Doubling Rate

$N(t)=N_0*2^{t/T}$

In [12]:
def doubling_rate(N_0,t,T_d):
    return N_0*np.power(2,t/T_d)

In [13]:
max_days=34

norm_slopes={
    #'doubling every day':doubling_rate(100,np.arange(10),1),
    'doubling every two days':doubling_rate(100,np.arange(20),2),
    'doubling every 4 days':doubling_rate(100,np.arange(20),4),
    'doubling every 10 days':doubling_rate(100,np.arange(20),10),
}

In [14]:
pd_sync_timelines_w_slope=pd.concat([pd.DataFrame(norm_slopes),pd_sync_timelines], axis=1)

In [15]:
generate_plot(pd_sync_timelines_w_slope.date,
           pd_sync_timelines_w_slope.iloc[:,0:5],
           y_scale='log',
           slider=True)

# Understanding Linear Regression

In [16]:
from sklearn import linear_model
reg = linear_model.LinearRegression(fit_intercept=False)

In [17]:
l_vec=len(df_analyse['Germany'])
X=np.arange(l_vec-5).reshape(-1, 1)
y=np.log(np.array(df_analyse['Germany'][5:]))

In [18]:
reg.fit(X,y)

In [19]:
X_hat=np.arange(l_vec).reshape(-1, 1)
Y_hat=reg.predict(X_hat)