In [16]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np

%matplotlib inline
mpl.rcParams['figure.figsize'] = (16,10)
pd.set_option('display.max_rows',500)

import plotly.graph_objects as go

![CRISP_DM](Crisp_DM_Tasks.png)

# Data load

In [17]:
# try to parse the dates right at the beginning
# it works out of the box if the date was stored ISO YYYY-MM-DD format

df_analyse=pd.read_csv('C:/ProgramData/Anaconda3/eps_covid19/data/processed/COVID_small_flat_table.csv',sep=';',
                      parse_dates=[0])

df_analyse.sort_values('date',ascending=True).tail()

Unnamed: 0,date,Albania,India,Italy,Germany,US
231,2020-09-09,10704,4465863,281583,256433,6360212
232,2020-09-10,10860,4562414,283180,258149,6396100
233,2020-09-11,11021,4659984,284796,259735,6443652
234,2020-09-12,11185,4754356,286297,260817,6485123
235,2020-09-13,11353,4846427,287753,261737,6519573


# Helper functions

In [18]:
def quick_plot(x_in, df_input, y_scale='log', slider=False):
    
    """ Quick basic plot for quick static evaluation of a time series
    
        you can push selective columns of your data frame by .iloc[:,[0,6,7,8]]
        
        Parameters:
        ----------
        x_in : array 
            array of date time object, or array of numbers
        df_input : pandas dataframe 
            the plotting matrix where each column is plotted
            the name of the column will be used for the legend
        scale: str
            y-axis scale as 'log' or 'linear'
        slider: bool
            True or False for x-axis slider
    
        
        Returns:
        ----------
        
    """
    
    fig = go.Figure()
    
    for each in df_input.columns:
        fig.add_trace(go.Scatter(
                            x=x_in,
                            y=df_input[each],
                            mode='markers+lines',
                            opacity=0.8,
                            #line_width=2,
                            #marker_size=4,
                            name=each
                            )
                 )

## defines the overall layout properties
    fig.update_layout(autosize=True,
        width=1024, #1280 or 1920
        height=768, #720 or 1080
        font=dict(
            family="PT Sans, monospace",
            size=18,
            color="#7f7f7f"
            )
        #xaxis_title="Time",
        #yaxis_title="Confirmed infected people (source Johns Hopkins csse, log-scale)",
        )
    fig.update_yaxes(type=y_scale),
    fig.update_xaxes(tickangle=-45,
                    nticks=20,
                    tickfont=dict(size=14,color="#7f7f7f")
                    )
    
    if slider==True:
        fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show()


In [19]:
quick_plot(df_analyse.date,
          df_analyse.iloc[:,1:],
          y_scale='log',
          slider=True)

In [20]:
threshold=100

In [21]:
compare_list=[]
for pos,country in enumerate(df_analyse.columns[1:]):
    compare_list.append(np.array(df_analyse[country][df_analyse[country]>threshold]))

In [22]:
compare_list

[array([  104,   123,   146,   174,   186,   197,   212,   223,   243,
          259,   277,   304,   333,   361,   377,   383,   400,   409,
          416,   433,   446,   467,   475,   494,   518,   539,   548,
          562,   584,   609,   634,   663,   678,   712,   726,   736,
          750,   766,   773,   782,   789,   795,   803,   820,   832,
          842,   850,   856,   868,   872,   876,   880,   898,   916,
          933,   946,   948,   949,   964,   969,   981,   989,   998,
         1004,  1029,  1050,  1076,  1099,  1122,  1137,  1143,  1164,
         1184,  1197,  1212,  1232,  1246,  1263,  1299,  1341,  1385,
         1416,  1464,  1521,  1590,  1672,  1722,  1788,  1838,  1891,
         1962,  1995,  2047,  2114,  2192,  2269,  2330,  2402,  2466,
         2535,  2580,  2662,  2752,  2819,  2893,  2964,  3038,  3106,
         3188,  3278,  3371,  3454,  3571,  3667,  3752,  3851,  3906,
         4008,  4090,  4171,  4290,  4358,  4466,  4570,  4637,  4763,
      

In [23]:
pd_sync_timelines=pd.DataFrame(compare_list,index=df_analyse.columns[1:]).T

In [24]:
pd_sync_timelines['date']=np.arange(pd_sync_timelines.shape[0])

In [25]:
pd_sync_timelines.head()

Unnamed: 0,Albania,India,Italy,Germany,US,date
0,104.0,102.0,155.0,130.0,103.0,0
1,123.0,113.0,229.0,159.0,172.0,1
2,146.0,119.0,322.0,196.0,215.0,2
3,174.0,142.0,453.0,262.0,337.0,3
4,186.0,156.0,655.0,482.0,450.0,4


In [26]:
quick_plot(pd_sync_timelines.date,
           pd_sync_timelines.iloc[:,:-1],
           y_scale='log',
           slider=True)

# Doubling Rate

$N(t)=N_0*2^{t/T}$

In [27]:
def doubling_rate(N_0,t,T_d):
    return N_0*np.power(2,t/T_d)

In [28]:
max_days=160

norm_slopes={
    'doubling every day':doubling_rate(100,np.arange(max_days),1),
    'doubling every two days':doubling_rate(100,np.arange(max_days),2),
    'doubling every 4 days':doubling_rate(100,np.arange(max_days),4),
    'doubling every 10 days':doubling_rate(100,np.arange(max_days),10),
}

In [29]:
#pd.concat([pd.DataFrame(norm_slopes),pd_sync_timelines], axis=1)
pd_sync_timelines_w_slope=pd.concat([pd.DataFrame(norm_slopes),pd_sync_timelines], axis=1)

In [30]:
quick_plot(pd_sync_timelines_w_slope.date,
          pd_sync_timelines_w_slope.iloc[:,0:5],
          y_scale='log',
          slider=True)