In [1]:
import datetime
import os
import time
import joblib
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

# get daily change from cummulative total, array is n-1 shorter, first value cannot be compared
def getDailyChange(d):
    return np.diff(d)

def getNDayAverage(d, n):
    d_averaged = np.empty(d.shape)
    for ri, r in enumerate(d):
        r_averaged = []
        for ci, c in enumerate(r):
            c_averaged = []
            index = 1
            while (ci - index > 0) & (index <= n):
                c_averaged.append(r[ci - index])
                index += 1
            if len(c_averaged) > 0:
                r_averaged.append(np.mean(c_averaged))
            else:
                r_averaged.append(0)
        d_averaged[ri] = r_averaged
    return d_averaged

def getChange(current_data):
    previous_data = np.delete(current_data, -1, 1)
    previous_data = np.insert(previous_data, 0,np.zeros([previous_data.shape[0],1]),1)
    previous_data = np.float64(previous_data)
    change_data = np.divide(current_data, previous_data, out=np.zeros_like(current_data), where=previous_data!=0)
    change_data = change_data*100
    change_data[change_data!=0] -= 100
    return change_data

def getData(name):
    base_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"
    url = os.path.join(base_url, "time_series_covid19_" + name + "_global.csv")
#     return pd.read_csv(url)
    
#     ** select only one country for better development
    df = pd.read_csv(url)
#     return df.loc[(df['Country/Region'] == 'Czechia')]
    return df.loc[
    (df['Country/Region'] == 'Czechia')|
    (df['Country/Region'] == 'Germany')|
    (df['Country/Region'] == 'Austria')|
    (df['Country/Region'] == 'US')|
    (df['Country/Region'] == 'Spain')|
    (df['Country/Region'] == 'Italy')|
    (df['Country/Region'] == 'Switzerland')]

df_confirmed = getData("confirmed")
df_deaths = getData("deaths")

In [2]:
dates = np.delete(df_confirmed.columns, [0,1,2,3]).tolist()
countries = df_confirmed['Country/Region'].tolist()
print(countries)

['Czechia']


In [3]:
deaths = np.delete(df_deaths.to_numpy(), [0,1,2,3], axis=1)
deaths = getDailyChange(deaths)

# TO DO: diff function gets negative values, they should be eliminated...?

confirmed = np.delete(df_confirmed.to_numpy(), [0,1,2,3], axis=1)
confirmed = getDailyChange(confirmed)

In [None]:
# get data per 100 000 population
# TO DO: works only for "Austria, Czechia, Germany, Italy, Spain, Switzerland, US"
# Austria
deaths[0] = deaths[0]*100000/8847037
confirmed[0] = confirmed[0]*100000/8847037
# Czechia
deaths[1] = deaths[1]*100000/10625695
confirmed[1] = confirmed[1]*100000/10625695
# Germany
deaths[2] = deaths[2]*100000/82927922
confirmed[2] = confirmed[2]*100000/82927922
# Italy
deaths[3] = deaths[3]*100000/60431283
confirmed[3] = confirmed[3]*100000/60431283
# Spain
deaths[4] = deaths[4]*100000/46723749
confirmed[4] = confirmed[4]*100000/46723749
# Switzerland
deaths[5] = deaths[5]*100000/8516543
confirmed[5] = confirmed[5]*100000/8516543
# US
deaths[6] = deaths[6]*100000/327167434
confirmed[6] = confirmed[6]*100000/327167434

## No averaging

In [None]:
fig = go.Figure()
for i, d in enumerate(deaths):
    fig.add_trace(go.Scatter(
        x=deaths[i], 
        y=confirmed[i],
        line_shape='spline',
        name=countries[i]))
fig.update_layout(title='New confirmed cases and deaths each day',
                   xaxis_title='Deaths',
                   yaxis_title='Confirmed cases')
fig.show()

# for scatterplot
# fig = px.scatter(x=deaths[0], y=confirmed[0], color=np.arange(deaths.shape[1]),trendline="lowess")

## Averaging

In [4]:
# get n day average
nDaysAverage = 7
deaths = getNDayAverage(deaths, nDaysAverage)
confirmed = getNDayAverage(confirmed, nDaysAverage)

In [None]:
fig = go.Figure()
for i, d in enumerate(deaths):
    fig.add_trace(go.Scatter(
        x=deaths[i], 
        y=confirmed[i],
        line_shape='spline',
        name=countries[i],
        text="Date: "+dates[i]))
fig.update_layout(title='New confirmed cases and deaths each day (7-day-average), per 100 000 population',
                   xaxis_title='Deaths',
                   yaxis_title='Confirmed cases')
fig.show()

# Change in %

In [7]:
# TO DO works only for one country... does not work now...
change_confirmed = getChange(confirmed)
change_deaths = getChange(deaths)

# nDaysAverage = 7
# change_deaths = getNDayAverage(change_deaths, nDaysAverage)
# change_confirmed = getNDayAverage(change_confirmed, nDaysAverage)

fig = go.Figure(data=go.Scatter(x=change_deaths[0], y=change_confirmed[0],line_shape='spline'))
fig.show()

fig.update_layout(title='Daily change in new confirmed cases and deaths(7-day-average) in %',
                   xaxis_title='Deaths',
                   yaxis_title='Confirmed cases')

# fig = px.scatter(x=change_deaths[0], y=change_confirmed[0], color=np.arange(change_deaths.shape[1]), trendline="lowess")
# fig.show()