In [None]:
import pandas as pd
import numpy as np
import math

import matplotlib.pyplot as plt
import seaborn as sns
from pylab import *
import pylab

from scipy.optimize import curve_fit, leastsq

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

sns.set_style('whitegrid')

import covid19_module

In [None]:
df_cases_raw = pd.read_csv("./data/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv", index_col=False)
df_deaths_raw = pd.read_csv("./data/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv", index_col=False)
df_recoveries_raw = pd.read_csv("./data/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv", index_col=False)
df_population_raw = pd.read_csv("./data/population_data/API_SP.POP.TOTL_DS2_en_csv_v2_887275.csv")

In [None]:
df_cases_raw.head()

In [None]:
df_cases = covid19_module.preprocess_frame(df_cases_raw)
df_deaths = covid19_module.preprocess_frame(df_deaths_raw)
df_recoveries = covid19_module.preprocess_frame(df_recoveries_raw)

df_population = df_population_raw[["Country Name", "2018"]]

df_cases.head()

In [None]:
df_cases_grouped = df_cases_raw.groupby(by='Country/Region', as_index=False).agg('sum')
top_cases = df_cases_grouped.nlargest(10, df_cases_raw.columns[-1])['Country/Region']
print (top_cases)


print ("Mortality rates")
print ("---------------")
for country in top_cases:
    print (country, ": ", round(float(df_deaths[country].iloc[-1])/float(df_cases[country].iloc[-1])*100, 3), "%")
    
    
print ("\nPopulation percentage infected")
print ("---------------")
for country in top_cases:
    country_pop = country
    if country == "US":
        country_pop = "United States"
    if country == "Iran":
        country_pop = "Iran, Islamic Rep."
    if country == "Korea, South":
        country_pop = "Korea, Rep."
    print (country, 
    ": ", round(float(df_cases[country].iloc[-1])/float(df_population[df_population["Country Name"]==country_pop]["2018"])*100, 3), "%")

print ("\nPopulation permil dead")
print ("---------------")
for country in top_cases:
    country_pop = country
    if country == "US":
        country_pop = "United States"
    if country == "Iran":
        country_pop = "Iran, Islamic Rep."
    if country == "Korea, South":
        country_pop = "Korea, Rep."
    print (country, ": ", round(float(df_deaths[country].iloc[-1])/float(df_population[df_population["Country Name"]==country_pop]["2018"])*1000, 5), "permil")

In [None]:
df_cases_reference = df_cases.copy()
covid19_module.shift_to_day_zero(df_cases, df_cases_reference)
covid19_module.shift_to_day_zero(df_deaths, df_deaths)
covid19_module.shift_to_day_zero(df_recoveries, df_recoveries)

In [None]:
%matplotlib notebook

countries = top_cases
 
covid19_module.plot_confirmed_cases(df_cases, countries)

In [None]:
%matplotlib notebook

covid19_module.plot_case_death_recovery('China', df_cases, df_deaths, df_recoveries)
covid19_module.plot_case_death_recovery('notChina', df_cases, df_deaths, df_recoveries)

In [None]:
%matplotlib notebook

def sigmoid(x, x0, k, a, c):
     y = a / (1 + np.exp(-k*(x-x0))) + c
     return y

xdata, ydata = df_cases['Day'], df_cases['China']
popt, pcov = curve_fit(sigmoid, xdata, ydata, p0=(1.0, -1.0, 1.0, 0.0))
print ("Fit:")
print ("x0 =", popt[0])
print ("k  =", popt[1])
print ("a  =", popt[2])
print ("c  =", popt[3])
print ("Asymptotes are", popt[3], "and", popt[3] + popt[2] )

print (popt)

x = np.linspace(-1, df_cases['China'].shape[0], df_cases['China'].shape[0])
y = sigmoid(x, *popt)

pylab.plot(xdata, ydata, 'o', label='data')
pylab.plot(x,y, label='fit')
pylab.ylim(0, 100000)
pylab.legend(loc='best')
pylab.show()

In [None]:
%matplotlib notebook

covid19_module.fit_cases_data("Sweden", df_cases)

In [None]:
def fit_cases_data_sigmoid(country, func, df):
    firstday = 0
    lastday = df[country].dropna().shape[0]

    xdata = df['Day'][(df['Day']>=firstday) & (df['Day']<lastday)]
    ydata = df[country][(df['Day']>=firstday) & (df['Day']<lastday)]
    
    plt.plot(xdata, ydata, 'bo', label='data')

    popt, pcov = curve_fit(func, xdata, ydata, [50.0, 0, -1e3, 1e3])
    print(popt)
    print("covariance matrix")
    print(pcov)
    x = np.linspace(firstday, lastday+10 , 100)
    plt.plot(x, func(x, *popt), 'r-',label='fit: a=%5.3f, b=%5.3f, c=%5.3f, d=%5.3f' % tuple(popt))

    perr=np.sqrt(np.diag(pcov)) #standard errors
    #plt.plot(x,func(x, *popt+perr), 'g')
    #plt.plot(x,func(x, *popt-perr), 'g')

    plt.xlabel('days since first case')
    plt.ylabel('number of confirmed cases')
    plt.legend()
    #plt.yscale('log')
    plt.show()

In [None]:
%matplotlib notebook

fit_cases_data_sigmoid("United Kingdom", sigmoid, df_cases)

In [None]:
%matplotlib notebook
interval = 7
covid19_module.plot_daily_vs_total(df_cases, "China", interval)
covid19_module.plot_daily_vs_total(df_cases, "Korea, South", interval)
covid19_module.plot_daily_vs_total(df_cases, "Greece", interval)
covid19_module.plot_daily_vs_total(df_cases, "US", interval)
covid19_module.plot_daily_vs_total(df_cases, "Italy", interval)
covid19_module.plot_daily_vs_total(df_cases, "Spain", interval)
covid19_module.plot_daily_vs_total(df_cases, "Sweden", interval)
