In [1]:
## Initialisation
import pandas as pd
import geopandas
import numpy as np
from lmfit import Model, models
import matplotlib
import matplotlib.pyplot as plt
from covid.grabbers import grab_wbdata, grab_JHU
from covid.utils import print_wb_indicators, print_wb_sources, find_date_of_nth_label
 
# set up the wbdata indicator (just build up the dict if you want more than one)
indicators = {'VA.EST': 'Voice and Accountability', 'GE.EST': 'Government Effectiveness'}

# We use two dataframes. One for for storing zone-wide wide and one for timeseries stuff
zone_df = pd.DataFrame(columns=['zone'])
ts_df = pd.DataFrame(columns=['zone', 'date'])


In [3]:
# Grab data - wbdata
# grab indicators and load into data frame
wb_df = grab_wbdata(indicators)

gov_df = wb_df.groupby(level='country').tail(5).groupby(level='country').mean()
gov_df.index.name = 'zone'

# Merge into zone_df
zone_df = pd.merge(zone_df, gov_df, on='zone', how='right')

del indicators
del gov_df
del wb_df
zone_df

Unnamed: 0,zone,Voice and Accountability,Government Effectiveness
0,Afghanistan,-1.718050,-1.838644
1,Albania,-0.251730,-0.629331
2,Algeria,-1.128412,-0.818300
3,American Samoa,,
4,Andorra,1.497404,1.431379
...,...,...,...
209,Virgin Islands (U.S.),,
210,West Bank and Gaza,-0.779990,-1.124713
211,"Yemen, Rep.",-0.862932,-0.736121
212,Zambia,-0.382936,-0.898444


In [4]:
## Timeseries
# Grab JHU time series data
ts_df = grab_JHU()


In [58]:
a = ts_df
a

Unnamed: 0_level_0,Unnamed: 1_level_0,province_state,lat,long,cases,deaths,recoveries,day
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Afghanistan,2020-01-22,Afghanistan,33.0,65.0,0,0,0,0
Afghanistan,2020-01-23,Afghanistan,33.0,65.0,0,0,0,1
Afghanistan,2020-01-24,Afghanistan,33.0,65.0,0,0,0,2
Afghanistan,2020-01-25,Afghanistan,33.0,65.0,0,0,0,3
Afghanistan,2020-01-26,Afghanistan,33.0,65.0,0,0,0,4
...,...,...,...,...,...,...,...,...
Zimbabwe,2020-03-29,Zimbabwe,-20.0,30.0,7,1,0,67
Zimbabwe,2020-03-30,Zimbabwe,-20.0,30.0,7,1,0,68
Zimbabwe,2020-03-31,Zimbabwe,-20.0,30.0,8,1,0,69
Zimbabwe,2020-04-01,Zimbabwe,-20.0,30.0,8,1,0,70


In [83]:

b = a.reset_index().pivot_table(index=['country', 'date'], columns='province_state', margins=True, margins_name='total', values=['cases', 'deaths', 'recoveries'], aggfunc=np.sum).stack()

In [57]:
for label, sub_df in a.groupby(level=0):
    sub_df = sub_df.reset_index()
    c = sub_df['country'] == sub_df['province_state']
    if not any(c):
        # Need to calculate
        break

In [9]:
### Add population to zone_df
# Subnational population. Source=50
wb_df = grab_wbdata({'SP.POP.TOTL': 'population'})
pop_df = wb_df.xs('2018', level=1)
del wb_df

pop_df.index.name = 'zone'

zone_df = pd.merge(zone_df, pop_df, on='zone')
del pop_df


In [27]:
ts_df

Unnamed: 0_level_0,Unnamed: 1_level_0,province_state,lat,long,cases,deaths,recoveries,day,cases_per_1M,deaths_per_1M,recoveries_per_1M
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Afghanistan,2020-01-22,Afghanistan,33.0,65.0,0,0,0,0,0.000000,0.000000,0.0
Afghanistan,2020-01-23,Afghanistan,33.0,65.0,0,0,0,1,0.000000,0.000000,0.0
Afghanistan,2020-01-24,Afghanistan,33.0,65.0,0,0,0,2,0.000000,0.000000,0.0
Afghanistan,2020-01-25,Afghanistan,33.0,65.0,0,0,0,3,0.000000,0.000000,0.0
Afghanistan,2020-01-26,Afghanistan,33.0,65.0,0,0,0,4,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...
Zimbabwe,2020-03-29,Zimbabwe,-20.0,30.0,7,1,0,67,0.000007,0.000001,0.0
Zimbabwe,2020-03-30,Zimbabwe,-20.0,30.0,7,1,0,68,0.000007,0.000001,0.0
Zimbabwe,2020-03-31,Zimbabwe,-20.0,30.0,8,1,0,69,0.000008,0.000001,0.0
Zimbabwe,2020-04-01,Zimbabwe,-20.0,30.0,8,1,0,70,0.000008,0.000001,0.0


In [26]:
## Timeseries calculate  per 1M pop
def logistic_fit(x, a, b, c):
    """
    x is variable.
    a is shift along x axis
    b is steepness
    c is asymptote max
    """
    return c / (1 + a * np.exp(-b*x))

LogisticModel = Model(logistic_fit)

one_mil = 10**6
LABELS = ('cases', 'deaths', 'recoveries')
for label in LABELS:
    ts_df[label + '_per_1M'] = ts_df[label]/one_mil
ts_df

label = 'deaths'
#countries = ['United Kingdom', 'Italy', 'Spain', 'Denmark', 'Iran', 'Greece', 'US', 'Sweden', 'Austria', 'Germany']
countries = 'all'

n = 10

fig, ax = plt.subplots(figsize=(15, 6))
ax.set_yscale('log')

if countries == 'all':
    countries = ts_df.reset_index()['country'].unique()

countries_fit = []
for country in countries:
    print("Processing {}".format(country))
    col = np.random.rand(3,)
    x = find_date_of_nth_label(ts_df, country, n, label.strip('_per_1M'))
    if not x:
        continue

    sub_df = ts_df[(ts_df['province_state'] == country) & (ts_df.index.is(country))]
    rel_df = sub_df[sub_df.index.levels[1] >= x]
    rel_df['rel_day'] = rel_df['day'] - rel_df['day'].iloc[0] + 1
    # rel_df.plot.scatter(x='rel_day', y=label)

    model = LogisticModel
    params = model.make_params(c=1000, a=1000, b=0.3)
    result = model.fit(rel_df[label], params, x=rel_df['rel_day'])

    countries_fit.append({
        'zone': country,
        'fit_result': result,
        'fit_a': result.params['a'].value,
        'fit_b': result.params['b'].value,
        'fit_c': result.params['c'].value,
        'x': rel_df['rel_day'],
        'y': rel_df[label],
        'n': n,
        'col': col
    })

    rel_df.plot.scatter(x='rel_day', y=label, color=[col], ax=ax, label=country + ", b: " + str(round(result.params['b'].value, 3)))
    plt.plot(rel_df['rel_day'], result.best_fit, color=col)
    print(result.params)

plt.legend()


SyntaxError: invalid syntax (<ipython-input-26-9e4090ecc55c>, line 39)

In [34]:
country = 'Afghanistan'
sub_df = ts_df[(ts_df['province_state'] == country)]
ts_df

Unnamed: 0_level_0,Unnamed: 1_level_0,province_state,lat,long,cases,deaths,recoveries,day,cases_per_1M,deaths_per_1M,recoveries_per_1M
country,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Afghanistan,2020-01-22,Afghanistan,33.0,65.0,0,0,0,0,0.000000,0.000000,0.0
Afghanistan,2020-01-23,Afghanistan,33.0,65.0,0,0,0,1,0.000000,0.000000,0.0
Afghanistan,2020-01-24,Afghanistan,33.0,65.0,0,0,0,2,0.000000,0.000000,0.0
Afghanistan,2020-01-25,Afghanistan,33.0,65.0,0,0,0,3,0.000000,0.000000,0.0
Afghanistan,2020-01-26,Afghanistan,33.0,65.0,0,0,0,4,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...
Zimbabwe,2020-03-29,Zimbabwe,-20.0,30.0,7,1,0,67,0.000007,0.000001,0.0
Zimbabwe,2020-03-30,Zimbabwe,-20.0,30.0,7,1,0,68,0.000007,0.000001,0.0
Zimbabwe,2020-03-31,Zimbabwe,-20.0,30.0,8,1,0,69,0.000008,0.000001,0.0
Zimbabwe,2020-04-01,Zimbabwe,-20.0,30.0,8,1,0,70,0.000008,0.000001,0.0


In [16]:
zone_df = pd.merge(zone_df, pd.DataFrame(countries_fit), on='zone', how='right')
zone_df

KeyError: 'zone'

In [18]:

world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))

world.rename(columns={'name': 'zone'}, inplace=True)

a = world.merge(zone_df, on='zone', how='outer')

a.plot(column='fit_b', legend=True, cmap='summer', missing_kwds={
    "color": 'lightgrey'
})

KeyError: 'fit_b'