In [221]:
from pathlib import Path
import os
import pandas as pd

In [222]:
gpl_covid_path = Path(os.getcwd()).parent

In [223]:
dir_data_raw = gpl_covid_path / 'data' / 'raw'

In [224]:
country_dirs = {
    country:dir_data_raw / country for country in ['china', 'france', 'italy', 'korea', 'usa']
}

country_dirs['iran'] = gpl_covid_path / 'data' / 'interim' / 'iran'

In [225]:
country_paths = dict()

Exploration

In [226]:
# os.listdir(country_dirs['korea'])
# sorted([filename for filename in os.listdir(country_dirs['korea']) if 'korea_policy_static' in filename])[-1]

Set paths

In [227]:
country_paths['china'] = country_dirs['china'] / 'china_city_policy.xlsx'
country_paths['france'] = country_dirs['france'] / sorted([filename for filename in os.listdir(country_dirs['france']) if 'france_policy_static' in filename])[-1]
country_paths['iran'] = country_dirs['iran'] / 'covid_iran_policies.csv'
country_paths['italy'] = country_dirs['italy'] / sorted([filename for filename in os.listdir(country_dirs['italy']) if 'italy_policy_static' in filename])[-1]
country_paths['korea'] = country_dirs['korea'] / sorted([filename for filename in os.listdir(country_dirs['korea']) if 'korea_policy_static' in filename])[-1]
country_paths['usa'] = country_dirs['usa'] / 'US_COVID-19_policies.csv'

In [228]:
country_policies = dict()

In [229]:
for country in country_paths:
    path = country_paths[country]
    if path.suffix == '.xlsx':
        if country == 'korea':
            country_policies[country] = pd.read_excel(country_paths[country], sheet_name='mapping_policy_cleaned')
        else:
            country_policies[country] = pd.read_excel(country_paths[country])
    elif path.suffix == '.csv':
        if country in ['france', 'iran', 'usa']:
            country_policies[country] = pd.read_csv(country_paths[country], encoding='latin1')
        else:
            country_policies[country] = pd.read_csv(country_paths[country])

In [230]:
country_counts = dict()
country_counts['adm0'] = dict()
country_counts['adm1'] = dict()
country_counts['adm2'] = dict()
country_counts['adm3'] = dict()

In [None]:
pandas_counts = dict()

USA

In [231]:
country_policies['usa'] = country_policies['usa'][['adm1_name', 'adm2_name', 'adm3_name', 'date', 'policy']].drop_duplicates()

def get_policy_level_usa(row):
    if row['adm3_name'] != 'all':
        return 'adm3'
    if row['adm2_name'] != 'all':
        return 'adm2'
    if row['adm1_name'] != 'all':
        return 'adm1'
    return 'adm0'

country_policies['usa']['policy_level'] = country_policies['usa'].apply(get_policy_level_usa, axis=1)

us_counts = country_policies['usa'].groupby('policy_level')['policy_level'].count()

for level in ['adm0', 'adm1', 'adm2', 'adm3']:
    if level in us_counts:
        country_counts[level]['usa'] = us_counts[level]

Korea

In [232]:
country_policies['korea']['adm1_affected'] = country_policies['korea']['adm1_affected'].fillna('All')
country_policies['korea']['adm1_affected'] = country_policies['korea']['adm1_affected'].replace('national', 'All')
country_policies['korea'] = country_policies['korea'][['adm1_affected', 'date_start', 'mapped_variable']].drop_duplicates()

def get_policy_level_korea(row):
    if row['adm1_affected'] != 'All':
        return 'adm1'
    return 'adm0'


country_policies['korea']['policy_level'] = country_policies['korea'].apply(get_policy_level_korea, axis=1)

korea_counts = country_policies['korea'].groupby('policy_level')['policy_level'].count()

for level in ['adm0', 'adm1', 'adm2', 'adm3']:
    if level in korea_counts:
        country_counts[level]['korea'] = korea_counts[level]

Iran

In [233]:
country_policies['iran'] = country_policies['iran'].dropna(how='all')
country_policies['iran'] = country_policies['iran'].dropna(subset=['date'])

def get_policy_level_iran(locations_affected):
    if locations_affected == 'National' or pd.isnull(locations_affected):
        return 'adm0'
    return 'adm2'

country_policies['iran']['Locations affected'] = country_policies['iran']['Locations affected'].replace('None', 'National')
country_policies['iran']['policy_level'] = country_policies['iran']['Locations affected'].apply(get_policy_level_iran)

def count_comma_separated(locations):
    if pd.isnull(locations):
        return 1
    loc_split = locations.split(',')
    return len(loc_split)

country_policies['iran']['policy_count'] = country_policies['iran']['Locations affected'].apply(count_comma_separated)
country_policies['iran'] = country_policies['iran'][country_policies['iran']['Cancelled'].isnull()]

iran_counts = country_policies['iran'].groupby(['policy_level'])['policy_count'].sum()
for level in ['adm0', 'adm1', 'adm2', 'adm3']:
    if level in iran_counts:
        country_counts[level]['iran'] = iran_counts[level]

In [253]:
country_counts

{'adm0': {'usa': 16, 'korea': 17, 'iran': 7},
 'adm1': {'usa': 199, 'korea': 20},
 'adm2': {'usa': 38, 'iran': 19},
 'adm3': {'usa': 245}}

In [259]:
def get_policy_level_china(row):
    if row['adm2_name'] != 'ALL':
        return 'adm2'
    if row['adm1_name'] != 'ALL':
        return 'adm1'
    return 'adm0'

country_policies['china'] = country_policies['china'][['adm1_name', 'adm2_name', 'date_start', 'policy']].drop_duplicates()
country_policies['china']['policy_level'] = country_policies['china'].apply(get_policy_level_china, axis=1)

china_counts = country_policies['china'].groupby('policy_level')['policy_level'].count()
for level in ['adm0', 'adm1', 'adm2', 'adm3']:
    if level in china_counts:
        country_counts[level]['china'] = china_counts[level]

In [265]:
country_policies['france']['adm1'] = country_policies['france']['adm1'].fillna('All')
country_policies['france']['adm2'] = country_policies['france']['adm2'].fillna('All')

In [267]:
country_policies['france']

Unnamed: 0,adm0,adm1,adm2,date,policy_category,policy_description,optional,minor,source,access_date
0,France,11,75,2/29/2020,event_cancel,Paris half marathon for March 1,,,https://fr.wikipedia.org/wiki/Pand%C3%A9mie_de...,3/14/2020
1,France,32,60,2/29/2020,no_gathering,no gathering,,,https://fr.wikipedia.org/wiki/Pand%C3%A9mie_de...,3/14/2020
2,France,32,60,2/29/2020,school_closure_5,5 cities,,,https://fr.wikipedia.org/wiki/Pand%C3%A9mie_de...,3/14/2020
3,France,All,All,2/29/2020,no_gathering_5000,limit gatherings of more than 5000,,,https://www.gouvernement.fr/info-coronavirus,3/14/2020
4,France,76,82,2/29/2020,event_cancel,Championnats de France de cross-country for Ma...,,1.0,https://fr.wikipedia.org/wiki/Pand%C3%A9mie_de...,3/14/2020
...,...,...,...,...,...,...,...,...,...,...
56,France,11,All,3/14/2020,social_distance,Activation of the ARS Business Continuity Plan...,,,https://www.iledefrance.ars.sante.fr/system/fi...,3/15/2020
57,France,44,67,3/14/2020,no_gathering_50,"excludes shops, businesses, restaurants, bars,...",,,https://www.grand-est.ars.sante.fr/system/file...,3/14/2020
58,France,All,All,3/16/2020,school_closure_all,for minimum of 15 days or as long as necessary,,,https://www.gouvernement.fr/info-coronavirus,3/14/2020
59,France,All,All,3/16/2020,business_closure,,,,https://www.gouvernement.fr/info-coronavirus,3/17/2020


In [268]:
def get_policy_level_france(row):
    if row['adm2'] != 'All':
        return 'adm2'
    if row['adm1'] != 'All':
        return 'adm1'
    return 'adm0'

country_policies['france'] = country_policies['france'][['adm1', 'adm2', 'date', 'policy_category']].drop_duplicates()
country_policies['france']['policy_level'] = country_policies['france'].apply(get_policy_level_france, axis=1)

france_counts = country_policies['france'].groupby('policy_level')['policy_level'].count()
for level in ['adm0', 'adm1', 'adm2', 'adm3']:
    if level in france_counts:
        country_counts[level]['france'] = france_counts[level]

In [272]:
def get_policy_level_italy(row):
    if row['adm3_affected'] != 'All':
        return 'adm3'
    if row['adm2_affected'] != 'All':
        return 'adm2'
    if row['adm1_affected'] != 'All':
        return 'adm1'
    return 'adm0'

country_policies['italy'] = country_policies['italy'][['adm1_affected', 'adm2_affected', 'adm3_affected', 'Date', 'Policy']].drop_duplicates()
country_policies['italy']['policy_level'] = country_policies['italy'].apply(get_policy_level_italy, axis=1)

italy_counts = country_policies['italy'].groupby('policy_level')['policy_level'].count()
for level in ['adm0', 'adm1', 'adm2', 'adm3']:
    if level in italy_counts:
        country_counts[level]['italy'] = italy_counts[level]

In [273]:
country_counts

{'adm0': {'usa': 16, 'korea': 17, 'iran': 7, 'france': 8, 'italy': 13},
 'adm1': {'usa': 199, 'korea': 20, 'china': 4, 'france': 2, 'italy': 24},
 'adm2': {'usa': 38, 'iran': 19, 'china': 126, 'france': 49, 'italy': 81},
 'adm3': {'usa': 245, 'italy': 77}}

In [277]:
country_counts_df = pd.DataFrame.from_dict(country_counts)
country_counts_df = country_counts_df.fillna(0).sort_index()

In [282]:
country_counts_df['total'] = country_counts_df.sum(axis=1)

In [286]:
x = country_counts_df.sum(axis=0)

In [290]:
country_counts_df.append(country_counts_df.sum(axis=0).rename('total')).astype(int)

Unnamed: 0,adm0,adm1,adm2,adm3,total
china,0,4,126,0,130
france,8,2,49,0,59
iran,7,0,19,0,26
italy,13,24,81,77,195
korea,17,20,0,0,37
usa,16,199,38,245,498
total,61,249,313,322,945
