In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from pycountry_convert import country_alpha2_to_continent_code, country_name_to_country_alpha2, country_name_to_country_alpha3
import json

In [2]:
df = pd.read_csv('data/policy_list.csv')
df.START_DATE = pd.to_datetime(df.START_DATE, format = "%m_%d_%y", errors = 'coerce')
df.END_DATE = pd.to_datetime(df.END_DATE, format = "%m_%d_%y", errors = 'coerce')
#fixing wrong dates
df.loc[878, ['END_DATE']] = pd.to_datetime('2021-01-10')
df.loc[69, ['END_DATE']] = pd.to_datetime('2020-04-28')
df.loc[421, ['START_DATE']] = pd.to_datetime('2020-08-17')
df.loc[595, ['END_DATE']] = pd.to_datetime('2021-01-04')
df.loc[918, ['END_DATE']] = pd.to_datetime('2021-02-01')
df.loc[1196, ['END_DATE']] = pd.to_datetime('2021-09-21')

df = df.sort_values('START_DATE')

#remove data with no duration
df['DURATION'] = df.END_DATE - df.START_DATE
df = df[df.DURATION > pd.Timedelta(seconds = 0)]

#fixing policys with country equal to 'European Union'
european_union_countries = ['Austria',  'Belgium', 'Bulgaria', 'Croatia', 'Cyprus',
                            'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France',
                            'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia',
                            'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland',
                            'Portugal','Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'United Kingdom']
df_wrong_country = df[df.COUNTRY_NAME == 'European Union']
new_rows = dict([(a, []) for a in df.columns])

for row in df_wrong_country.iterrows():
    for i, country in enumerate(european_union_countries):
        for col in df.columns:
            if col == 'COUNTRY_NAME':
                new_rows[col].append(country)
            elif col == 'ISO2':
                new_rows[col].append(country_name_to_country_alpha2(country))
            elif col == 'ISO3':
                new_rows[col].append(country_name_to_country_alpha3(country))
            else:
                new_rows[col].append(row[1][col])
df = df[df.COUNTRY_NAME != 'European Union']
df = pd.concat([df, pd.DataFrame(new_rows)])

print(f"Size of dataframe: {df.shape}")
print("Columns: ", list(df.columns))

Size of dataframe: (1274, 45)
Columns:  ['ID', 'COUNTRY_NAME', 'ISO3', 'ISO2', 'POLICY_TYPE', 'POLICY_SUBTYPE', 'START_DATE', 'END_DATE', 'AIR', 'AIR_TYPE', 'TARGETS_AIR', 'LAND', 'LAND_TYPE', 'TARGETS_LAND', 'SEA', 'SEA_TYPE', 'TARGETS_SEA', 'CITIZEN', 'CITIZEN_LIST', 'HISTORY_BAN', 'HISTORY_BAN_LIST', 'REFUGEE', 'REFUGEE_LIST', 'VISA_BAN', 'VISA_BAN_TYPE', 'VISA_BAN_LIST', 'CITIZEN_EXCEP', 'CITIZEN_EXCEP_LIST', 'COUNTRY_EXCEP', 'COUNTRY_EXCEP_LIST', 'WORK_EXCEP', 'SOURCE_QUALITY', 'SOURCE_TYPE', 'INTERNAL_GOVT_SOURCE', 'AIRLINE_SOURCE', 'INSURANCE_SOURCE', 'GOVT_SOCIAL_MED_SOURCE', 'EXT_GOVT_SOURCE', 'INTERNAL_MEDIA_SOURCE', 'EXT_MEDIA_SOURCE', 'OTHER_SOURCE', 'END_SOURCE', 'COMMENTS', 'OLD_ID', 'DURATION']


In [3]:
print(f"Unique countries: {len(df.COUNTRY_NAME.unique())}")
print(f"Initial start date: {df.START_DATE.min()}")
print(f"Last end date: {df.END_DATE.max()}")
print(f"Policy with smallest duration: {df.DURATION.min()}")
print(f"Policy duration mean: {df.DURATION.mean()}")
print("\nType of policys:")
print(df.POLICY_TYPE.value_counts())
print("\nClosures:")
print(f"Total of air closures: {df[df.AIR == 1].shape[0]}/{df.shape[0]}")
print(f"Total of land closures: {df[df.LAND == 1].shape[0]}/{df.shape[0]}")
print(f"Total of sea closures: {df[df.SEA == 1].shape[0]}/{df.shape[0]}")
print("\nPartial closures params:")
for col in ['CITIZEN', 'HISTORY_BAN', 'REFUGEE', 'VISA_BAN']:
    print(f"{col.lower()}: {df[df[col] == 1].shape[0]}/{df.shape[0]}")
print("\nComplete closure exceptions:")    
for col in ['CITIZEN_EXCEP', 'COUNTRY_EXCEP', 'WORK_EXCEP']:
    print(f"{col.lower()}: {df[df[col] == 1].shape[0]}/{df.shape[0]}")

Unique countries: 221
Initial start date: 2020-01-24 00:00:00
Last end date: 2021-10-01 00:00:00
Policy with smallest duration: 1 days 00:00:00
Policy duration mean: 59 days 06:14:07.723704867

Type of policys:
PARTIAL     824
COMPLETE    450
Name: POLICY_TYPE, dtype: int64

Closures:
Total of air closures: 505/1274
Total of land closures: 214/1274
Total of sea closures: 178/1274

Partial closures params:
citizen: 71/1274
history_ban: 140/1274
refugee: 1/1274
visa_ban: 41/1274

Complete closure exceptions:
citizen_excep: 408/1274
country_excep: 186/1274
work_excep: 208/1274


In [4]:
df = df[df.AIR == 1]

In [5]:
# add values in a list to a dictionary
def add_dict(row,dic):
    for i, key in enumerate(dic):
        dic[key].append(row[i])

# parse the string to get the countries involved
def parse_country_list(string):
    string = str(string)
    possible_seps = ['or','and']
    for sep in possible_seps:
        string.replace(sep, ',')
    list_string = string.split(sep=',')
    for i in range(len(list_string)):
        list_string[i] = list_string[i].split(sep='(')[0].strip()
    list_string = [string for string in list_string if string in countries]
    return list_string

# only considering these countries
with open('data/countries.json', 'r') as f:
    countries = json.load(f)
countries_map = dict([(countries[i], i) for i in range(len(countries))])

# our main data object
restrictions = {'Source':[],'Target':[], 'time_start':[],'time_end':[]}
# iterate over each row, finding out which type is TRUE and what countries are in the restriction
for index, row in df.iterrows():
    country1 = row['COUNTRY_NAME']
    if country1 not in countries:
        continue
    if row['AIR_TYPE'] == 'All':
        add_dict([country1,'EVERYONE',row['START_DATE'],row['END_DATE']], restrictions)
    else:
        countries2 = parse_country_list(row['TARGETS_AIR'])
        for country2 in countries2:
            if country2 in countries:
                add_dict([country1,country2,row['START_DATE'],row['END_DATE']], restrictions)

restrictions = pd.DataFrame(restrictions)

restrictions.head()

Unnamed: 0,Source,Target,time_start,time_end
0,Israel,China,2020-01-30,2020-03-18
1,Italy,China,2020-01-30,2020-02-09
2,Pakistan,China,2020-01-31,2020-03-21
3,Palau,Hong Kong,2020-02-01,2020-02-29
4,Palau,Macau,2020-02-01,2020-02-29


In [30]:
number_of_nodes = len(countries)

basedate = restrictions.time_start.min()
restrictions['time_start_num'] = (restrictions['time_start'] - basedate).dt.days
restrictions['time_end_num'] = (restrictions['time_end'] - basedate).dt.days

number_of_days = max(restrictions.time_start_num.max(), restrictions.time_end_num.max())

with open('data/initial_network.np', 'rb') as f:
    initial_network = np.load(f)
temporal_adj = np.tile(initial_network, (number_of_days, 1, 1))

print(f"Shape of 3D adjacency matrix: {temporal_adj.shape}")

Shape of 3D adjacency matrix: (538, 225, 225)


In [32]:
for index, row in restrictions.iterrows():
    source = row['Source']
    source_i = countries_map[source]
    target = row['Target']
    t1 = row['time_start_num']
    t2 = row['time_end_num']
    if target != 'EVERYONE':
        target_i = countries_map[target]    
        temporal_adj[t1:t2, target_i, source_i] = 0
    else:
        temporal_adj[t1:t2, :, source_i] = 0

In [33]:
with open('data/temporal_network.np', 'wb') as f:
    np.save(f, temporal_adj)