# CoVID vaccine preparedness
Calculate a preparedness index/prioritization framework for CoVID vaccince distribution. Factors are calculated as demand-side (vulnerability) and supply-side (ability to deploy vaccine).

### Demand-side data 

1. Population
2. Vulnerable population (following methodology detailed here) 
3. WASH data on sanitation 
4. Urbanization 

### Supply-side data 

1. Access to health facilities  
    a. Access is quantified based on driving time to facilities  
    b. Health facilities are brought in from Healsthsites.io 
3. Electrification  
    a. The global electrification platform generated several datasets on electrification status and potential electrification solutions  
4. ICT coverage  
    a. GSMA coverage has been acquired from Bartholemew Collins – you can visualize it (inside the World Bank firewall) here  

See OneDrive [methodology document here](https://worldbankgroup-my.sharepoint.com/:w:/r/personal/jvicencio_worldbank_org/_layouts/15/Doc.aspx?sourcedoc=%7B6A1130C8-A105-4C8B-AB52-4629F72DB07A%7D&file=CoVID%20Vaccine%20Dissemination%20Preparedness%20Methodology.docx&action=default&mobileredirect=true)

In [1]:
import os, sys, importlib, subprocess, copy, zipfile, json, urllib
import rasterio, geohash

import geopandas as gpd
import pandas as pd
import numpy as np
import skimage.graph as graph

from shapely.geometry import Point, box
from shapely.wkt import loads
from rasterio import features
from collections import Counter

sys.path.append("../")

import infrasap.vulnerability_mapping as vulmap
import infrasap.covid_data_extraction as cov
import infrasap.rasterMisc as rMisc
import infrasap.misc as misc
import infrasap.osmMisc as osm
import infrasap.UrbanRaster as urban
import infrasap.market_access as ma

In [3]:
iso3 = 'ZWE'

#Define input data
all_facilities = "/home/public/Data/GLOBAL/HEALTH/HealthsitesIO/20201023/World-node.shp"
global_gsm_folder = "/home/public/Data/GLOBAL/INFRA/GSMA/2019/MCE/Data_MCE/Global"
risk_json_file = "/home/wb411133/temp/RiskSchema.json"
covid_base_folder = "/home/wb411133/data/Projects/CoVID"
input_covid_folder = os.path.join(covid_base_folder, iso3)
global_friction_noOcean = "/home/public/Data/GLOBAL/INFRA/FRICTION_2015/2015_friction_surface_v1_no_ocean_travel.tif"
with open(risk_json_file, 'r') as response:
    covid_metadata = json.load(response)
    
#Define output data
friction_surface = os.path.join(os.path.join(input_covid_folder, os.path.basename(global_friction_noOcean)))
access_to_facilities = os.path.join(input_covid_folder, "travel_time_health_facilities.tiff")
health_facilities = os.path.join(input_covid_folder, "health_facilities.shp")

In [34]:
# Read in DHS and/or population entries in hnp_indicators dataset
for key, value in covid_metadata['hnp_indicators'].items():
    #if "DHS" in key:
    #if "population" in value['Name'].lower():
    print("%s: %s" % (key, value['Name']))

P1: Urban_Population
P2: Risk from Demographics
P3: Age_incident_curve
P4: Obesity
P5: HeartDisease
P6: Diabetes
P7: ImmuneComp
P8: Cancer
P9: Density
P10: Population_Risk_Hotspot
P11: Water_Points_Risk_Hotspot
P12: Toilets_Risk_Hotspot
P13: Shops_Risk_Hotspot
A1: Water_fountain_public
A2: Water_fountain_shared
A3: Water_toilet_public
A4: Water_toilet_shared
A5: Transport_public
A6: Access_Health_Facility_public
S1: Case_Rates_by_Age_Severe
S2: Case_Rates_by_Age_Death
S3: Case_Rates_by_Gender_Severe
S4: Case_Rates_by_Gender_Death
S5: Case_Rates_Recovered
S6: Tests_total
S7: Tests_pos
S8: Tests_neg
S9: Tests_inconclusive
S10: Deaths
S11: Cumulative Deaths
S12: Confirmed
S13: Cumulative Confirmed
C1: Close_school 
C2: Close_workplaces
C3: Cancel_events
C4: Limit_gathering_size
C5: Close_transit_public
C6: Stay_home_requirements
C7: Limit_movement
C8: Limit_international_travel
E1: Econ_support_debt
E2: Econ_support_income
E3: Econ_support_econ
E4: Econ_support_allies
H1: Health_engage_pu

In [4]:
class covid_data(object):
    ''' The CoVID data are all stored in an open database that I cannot currently figure out, instead
        we will read the data from our local file repository
        
        http://covid-publi-1onc9lx0j49x6-1338300620.us-east-1.elb.amazonaws.com/
    '''
    def __init__(self, folder):
        ''' Initial read in of CoVID data for area of interest
        
        INPUT
        folder [string path] - country-specific folder of CoVID results
        '''
        self.adm0_file = os.path.join(folder, 'adm0.shp')
        self.adm1_file = os.path.join(folder, 'adm1.shp')
        self.adm2_file = os.path.join(folder, 'adm2.shp')
        #define zonal results
        self.adm1_zonal_BASE = os.path.join(folder, "FINAL_STATS", "adm1_zonal_BASE.csv")
        self.adm1_zonal_DHS  = os.path.join(folder, "FINAL_STATS", "adm1_zonal_DHS.csv")
        self.adm2_zonal_BASE = os.path.join(folder, "FINAL_STATS", "adm2_zonal_BASE.csv")
        self.adm2_zonal_DHS  = os.path.join(folder, "FINAL_STATS", "adm2_zonal_DHS.csv")
        #define RISK values to extract from the zonal results
        self.dhs_elec = ["DHS_27",'DHS_28']
        self.urban_pop = ['R10','P1']
        self.vul_pop = ['P2']
        self.pop = ['P9']
        self.wash = ['DHS_33', "DHS_57", "DHS_87"]
        self.default_cols = ['OBJECTID','WB_ADMO_CO', 'WB_ADM0_NA', 'WB_ADM1_CO','WB_ADM1_NA', 'WB_ADM2_CO','WB_ADM2_NA', 'ISO3']
        all_cols = self.dhs_elec + self.urban_pop + self.vul_pop + self.pop + self.wash
        self.all_cols = all_cols
        self.all_cols_stats = ['%s_SUM' % x for x in all_cols] + ['%s_MEAN' % x for x in all_cols] + self.default_cols
    
    def readin(self, level='adm1'):
        self.national_bounds = gpd.read_file(self.adm0_file)
        if level == 'adm1':
            self.adm = gpd.read_file(self.adm1_file)
            self.base = pd.read_csv(self.adm1_zonal_BASE, index_col=0)
            base_cols = [x for x in self.base if x in self.all_cols_stats]
            self.base = self.base.loc[:,base_cols]
            
            self.dhs = pd.read_csv(self.adm1_zonal_DHS, index_col=0)
            dhs_cols = [x for x in self.dhs if x in self.all_cols_stats]
            self.dhs = self.dhs.loc[:,dhs_cols]
            
        elif level == 'adm2':
            self.adm = gpd.read_file(self.adm2_file)
            self.base = pd.read_csv(self.adm2_zonal_BASE)
            self.dhs = pd.read_csv(self.adm2_zonal_DHS)

cData = covid_data(input_covid_folder)
cData.readin(level='adm1')

# Calculate access to health facilities


In [21]:
# Extract national health facilities from healthsites.io
if not os.path.exists(health_facilities):
    in_facilities = gpd.read_file(all_facilities)
    sidx = in_facilities.sindex

    if cData.national_bounds.crs != in_facilities.crs:
        cData.national_bounds = cData.national_bounds.to_crs(in_facilities.crs)
    sel_facilities = in_facilities.loc[sidx.intersection(cData.national_bounds.total_bounds)]
    sel_facilities = sel_facilities.loc[sel_facilities.intersects(cData.national_bounds.unary_union)]
    sel_facilities.to_file(health_facilities)
else:
    sel_facilities = gpd.read_file(health_facilities)
    
# Extract friction surface
if not os.path.exists(friction_surface):
    rMisc.clipRaster(rasterio.open(global_friction_noOcean), cData.national_bounds, friction_surface)

In [22]:
sel_facilities['amenity'].value_counts()

clinic      86
hospital    29
pharmacy    24
doctors     12
dentist      3
Name: amenity, dtype: int64

In [35]:
# Calculate travel time to nearest health facility
in_friction = rasterio.open(friction_surface)
inR = in_friction.read()[0,:,:]
#nodata areas are set to 0, but need to be set tto super high
inR[inR <= 0.0] = 999999
mcp = graph.MCP_Geometric(inR)


In [36]:
xx = ma.calculate_travel_time(in_friction, mcp, sel_facilities)

meta = in_friction.meta.copy()
meta.update(dtype = xx[0].dtype)

with rasterio.open(access_to_facilities, 'w', **meta) as out:
    out.write_band(1,xx[0])

In [37]:
# Summarize populations within travel bands
tt_match = access_to_facilities.replace(".tif", "_match_pop.tif")
pop_layer = os.path.join(input_covid_folder, "WP_2020_1km.tif")
if not os.path.exists(tt_match):
    rMisc.standardizeInputRasters(rasterio.open(access_to_facilities), rasterio.open(pop_layer), tt_match)
    
pop_raster = rasterio.open(pop_layer)
pop_d = pop_raster.read()
tt_raster = rasterio.open(tt_match)
tt_d = tt_raster.read()

# Calculate population within various travel thresholds
tt_pop_files = []
for thresh in [30, 60, 120, 180]:
    out_file = os.path.join(input_covid_folder, "tt_pop_%s.tif" % thresh)
    tt_pop_files.append(out_file)
    cur_tt = (tt_d < thresh) * 1
    cur_pop = cur_tt * pop_d
    cur_pop = cur_pop.astype(pop_raster.meta['dtype'])
    with rasterio.open(out_file, 'w', **pop_raster.meta) as out_raster:
        out_raster.write(cur_pop)

In [43]:
#Run zonal stats on travel_time population layers
try:
    del(final)
except:
    pass
for tt_pop_file in tt_pop_files:
    thresh = tt_pop_file.split("_")[-1].replace(".tif", "")
    res = rMisc.zonalStats(cData.adm, tt_pop_file, minVal=0)
    res = pd.DataFrame(res, columns=["%s_%s" % (val, thresh) for val in ["SUM", "MIN", "MAX", "MEAN"]])
    try:
        final = final.join(res)
    except:
        final = res
res = rMisc.zonalStats(cData.adm, pop_raster, minVal=0)
res = pd.DataFrame(res, columns=["%s_%s" % (val, "total_pop") for val in ["SUM", "MIN", "MAX", "MEAN"]])
try:
    final = final.join(res)
except:
    final = res

        
final = final.filter(regex="SUM")

In [44]:
cData.adm.head()

Unnamed: 0,OBJECTID,WB_ADMO_CO,WB_ADM0_NA,WB_ADM1_CO,WB_ADM1_NA,ISO_A2,Shape_Leng,Shape_Area,ISO3,geometry
0,3343,271,Zimbabwe,3435,Bulawayo,ZW,133250.8,526257900.0,ZWE,POLYGON ((28.60867309583881 -20.23068809467565...
1,3344,271,Zimbabwe,3436,Harare,ZW,209033.4,1118045000.0,ZWE,"POLYGON ((31.11623382586747 -17.6924839023963,..."
2,3345,271,Zimbabwe,3437,Manicaland,ZW,1542219.0,40162850000.0,ZWE,POLYGON ((32.99626000015754 -17.25993000020309...
3,3346,271,Zimbabwe,3438,Mashonaland Central,ZW,1214989.0,30828470000.0,ZWE,POLYGON ((30.42514999991944 -15.62827000034756...
4,3347,271,Zimbabwe,3440,Mashonaland West,ZW,1683374.0,63543110000.0,ZWE,"POLYGON ((29.90322158391638 -15.6224278543519,..."


In [45]:
final['Name'] = cData.adm['WB_ADM1_NA']

In [48]:
final['SUM_60'] / final['SUM_total_pop']

0    1.000000
1    1.000000
2    0.687336
3    0.595982
4    0.603616
5    0.463155
6    0.343490
7    0.415909
8    0.399052
9    0.659970
dtype: float64

# Summarize GSMA coverage by population

In [None]:
importlib.reload(rMisc)

in_gsm_files = [os.path.join(global_gsm_folder, 'MCE_Global%sG_2020.tif' % x) for x in [2,3,4]]
pop_layer = os.path.join(input_covid_folder, "WP_2020_1km.tif")
pop_raster = rasterio.open(pop_layer)
pop_d = pop_raster.read()


In [None]:
try:
    del(final)
except:
    pass

# Extract country files
for gsm_file in in_gsm_files:
    gsm_level = gsm_file.split("_")[-2][-2:]
    out_file_base = os.path.join(input_covid_folder, os.path.basename(gsm_file))
    out_file = out_file_base.replace(".tif", "_STANDARD.tif")
    out_file_pop = out_file_base.replace(".tif", "_POP.tif")
    if not os.path.exists(out_file):
        rMisc.clipRaster(rasterio.open(gsm_file), cData.adm, out_file_base)        
        # standardize gsm_file to pop file
        rMisc.standardizeInputRasters(rasterio.open(out_file_base), rasterio.open(pop_layer), out_file, data_type="C")
        os.remove(out_file_base)
    if not os.path.exists(out_file_pop):
        gsma_coverage = rasterio.open(out_file)
        gsma_d = (gsma_coverage.read() != gsma_coverage.meta['nodata']) * 1
        print(gsma_d.sum())
        cur_pop = pop_d * gsma_d
        cur_pop = cur_pop.astype(pop_raster.meta['dtype'])
        with rasterio.open(out_file_pop, 'w', **pop_raster.meta) as out_r:
            out_r.write(cur_pop)
    #run zonal
    print(out_file_pop)
    res = rMisc.zonalStats(cData.adm, out_file_pop, minVal=0)
    res = pd.DataFrame(res, columns=["%s_%s" % (val, gsm_level) for val in ["SUM", "MIN", "MAX", "MEAN"]])
    try:
        final = final.join(res)
    except:
        final = res
    
final = final.filter(regex="SUM")

In [None]:
final