In [1]:
import warnings
warnings.filterwarnings(action='ignore')

In [2]:
import pandas as pd
import geopandas as gpd
import requests

import shapely
from shapely.geometry import Polygon, Point
from shapely.ops import cascaded_union
import shapely.speedups

shapely.speedups.enable()

import folium
from folium.plugins import MeasureControl

import plotly.express as px

pd.set_option('display.max_rows', 500)

In May 2021, Nepal had a limited number of COVID-19 test labs, with only 69 operational facilities. By November 2021, the number increased to 89. This study aimed to assess the impact of the opening of COVID-19 test labs on the physical accessibility of vulnerable populations in Nepal. By combining public data from the Ministry of Health and Population (MoHP) with global datasets on population, healthcare centers, travel time, and relative wealth index, this research aimed to provide critical insights on optimizing the location of testing facilities using open data and optimization models, with a focus on confronting unprecedented pandemics such as COVID-19.

In [3]:
existing_labs = pd.read_csv('Data/laboratory_geocoded.csv')
existing_labs = existing_labs[['Unnamed: 0','lng','lat','Laboratory']]
existing_labs.columns = ['index','LONG','LAT','L_NAME']
current_hospitals = existing_labs[['index','LONG','LAT','L_NAME']].drop_duplicates()
current_hospitals.columns = ['Hosp_ID','Longitude','Latitude','L_NAME']

df_lab_open_dates = pd.read_excel('Data/tests_per_lab.xlsx')[['Date','Laboratory','Province Name']]
df_open_dates = df_lab_open_dates.groupby(['Laboratory','Province Name'])['Date'].min().reset_index()
df_open_dates.columns = ['L_NAME','Province','Date Open']

In [4]:
existing_labs = pd.merge(current_hospitals,df_open_dates,on='L_NAME')

def convert_point(df):
    lat = df['Latitude']
    lon = df['Longitude']
    return(Point(lon,lat))

existing_labs['geometry'] = existing_labs[['Latitude','Longitude']].apply(convert_point,axis=1)

existing_labs.head(2)

Unnamed: 0,Hosp_ID,Longitude,Latitude,L_NAME,Province,Date Open,geometry
0,0,87.268361,26.812242,"B. P. Koirala Institute of Health Sciences, Dh...",Province 1,2021-05-01,POINT (87.2683609 26.8122418)
1,1,87.285188,26.45905,"Koshi Hospital, Biratnagar",Province 1,2021-05-01,POINT (87.2851884 26.4590497)


In [5]:
%%time
population = pd.read_csv(r'Data/ppp_NPL_2020_1km_Aggregated_UNadj.csv').reset_index()
population.columns = ['ID','xcoord','ycoord','household_count']
population['xcoord'] = population['xcoord'].round(2)
population['ycoord'] = population['ycoord'].round(2)

population = population.groupby(['xcoord','ycoord'])['household_count'].sum().reset_index().reset_index()
population['household_count'] = population['household_count'].round()
population.columns = ['ID','xcoord','ycoord','population']

def convert_Point(population):
    return Point(population['xcoord'],population['ycoord'])
population['geometry'] = population[['ycoord','xcoord']].apply(convert_Point,axis=1)
population = gpd.GeoDataFrame(population)

CPU times: user 7.85 s, sys: 112 ms, total: 7.96 s
Wall time: 8.05 s


In [13]:
print('Total Population:',round(population['population'].sum()/1000000,2),'million')

Total Population: 29.14 million


In [6]:
districts_nepal = gpd.read_file('Data/shapefile_nepal_districts.geojson')
population = gpd.sjoin(population, districts_nepal, how="left", op="within")


In [123]:
access_token = "pk.eyJ1IjoicGFydmF0aHlrcmlzaG5hbmthYnciLCJhIjoiY2xqZXlna2VtMDFyOTNrbThqcnE2ZWx2ZiJ9.fwUyC3lZIYnj9hoEwgywHQ"



In [124]:
def get_isochrone(df,minutes_list,access_token,mode):
    longitude = df['Longitude']
    latitude = df['Latitude']
    query = """https://api.mapbox.com/isochrone/v1/mapbox/"""
    query = query+mode+'/'
    query = query+str(longitude)+','+str(latitude)+'?'
    query = query+'contours_minutes='+minutes_list
    query = query+'&polygons=true&access_token='
    query = query+access_token
    req_return = (requests.get(query).json())
    #print(req_return)
    return(req_return['features'])

In [31]:
def get_pop_count(x):
    pop_count = population[population['ID'].isin(x)]['population'].sum()
    return pop_count

In [32]:
%%time

existing_labs['isochrone_60min_driving'] = existing_labs[['Longitude','Latitude']].apply(get_isochrone,
                                                                                   minutes_list="60",
                                                                                   access_token=access_token,
                                                                                   mode='driving',
                                                                                   axis=1)

existing_labs['isochrone_30min_driving'] = existing_labs[['Longitude','Latitude']].apply(get_isochrone,
                                                                                   minutes_list="30",
                                                                                   access_token=access_token,
                                                                                   mode='driving',
                                                                                   axis=1)

existing_labs['isochrone_60min_walking'] = existing_labs[['Longitude','Latitude']].apply(get_isochrone,
                                                                                   minutes_list="60",
                                                                                   access_token=access_token,
                                                                                   mode='walking',
                                                                                   axis=1)

existing_labs['isochrone_30min_walking'] = existing_labs[['Longitude','Latitude']].apply(get_isochrone,
                                                                                   minutes_list="30",
                                                                                   access_token=access_token,
                                                                                   mode='walking',
                                                                                   axis=1)

CPU times: user 5.65 s, sys: 583 ms, total: 6.23 s
Wall time: 25.2 s


In [33]:
%%time

existing_labs['60min_driving'] = existing_labs['isochrone_60min_driving'].apply(lambda x: x[0]['geometry'])
existing_labs['60min_driving'] = existing_labs['60min_driving'].apply(lambda x:Polygon(x['coordinates'][0]))

existing_labs['30min_driving'] = existing_labs['isochrone_30min_driving'].apply(lambda x: x[0]['geometry'])
existing_labs['30min_driving'] = existing_labs['30min_driving'].apply(lambda x:Polygon(x['coordinates'][0]))

existing_labs['60min_walking'] = existing_labs['isochrone_60min_walking'].apply(lambda x: x[0]['geometry'])
existing_labs['60min_walking'] = existing_labs['60min_walking'].apply(lambda x:Polygon(x['coordinates'][0]))

existing_labs['30min_walking'] = existing_labs['isochrone_30min_walking'].apply(lambda x: x[0]['geometry'])
existing_labs['30min_walking'] = existing_labs['30min_walking'].apply(lambda x:Polygon(x['coordinates'][0]))


CPU times: user 21.7 ms, sys: 3.99 ms, total: 25.7 ms
Wall time: 23.6 ms


In [34]:
def get_population_within_vector(vector_polygon,vector_layer):
    pip_mask = vector_layer.within(vector_polygon)
    pip_data = vector_layer.loc[pip_mask]
    return(list(pip_data['ID'].unique()))

In [35]:
%%time

existing_labs['ID_60min_driving'] = existing_labs['60min_driving'].apply(get_population_within_vector,vector_layer=population)
existing_labs['ID_30min_driving'] = existing_labs['30min_driving'].apply(get_population_within_vector,vector_layer=population)
existing_labs['ID_60min_walking'] = existing_labs['60min_walking'].apply(get_population_within_vector,vector_layer=population)
existing_labs['ID_30min_walking'] = existing_labs['30min_walking'].apply(get_population_within_vector,vector_layer=population)


CPU times: user 19 s, sys: 70.4 ms, total: 19 s
Wall time: 19.1 s


In [36]:
selected_hosp = existing_labs['L_NAME'].unique()[4]
selected_hosp

'Nobel Medical College Teaching Hospital,Biratnagar'

In [37]:
start_coords = (28.4939,84.1240)
folium_map = folium.Map(location=start_coords, zoom_start=5)

test_ids = existing_labs[existing_labs['L_NAME']==selected_hosp]


for i in range(0,len(test_ids)):
    folium.Marker([test_ids.iloc[i]['Latitude'], test_ids.iloc[i]['Longitude']],
                        color='blue',popup=test_ids.iloc[i]['L_NAME']).add_to(folium_map)
    
    geo_j = folium.GeoJson(data=test_ids.iloc[i]['60min_driving'],style_function=lambda x:{'color': 'red'})
    folium.Popup(test_ids.iloc[i]['L_NAME']).add_to(geo_j)
    geo_j.add_to(folium_map)
    
    geo_j = folium.GeoJson(data=test_ids.iloc[i]['30min_driving'],style_function=lambda x:{'color': 'cyan'})
    folium.Popup(test_ids.iloc[i]['L_NAME']).add_to(geo_j)
    geo_j.add_to(folium_map)
    
    geo_j = folium.GeoJson(data=test_ids.iloc[i]['60min_walking'],style_function=lambda x:{'color': 'blue'})
    folium.Popup(test_ids.iloc[i]['L_NAME']).add_to(geo_j)
    geo_j.add_to(folium_map)
    
    geo_j = folium.GeoJson(data=test_ids.iloc[i]['30min_walking'],style_function=lambda x:{'color': 'green'})
    folium.Popup(test_ids.iloc[i]['L_NAME']).add_to(geo_j)
    geo_j.add_to(folium_map)
    
folium_map

## Calculating percentages for all open test centres.

In [38]:
list_pop_ids = list(existing_labs['ID_30min_walking'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()

17.0

In [39]:
list_pop_ids = list(existing_labs['ID_60min_walking'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()

22.0

In [40]:
list_pop_ids = list(existing_labs['ID_30min_driving'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()

35.0

In [41]:
list_pop_ids = list(existing_labs['ID_60min_driving'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()

62.0

## Calculating percentages for test centres open in May 2021

In [42]:
initial_labs = existing_labs[existing_labs['Date Open']=='2021-05-01']

In [43]:
list_pop_ids = list(initial_labs['ID_30min_walking'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()


16.0

In [44]:
list_pop_ids = list(initial_labs['ID_60min_walking'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()

21.0

In [45]:
list_pop_ids = list(initial_labs['ID_30min_driving'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()

33.0

In [46]:
list_pop_ids = list(initial_labs['ID_60min_driving'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()

55.0

### 60 min driving shows the highest increase in percentage from 55 to 62% 

In [84]:
df_relative_wealth_index = pd.read_csv('Data/npl_relative_wealth_index.csv')
def convert_Point(population):
    return Point(population['longitude'],population['latitude'])
df_relative_wealth_index['geometry'] = df_relative_wealth_index[['latitude','longitude']].apply(convert_Point,axis=1)
df_relative_wealth_index = gpd.GeoDataFrame(df_relative_wealth_index)
rwi_districts = gpd.sjoin(df_relative_wealth_index, districts_nepal, how="left", op="within")
rwi_districts = rwi_districts.groupby(['DISTRICT','Province'])['rwi'].mean().reset_index()
rwi_districts['rwi'] = rwi_districts['rwi'].round(2)
rwi_districts.columns = ['District','Province','Relative Wealth Index']

In [94]:
list_pop_ids = list(initial_labs['ID_60min_driving'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]

district_analysis = population[population['ID'].isin(list_pop_ids)][['population','DISTRICT','Province']]
district_analysis.columns = ['Pop with Access','District','Province']
dt1 = district_analysis.groupby(['District','Province'])['Pop with Access'].sum().reset_index()

dt2 = population.groupby(['DISTRICT','Province'])['population'].sum().reset_index()
dt2.columns = ['District','Province','Total Population']

df_pop_access_dist = pd.merge(dt1,dt2,on=['District','Province'])
df_pop_access_dist['%'] = df_pop_access_dist['Pop with Access']*100/df_pop_access_dist['Total Population']
df_pop_access_dist['%'] = df_pop_access_dist['%'].round(2)

df_pop_access_dist = pd.merge(df_pop_access_dist,rwi_districts,on=['District','Province'])

fig = px.scatter(df_pop_access_dist,y='%',x='Relative Wealth Index',color='Province',size='Total Population')
fig.update_xaxes(title='Average Relative Wealth Index of District')
fig.update_yaxes(title='% of Population with Access')
fig.update_layout(plot_bgcolor='white',title='60 Min Driving for Initial set of 69 labs')

In [95]:
list_pop_ids = list(existing_labs['ID_60min_driving'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]

district_analysis = population[population['ID'].isin(list_pop_ids)][['population','DISTRICT','Province']]
district_analysis.columns = ['Pop with Access','District','Province']
dt1 = district_analysis.groupby(['District','Province'])['Pop with Access'].sum().reset_index()

dt2 = population.groupby(['DISTRICT','Province'])['population'].sum().reset_index()
dt2.columns = ['District','Province','Total Population']

df_pop_access_dist = pd.merge(dt1,dt2,on=['District','Province'])
df_pop_access_dist['%'] = df_pop_access_dist['Pop with Access']*100/df_pop_access_dist['Total Population']
df_pop_access_dist['%'] = df_pop_access_dist['%'].round(2)

df_pop_access_dist = pd.merge(df_pop_access_dist,rwi_districts,on=['District','Province'])

fig = px.scatter(df_pop_access_dist,y='%',x='Relative Wealth Index',color='Province',size='Total Population')
fig.update_xaxes(title='Average Relative Wealth Index of District')
fig.update_yaxes(title='% of Population with Access')
fig.update_layout(plot_bgcolor='white',title='60 Min Driving for all 89 labs')

### Data of potential locations as list of hospitals

In [103]:
potential_locations = gpd.read_file('Data/nepal_hospitals_osm.geojson')
potential_locations = potential_locations[['amenity','name','geometry']].reset_index()
potential_locations.columns = ['ID','Facility Type','Name','geometry']
potential_locations = potential_locations[potential_locations['Facility Type']=='hospital']
potential_locations['Latitude'] = potential_locations['geometry'].apply(lambda x:x.y)
potential_locations['Longitude'] = potential_locations['geometry'].apply(lambda x:x.x)

In [104]:
len(potential_locations)

694

In [110]:
%%time
potential_locations['isochrone_60min_driving'] = potential_locations[['Longitude','Latitude']].apply(get_isochrone,
                                                                                   minutes_list="60",
                                                                                   access_token=access_token,
                                                                                   mode='driving',
                                                                                   axis=1)

potential_locations['isochrone_30min_driving'] = potential_locations[['Longitude','Latitude']].apply(get_isochrone,
                                                                                   minutes_list="30",
                                                                                   access_token=access_token,
                                                                                   mode='driving',
                                                                                   axis=1)

potential_locations['isochrone_60min_walking'] = potential_locations[['Longitude','Latitude']].apply(get_isochrone,
                                                                                   minutes_list="60",
                                                                                   access_token=access_token,
                                                                                   mode='walking',
                                                                                   axis=1)

potential_locations['isochrone_30min_walking'] = potential_locations[['Longitude','Latitude']].apply(get_isochrone,
                                                                                   minutes_list="30",
                                                                                   access_token=access_token,
                                                                                   mode='walking',
                                                                                   axis=1)

CPU times: user 11.3 s, sys: 1.08 s, total: 12.4 s
Wall time: 44.8 s


In [128]:
%%time

potential_locations['60min_driving'] = potential_locations['isochrone_60min_driving'].apply(lambda x: x[0]['geometry'])
potential_locations['60min_driving'] = potential_locations['60min_driving'].apply(lambda x:Polygon(x['coordinates'][0]))

potential_locations['30min_driving'] = potential_locations['isochrone_30min_driving'].apply(lambda x: x[0]['geometry'])
potential_locations['30min_driving'] = potential_locations['30min_driving'].apply(lambda x:Polygon(x['coordinates'][0]))

potential_locations['60min_walking'] = potential_locations['isochrone_60min_walking'].apply(lambda x: x[0]['geometry'])
potential_locations['60min_walking'] = potential_locations['60min_walking'].apply(lambda x:Polygon(x['coordinates'][0]))

potential_locations['30min_walking'] = potential_locations['isochrone_30min_walking'].apply(lambda x: x[0]['geometry'])
potential_locations['30min_walking'] = potential_locations['30min_walking'].apply(lambda x:Polygon(x['coordinates'][0]))


CPU times: user 355 ms, sys: 37.3 ms, total: 392 ms
Wall time: 381 ms


In [129]:
%%time

potential_locations['ID_60min_driving'] = potential_locations['60min_driving'].apply(get_population_within_vector,vector_layer=population)
potential_locations['ID_30min_driving'] = potential_locations['30min_driving'].apply(get_population_within_vector,vector_layer=population)
potential_locations['ID_60min_walking'] = potential_locations['60min_walking'].apply(get_population_within_vector,vector_layer=population)
potential_locations['ID_30min_walking'] = potential_locations['30min_walking'].apply(get_population_within_vector,vector_layer=population)


CPU times: user 1min 50s, sys: 356 ms, total: 1min 51s
Wall time: 1min 51s


In [130]:
list_pop_ids = list(potential_locations['ID_30min_walking'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()

21.0

In [131]:
list_pop_ids = list(potential_locations['ID_60min_walking'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()

31.0

In [132]:
list_pop_ids = list(potential_locations['ID_30min_driving'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()

54.0

In [133]:
list_pop_ids = list(potential_locations['ID_60min_driving'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]
pop_with_access = list(set(list_pop_ids))

(population[population['ID'].isin(pop_with_access)]['population'].sum()*100/population['population'].sum()).round()

80.0

In [138]:
list_pop_ids = list(potential_locations['ID_60min_driving'].values)
list_pop_ids = [item for sublist in list_pop_ids for item in sublist]

district_analysis = population[population['ID'].isin(list_pop_ids)][['population','DISTRICT','Province']]
district_analysis.columns = ['Pop with Access','District','Province']
dt1 = district_analysis.groupby(['District','Province'])['Pop with Access'].sum().reset_index()

dt2 = population.groupby(['DISTRICT','Province'])['population'].sum().reset_index()
dt2.columns = ['District','Province','Total Population']

df_pop_access_dist = pd.merge(dt1,dt2,on=['District','Province'])
df_pop_access_dist['%'] = df_pop_access_dist['Pop with Access']*100/df_pop_access_dist['Total Population']
df_pop_access_dist['%'] = df_pop_access_dist['%'].round(2)

df_pop_access_dist = pd.merge(df_pop_access_dist,rwi_districts,on=['District','Province'])

fig = px.scatter(df_pop_access_dist,y='%',x='Relative Wealth Index',color='Province',size='Total Population')
fig.update_xaxes(title='Average Relative Wealth Index of District')
fig.update_yaxes(title='% of Population with Access')
fig.update_layout(plot_bgcolor='white',title='60 Min Driving - Health Facility (Hospital) Access')

In [141]:
existing_labs.to_pickle('Data/Results_Analytics/existing_labs.pkl')

In [142]:
potential_locations.to_pickle('Data/Results_Analytics/potential_locs_hospitals.pkl')

In [144]:
population.to_pickle('Data/Results_Analytics/population.pkl')