In [None]:
# SELECT runtime { local | colab }
runtime = 'local'

# SELECT Road Network Data { osm | estrada | combined }
road_path = 'osm'

# SELECT Population Data { facebook | worldpop | census }
pop_path = 'census'

# SELECT Distance Threshold in KM
distance_threshold = 5

In [None]:
# Assign Runtime based on user input

if runtime == 'local':
    os_path_selected = ''

if runtime == 'colab':
    from google.colab import drive
    drive.mount('/content/drive')
    #os_path_selected = '#Add the drive location here'

In [None]:
import geopandas as gpd
import pandas as pd
import shapely
from shapely.geometry import Polygon, Point
import geopy.distance
import pandana
import numpy as np

In [None]:
def get_nodes_and_edges(json_file,rounding=5):
    """Use geopandas to read line shapefile and compile all paths and nodes in a line file based on a rounding tolerance.
    shp_file:path to polyline file with end to end connectivity
    rounding: tolerance parameter for coordinate precision"""
    edges = gpd.read_file(json_file,driver='GeoJSON')
    edges["from_x"]=edges["geometry"].apply(lambda x:round(x.coords[0][0],rounding))
    edges["from_y"]=edges["geometry"].apply(lambda x:round(x.coords[0][1],rounding))
    edges["to_x"]=edges["geometry"].apply(lambda x:round(x.coords[-1][0],rounding))
    edges["to_y"]=edges["geometry"].apply(lambda x:round(x.coords[-1][1],rounding))
    nodes_from = edges[["from_x","from_y"]].rename(index=str,columns={"from_x":"x","from_y":"y"})
    nodes_to = edges[["to_x","to_y"]].rename(index=str,columns={"to_x":"x","to_y":"y"})
    nodes = pd.concat([nodes_from,nodes_to],axis=0)
    nodes["xy"] = list(zip(nodes["x"], nodes["y"]))
    nodes = pd.DataFrame(nodes["xy"].unique(),columns=["xy"])
    nodes["x"] = nodes["xy"].apply(lambda x: x[0])
    nodes["y"] = nodes["xy"].apply(lambda x: x[1])
    nodes = nodes[["x","y"]].copy()
    return [nodes , edges]

In [None]:
%%time

# Assigning and reading road network data based on user input

if(road_path=='osm'):
    road_selected_path = os_path_selected+'osm_network_edited.geojson'

if(road_path=='estrada'):
    road_selected_path = os_path_selected+'road_estrada_new.geojson'

if(road_path=='combined'):
    road_selected_path = os_path_selected+'estrada-osm-merged-tl.geojson'


node_edge = get_nodes_and_edges(road_selected_path)
nodes = node_edge[0]
edges = node_edge[1]

nodes = nodes.reset_index()
nodes.columns = ['nodeID','lon','lat']

edges_attr = pd.merge(edges,nodes,left_on=['from_x','from_y'], right_on=['lon','lat'])
edges_attr = pd.merge(edges_attr,nodes,left_on=['to_x','to_y'], right_on=['lon','lat'])
edges_attr.rename(columns= {'nodeID_x':'node_start','nodeID_y':'node_end'},inplace=True)

def get_length_edge_geopy(x):
    lon_x = float(x['from_x'])
    lat_x = float(x['from_y'])    
    
    lon_y = float(x['to_x'])
    lat_y = float(x['to_y'])

    dist = geopy.distance.geodesic((lat_x,lon_x),(lat_y,lon_y))
    return((dist.meters)/1000)

edges_attr['len_km'] = edges_attr[['from_x','from_y','to_x','to_y']].apply(get_length_edge_geopy,axis=1)

CPU times: user 2min 32s, sys: 1.18 s, total: 2min 33s
Wall time: 2min 34s


In [None]:
%%time
# Road Network Data in Nodes and Edges nodes as a Network
network = pandana.Network(nodes['lon'], nodes['lat'], 
                          edges_attr['node_start'], edges_attr['node_end'], edges_attr[['len_km']],twoway=True)

CPU times: user 4.16 s, sys: 177 ms, total: 4.33 s
Wall time: 4.33 s


In [None]:
def haversine_vectorize(lon1, lat1, lon2, lat2):
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])
    newlon = lon2 - lon1
    newlat = lat2 - lat1
    haver_formula = np.sin(newlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(newlon/2.0)**2
    dist = 2 * np.arcsin(np.sqrt(haver_formula ))
    km = 6367 * dist #6367 for distance in KM for miles use 3958
    return round(km,2)

In [None]:
%%time

# Current Hospitals
current_hospitals = gpd.read_file(os_path_selected+'current_health_facilities.geojson', driver='GeoJSON')
current_hospitals = current_hospitals[['L_NAME','LONG','LAT']].drop_duplicates()

#Add row number to be ID to perform optimization right
current_hospitals = current_hospitals.reset_index()
current_hospitals = current_hospitals[['index','LONG','LAT','L_NAME']]
current_hospitals.columns = ['Hosp_ID','Longitude','Latitude','L_NAME']

# Find nearest road and calculate the distance to the road
current_hospitals['nearest_node'] = network.get_node_ids(current_hospitals['Longitude'], current_hospitals['Latitude'], mapping_distance=None)
current_hospitals = pd.merge(current_hospitals,nodes,right_on='nodeID',left_on='nearest_node')
current_hospitals['hosp_dist_road_estrada'] = haversine_vectorize(current_hospitals['Longitude'],current_hospitals['Latitude'],current_hospitals['lon'],current_hospitals['lat'])


CPU times: user 102 ms, sys: 14.4 ms, total: 116 ms
Wall time: 116 ms


In [None]:
# Assigning population data as per user choice
if (pop_path=='facebook'):
    facebook_pop_csv = pd.read_csv(os_path_selected+'tls_population_2020.csv')
    facebook_pop_csv['tls_population_2020'] = facebook_pop_csv['tls_population_2020'].round()
    pop_selected = facebook_pop_csv[['latitude','longitude','tls_population_2020']].reset_index()

if (pop_path=='census'):
    census_tl = gpd.read_file(os_path_selected+'census.geojson')
    census_tl = census_tl.groupby(['LAT','LONG'])['EAs'].count().reset_index()
    census_tl['pop'] = census_tl['EAs']*5
    pop_selected = census_tl[['LAT','LONG','pop']].reset_index()
    
if (pop_path=='worldpop'):
    world_pop_csv = pd.read_csv(os_path_selected+'ppp_TLS_2020_1km_Aggregated_UNadj.csv')
    world_pop_csv['Z'] = world_pop_csv['Z'].round()
    pop_selected = world_pop_csv[['Y','X','Z']].reset_index()

pop_selected.columns = ['ID','LAT','LONG','POP']

In [None]:
pop_selected['nearest_node'] = network.get_node_ids(pop_selected['LONG'], pop_selected['LAT'], mapping_distance=None)
pop_selected = pd.merge(pop_selected,nodes,right_on='nodeID',left_on='nearest_node')
pop_selected['pop_dist_road'] = haversine_vectorize(pop_selected['LONG'],pop_selected['LAT'],pop_selected['lon'],pop_selected['lat'])

In [None]:
%%time

df_matrix = pd.DataFrame()
for each_current_hospital in current_hospitals[['nearest_node','Longitude','Latitude']].values:
    hosp_node = each_current_hospital[0]
    hosp_lon = each_current_hospital[1]
    hosp_lat = each_current_hospital[2]
    
    pop_subset = pop_selected[['nearest_node','LONG','LAT']]
    pop_subset['hosp_node'] = hosp_node
    pop_subset['hosp_lon'] = hosp_lon
    pop_subset['hosp_lat'] = hosp_lat
    
    pop_subset['euclidean_distance'] = haversine_vectorize(pop_subset['LONG'],pop_subset['LAT'],pop_subset['hosp_lon'],pop_subset['hosp_lat'])
    nearest_nodes = pop_subset[pop_subset['euclidean_distance']<=5]['nearest_node'].unique()
    
    matrix_selected = pd.DataFrame([(a,b) for a in [int(hosp_node)] for b in nearest_nodes])
    df_matrix = df_matrix.append(matrix_selected)
    
df_matrix = df_matrix.drop_duplicates()
df_matrix.columns = ['Hosp_ID','ID']
df_matrix['shortest_path_length'] = network.shortest_path_lengths(df_matrix['ID'],df_matrix['Hosp_ID'])
matrix_current = pd.merge(df_matrix,pop_selected[['ID','nearest_node','pop_dist_road','POP','LONG','LAT']],right_on='nearest_node',left_on='ID')
matrix_current = pd.merge(matrix_current,current_hospitals[['Hosp_ID','Longitude','Latitude','nearest_node','hosp_dist_road_estrada']],right_on='nearest_node',left_on='Hosp_ID')

matrix_current['euclidean_distance'] = haversine_vectorize(matrix_current['LONG'],matrix_current['LAT'],matrix_current['Longitude'],matrix_current['Latitude'])
matrix_current['total_network_distance'] = matrix_current['pop_dist_road']+matrix_current['hosp_dist_road_estrada']+matrix_current['shortest_path_length']
matrix_current = matrix_current[['ID_y','pop_dist_road','POP','LONG','LAT',
                                 'Hosp_ID_y','hosp_dist_road_estrada','Longitude','Latitude',
                                 'shortest_path_length','euclidean_distance','total_network_distance']]
matrix_current = matrix_current.rename(columns={'ID_y':'Pop_ID','Hosp_ID_y':'Hosp_ID'})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


CPU times: user 7.48 s, sys: 1.08 s, total: 8.56 s
Wall time: 8.56 s


In [None]:
%%time
#If the distance people have to travel to the road network is larger than the euclidean distance to a hospital, take the euclidean distance

def get_corrected_distance(x):
    if(x['euclidean_distance'])<=1:
        return x['euclidean_distance']
    else:
        if (x['pop_dist_road']>=x['euclidean_distance']):
            return x['euclidean_distance']
        else:
            return x['total_network_distance']

matrix_current['distance_corrected'] = matrix_current[['pop_dist_road','euclidean_distance','total_network_distance']].apply(get_corrected_distance,axis=1)


CPU times: user 7.08 s, sys: 22.4 ms, total: 7.11 s
Wall time: 7.11 s


In [None]:
matrix_current.head(2)

Unnamed: 0,Pop_ID,pop_dist_road,POP,LONG,LAT,Hosp_ID,hosp_dist_road_estrada,Longitude,Latitude,shortest_path_length,euclidean_distance,total_network_distance,distance_corrected
0,149068,0.04,9.0,124.444167,-9.167778,0,0.0,124.458346,-9.172521,1.662,1.64,1.702,1.702
1,149096,0.03,9.0,124.426111,-9.168333,0,0.0,124.458346,-9.172521,3.691,3.57,3.721,3.721


In [None]:
subset_sel_matrix = matrix_current[matrix_current['distance_corrected']<=distance_threshold]
round(subset_sel_matrix[['Pop_ID','POP']].drop_duplicates()['POP'].sum()*100/pop_selected['POP'].sum())

52