In [4]:
# SELECT runtime { local | colab }
# runtime = 'local'

# SELECT Road Network Data { osm | estrada | combined }
road_path = 'osm'

# SELECT Population Data { facebook | worldpop | census }
pop_path = 'worldpop'

# SELECT Distance Threshold in KM
distance_thresholds = [2, 5, 10]

# SELECT potential stroke facilities {grid}
potential_facilities = 'grid'

# IF potential facilties = grid, pick km's { 10 | 5 | 2 } 
grid_size = 10

# If it is grid: add the function. For now: use this
if potential_facilities == 'grid':
    if grid_size == 10:
        potential_facilities = '10kmGrid'
    elif grid_size == 5:
        potential_facilities = '5kmGrid'
    elif grid_size == 1:
        potential_facilities = '1kmGrid'

In [5]:
%%time
import pandas as pd
import geopandas as gpd

import matplotlib
import matplotlib.pyplot as plt

import plotly.express as px
import itertools
import geopy.distance

import warnings
warnings.filterwarnings('ignore')

import json

import time

import sys

sys.path.append(r'../Scripts')


# Import python functions
from optimization_model import OptimizationModel
from distance_matrix import DistanceCalculation, haversine_vectorize
from network_gpbp import get_nodes_and_edges
from InputDataPreprocessingv1 import CurrentHospitals, NewHospitals, NewHospitalsGrid, NewHospitalsCSV, Population, PopulationFB

CPU times: user 37 µs, sys: 1 µs, total: 38 µs
Wall time: 41 µs


# Import data

In [None]:
# ROAD NETWORK --> Pick "OSM", "eStrada" or "Merged"
nodes, edges_attr, network = get_nodes_and_edges(r'C:\Users\joyce\WorldBank\Model1\Data_model1_OSM/osm_network_edited.geojson')


In [None]:
edges_attr.sum()

In [None]:
#Plot
f, ax = plt.subplots(figsize=(10, 10))
edges_attr.plot(ax=ax, color='grey', zorder=1)
ax.set_axis_off()
plt.show()

In [None]:
%%time
# Current hospitals -- current_hospitals['index, longitude, latitude, name']
current_hospitals = gpd.read_file(r'C:\Users\joyce\WorldBank\Model1\Data_model1_official/health_facilities.geojson', driver='GeoJSON').reset_index()
current_hospitals = current_hospitals[['index','LONG','LAT','L_NAME']].drop_duplicates()
current_hospitals_ID, current_hospitals = CurrentHospitals(current_hospitals, network, nodes)

In [None]:
len(current_hospitals)

In [None]:
%%time
# Location of new potential hospitals, 1*1 grid for Timor-Leste
new_hospitals = pd.read_csv(r'C:\Users\joyce\WorldBank\Model1\Data19-04/potential_hospital_locations_complete.csv')
new_hospitals_ID, new_hospitals = NewHospitalsCSV(current_hospitals, new_hospitals, network, nodes)
# new_hospitals = NewHospitalsCSV(current_hospitals, new_hospitals, network, nodes)
# new_hospitals.head()

len(new_hospitals)

In [None]:
%%time
digits_rounding = 8

if pop_path == "facebook":
    read_population = pd.read_csv(r'C:\Users\joyce\Downloads\tls_population_2020_csv.zip').reset_index()
    array_household, population = PopulationFB(digits_rounding, read_population, network, nodes)
elif pop_path == "census":
    read_population = gpd.read_file(r'C:\Users\joyce\WorldBank\Model1\Data_model1_official/census.geojson', driver='GeoJSON')
    read_population = read_population[['IDENT','LONG','LAT']]
    array_household, population = Population(digits_rounding, read_population, network, nodes)

len(population)

# Distance matrix calculation

In [None]:
# Determine the smallest and largest threshold
distance_threshold_largest = max(distance_thresholds)
distance_threshold_smallest = min(distance_thresholds)

In [None]:
%%time
# Calculate distance from households to current hospitals
pop_subset = population[['nearest_node','xcoord','ycoord']]
matrix_current = DistanceCalculation(network, current_hospitals, population, pop_subset, distance_threshold_largest)
matrix_current = matrix_current[['ID','pop_dist_road_estrada','household_count','xcoord','ycoord',
                                 'Hosp_ID','hosp_dist_road_estrada','Longitude','Latitude',
                                 'shortest_path_length','euclidean_distance','total_network_distance', 'distance_corrected']]
matrix_current = matrix_current.rename(columns={'ID':'Pop_ID'})

In [None]:
%%time
# Create a subset of the population. 
# This subset contains only the population that is not already within a range of S kilometers
withinSkm = matrix_current[matrix_current.distance_corrected<=distance_threshold_smallest]
withinSkm=withinSkm['Pop_ID'].unique()
population_new = population.drop(population[population.ID.isin(withinSkm)].index.tolist())

In [None]:
%%time
# Calculate distance to new hospitals
pop_subset = population_new[['nearest_node','xcoord','ycoord']]
matrix_new = DistanceCalculation(network, new_hospitals, population, pop_subset, distance_threshold_largest)
matrix_new = matrix_new[['ID','pop_dist_road_estrada','household_count','xcoord','ycoord',
                                 'Cluster_ID','hosp_dist_road_estrada','Longitude','Latitude',
                                 'shortest_path_length','euclidean_distance','total_network_distance', 'distance_corrected']]
matrix_new = matrix_new.rename(columns={'ID':'Pop_ID'})

# Optimization 

In [None]:
%%time
# Add matrix with distances of new hospitals to matrix of distances of current hospitals
matrix_new1 = matrix_new[['Pop_ID','Cluster_ID','distance_corrected']]
matrix_current1 = matrix_current[['Pop_ID','Hosp_ID','distance_corrected']]
matrix_new1.columns = ['Pop_ID','HospCluster','distance']
matrix_current1.columns = ['Pop_ID','HospCluster','distance']
df_combined = pd.concat([matrix_current1, matrix_new1],axis=0)
df_combined= df_combined.sort_values(by='Pop_ID')

# Model




$\textrm{Maximize } \quad \sum_i v_iy_i $ 

$\textrm{Subject to } \quad x_j = 1 \quad \quad \quad \quad \quad\forall j = 1, ... m $

$ \quad \quad \quad \quad \quad \sum^{M}_{j=m+1} x_j \leq p  $

$ \quad \quad \quad \quad \quad y_i \leq \sum_{j|d_{ij}\leq S} x_j \quad  \forall i \in I$

$ \quad \quad \quad \quad \quad y_i, x_j \in \{0,1\} \quad \quad\forall i \in I, \forall j \in J $

where: 

- $I$ = the index set of households, or clusters of households, indexed by $i = 1, ..., n$. 

- $J$ = the index set of all healthcare sites, where indices $j = 1, ..., m$ are corresponding to the already existing healthcare facilities and indices $j = m+1, ..., M$ are corresponding to potential hospital locations

- $v_i$ = the number of people in (cluster of) household(s) $i$

- $d_{ij}$ = the travel distance from (cluster of) household(s) $i$ to hospital facility $j$

- $S$ = the maximum travel distance from a household (or cluster) to a health care facility

- $p$ = the number of additional hospitals located.

Decision variables:
$
x_j = \begin{cases} 1 & \text{ if hospital } j \text{ is opened} \\
0 & \text{ otherwise}
\end{cases} \\
$
$
y_i = \begin{cases} 1 & \text{ if there is an opened health facility within } S \\ & \text{ kilometers travel distance away from the (cluster of) household(s) } i\\
0 & \text{ otherwise}
\end{cases}
$


In [None]:
S = 2
nhospitals = [10,20,30,40,50,60,70,80,90,100,150,200,250,300,400,500,750,1000,2500,5000,7500,10000,14928]
opt_array, tModeling, tSolving = OptimizationModel(array_household, current_hospitals_ID, new_hospitals_ID, df_combined, S, nhospitals, MipGap)
df_opt_outputs = pd.DataFrame(opt_array)

In [None]:
df_opt_outputs

In [None]:
# Time to solve the model + extract the solution
tModeling

In [None]:
tSolving

In [None]:
S = 5
MipGap = 0.001
nhospitals = [10,20,30,40,50,60,70,80,90,100,150,200,250,300,400,500,750,1000,2500,5000,7500,10000,14928]
opt_array, tModeling, tSolving = OptimizationModel(array_household, current_hospitals_ID, new_hospitals_ID, df_combined, S, nhospitals, MipGap)
df_opt_outputs = pd.DataFrame(opt_array)

In [None]:
df_opt_outputs

In [None]:
# Time to solve the model + extract the solution
tModeling

In [None]:
tSolving