In [None]:
# SELECT runtime { local | colab }
# runtime = 'local'

# SELECT Road Network Data { osm | estrada | combined }
road_path = 'osm'

# SELECT Population Data { facebook | worldpop | census }
pop_path = 'facebook'

# SELECT Distance Threshold in KM
distance_thresholds = [10]

# Daily probability to a stroke
prob_stroke  = 0.001 # 0.1% -> TO TEST, NO REFERENCES

# Average days in stroke center
average_days = 5 # 5 days, -> TO TEST, NO REFERENCES

# Utilization rate (% of people with a stroke actually go to the hospital)
utilization_rate = 0.8 # 80%, -> TO TEST, NO REFERENCES

# List of number of additional stroke facilities added in the optimization
nhospitals = [10,20,30,40,50,60,70,80,90,100,150,200,250,300,400,500,750,1000]

# List of number of additional beds added in the optimization
nbeds = [100, 200, 300, 400, 500, 600]

# SELECT potential stroke facilities { existing hospitals }
potential_facilities = 'existing hospitals'

In [None]:
%%time
import pandas as pd
import geopandas as gpd

import matplotlib
import matplotlib.pyplot as plt
import plotly.express as px

import folium
import itertools
import geopy.distance

import warnings
warnings.filterwarnings('ignore')

import json
import time
from datetime import datetime

import sys
sys.path.append(r'C:\Users\joyce\WorldBank\GPBP/Scripts')
sys.path.append(r'C:\Users\joyce\WorldBank\Vietnam Project\0906Data')

# Import python functions
from optimization_model import OptimizationModel
from distance_matrix import DistanceCalculation, haversine_vectorize
from network_gpbp import get_nodes_and_edges
from InputDataPreprocessingv1 import CurrentHospitals, NewHospitals, NewHospitalsGrid, NewHospitalsCSV, Population, PopulationFB
from Visualization_GPBP import ParetoCurve, CreateMap

# Import data

In [None]:
%%time
# Preprocessing of the road network
# nodes, edges_attr, network = get_nodes_and_edges(r'C:/Users/joyce/WorldBank/Vietnam Project/0906Data/road_osm_preprocessed.geojson')
nodes, edges_attr, network = get_nodes_and_edges(r'../Data/road_osm_preprocessed.geojson')

In [None]:
#Plot
f, ax = plt.subplots(figsize=(10, 10))
edges_attr.plot(ax=ax, color='grey', zorder=1)
ax.set_axis_off()
plt.show()

In [None]:
%%time
# Current Hospitals -- stroke facilities in this case
health_facilities =  pd.read_csv(r'../Data/stroke-facs.csv').reset_index()
# health_facilities =  pd.read_csv(r'C:/Users/joyce/WorldBank/Vietnam Project/0906Data/stroke-facs.csv').reset_index()
health_facilities = health_facilities[['index','longitude','latitude','Name_English']]
current_hospitals_ID, current_hospitals = CurrentHospitals(health_facilities, network, nodes)
len(current_hospitals)

In [None]:
%%time
# Location of new potential hospitals. 
# Pick "1kmGrid", "5kmGrid", "10kmGrid" or "existing hospitals"

hospitals = gpd.read_file(r'../Data/hospitals_vietnam.geojson')
# hospitals = gpd.read_file(r'C:/Users/joyce/WorldBank/Vietnam Project/0906Data/hospitals_vietnam.geojson')
new_hospitals_ID, new_hospitals = NewHospitals(current_hospitals, hospitals, network, nodes)

len(new_hospitals)

In [None]:
%%time

# Population Data
#Round the coordinates to cluster the population. 8 digits: no rounding
digits_rounding = 2

if pop_path == 'facebook':
    read_population = pd.read_csv(r'../Data/pop_fb_2020.csv').reset_index()
#     read_population = pd.read_csv(r'C:/Users/joyce/WorldBank/Vietnam Project/0906Data/pop_fb_2020.csv').reset_index()
    read_population = read_population[['index','longitude','latitude','population_2020']]
elif pop_path == 'worldpop':
    read_population = pd.read_csv(r'../Data/WorldPop_2020_1km.csv').reset_index()
#     read_population = pd.read_csv(r'C:/Users/joyce/WorldBank/Vietnam Project/0906Data/WorldPop_2020_1km.csv').reset_index()
    
array_household, population = PopulationFB(digits_rounding, read_population, network, nodes)

len(population)

# Distance matrix

In [None]:
# Determine the smallest and largest threshold
distance_threshold_largest = max(distance_thresholds)
distance_threshold_smallest = min(distance_thresholds)

In [None]:
%%time
# Calculate distance from households to current hospitals
pop_subset = population[['nearest_node','xcoord','ycoord']]
matrix_current = DistanceCalculation(network, current_hospitals, population, pop_subset, distance_threshold_largest)
matrix_current = matrix_current[['ID','pop_dist_road_estrada','household_count','xcoord','ycoord',
                                 'Hosp_ID','hosp_dist_road_estrada','Longitude','Latitude',
                                 'shortest_path_length','euclidean_distance','total_network_distance', 'distance_corrected']]
matrix_current = matrix_current.rename(columns={'ID':'Pop_ID'})


In [None]:
%%time
# Create a subset of the population. 
# This subset contains only the population that is not already within a range of S kilometers
withinSkm = matrix_current[matrix_current.total_network_distance<=distance_threshold_smallest]
withinSkm=withinSkm['Pop_ID'].unique()
population_new = population.drop(population[population.ID.isin(withinSkm)].index.tolist())

In [None]:
%%time
# Calculate distance to new hospitals
pop_subset = population_new[['nearest_node','xcoord','ycoord']]
matrix_new = DistanceCalculation(network, new_hospitals, population, pop_subset, distance_threshold_largest)
matrix_new = matrix_new[['ID','pop_dist_road_estrada','household_count','xcoord','ycoord',
                                 'Cluster_ID','hosp_dist_road_estrada','Longitude','Latitude',
                                 'shortest_path_length','euclidean_distance','total_network_distance', 'distance_corrected']]
matrix_new = matrix_new.rename(columns={'ID':'Pop_ID'})

# Optimization

In [None]:
%%time
# Add matrix with distances of new hospitals to matrix of distances of current hospitals
matrix_new1 = matrix_new[['Pop_ID','Cluster_ID','distance_corrected']]
matrix_current1 = matrix_current[['Pop_ID','Hosp_ID','distance_corrected']]
matrix_new1.columns = ['Pop_ID','Hosp/Cluster','distance']
matrix_current1.columns = ['Pop_ID','Hosp/Cluster','distance']
df_combined = pd.concat([matrix_current1, matrix_new1],axis=0)
df_combined= df_combined.sort_values(by='Pop_ID')

# Model




$\textrm{Maximize } \quad \sum_{i \in I} \sum_{j \in J} v_iy_{ij} $ 

$\textrm{Subject to } \quad (1) x_j = 1 \quad \quad \quad \quad \quad\forall j = 1, ... m $

$ \quad \quad \quad \quad \quad (2) \sum^{M}_{j=m+1} x_j \leq p  $

$ \quad \quad \quad \quad \quad  (3) \sum_{j=1}^M z_j \leq q $

$ \quad \quad \quad \quad \quad  (4) \sum_{i \in I} y_{ij} \leq nx_j \quad \quad \quad \quad \quad \forall j \in J $

$ \quad \quad \quad \quad \quad  (5) z_j \leq Mx_j \quad \quad \quad \quad \quad \forall j \in J $

$ \quad \quad \quad \quad \quad (6) \sum_{j \in J} y_{ij} \leq 1 \quad \quad \quad \quad \quad \forall i \in I$

$ \quad \quad \quad \quad \quad (7) a * s * \sum_{i \in I} v_iy_{ij} \leq u(c_j + z_j)  \quad \quad \quad \quad \quad \forall j \in J  $

$ \quad \quad \quad \quad \quad  (8) y_{ij} = 0 \quad \quad \quad \quad \quad \forall i \in I, \forall j \in J, d_{ij} > S $

$ \quad \quad \quad \quad \quad x_j \in \{0,1\}, y_{ij} \in [0,1] \quad \quad\quad \quad \quad\forall i \in I, \forall j \in J $

where: 

- $I$ = the index set of households, or clusters of households, indexed by $i = 1, ..., n$. 

- $J$ = the index set of all healthcare sites, where indices $j = 1, ..., m$ are corresponding to the already existing healthcare facilities and indices $j = m+1, ..., M$ are corresponding to potential hospital locations

- $v_i$ = the number of people in (cluster of) household(s) $i$

- $d_{ij}$ = the travel distance from (cluster of) household(s) $i$ to hospital facility $j$

- $S$ = the maximum travel distance from a household (or cluster) to a health care facility

- $p$ = the number of additional stroke centers 

- $q$ = the total number of additional capacity (in #beds)

- $c_j$ = the current capacity at stroke center $j$ (in # beds)

- $u$ = maximal allowed utilization rate (between 0 and 1)

- $s$ = average number o days patient occupies a bed in stroke center

- $a$ = probability (per days) that a person has to visit a stroke center

Decision variables:
$
x_j = \begin{cases} 1 & \text{ if hospital } j \text{ is opened} \\
0 & \text{ otherwise}
\end{cases} \\
$
$
y_i = \begin{cases} 1 & \text{ if there is an opened health facility within } S \\ & \text{ kilometers travel distance away from the (cluster of) household(s) } i\\
0 & \text{ otherwise}
\end{cases} \\
$
$
z_j = \text{extra capacity at stroke center j}
$


In [None]:
import time
import gurobipy as gb
from gurobipy import GRB
import numpy as np
import pandas as pd

In [None]:
existinghosp = len(current_hospitals_ID)
m = len(current_hospitals_ID) + len(new_hospitals_ID)
n = len(array_household)
p = existinghosp + 0
S = 10
max_beds = 100

In [None]:
# Use large capacity if you want to check with uncapacitated model
# c1 = [100000]*m

#Create random existing capacity
import random
c1 = random.sample(range(0,100), existinghosp)
c = [0] * m
c[:len(c1)] = c1

In [None]:
# Only keep combinations if dist <= S
dist = df_combined[df_combined.distance<=S]

In [None]:
#Create the model
M = gb.Model("Facility location problem")
M.Params.OutputFlag = False #trace 
M.Params.mipgap     = .0001 #mipGap
M.Params.timelimit  = 100 #maxTimeInSeconds

In [None]:
# Objective needs improvement, find documentation
Y ={}
obj = gb.LinExpr()
II = dist['Pop_ID']
JJ = dist['Hosp/Cluster']
JI = { j: [] for j in range(m) }
IJ = { i : [] for i in range(n) }

# only add variables Yij for which dij <= S
for i,j in zip(II,JJ):
    IJ[i].append(j) 
    JI[j].append(i)
    Y[i,j] = M.addVar(lb=0.0, vtype=gb.GRB.CONTINUOUS)
    obj += array_household[i]*Y[i,j]
X = M.addVars(m, vtype=GRB.BINARY)
Z = M.addVars(m, lb = 0, vtype=GRB.BINARY)

In [None]:
M.setObjective(obj, gb.GRB.MAXIMIZE)

In [None]:
# Constraints
# 1 Existing hospitals are opened
for j in range(existinghosp):
    M.addLConstr(X[j] == 1, name="Existing hospitals"+str((j)))
        
#2 Additional hopsitals less or equal to maximum
s = M.addLConstr(gb.quicksum(X[j] for j in range(m))<= p)

#3 Extra beds less or equal to maximum
M.addConstr(gb.quicksum(Z[j] for j in range(m))<= max_beds)

#4 Only assign beneficiaries if the hospital is opened
for j in range(m):
    M.addConstr(gb.quicksum( Y[i,j] for i in JI[j]) <= n * X[j])
    
#5 Only add beds when the hospital is opened
for j in range(m):
    M.addConstr(Z[j] <= max_beds * X[j])
    
#6 One can only be assigned once
for i in range(n):
    M.addConstr(gb.quicksum( Y[i,j] for j in IJ[i]) <= 1)
    
# 7 utilization rate
for j in range(m):
    M.addConstr(prob_stroke * average_days * gb.quicksum( array_household[i]*Y[i,j] for i in JI[j]) <= utilization_rate * (c[j] + Z[j]))

In [None]:
# Optimize the model and find the number of served beneficiaries
M.optimize()
obj_val = M.objVal

In [None]:
def getvariables(n, m, X, Y, Z):
    
    import numpy as np
    
    Xvalues = np.zeros(m)
    Yvalues = np.zeros(n*m)
    Zvalues = np.zeros(m)
    for j in range(m):
        Xvalues[j]=X[j].x
        Zvalues[j]=Z[j].x
    Yvalues = { j: [] for j in range(m) }
    for i,j in zip(II,JJ):
        Yvalues[j].append(i)
    
    return(Xvalues, Yvalues, Zvalues)

In [None]:
Xvalues, Yvalues, Zvalues  = getvariables(n, m, X, Y, Z)

# Optimization without capacity

## Model

$\textrm{Maximize } \quad \sum_i v_iy_i $ 

$\textrm{Subject to } \quad x_j = 1 \quad \quad \quad \quad \quad\forall j = 1, ... m $

$ \quad \quad \quad \quad \quad \sum^{M}_{j=m+1} x_j \leq p  $

$ \quad \quad \quad \quad \quad y_i \leq \sum_{j|d_{ij}\leq S} x_j \quad  \forall i \in I$

$ \quad \quad \quad \quad \quad y_i, x_j \in \{0,1\} \quad \quad\forall i \in I, \forall j \in J $

where: 

- $I$ = the index set of households, or clusters of households, indexed by $i = 1, ..., n$. 

- $J$ = the index set of all healthcare sites, where indices $j = 1, ..., m$ are corresponding to the already existing healthcare facilities and indices $j = m+1, ..., M$ are corresponding to potential hospital locations

- $v_i$ = the number of people in (cluster of) household(s) $i$

- $d_{ij}$ = the travel distance from (cluster of) household(s) $i$ to hospital facility $j$

- $S$ = the maximum travel distance from a household (or cluster) to a health care facility

- $p$ = the number of additional hospitals located.

Decision variables:
$
x_j = \begin{cases} 1 & \text{ if hospital } j \text{ is opened} \\
0 & \text{ otherwise}
\end{cases} \\
$
$
y_i = \begin{cases} 1 & \text{ if there is an opened health facility within } S \\ & \text{ kilometers travel distance away from the (cluster of) household(s) } i\\
0 & \text{ otherwise}
\end{cases}
$

Interesting to check whether this model gives the same results when you set a huge capacity on the extra number of beds added in the previous model.


In [None]:
nhospitals = [10,20,80]
df_combined_output = pd.DataFrame()
for each_threshold in distance_thresholds:
    opt_array, tModelling, tSolving = OptimizationModel(array_household, current_hospitals_ID, new_hospitals_ID, df_combined, each_threshold, nhospitals)
    df_opt_outputs = pd.DataFrame(opt_array)
    df_combined_output = df_combined_output.append(df_opt_outputs)
    print("Threshold distance: " + str(each_threshold))
    print("Solving time: " + str(tSolving/60) + ", modelling time: " + str(tModelling/60))
    

In [None]:
df_combined_output