In [5]:
import numpy as np
import pandas as pd

This notebook will serve as an example to showcase the different flows we will implement. we will generate a dataset of 100 enumerators and 2000 targets and simulate the optimization.

# Min distance flow with haversine

In [6]:
# Set a random seed for reproducibility
np.random.seed(0)

# Generate 100 enumerators with random latitudes and longitudes
enumerator_data = {
    "enum_id": [f"E{i+1:03d}" for i in range(100)],  # Creates IDs like E001, E002, ...
    "enum_lat": np.random.uniform(-90, 90, 100),  # Random latitudes
    "enum_long": np.random.uniform(-180, 180, 100),  # Random longitudes
}

# Generate 2000 targets with random latitudes and longitudes
target_data = {
    "target_id": [
        f"T{i+1:04d}" for i in range(2000)
    ],  # Creates IDs like T0001, T0002, ...
    "target_lat": np.random.uniform(-90, 90, 2000),  # Random latitudes
    "target_long": np.random.uniform(-180, 180, 2000),  # Random longitudes
}

# Create the DataFrames
df_enum = pd.DataFrame(enumerator_data)
df_target = pd.DataFrame(target_data)

# Display the shape of the created DataFrames to confirm the number of rows
print(f"Enumerators DataFrame shape: {df_enum.shape}")
print(f"Targets DataFrame shape: {df_target.shape}")

Enumerators DataFrame shape: (100, 3)
Targets DataFrame shape: (2000, 3)


In [7]:
from surveyscout.utils import LocationDataset

enum_locations = LocationDataset(df_enum, "enum_id", "enum_lat", "enum_long")
target_locations = LocationDataset(
    df_target, "target_id", "target_lat", "target_long"
)

### Basic min distance flow with haversine

This flow implements the basic min distance model where we specify our parameters and the model will find the optimal results. Here are the parameters of the model:
- min_target: The minimum number of targets each enumerator is required to visit.
- max_target: The maximum number of targets each enumerator is allowed to visit.
- max_cost: The  maximum cost assignable to a surveyor to visit a single target.
- max_total_cost:  The initial maximum total cost assignable to a surveyor

In [8]:
from surveyscout.flows import basic_min_distance_flow

results = basic_min_distance_flow(
    enum_locations=enum_locations,
    target_locations=target_locations,
    min_target=5,
    max_target=30,
    max_distance=10000,
    max_total_distance=100000,
)

Optimal value:  2142243.4940421954


In [9]:
results.head()

Unnamed: 0,target_id,enum_id,value
0,T0001,E060,1.0
1,T0002,E063,1.0
2,T0003,E042,1.0
3,T0004,E061,1.0
4,T0005,E035,1.0


### Recursive min distance flow with haversine
This flow allows to recursively update parameters  until we reach a solution. 
The parameters are as follow:
- min_target: The minimum number of targets each enumerator is required to visit.
- max_target: The maximum number of targets each enumerator is allowed to visit.
- max_cost: The  maximum cost assignable to a surveyor to visit a single target.
- max_total_cost:  The initial maximum total cost assignable to a surveyor
- max_perc: The initial percentile to determine the maximum surveyor-to-target cost (default is 80).
- param_increment: The value by which the parameter bounds and percentiles are adjusted during the recursion if no solution is found (default is 5).

In [11]:
from surveyscout.flows import recursive_min_distance_flow

results_df, params = recursive_min_distance_flow(
    enum_locations=enum_locations,
    target_locations=target_locations,
    min_target=15,
    max_target=35,
    max_distance=10000,
    max_total_distance=100000,
    param_increment=5,
)

Optimal value:  2244424.350659192


In [12]:
print(params)

{'min_target': 15, 'max_target': 35, 'max_cost': 10000, 'max_total_cost': 100000}


In [8]:
results_df.head()

Unnamed: 0,target_id,enum_id,value
0,T0001,E047,1.0
1,T0002,E063,1.0
2,T0003,E042,1.0
3,T0004,E031,1.0
4,T0005,E035,1.0


# Routed Min distance flow

We will generate random enumerators and targets data in the city of Chenna in India

In [9]:
# Set a random seed for reproducibility
np.random.seed(22)
# Define the boundaries of Tamil Nadu (approximate)
min_lat = 12.9190
max_lat = 13.2400
min_lon = 80.1234
max_lon = 80.3220


# Generate 100 enumerators with random latitudes and longitudes
enumerator_data = {
    "enum_id": [f"E{i+1:03d}" for i in range(10)],  # Creates IDs like E001, E002, ...
    "enum_lat": np.random.uniform(min_lat, max_lat, 10), 
    "enum_long": np.random.uniform(min_lon, max_lon, 10), 
}

# Generate 2000 targets with random latitudes and longitudes
target_data = {
    "target_id": [
        f"T{i+1:04d}" for i in range(200)
    ],  # Creates IDs like T0001, T0002, ...
    "target_lat": np.random.uniform(min_lat, max_lat, 200),  
    "target_long": np.random.uniform(min_lon, max_lon, 200), 
}

# Create the DataFrames
df_enum = pd.DataFrame(enumerator_data)
df_target = pd.DataFrame(target_data)

# Display the shape of the created DataFrames to confirm the number of rows
print(f"Enumerators DataFrame shape: {df_enum.shape}")
print(f"Targets DataFrame shape: {df_target.shape}")

Enumerators DataFrame shape: (10, 3)
Targets DataFrame shape: (200, 3)


### Min  distance flow with OSRM
To use OSRM we will specify one more parameter:
- routing: The API used to get the route distance between points. Can be either 'google' or 'OSRM'

In [11]:
from surveyscout.flows import basic_routed_min_distance_flow

enum_locations = LocationDataset(df_enum, "enum_id", "enum_lat", "enum_long")
target_locations = LocationDataset(
    df_target, "target_id", "target_lat", "target_long"
)
results = basic_routed_min_distance_flow(
    enum_locations=enum_locations,
    target_locations=target_locations,
    min_target=5,
    max_target=30,
    max_distance=100,
    max_total_distance=1000,
    routing="osrm",
)

Optimal value:  1314.2251


In [17]:
results.head()

Unnamed: 0,target_id,enum_id,value
0,T0001,E010,1.0
1,T0002,E004,1.0
2,T0003,E006,1.0
3,T0004,E002,1.0
4,T0005,E003,1.0


### Recursive min distance Flow with OSRM
This flow has the same parameters as the recursive haversine min distance flow but with one more parameter:
- routing: The API used to get the route distance between points. Can be either 'google' or 'OSRM'

In [13]:
from surveyscout.flows import recursive_min_distance_flow

results_df, params = recursive_min_distance_flow(
    enum_locations=enum_locations,
    target_locations=target_locations,
    min_target=15,
    max_target=35,
    max_distance=100,
    max_total_distance=1000,
    param_increment=5,
    routing="osrm",
)

ConnectionError: HTTPConnectionPool(host='localhost', port=5001): Max retries exceeded with url: /table/v1/driving/-143.25417751710535,-33.87674124106154;64.01395324664284,8.786430706918452;-82.79712965082065,38.73408594703551;84.66984796413414,18.49740769289589;166.38787624227774,8.078972939441442;-90.44886833281511,-13.742136118997152;27.416640390421293,26.2609403519981;33.13509525786205,-11.234301972715343;26.010686084714422,70.51914014077437;-99.69061224937741,83.45929689018527;162.98964414611459,-20.98052661136002;-19.034863697654146,52.51050685487962;124.707122089606,5.201085555522809;71.81253911430156,12.248020996907812;-72.92269769215189,76.607394892679;112.96721509289182,-77.21350952438036;-37.257933295085536,-74.31672605372268;137.19715096001818,-86.36068846074137;29.25823414890914,59.871572198628826;137.424730267747,50.068215170973076;69.3113724279957,66.60218668442744;81.09154073507057,86.15130160189753;0.47677749361281485,53.84854155901024;164.19010850036062,-6.933714794472266;51.83647172266947,50.49525173156198;-27.412182519055307,-68.71060334359203;38.30155708605275,25.185783838954293;-173.09044860863992,-64.19640826637165;-71.43306599716225,80.04040506892511;57.66247349736662,3.932697915012895;-75.57206140424013,-15.360850801705752;42.485554439582955,-42.37998982116714;-25.643267659524184,49.36206409815901;-131.22933687991792,-7.892940201021261;-72.61836265582892,12.318110796356734;25.187367852455367,-86.61783592145608;32.71419404934235,21.174389473657868;26.757089585848377,20.17723009003585;55.1522951485681,21.04811943745625;54.757177200608,79.87465413263234;-24.689363243769378,32.72765383862702;142.75677450638267,-25.28857789671852;-47.67772678275725,-11.334248316118533;-23.088626904374337,35.57361526690768;141.09240780564198,-79.15941510673143;110.22983605659084,30.018008780220185;73.39989007453187,30.714816531268696;-143.91832056757158,-52.131139006708636;151.0137409480825,-66.79326642212641;77.1268678376801,-33.22289683364691;179.58492236443192,-24.532061230327926;-126.19861032312225,12.635418675218332;132.52538065255715,-11.05172757678234;-121.50254351650506,87.9072908506607;41.60144314218391,-71.63193406535495;-135.42480617420105,-52.40218390292976;125.2829625560044,-60.964286780700675;110.63482514100383,27.559498583771713;24.87626590125359,-44.40751154283922;-33.41401299864012,-6.064060885864862;-155.09988163615031,-46.00339343971151;71.07435833204292,-61.38547494380645;-16.724634235895223,-70.13247459042508;79.94001580932525,28.139326103749212;131.89763733430652,-65.12706875724952;171.1877418010389,-54.61517489759037;128.08920326133995,-23.62946928102646;-175.7829296933993,57.778781372628316;-50.407896787789014,-72.52177035724897;82.79660247266088,60.8300833497847;-118.21331618588141,-72.70228657908665;7.573178233486544,85.76270370241124;-160.4383241978687,-5.642783703413713;-108.00125103729597,85.8169958742607;-173.33215399417898,18.87219355410828;105.73117320867141,43.0674442916943;-99.38711229826315,-82.94619739422228;-55.673394949115036,-39.09474673624628;154.1092656476127,-68.3646189816296;73.5891846924718,-36.69476444601391;-168.53798536872918,-68.62901058823607;-120.71010366075141,-32.76302770908431;43.73222453991488,-15.432660987359412;27.80229189750034,-78.45345065721881;-94.35858430517689,34.64498148660357;156.31703925292578,11.988261757183523;41.02774414772256,-42.22989163089983;12.827809088984992,4.184649624005942;32.36759148764554,-73.09070806348049;82.84393062603709,13.670369200112276;-67.69980162734333,77.27331556371854;-36.640417602206924,-32.65758855876174;-104.45625036895603,30.133868393462706;-112.97051788307898,-66.27638476720941;159.9740603942161,38.93889674134181;86.23828621774351,-37.9069032695038;-3.4348288976758568,-57.02555483871897;-98.13073392960364,15.572328265814974;-88.43166656265853,-86.38064168625117;-159.10950228340477,59.209205259125355;-23.61001479907651,-89.15481428534153?sources=0&annotations=distance,duration (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x11f444100>: Failed to establish a new connection: [Errno 61] Connection refused'))

In [21]:
print(params)

{'min_target': 15, 'max_target': 35, 'max_cost': 100, 'max_total_cost': 1000}


In [22]:
results.head()

Unnamed: 0,target_id,enum_id,value
0,T0001,E010,1.0
1,T0002,E004,1.0
2,T0003,E006,1.0
3,T0004,E002,1.0
4,T0005,E003,1.0
