In [3]:
import numpy as np
import pandas as pd

This notebook will serve as an example to showcase the different flows we will implement. we will generate a dataset of 100 enumerators and 2000 targets and simulate the optimization.

# Min haversine distance flow

In [2]:
# Set a random seed for reproducibility
np.random.seed(0)

# Generate 100 enumerators with random latitudes and longitudes
enumerator_data = {
    "enum_id": [f"E{i+1:03d}" for i in range(100)],  # Creates IDs like E001, E002, ...
    "enum_lat": np.random.uniform(-90, 90, 100),  # Random latitudes
    "enum_long": np.random.uniform(-180, 180, 100),  # Random longitudes
}

# Generate 2000 targets with random latitudes and longitudes
target_data = {
    "target_id": [
        f"T{i+1:04d}" for i in range(2000)
    ],  # Creates IDs like T0001, T0002, ...
    "target_lat": np.random.uniform(-90, 90, 2000),  # Random latitudes
    "target_long": np.random.uniform(-180, 180, 2000),  # Random longitudes
}

# Create the DataFrames
df_enum = pd.DataFrame(enumerator_data)
df_target = pd.DataFrame(target_data)

# Display the shape of the created DataFrames to confirm the number of rows
print(f"Enumerators DataFrame shape: {df_enum.shape}")
print(f"Targets DataFrame shape: {df_target.shape}")

Enumerators DataFrame shape: (100, 3)
Targets DataFrame shape: (2000, 3)


In [3]:
from optimization.utils import LocationDataset

enum_locations = LocationDataset(df_enum, "enum_id", "enum_lat", "enum_long")
target_locations = LocationDataset(
    df_target, "target_id", "target_lat", "target_long"
)

### Basic min distance flow

This flow implements the basic min distance model where we specify our parameters and the model will find the optimal results. Here are the parameters of the model:
- min_target: The minimum number of targets each enumerator is required to visit.
- max_target: The maximum number of targets each enumerator is allowed to visit.
- max_cost: The  maximum cost assignable to a surveyor to visit a single target.
- max_total_cost:  The initial maximum total cost assignable to a surveyor

In [4]:
from optimization.flows import basic_min_distance_flow

results = basic_min_distance_flow(
    enum_locations=enum_locations, 
    target_locations=target_locations,
    min_target=5,
    max_target=30, 
    max_distance=10000, 
    max_total_distance=100000
)

Optimal value:  2142243.4940421954


In [5]:
results.head()

Unnamed: 0,target_id,enum_id,value
0,T0001,E060,1.0
1,T0002,E063,1.0
2,T0003,E042,1.0
3,T0004,E061,1.0
4,T0005,E035,1.0


### Recursive Optimization Flow
This flow allows to recursively update parameters  until we reach a solution. 
The parameters are as follow:
- min_target: The minimum number of targets each enumerator is required to visit.
- max_target: The maximum number of targets each enumerator is allowed to visit.
- max_cost: The  maximum cost assignable to a surveyor to visit a single target.
- max_total_cost:  The initial maximum total cost assignable to a surveyor
- max_perc: The initial percentile to determine the maximum surveyor-to-target cost (default is 80).
- param_increment: The value by which the parameter bounds and percentiles are adjusted during the recursion if no solution is found (default is 5).

In [6]:
from optimization.flows import recursive_optimization_flow

results_df,params = recursive_optimization_flow(
    enum_locations=enum_locations,
    target_locations=target_locations,
    min_target=15,
    max_target=35,
    max_distance=10000,
    max_total_distance = 100000,
    param_increment=5,
)

Optimal value:  2244424.350659192


In [7]:
print(params)

{'min_target': 15, 'max_target': 35, 'max_cost': 10000, 'max_total_cost': 100000}


In [8]:
results_df.head()

Unnamed: 0,target_id,enum_id,value
0,T0001,E047,1.0
1,T0002,E063,1.0
2,T0003,E042,1.0
3,T0004,E031,1.0
4,T0005,E035,1.0


# Min OSRM distance flow


## Basic min osrm distance flow

We will generate random enumerators and targets data in the state of Tamil Nadu in India

In [7]:
# Set a random seed for reproducibility
np.random.seed(22)
# Define the boundaries of Tamil Nadu (approximate)
min_lat = 8.087964
max_lat = 13.088364
min_lon = 76.231978
max_lon = 80.346451


# Generate 100 enumerators with random latitudes and longitudes
enumerator_data = {
    "enum_id": [f"E{i+1:03d}" for i in range(100)],  # Creates IDs like E001, E002, ...
    "enum_lat": np.random.uniform(min_lat, max_lat, 100), 
    "enum_long": np.random.uniform(min_lon, max_lon, 100), 
}

# Generate 2000 targets with random latitudes and longitudes
target_data = {
    "target_id": [
        f"T{i+1:04d}" for i in range(2000)
    ],  # Creates IDs like T0001, T0002, ...
    "target_lat": np.random.uniform(min_lat, max_lat, 2000),  
    "target_long": np.random.uniform(min_lon, max_lon, 2000), 
}

# Create the DataFrames
df_enum = pd.DataFrame(enumerator_data)
df_target = pd.DataFrame(target_data)

# Display the shape of the created DataFrames to confirm the number of rows
print(f"Enumerators DataFrame shape: {df_enum.shape}")
print(f"Targets DataFrame shape: {df_target.shape}")

Enumerators DataFrame shape: (100, 3)
Targets DataFrame shape: (2000, 3)


In [9]:
from optimization.flows import basic_min_osrm_distance_flow

rom optimization.utils import LocationDataset

enum_locations = LocationDataset(df_enum, "enum_id", "enum_lat", "enum_long")
target_locations = LocationDataset(
    df_target, "target_id", "target_lat", "target_long"
)
results = basic_min_osrm_distance_flow(
    enum_locations=enum_locations,
    target_locations=target_locations,
    min_target=5,
    max_target=30,
    max_distance=10000,
    max_total_distance=100000,
)

NameError: name 'enum_locations' is not defined