In [1]:
from math import atan2, cos, radians, sin, sqrt

import pandas as pd
import pulp
import plotly.express as px
import plotly.graph_objects as go


# Introduction to PuLP

In this demo notebook, we’ll take a quick look at [PuLP](https://pypi.org/project/PuLP/), a Python library for defining and solving linear programming (LP) problems. Our app in Chapter 8 uses PuLP to determine which warehouses a company should choose to reduce cost or carbon emissions. 

You don't need to be an optimization expert to follow along! This is just a lightweight intro so you can understand what’s happening under the hood when our app uses PuLP to solve problems. We'll be explaining the algorithms from our app, you can find more examples in [PuLP's GitHub repo / documentation](https://github.com/coin-or/pulp).

## What is Linear Programming?

Linear Programming is a mathematical method used to determine the best outcome (like maximum profit or lowest cost) in a situation with certain constraints (like limited resources, time, or capacity). It’s widely used in fields like logistics, finance, energy, and operations research.

## What is PuLP?
PuLP is a Python library that lets you:

* Define optimization problems using Python code
* Set constraints and objective functions
* Use solvers (like CBC, which comes with PuLP) to find optimal solutions. I also provides an API to use commercial solvers such as CPLEX, GUROBI and more

It’s a great tool for integrating optimization into Python applications, just like our demo app!


## Application Problem

Our app tries to solve a **facility location problem** using linear programming.

The first step of the application DOES NOT involve Linear Programming: 

1. First, we need to read in the data from our CSV files
2. Then we calculate a distance matrix between customers and warehouses using the [haversine formumla](https://en.wikipedia.org/wiki/Haversine_formula) (we create a function)

In [2]:
df_warehouses = pd.read_csv("../src/data/warehouses.csv")
df_warehouses.head(2)


Unnamed: 0,warehouse,country,city,latitude,longitude,yearly_cost,yearly_co2_tons
0,Warehouse 3,Spain,Logroño,42.4627,-2.4444,850071,441
1,Warehouse 10,France,Clermont-Ferrand,45.7772,3.087,901430,568


In [3]:
df_customers = pd.read_csv("../src/data/customers.csv")
df_customers.head(2)


Unnamed: 0,id,country,city,latitude,longitude,company_name,yearly_orders
0,64,Italy,Bologna,44.4949,11.3426,64 - Lombardo-Martinelli SPA,60
1,21,Spain,Guadalajara,40.6292,-3.1614,21 - Vázquez & Asociados S.Com.,113


### Plotting the warehouses

This function plots the warehouses in a map, it's the one we use in the chapter (we add two helper functions to make it more modular):

In [4]:
def _prepare_data(df_customers, df_warehouses):
    df_cust = df_customers.copy()
    df_wh = df_warehouses.copy()
    df_cust["type"] = "Customer"
    df_wh["type"] = "Warehouse"
    data = pd.concat([df_cust, df_wh])
    return data


def _add_columns_to_data(data):
    data["hover_amount"] = data.apply(
        lambda row: (
            f"Yearly Orders: {row['yearly_orders']}"
            if row["type"] == "Customer"
            else f"Yearly Rent: {row['yearly_cost']:,.0f} €"
        ),
        axis=1,
    )
    data["hover_name"] = data.apply(
        lambda row: (
            f"Company Name: {row['company_name']}"
            if row["type"] == "Customer"
            else f"Warehouse {row['warehouse']}"
        ),
        axis=1,
    )
    # Different marker size for Warehouses and customers
    data["size"] = data["type"].map({"Customer": 5, "Warehouse": 22})
    return data


def plot_customers_warehouses(df_customers, df_warehouses):
    data = _prepare_data(df_customers, df_warehouses)
    data = _add_columns_to_data(data)

    # Create the map
    fig = px.scatter_map(
        data,
        lat="latitude",
        lon="longitude",
        color="type",
        size="size",
        hover_name="hover_name",
        hover_data={
            "city": True,
            "hover_amount": True,
            "latitude": False,
            "longitude": False,
        },
        map_style="carto-positron",
        zoom=3,
        center={"lat": 50, "lon": 10},  # Center on Europe
        size_max=10,
    )

    fig.update_traces(
        hovertemplate="<b>%{hovertext}</b><br>%{customdata[0]}<br>%{customdata[1]}<extra></extra>"
    )
    # Customize legend (inside map, white background)
    fig.update_layout(
        title="📍 Customers & Potential Warehouses",
        height=700,
        margin={"r": 0, "t": 50, "l": 0, "b": 0},
        legend=dict(
            x=0.02,
            y=0.98,  # Position: top-left inside the map
            bgcolor="rgba(255,255,255,0.8)",  # White semi-transparent background
            bordercolor="black",
            borderwidth=1,
        ),
    )

    return fig


In [5]:
plot_customers_warehouses(df_customers, df_warehouses)

Next, we create the functions to create the distance matrix:

In [6]:
def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
    """
    Calculate the great-circle distance (in km) between two points on Earth.
    """
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
    return 6371 * 2 * atan2(sqrt(a), sqrt(1 - a))  # Earth radius = 6371 km


def calculate_distance_matrix(df_warehouses, df_customers):
    """
    Returns a DataFrame where rows = warehouses, columns = customers,
    and values = distances in km.
    """
    df_warehouses["id"] = df_warehouses.index
    df_customers["id"] = df_customers.index  # 2 customers can have the same name
    distance_matrix = pd.DataFrame(
        index=df_warehouses["id"],
        columns=df_customers["id"],
        data=[
            [
                haversine(
                    wh_row["latitude"],
                    wh_row["longitude"],
                    cust_row["latitude"],
                    cust_row["longitude"],
                )
                for _, cust_row in df_customers.iterrows()
            ]
            for _, wh_row in df_warehouses.iterrows()
        ],
    )
    return distance_matrix

We calculate the distance matrix before we can start looking for optimum solutions:

In [7]:
df_matrix = calculate_distance_matrix(df_warehouses, df_customers)
df_matrix.head()

id,0,1,2,3,4,5,6,7,8,9,...,84,85,86,87,88,89,90,91,92,93
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,1133.680545,212.425946,686.99979,1059.075333,1063.519727,1267.4602,1103.05274,1043.796088,943.927554,1228.66332,...,1000.127611,640.936111,963.61674,1605.368973,76.028457,1445.455733,1190.810487,1170.891189,334.176109,1853.755348
1,662.750903,763.822384,135.638112,576.527536,588.622024,919.697018,626.456539,469.959903,484.45081,693.945204,...,539.522261,1214.606183,399.604662,1341.010853,500.398582,910.398219,684.917781,597.71726,890.134474,1284.05135
2,884.284892,1631.856436,816.705892,843.366419,439.491997,1236.337138,863.853444,445.383629,888.139034,212.78474,...,500.923398,2066.252463,480.132966,1650.995871,1360.02681,100.208522,790.59774,442.457978,1707.397731,687.247254
3,586.733774,1681.584685,1098.987626,671.516874,1495.623047,338.481912,623.277688,1095.866171,742.725109,1374.123197,...,1499.571049,2101.937946,1188.62517,350.303884,1539.436611,1461.512618,625.047573,974.624278,1927.934603,1031.546026
4,1635.988679,452.788205,1163.655618,1559.517778,1406.431571,1767.038521,1604.569644,1487.278541,1445.72735,1609.676796,...,1340.248123,387.738898,1391.16928,2083.830908,576.047358,1816.344916,1688.621835,1630.694647,179.344859,2318.581372


## Optimization Time 🧮: Selecting the Best Warehouses

Now that we have our data and distance matrix ready, it's time to solve the actual optimization problem using PuLP.

Our objective is to minimize either the total cost or the CO₂ emissions of serving all customers from a subset of warehouses. We will:

1. Create an optimization model using PuLP
2. Define decision variables, the objective function, and constraints
3. Extract the optimal solution and present it as user-friendly data

To keep things clean and modular, we’ll break the process into smaller, well-defined functions before combining them in the main create_pulp_model function.

### Steps: 

1. Problem Initialization: return the `pulp.LpProblem()` object, setting the optimization goal. in our case, it's minimization (we want to minimie cost)
2. Defining Decision Variables. These variables represent:
    * Whether each warehouse is selected
    * Which warehouse each customer is assigned to
3. Setting the objective function: This function sets the optimization goal — minimize total cost or total CO₂.
4. Adding Constraints: These constraints make sure that:
    * Every customer is served
    * Only open warehouses serve customers
    * We respect the warehouse number and geographical rules
5. Running the optimization function --> This step happens in the main `create_pulp_model` function
6. Extract the results into pandas DataFrames


### Step 1 — Initialize the Optimization Problem

Before we can add variables, objectives, or constraints, we need to define our optimization problem.

We’ll create a minimization problem, which tells PuLP that our goal is to minimize something — in our case, total cost or total CO₂ emissions.

In [8]:
def initialize_problem():
    """
    Initialize a PuLP minimization problem.
    """
    return pulp.LpProblem("Warehouse_Selection", pulp.LpMinimize)

### Step 2: Define Decision Variables
Now that we have an optimization problem initialized, we need to define what decisions the solver can make.

We define two sets of binary decision variables:

* `warehouse_var[w]`:
Equals 1 if warehouse w is opened, 0 otherwise.

* `assignment_var[(w, c)]`:
Equals 1 if customer c is assigned to warehouse w, 0 otherwise.

These are created using `pulp.LpVariable.dicts`, which lets us create many variables at once in a dictionary-like structure.

In [9]:
def define_variables(df_warehouses, df_customers):
    """
    Creates binary decision variables for warehouse selection and customer assignment.
    """
    warehouses = df_warehouses.index.tolist()
    customers = df_customers.index.tolist()

    warehouse_var = pulp.LpVariable.dicts("Warehouse", warehouses, cat="Binary")
    assignment_var = pulp.LpVariable.dicts(
        "Assignment", [(w, c) for w in warehouses for c in customers], cat="Binary"
    )
    return warehouse_var, assignment_var

--> **What does `LpVariable.dicts` do?**
It creates a dictionary of variables.

`cat="Binary"` tells PuLP these variables can only take values 0 or 1 (typical for yes/no decisions in optimization).

You can access a variable like `warehouse_var[2]` or `assignment_var[(1, 5)]`. 

**For example:**

In [10]:
define_variables(df_warehouses, df_customers)

({0: Warehouse_0,
  1: Warehouse_1,
  2: Warehouse_2,
  3: Warehouse_3,
  4: Warehouse_4,
  5: Warehouse_5,
  6: Warehouse_6,
  7: Warehouse_7,
  8: Warehouse_8,
  9: Warehouse_9,
  10: Warehouse_10,
  11: Warehouse_11,
  12: Warehouse_12,
  13: Warehouse_13,
  14: Warehouse_14,
  15: Warehouse_15,
  16: Warehouse_16,
  17: Warehouse_17,
  18: Warehouse_18,
  19: Warehouse_19},
 {(0, 0): Assignment_(0,_0),
  (0, 1): Assignment_(0,_1),
  (0, 2): Assignment_(0,_2),
  (0, 3): Assignment_(0,_3),
  (0, 4): Assignment_(0,_4),
  (0, 5): Assignment_(0,_5),
  (0, 6): Assignment_(0,_6),
  (0, 7): Assignment_(0,_7),
  (0, 8): Assignment_(0,_8),
  (0, 9): Assignment_(0,_9),
  (0, 10): Assignment_(0,_10),
  (0, 11): Assignment_(0,_11),
  (0, 12): Assignment_(0,_12),
  (0, 13): Assignment_(0,_13),
  (0, 14): Assignment_(0,_14),
  (0, 15): Assignment_(0,_15),
  (0, 16): Assignment_(0,_16),
  (0, 17): Assignment_(0,_17),
  (0, 18): Assignment_(0,_18),
  (0, 19): Assignment_(0,_19),
  (0, 20): Assignme

### Step 3: Setting the objective function

Once the decision variables are in place, we need to tell the solver what we want to minimize.

Our app supports two optimization goals:

* Minimize cost (including warehouse rent + transportation)
* Minimize CO₂ emissions (including warehouse emissions + transport emissions)

To keep things modular and readable, we use two helper functions (prefixed with _) to build each version of the objective.


`pulp.lpSum(...)` is PuLP’s way of building up a linear expression: it sums over large sets of variables.

In both objectives, we combine fixed costs/emissions of operating a warehouse with the variable transport costs/emissions based on how far customers are from selected warehouses.

`prob +=` expression sets this expression as the thing we want to minimize; the `prob` variable comes from the initialization, it represents the problem we are trying to solve.

In [11]:
def _objective_total(assignment_var, warehouse_var, df_warehouses, df_customers, distance_matrix,
                     transport_unit_cost, warehouse_cost_column, warehouse_cost_multiplier=1):
    """
    Generic objective builder for either price or CO₂.
    """
    warehouses = df_warehouses.index.tolist()
    customers = df_customers.index.tolist()

    transport_total = pulp.lpSum(
        assignment_var[(w, c)] *
        distance_matrix.at[w, c] *
        transport_unit_cost *
        df_customers.at[c, "yearly_orders"]
        for w in warehouses for c in customers
    )

    warehouse_total = pulp.lpSum(
        warehouse_var[w] *
        df_warehouses.at[w, warehouse_cost_column] *
        warehouse_cost_multiplier
        for w in warehouses
    )

    return transport_total + warehouse_total

In [12]:
def set_objective_function(
    prob,
    assignment_var,
    warehouse_var,
    df_warehouses,
    df_customers,
    distance_matrix,
    optimize,
    price_per_km=4,
    co2_per_km=2
):
    """
    Adds the selected objective (price or co2) to the optimization problem.
    """
    if optimize == "price":
        prob += _objective_total(
            assignment_var,
            warehouse_var,
            df_warehouses,
            df_customers,
            distance_matrix,
            transport_unit_cost=price_per_km,
            warehouse_cost_column="yearly_cost"
        )
    else:
        prob += _objective_total(
            assignment_var,
            warehouse_var,
            df_warehouses,
            df_customers,
            distance_matrix,
            transport_unit_cost=co2_per_km,
            warehouse_cost_column="yearly_co2_tons",
            warehouse_cost_multiplier=1000  # Convert tons to kg
        )

### Step 4: Adding Constraints

This function tells the optimization model how the problem works. It defines the rules (called **constraints**) that the solution must follow. Without these rules, the model could give us crazy or meaningless results.

With PuLP, we build the problem by combining an objective function (from the function beofre this one!) with a list of constraints (what the solution is allowed to do). --> This function add the constraints to the same `prob` element. 

In other words: The objective + the constraints define a problem to solve.

#### What constraints we add, and WHY:

1. **Each customer must be assigned to exactly one warehouse**

Every customer needs to receive deliveries from some warehouse, but only one. 
This constraint forces the model to pick exactly one warehouse for each customer.

2. **Customers can only be assigned to warehouses that are open**

We don’t want the model to assign customers to warehouses we didn’t even open — that would be like shipping from an imaginary location.
This rule ties the assignment of a customer to a warehouse to whether that warehouse is actually selected/open.

3. **Limit the total number of warehouses**

The end application lets users select how many warehouses to add to the solution. Sometimes we want exactly `n` warehouses, sometimes we’re flexible.
This part adds either:

* A fixed number of warehouses (if the user says `number_of_warehouses` = `n`)
* Or a reasonable range (like between 1 and 10), if the user just says "any"

4. **(Optional) Require warehouses in specific countries**

Sometimes, for strategic, political, or logistical reasons, we want to make sure we operate in certain countries.
If the user passes a list like ["France", "Germany"], this rule tells the model:

"Hey, make sure you open at least one warehouse in France and one in Germany."

It’s optional — we only add this if the user asks for it.



In [13]:
def add_constraints(prob, assignment_var, warehouse_var, df_warehouses, df_customers, number_of_warehouses, country_list=None):
    """
    Adds problem constraints.
    """
    warehouses = df_warehouses.index.tolist()
    customers = df_customers.index.tolist()

    # 1. Each customer is assigned to exactly one warehouse
    for c in customers:
        prob += pulp.lpSum(assignment_var[(w, c)] for w in warehouses) == 1

    # 2. Customers can only be assigned to selected warehouses
    for w in warehouses:
        for c in customers:
            prob += assignment_var[(w, c)] <= warehouse_var[w]

    # 3. Limit number of warehouses
    if number_of_warehouses != "any":
        prob += pulp.lpSum(warehouse_var[w] for w in warehouses) == int(number_of_warehouses)
    else:
        prob += pulp.lpSum(warehouse_var[w] for w in warehouses) >= 1
        prob += pulp.lpSum(warehouse_var[w] for w in warehouses) <= 10

    # 4. Country constraints (optional)
    if country_list:
        for country in country_list:
            wh_in_country = df_warehouses[df_warehouses["country"] == country].index.tolist()
            prob += pulp.lpSum(warehouse_var[w] for w in wh_in_country) >= 1


### Step 6. Extracting results

**Important!** You may think we forgot step 5... Not quite! Step 5 is to launch the optimization function, wich happens in the main function!

This step collects the results after the solver gives us a solution. It formats it so it’s human-readable and ready for analysis or visualization.

We're basically asking:

“Which warehouses did the model choose? Which customers are assigned to which warehouse? And what are the total costs and CO₂ emissions?”

In [14]:
def extract_results(warehouse_var, assignment_var, df_warehouses, df_customers, distance_matrix, price_per_km=4, co2_per_km=2):
    """
    Extracts the results from the solved problem and formats them.
    """
    selected_warehouses = [w for w in df_warehouses.index if pulp.value(warehouse_var[w]) == 1]
    customers = df_customers.index.tolist()

    assignments = []
    scenario_costs = []
    scenario_co2s = []
    scenario_orders = []

    for w in selected_warehouses:
        yearly_cost = df_warehouses.at[w, "yearly_cost"]
        yearly_co2_tons = df_warehouses.at[w, "yearly_co2_tons"]
        total_transport_cost = 0
        total_transport_co2 = 0
        warehouse_orders = 0

        for c in customers:
            if pulp.value(assignment_var[(w, c)]) == 1:
                distance = distance_matrix.at[w, c]
                orders = df_customers.at[c, "yearly_orders"]

                total_transport_cost += distance * price_per_km * orders
                total_transport_co2 += distance * co2_per_km * orders
                warehouse_orders += orders

                assignments.append({
                    "warehouse": df_warehouses.at[w, "warehouse"],
                    "warehouse_lat": df_warehouses.at[w, "latitude"],
                    "warehouse_lon": df_warehouses.at[w, "longitude"],
                    "customer": df_customers.at[c, "company_name"],
                    "customer_lat": df_customers.at[c, "latitude"],
                    "customer_lon": df_customers.at[c, "longitude"],
                    "distance_km": distance,
                    "orders": orders,
                    "total_cost": int(distance * price_per_km * orders),
                    "total_co2_kg": int(distance * co2_per_km * orders),
                })

        scenario_costs.append(int(yearly_cost + total_transport_cost))
        scenario_co2s.append(int(yearly_co2_tons + total_transport_co2 / 1000))
        scenario_orders.append(warehouse_orders)

    df_selected = df_warehouses.loc[selected_warehouses].copy()
    df_selected["scenario_cost"] = scenario_costs
    df_selected["scenario_co2_tons"] = scenario_co2s
    df_selected["scenario_orders"] = scenario_orders
    df_assignments = pd.DataFrame(assignments)

    return df_selected, df_assignments

### Main optimization function

We call all our previous functions from a main module.

**To summarize:**

1. We create an empty problem (`prob` variable)
2. We create variables for the mathematical equations
3. We add the objective: this is what to solve
4. We add the constraints: this is the conditions the solution needs to respect
5. We solve the problem using a solver, in our case, we use `PULP_CBC_CMD`:**

```python
    prob.solve(pulp.PULP_CBC_CMD(msg=False))
```

6. We extract the results

In our case:

1. We create a minimization problem
2. Our variables are the warehouses that we can select, and each possible warehouse - customer combination
3. Our objective can be selected by users (from the app's UI). They have to choices:
    * Reduce (optimize) price (cost)
    * Reduce (optimize) CO2 emissions
4. Some constraints are "mandatory", like each customer needs to be asigned to one and only one warehouse. Some constraints are selected by the user from the UI (like mandatory country presence, or number of warehouses to include in the solution)
5. We use the `PULP_CBC_CMD` solver, because this is a simple problem.
6. We extract 2 DataFrames from the solution:
    * The warehouses from the solution
    * The customer - warehouse relationship

In [15]:
def create_pulp_model(
    df_warehouses,
    df_customers,
    distance_matrix,
    optimize="price",
    number_of_warehouses="any",
    country_list=None,
    price_per_km=4,
    co2_per_km=2
):
    """
    Creates and solves the warehouse optimization problem using PuLP.
    """
    df_warehouses = df_warehouses.copy()
    df_customers = df_customers.copy()
    df_warehouses["id"] = df_warehouses.index
    df_customers["id"] = df_customers.index

    prob = initialize_problem()
    warehouse_var, assignment_var = define_variables(df_warehouses, df_customers)
    set_objective_function(prob, assignment_var, warehouse_var, df_warehouses, df_customers, distance_matrix, optimize, price_per_km, co2_per_km)
    add_constraints(prob, assignment_var, warehouse_var, df_warehouses, df_customers, number_of_warehouses, country_list)
    prob.solve(pulp.PULP_CBC_CMD(msg=False))

    return extract_results(warehouse_var, assignment_var, df_warehouses, df_customers, distance_matrix, price_per_km, co2_per_km)


## Using our function!

We create a notebook cell with variables, feel free to change their value and then re-run the model below!

In [16]:
number_of_warehouses = 3
optimize = "price"  # or "co2"
country_list = ["France", "Germany"]  # optional
price_per_km = 4
co2_per_km = 2

In [17]:
# Run optimization
df_selected, df_assignments = create_pulp_model(
    df_warehouses,
    df_customers,
    df_matrix,
    optimize=optimize,
    number_of_warehouses=number_of_warehouses,
    country_list=country_list,
    price_per_km=price_per_km,
    co2_per_km=co2_per_km
)

# Display results
df_selected.head()

Unnamed: 0,warehouse,country,city,latitude,longitude,yearly_cost,yearly_co2_tons,id,scenario_cost,scenario_co2_tons,scenario_orders
0,Warehouse 3,Spain,Logroño,42.4627,-2.4444,850071,441,0,12008864,6020,6961
5,Warehouse 6,France,Grenoble,45.1885,5.7245,1232975,552,5,10227741,5049,4553
8,Warehouse 13,Germany,Karlsruhe,49.0069,8.4037,1687606,737,8,20322985,10054,12327


In [18]:
df_assignments.head()

Unnamed: 0,warehouse,warehouse_lat,warehouse_lon,customer,customer_lat,customer_lon,distance_km,orders,total_cost,total_co2_kg
0,Warehouse 3,42.4627,-2.4444,21 - Vázquez & Asociados S.Com.,40.6292,-3.1614,212.425946,113,96016,48008
1,Warehouse 3,42.4627,-2.4444,8 - Rodrigues Pires S.A.,41.2956,-7.7464,457.646412,153,280079,140039
2,Warehouse 3,42.4627,-2.4444,11 - Logística VWFK S.L.L.,41.6523,-4.7236,208.623292,178,148539,74269
3,Warehouse 3,42.4627,-2.4444,20 - Soluciones BMV S.L.L.,39.9596,-4.8308,342.483312,207,283576,141788
4,Warehouse 3,42.4627,-2.4444,3 - Fernandes,38.5243,-8.8926,698.996247,207,578768,289384


## Plotting the results!

We create a different version of the map function to show the selected warehouses and the link o each customer:

In [19]:
def _prepare_data(df_assignments):
    # Extract unique warehouses and customers
    df_warehouses_unique = df_assignments[
        ["warehouse", "warehouse_lat", "warehouse_lon"]
    ].drop_duplicates()
    df_warehouses_unique = df_warehouses_unique.rename(
        columns={"warehouse": "name", "warehouse_lat": "lat", "warehouse_lon": "lon"}
    )
    df_warehouses_unique["type"] = "Warehouse"

    df_customers_unique = df_assignments[
        ["customer", "customer_lat", "customer_lon"]
    ].drop_duplicates()
    df_customers_unique = df_customers_unique.rename(
        columns={"customer": "name", "customer_lat": "lat", "customer_lon": "lon"}
    )
    df_customers_unique["type"] = "Customer"

    data = pd.concat([df_warehouses_unique, df_customers_unique], ignore_index=True)
    return data


def _add_columns_to_data(data):
    # Prepare hover name and size
    data["hover_name"] = data.apply(
        lambda row: (
            f"Warehouse {row['name']}"
            if row["type"] == "Warehouse"
            else f"Customer: {row['name']}"
        ),
        axis=1,
    )
    data["size"] = data["type"].map({"Warehouse": 22, "Customer": 5})
    return data


def plot_assignments(df_assignments):
    data = _prepare_data(df_assignments)
    data = _add_columns_to_data(data)

    # Create the base figure with markers
    fig = px.scatter_map(
        data,
        lat="lat",
        lon="lon",
        color="type",
        color_discrete_map={"Customer": "blue", "Warehouse": "red"},
        size="size",
        hover_name="hover_name",
        hover_data={
            "lat": False,
            "lon": False,
            "type": False,
            "name": False,
            "size": False,
        },
        map_style="carto-positron",
        zoom=3,
        center={"lat": 50, "lon": 10},  # Center on Europe
        size_max=10,
    )

    # Prepare line coordinates for assignments
    lons = []
    lats = []
    for _, row in df_assignments.iterrows():
        lons.extend([row["warehouse_lon"], row["customer_lon"], None])
        lats.extend([row["warehouse_lat"], row["customer_lat"], None])

    # Add lines to the figure
    fig.add_trace(
        go.Scattermap(
            lon=lons,
            lat=lats,
            mode="lines",
            line=dict(width=1, color="#003399"),
            showlegend=False,
        )
    )

    # Customize layout similar to the example function
    fig.update_layout(
        title="📍 Customer-Warehouse Assignments",
        height=700,
        margin={"r": 0, "t": 50, "l": 0, "b": 0},
        legend=dict(
            x=0.02,
            y=0.98,
            bgcolor="rgba(255,255,255,0.8)",
            bordercolor="black",
            borderwidth=1,
        ),
    )

    # Update hovertemplate to only show the hover_name
    fig.update_traces(hovertemplate="<b>%{hovertext}</b><extra></extra>")

    return fig

In [20]:
plot_assignments(df_assignments)