In [None]:
from dataclasses import dataclass

import numpy as np
import pandas as pd
import pulp
import plotly.express as px
import plotly.graph_objects as go

# Introduction to PuLP

In this demo notebook, we’ll take a quick look at [PuLP](https://pypi.org/project/PuLP/), a Python library for defining and solving linear programming (LP) problems. Our app in Chapter 8 uses PuLP to determine which warehouses a company should choose to reduce cost or carbon emissions. 

You don't need to be an optimization expert to follow along! This is just a lightweight intro so you can understand what’s happening under the hood when our app uses PuLP to solve problems. We'll be explaining the algorithms from our app, you can find more examples in [PuLP's GitHub repo / documentation](https://github.com/coin-or/pulp).

## What is Linear Programming?

Linear Programming is a mathematical method used to determine the best outcome (like maximum profit or lowest cost) in a situation with certain constraints (like limited resources, time, or capacity). It’s widely used in fields like logistics, finance, energy, and operations research.

## What is PuLP?
PuLP is a Python library that lets you:

* Define optimization problems using Python code
* Set constraints and objective functions
* Use solvers (like CBC, which comes with PuLP) to find optimal solutions. I also provides an API to use commercial solvers such as CPLEX, GUROBI and more

It’s a great tool for integrating optimization into Python applications, just like our demo app!


## Application Problem

Our app tries to solve a **facility location problem** using linear programming.

The first step of the application DOES NOT involve Linear Programming: 

1. First, we need to read in the data from our CSV files
2. Then we calculate a distance matrix between customers and warehouses using the [haversine formumla](https://en.wikipedia.org/wiki/Haversine_formula) (we create a function)

In [None]:
df_warehouses = pd.read_csv("../src/data/warehouses.csv")
df_warehouses.head(2)


In [None]:
df_customers = pd.read_csv("../src/data/customers.csv")
df_customers.head(2)


### Plotting the warehouses

This function plots the warehouses in a map, it's the one we use in the chapter (we add two helper functions to make it more modular):

In [None]:
def _prepare_data(df_customers, df_warehouses):
    df_cust = df_customers.copy()
    df_wh = df_warehouses.copy()
    df_cust["type"] = "Customer"
    df_wh["type"] = "Warehouse"
    data = pd.concat([df_cust, df_wh])
    return data


def _add_columns_to_data(data):
    data["hover_amount"] = data.apply(
        lambda row: (
            f"Yearly Orders: {row['yearly_orders']}"
            if row["type"] == "Customer"
            else f"Yearly Rent: {row['yearly_cost']:,.0f} €"
        ),
        axis=1,
    )
    data["hover_name"] = data.apply(
        lambda row: (
            f"Company Name: {row['company_name']}"
            if row["type"] == "Customer"
            else f"Warehouse {row['warehouse']}"
        ),
        axis=1,
    )
    # Different marker size for Warehouses and customers
    data["size"] = data["type"].map({"Customer": 5, "Warehouse": 22})
    return data


def plot_customers_warehouses(df_customers, df_warehouses):
    data = _prepare_data(df_customers, df_warehouses)
    data = _add_columns_to_data(data)

    # Create the map
    fig = px.scatter_map(
        data,
        lat="latitude",
        lon="longitude",
        color="type",
        size="size",
        hover_name="hover_name",
        hover_data={
            "city": True,
            "hover_amount": True,
            "latitude": False,
            "longitude": False,
        },
        map_style="carto-positron",
        zoom=3,
        center={"lat": 50, "lon": 10},  # Center on Europe
        size_max=10,
    )

    fig.update_traces(
        hovertemplate="<b>%{hovertext}</b><br>%{customdata[0]}<br>%{customdata[1]}<extra></extra>"
    )
    # Customize legend (inside map, white background)
    fig.update_layout(
        title="📍 Customers & Potential Warehouses",
        height=700,
        margin={"r": 0, "t": 50, "l": 0, "b": 0},
        legend=dict(
            x=0.02,
            y=0.98,  # Position: top-left inside the map
            bgcolor="rgba(255,255,255,0.8)",  # White semi-transparent background
            bordercolor="black",
            borderwidth=1,
        ),
    )

    return fig


In [None]:
plot_customers_warehouses(df_customers, df_warehouses)

Next, we create the functions to create the distance matrix:

In [None]:
def haversine_vectorized(lat1, lon1, lat2, lon2):
    lat1 = np.radians(lat1[:, np.newaxis])  # shape (n, 1)
    lon1 = np.radians(lon1[:, np.newaxis])  # shape (n, 1)
    lat2 = np.radians(lat2[np.newaxis, :])  # shape (1, m)
    lon2 = np.radians(lon2[np.newaxis, :])  # shape (1, m)

    dlat = lat2 - lat1  # shape (n, m)
    dlon = lon2 - lon1  # shape (n, m)

    a = np.sin(dlat / 2) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2) ** 2
    c = 2 * np.arcsin(np.sqrt(a))

    R = 6371.0  # Radius of Earth in kilometers
    return R * c  # shape (n, m)

def calculate_distance_matrix(df_warehouses, df_customers):
    # Coordinates as np.arrays
    lat_wh = df_warehouses["latitude"].to_numpy()
    lon_wh = df_warehouses["longitude"].to_numpy()
    lat_cu = df_customers["latitude"].to_numpy()
    lon_cu = df_customers["longitude"].to_numpy()

    # Compute distance matrix using vectorized haversine
    distance_matrix = haversine_vectorized(lat_wh, lon_wh, lat_cu, lon_cu)

    return pd.DataFrame(
        distance_matrix,
        index=df_warehouses.index,
        columns=df_customers.index
    )

We calculate the distance matrix before we can start looking for optimum solutions:

In [None]:
df_matrix = calculate_distance_matrix(df_warehouses, df_customers)
df_matrix.head()

## Optimization Time 🧮: Selecting the Best Warehouses

Now that we have our data and distance matrix ready, it's time to solve the actual optimization problem using PuLP.

Our objective is to minimize either the total cost or the CO₂ emissions of serving all customers from a subset of warehouses. To keep things clean and modular, we organize the process using a dataclass and a set of helper functions. This allows us to manage input data more reliably and make the optimization logic easier to understand and maintain.

The ProblemData dataclass (defined in problem_data.py) bundles all input data in one place: customer and warehouse DataFrames, the distance matrix, and cost/emission parameters. Every helper function works with this single ProblemData object, making the code both tidy and consistent.

In [None]:
@dataclass
class ProblemData:
    df_warehouses: pd.DataFrame
    df_customers: pd.DataFrame
    distance_matrix: pd.DataFrame
    price_per_km: float = 4
    co2_per_km: float = 2

Meanwhile, each logical step of the optimization is handled by its own helper function, from defining decision variables and constraints to extracting results. All these steps are coordinated by the main create_pulp_model function.


1. Create an optimization model using PuLP
2. Define decision variables, the objective function, and constraints
3. Extract the optimal solution and present it as user-friendly data

To keep things clean and modular, we’ll break the process into smaller, well-defined functions before combining them in the main create_pulp_model function.

### Steps: 

1. **Problem Initialization**: We start by creating the optimization model using `_initialize_problem()`, which returns a `pulp.LpProblem()` object configured for minimization. Our goal is to reduce total cost or CO₂ emissions.
    
2- **Defining Decision Variables**: Next, `_define_variables(data)` creates the binary decision variables:
    - `warehouse_var` indicates if a warehouse is selected.
    - `assignment_var` indicates if a customer is assigned to a particular warehouse.
        
- **Setting the Objective Function**: Using `_set_objective_function()`, we build the objective that combines fixed warehouse costs or emissions with transportation costs or CO₂ terms (computed with `_compute_transport_cost_term()`, `_compute_transport_co2_term()`, and `_compute_warehouse_cost_term()`). This guides the solver to minimize the total impact.
    
- **Adding Constraints**: We then add necessary constraints through `_add_constraints()`, which calls:
    - `_add_customer_assignment_constraints()` to ensure every customer is assigned to exactly one warehouse
    - `_add_warehouse_selection_constraints()` to allow customer assignments only to open warehouses
    - `_add_number_of_warehouses_constraints()` to set how many warehouses should be selected
    - `_add_country_constraints()` (optional) to force selected countries in the solution
        
- **Solving the Optimization**: All of these steps are orchestrated in the main `create_pulp_model()` function, which calls PuLP’s solver to find the optimal solution.
    
- **Extracting Results**: Finally, `_interpret_solution()` reads the solution variables, and `_format_results()` converts the data into two pandas DataFrames: one summarizing selected warehouses, and one with customer assignment details.


### Step 1 — Initialize the Optimization Problem

Before we can add variables, objectives, or constraints, we need to define our optimization problem.

We’ll create a minimization problem, which tells PuLP that our goal is to minimize  total cost or total CO₂ emissions.

In [None]:
def _initialize_problem():
    """
    Initialize a PuLP minimization problem.
    """
    return pulp.LpProblem("Warehouse_Selection", pulp.LpMinimize)

### Step 2: Define Decision Variables

After initializing the optimization problem, we specify the decisions the solver can control by defining binary decision variables.  
There are two key sets of variables:

- `warehouse_var[w]`: Equals 1 if warehouse _w_ is selected (opened), and 0 otherwise.
- `assignment_var[(w, c)]`: Equals 1 if customer _c_ is assigned to warehouse _w_, and 0 otherwise.

These variables are created using PuLP’s `LpVariable.dicts` within the helper function `_define_variables(data)`, which efficiently builds these dictionary-based variables for all warehouses and customers at once.

In [None]:
def _define_variables(data: ProblemData):
    """
    Creates binary decision variables for warehouse selection and customer assignment.
    """
    warehouses = data.df_warehouses.index.tolist()
    customers = data.df_customers.index.tolist()
    warehouse_var = pulp.LpVariable.dicts("Warehouse", warehouses, cat="Binary")
    assignment_var = pulp.LpVariable.dicts(
        "Assignment", [(w, c) for w in warehouses for c in customers], cat="Binary"
    )
    return warehouse_var, assignment_var

--> **What does `LpVariable.dicts` do?**
It creates a dictionary of variables.

`cat="Binary"` tells PuLP these variables can only take values 0 or 1 (typical for yes/no decisions in optimization).

You can access a variable like `warehouse_var[2]` or `assignment_var[(1, 5)]`. 

**For example:**

In [None]:
demo_data = ProblemData(
    df_warehouses = df_warehouses.copy(),
    df_customers = df_customers.copy(),
    distance_matrix = df_matrix.copy(),
    price_per_km=4,
    co2_per_km=2,
)

_define_variables(demo_data)


Once the decision variables are in place, we define what the solver should try to minimize, this is known as the **minimization objective**.  
Our application supports two types of objectives:

- **Minimize total cost**: includes both warehouse rent and transport cost per kilometer
- **Minimize CO₂ emissions**: includes both warehouse emissions and transport-related emissions

To keep the code modular and readable, the objective is built using helper functions that return linear expressions for each part of the total. These expressions use `pulp.lpSum(...)`, which creates linear summations across decision variables.

The function `_compute_transport_cost_term` calculates transport costs over all customer-warehouse assignments, based on distances and order volumes. `_compute_transport_co2_term` calculates transport emissions. The fixed costs or emissions from running each selected warehouse are handled by `_compute_warehouse_cost_term`, which supports both cost and CO₂, depending on the input column.

These partial terms are combined inside `_set_objective_function`, which adds them to the optimization problem with `prob += ...`. This sets the linear expression as the minimization target for the solver.

In [None]:
def _compute_transport_cost_term(assignment_var, data: ProblemData):
    """
    Computes the total transport cost across all customer-warehouse assignments.

    Cost is calculated as:
    distance * price_per_km * yearly_orders
    """
    return pulp.lpSum(
        assignment_var[(w, c)]
        * data.distance_matrix.at[w, c]
        * data.price_per_km
        * data.df_customers.at[c, "yearly_orders"]
        for w in data.df_warehouses.index
        for c in data.df_customers.index
    )


def _compute_transport_co2_term(assignment_var, data: ProblemData):
    """
    Computes total CO₂ emissions from transport.

    Emissions are calculated as:
    distance * co2_per_km * yearly_orders
    """
    return pulp.lpSum(
        assignment_var[(w, c)]
        * data.distance_matrix.at[w, c]
        * data.co2_per_km
        * data.df_customers.at[c, "yearly_orders"]
        for w in data.df_warehouses.index
        for c in data.df_customers.index
    )


def _compute_warehouse_cost_term(warehouse_var, data: ProblemData, column, multiplier=1):
    """
    Computes fixed warehouse costs or emissions depending on the specified column.

    Args:
        warehouse_var: Decision variable dict for warehouses
        data: ProblemData instance
        column: Column name to read values from (e.g. 'yearly_cost', 'yearly_co2_tons')
        multiplier: Optional multiplier (e.g. for unit conversions like tons -> kg)

    Returns:
        Total cost or emission expression for all selected warehouses
    """
    return pulp.lpSum(
        warehouse_var[w] * data.df_warehouses.at[w, column] * multiplier
        for w in data.df_warehouses.index
    )


In [None]:
def _set_objective_function(
    prob, assignment_var, warehouse_var, data: ProblemData, optimize: str
):
    """
    Sets the minimization objective for the optimization problem.

    Depending on 'optimize', the objective is either minimize total cost 
    or total CO₂ emissions.

    - If optimize == "price": combines transport cost and warehouse rent
    - If optimize == "co2": combines transport emissions and warehouse emissions

    Args:
        prob: The PuLP LpProblem object.
        assignment_var: Dictionary of assignment decision variables (customer to warehouse).
        warehouse_var: Dictionary of warehouse selection decision variables.
        data: ProblemData instance containing input data and parameters.
        optimize: Objective type ("price" or "co2").
    """
    if optimize == "price":
        transport = _compute_transport_cost_term(assignment_var, data)
        warehouse = _compute_warehouse_cost_term(warehouse_var, data, "yearly_cost")
    else:
        transport = _compute_transport_co2_term(assignment_var, data)
        warehouse = _compute_warehouse_cost_term(
            warehouse_var, data, "yearly_co2_tons", multiplier=1000
        )
    prob += transport + warehouse 

### Step 4: Adding Constraints

These functions tells the optimization model how the problem works. It defines the rules (called **constraints**) that the solution must follow. Without these rules, the model could give us crazy or meaningless results.

With PuLP, we build the problem by combining an objective function (from the function beofre this one!) with a list of constraints (what the solution is allowed to do). --> This function add the constraints to the same `prob` element. 

In other words: The objective + the constraints define a problem to solve.

#### What constraints we add, and WHY:

1. **Each customer must be assigned to exactly one warehouse**

We use `_add_customer_assignment_constraints()` to make sure every customer is served, but by only one warehouse. This prevents the optimizer from ignoring or duplicating customers.

2. **Customers can only be assigned to warehouses that are open**

`_add_warehouse_selection_constraints()` enforces that customers can’t be assigned to a warehouse unless it’s actually been selected (opened). This reflects real-world logic: we can’t deliver from closed locations.

3. **Limit the total number of warehouses**

With `_add_number_of_warehouses_constraints()`, we let the user control how many warehouses to open. If the user specifies a number, we enforce it exactly. If they choose "any", we allow a range (1–10).

4. **(Optional) Require warehouses in specific countries**

Sometimes, for strategic, political, or logistical reasons, we want to make sure we operate in certain countries.  
`_add_country_constraints()` allows the user to specify required countries. If passed a list like `["Germany", "France"]`, the model is required to open **at least one warehouse** in each country.

All of these are combined by `_add_constraints()`, which calls each of the specialized helpers in order. This results in a clean, readable setup where each rule is easy to isolate, test, and understand.

In [None]:
def _add_customer_assignment_constraints(prob, assignment_var, data: ProblemData):
    """
    Adds a constraint to ensure each customer is assigned to exactly one warehouse.
    """
    for c in data.df_customers.index:
        prob += (
            pulp.lpSum(assignment_var[(w, c)] for w in data.df_warehouses.index) == 1
        )


def _add_warehouse_selection_constraints(
    prob, assignment_var, warehouse_var, data: ProblemData
):
    """
    Adds a constraint to ensure customers are only assigned to warehouses that are selected (open).
    """
    for w in data.df_warehouses.index:
        for c in data.df_customers.index:
            prob += assignment_var[(w, c)] <= warehouse_var[w]


def _add_number_of_warehouses_constraints(
    prob, warehouse_var, data: ProblemData, number_of_warehouses
):
    """
    Adds a constraint to control how many warehouses can be selected.

    If a specific number is given, enforces that exact number.
    If 'any' is passed, allows between 1 and 10 warehouses.
    """
    warehouse_sum = pulp.lpSum(warehouse_var[w] for w in data.df_warehouses.index)
    if number_of_warehouses != "any":
        prob += warehouse_sum == int(number_of_warehouses)
    else:
        prob += warehouse_sum >= 1
        prob += warehouse_sum <= 10


def _add_country_constraints(prob, warehouse_var, data: ProblemData, country_list):
    """
    Adds constraints to enforce that at least one warehouse is selected in each specified country.
    """
    for country in country_list:
        wh_in_country = data.df_warehouses[
            data.df_warehouses["country"] == country
        ].index.tolist()
        prob += pulp.lpSum(warehouse_var[w] for w in wh_in_country) >= 1

def _add_constraints(
    prob,
    assignment_var,
    warehouse_var,
    data: ProblemData,
    number_of_warehouses,
    country_list,
):
    """
    Central helper that applies all constraints to the optimization problem.

    This includes:
    - Assigning each customer to one warehouse
    - Preventing assignments to unopened warehouses
    - Enforcing total warehouse count
    - Optional geographic requirements
    """
    _add_customer_assignment_constraints(prob, assignment_var, data)
    _add_warehouse_selection_constraints(prob, assignment_var, warehouse_var, data)
    _add_number_of_warehouses_constraints(
        prob, warehouse_var, data, number_of_warehouses
    )
    if country_list:
        _add_country_constraints(prob, warehouse_var, data, country_list)


### Step 6. Extracting results

**Important!** You may think we forgot step 5... Not quite! Step 5 is to launch the optimization function, wich happens in the main function!

This step collects the results after the solver gives us a solution. It formats it so it’s human-readable and ready for analysis or visualization.

We use two helper functions:

*  `_interpret_solution(...)`: This function pulls the raw outputs from the model:
   - **Which warehouses were selected** (where the binary `warehouse_var[w] == 1`)
   - **How each customer was assigned**, along with the distances and order quantities 
   - **Totals per warehouse**, including operational cost, transport cost and CO₂ emissions.
* `_format_results(...)`: It formats the results into 2 DataFrames.

In [None]:
def _interpret_solution(warehouse_var, assignment_var, data: ProblemData):
    """
    Interprets the optimized solution by extracting:
    - Which warehouses are selected
    - How customers are assigned to those warehouses
    - Total transport and fixed costs, as well as CO₂ emissions

    Returns structured data for formatting: selected warehouses, assignment details, 
    and per-warehouse metrics.
    """
    selected_warehouses = [
        w for w in data.df_warehouses.index if pulp.value(warehouse_var[w]) == 1
    ]
    customers = data.df_customers.index.tolist()

    assignments = []
    scenario_costs = []
    scenario_co2s = []
    scenario_orders = []

    for w in selected_warehouses:
        yearly_cost = data.df_warehouses.at[w, "yearly_cost"]
        yearly_co2_tons = data.df_warehouses.at[w, "yearly_co2_tons"]
        total_transport_cost = 0
        total_transport_co2 = 0
        warehouse_orders = 0

        for c in customers:
            if pulp.value(assignment_var[(w, c)]) == 1:
                distance = data.distance_matrix.at[w, c]
                orders = data.df_customers.at[c, "yearly_orders"]

                total_transport_cost += distance * data.price_per_km * orders
                total_transport_co2 += distance * data.co2_per_km * orders
                warehouse_orders += orders

                assignments.append(
                    {
                        "warehouse": data.df_warehouses.at[w, "warehouse"],
                        "warehouse_lat": data.df_warehouses.at[w, "latitude"],
                        "warehouse_lon": data.df_warehouses.at[w, "longitude"],
                        "customer": data.df_customers.at[c, "company_name"],
                        "customer_lat": data.df_customers.at[c, "latitude"],
                        "customer_lon": data.df_customers.at[c, "longitude"],
                        "distance_km": distance,
                        "orders": orders,
                        "total_cost": int(distance * data.price_per_km * orders),
                        "total_co2_kg": int(distance * data.co2_per_km * orders),
                    }
                )

        scenario_costs.append(int(yearly_cost + total_transport_cost))
        scenario_co2s.append(int(yearly_co2_tons + total_transport_co2 / 1000))
        scenario_orders.append(warehouse_orders)

    return (
        selected_warehouses,
        assignments,
        scenario_costs,
        scenario_co2s,
        scenario_orders,
    )


def _format_results(
    data: ProblemData,
    selected_warehouses,
    assignments,
    scenario_costs,
    scenario_co2s,
    scenario_orders,
):
    """
    Takes structured solution data and converts it into two Pandas DataFrames:

    - df_selected: warehouse-level results with cost, CO₂, and total orders
    - df_assignments: detailed customer-to-warehouse assignment mapping

    These outputs are used for downstream analysis and visualization.
    """
    df_selected = data.df_warehouses.loc[selected_warehouses].copy()
    df_selected["scenario_cost"] = scenario_costs
    df_selected["scenario_co2_tons"] = scenario_co2s
    df_selected["scenario_orders"] = scenario_orders
    df_assignments = pd.DataFrame(assignments)
    return df_selected, df_assignments

### Main optimization function

We call all our previous functions from a main module.

**To summarize:**

1. We create an empty problem (`prob` variable)
2. We create variables for the mathematical equations
3. We add the objective: this is what to solve
4. We add the constraints: this is the conditions the solution needs to respect
5. We solve the problem using a solver, in our case, we use `PULP_CBC_CMD`:**

```python
    prob.solve(pulp.PULP_CBC_CMD(msg=False))
```

6. We extract the results

In our case:

1. We create a minimization problem
2. Our variables are the warehouses that we can select, and each possible warehouse - customer combination
3. Our objective can be selected by users (from the app's UI). They have to choices:
    * Reduce (optimize) price (cost)
    * Reduce (optimize) CO2 emissions
4. Some constraints are "mandatory", like each customer needs to be asigned to one and only one warehouse. Some constraints are selected by the user from the UI (like mandatory country presence, or number of warehouses to include in the solution)
5. We use the `PULP_CBC_CMD` solver, because this is a simple problem.
6. We extract 2 DataFrames from the solution:
    * The warehouses from the solution
    * The customer - warehouse relationship

In [None]:

def create_pulp_model(
    df_warehouses,
    df_customers,
    distance_matrix,
    optimize="price",
    number_of_warehouses="any",
    country_list=None,
    price_per_km=4,
    co2_per_km=2,
):
    """
    Builds and solves the warehouse optimization model using PuLP.

    This is the main function that ties everything together:
    - Prepares and encapsulates the input data
    - Defines decision variables for warehouse selection and customer assignments
    - Sets the objective function (optimize either total cost or total CO₂)
    - Adds all business constraints (assignment rules, warehouse limits, country requirements)
    - Solves the problem using the PuLP CBC solver
    - Extracts and formats the results into Pandas DataFrames

    Parameters:
        df_warehouses (pd.DataFrame): Info about candidate warehouses (location, cost, emissions, etc.)
        df_customers (pd.DataFrame): Info about customers (location, yearly orders, etc.)
        distance_matrix (pd.DataFrame): Distances between each warehouse and customer (in km)
        optimize (str): Whether to optimize for 'price' or 'co2' (default is 'price')
        number_of_warehouses (int or str): Either an integer (e.g. 3) or "any" for flexible count
        country_list (list or None): Optional list of countries to require at least one warehouse in each
        price_per_km (float): Transport cost per kilometer per order (default = 4)
        co2_per_km (float): Transport emissions per kilometer per order (default = 2)

    Returns:
        tuple:
            - df_selected (pd.DataFrame): Warehouses selected, with costs, CO₂, and orders handled
            - df_assignments (pd.DataFrame): Customer-to-warehouse assignments and their impact
    """
    data = ProblemData(
        df_warehouses.copy(),
        df_customers.copy(),
        distance_matrix.copy(),
        price_per_km,
        co2_per_km,
    )
    prob = _initialize_problem()
    warehouse_var, assignment_var = _define_variables(data)
    _set_objective_function(prob, assignment_var, warehouse_var, data, optimize)
    _add_constraints(
        prob, assignment_var, warehouse_var, data, number_of_warehouses, country_list
    )
    prob.solve(pulp.PULP_CBC_CMD(msg=False))
    selected_warehouses, assignments, costs, co2s, orders = _interpret_solution(
        warehouse_var, assignment_var, data
    )
    return _format_results(data, selected_warehouses, assignments, costs, co2s, orders)


## Using our function!

We create a notebook cell with variables, feel free to change their value and then re-run the model below!

In [None]:
number_of_warehouses = 3
optimize = "price"  # or "co2"
country_list = ["France", "Germany"]  # optional
price_per_km = 4
co2_per_km = 2

In [None]:
# Run optimization
df_selected, df_assignments = create_pulp_model(
    df_warehouses,
    df_customers,
    df_matrix,
    optimize=optimize,
    number_of_warehouses=number_of_warehouses,
    country_list=country_list,
    price_per_km=price_per_km,
    co2_per_km=co2_per_km
)

# Display results
df_selected.head()

In [None]:
df_assignments.head()

## Plotting the results!

We create a different version of the map function to show the selected warehouses and the link o each customer:

In [None]:
def _prepare_data(df_assignments):
    # Extract unique warehouses and customers
    df_warehouses_unique = df_assignments[
        ["warehouse", "warehouse_lat", "warehouse_lon"]
    ].drop_duplicates()
    df_warehouses_unique = df_warehouses_unique.rename(
        columns={"warehouse": "name", "warehouse_lat": "lat", "warehouse_lon": "lon"}
    )
    df_warehouses_unique["type"] = "Warehouse"

    df_customers_unique = df_assignments[
        ["customer", "customer_lat", "customer_lon"]
    ].drop_duplicates()
    df_customers_unique = df_customers_unique.rename(
        columns={"customer": "name", "customer_lat": "lat", "customer_lon": "lon"}
    )
    df_customers_unique["type"] = "Customer"

    data = pd.concat([df_warehouses_unique, df_customers_unique], ignore_index=True)
    return data


def _add_columns_to_data(data):
    # Prepare hover name and size
    data["hover_name"] = data.apply(
        lambda row: (
            f"Warehouse {row['name']}"
            if row["type"] == "Warehouse"
            else f"Customer: {row['name']}"
        ),
        axis=1,
    )
    data["size"] = data["type"].map({"Warehouse": 22, "Customer": 5})
    return data


def plot_assignments(df_assignments):
    data = _prepare_data(df_assignments)
    data = _add_columns_to_data(data)

    # Create the base figure with markers
    fig = px.scatter_map(
        data,
        lat="lat",
        lon="lon",
        color="type",
        color_discrete_map={"Customer": "blue", "Warehouse": "red"},
        size="size",
        hover_name="hover_name",
        hover_data={
            "lat": False,
            "lon": False,
            "type": False,
            "name": False,
            "size": False,
        },
        map_style="carto-positron",
        zoom=3,
        center={"lat": 50, "lon": 10},  # Center on Europe
        size_max=10,
    )

    # Prepare line coordinates for assignments
    lons = []
    lats = []
    for _, row in df_assignments.iterrows():
        lons.extend([row["warehouse_lon"], row["customer_lon"], None])
        lats.extend([row["warehouse_lat"], row["customer_lat"], None])

    # Add lines to the figure
    fig.add_trace(
        go.Scattermap(
            lon=lons,
            lat=lats,
            mode="lines",
            line=dict(width=1, color="#003399"),
            showlegend=False,
        )
    )

    # Customize layout similar to the example function
    fig.update_layout(
        title="📍 Customer-Warehouse Assignments",
        height=700,
        margin={"r": 0, "t": 50, "l": 0, "b": 0},
        legend=dict(
            x=0.02,
            y=0.98,
            bgcolor="rgba(255,255,255,0.8)",
            bordercolor="black",
            borderwidth=1,
        ),
    )

    # Update hovertemplate to only show the hover_name
    fig.update_traces(hovertemplate="<b>%{hovertext}</b><extra></extra>")

    return fig

In [None]:
plot_assignments(df_assignments)