
# Unified Supply Chain Optimization — MIP (PuLP) + QUBO (neal) on One Dataset

This notebook combines two approaches on the **same synthetic dataset**:
1. **Classical MIP (Plant Location + Transportation)**  
   - Decide which plants to open and how much to ship to each customer.  
   - Solved with **PuLP** (CBC) if available; otherwise a small **heuristic fallback**.

2. **QUBO Formulation (Uncapacitated Facility Location)**  
   - Binary variables for plant open decisions and customer-to-plant assignments (one plant per customer).  
   - Enforced with quadratic penalties and solved via **simulated annealing** (`neal`).

**Note:** The MIP allows **split shipments** and **capacities**; the QUBO section (core UFLP) assigns **each customer to exactly one plant** and does **not** hard-enforce capacity. An optional soft-capacity term is provided.

We'll generate **one synthetic dataset** (plants, customers, costs, capacities, demands) and reuse it in both sections.


In [None]:

# Install missing deps (works in Colab):
# - PuLP for MIP
# - dimod/neal for QUBO simulated annealing
def _silent_imports():
    flags = {"pulp": False, "dimod": False, "neal": False}
    try:
        import pulp  # noqa: F401
        flags["pulp"] = True
    except Exception:
        pass
    try:
        import dimod  # noqa: F401
        flags["dimod"] = True
    except Exception:
        pass
    try:
        import neal  # noqa: F401
        flags["neal"] = True
    except Exception:
        pass
    return flags

flags = _silent_imports()
if not flags["pulp"]:
    %pip -q install pulp
if not flags["dimod"] or not flags["neal"]:
    %pip -q install dimod neal

flags = _silent_imports()
print("PuLP:", flags["pulp"], "| dimod:", flags["dimod"], "| neal:", flags["neal"])


In [None]:

# ==== One Synthetic Dataset for Both Approaches ====
import numpy as np, pandas as pd, math

rng = np.random.default_rng(42)

num_plants = 4
num_customers = 8
plants = [f"P{i+1}" for i in range(num_plants)]
customers = [f"C{j+1}" for j in range(num_customers)]

fixed_open_cost = pd.Series(rng.integers(9000, 16001, size=num_plants), index=plants, name="fixed_open_cost")
prod_cost = pd.Series(rng.integers(35, 66, size=num_plants), index=plants, name="prod_cost_per_unit")
capacity = pd.Series(rng.integers(180, 321, size=num_plants), index=plants, name="capacity")
demand = pd.Series(rng.integers(50, 121, size=num_customers), index=customers, name="demand")

transport_cost = pd.DataFrame(rng.integers(5, 26, size=(num_plants, num_customers)), index=plants, columns=customers)

# Ensure feasibility for MIP
if capacity.sum() < demand.sum():
    scale = math.ceil(demand.sum() / capacity.sum() * 1.1)
    capacity = (capacity * scale).astype(int)

df_plants = pd.concat([fixed_open_cost, prod_cost, capacity], axis=1)
df_customers = demand.to_frame()
df_transport = transport_cost.copy()

print("Plants:", plants)
print("Customers:", customers)
df_plants, df_customers, df_transport.iloc[:5,:5]



## Part 1 — Classical MIP (MILP) with PuLP (fallback heuristic if PuLP unavailable)

**Decision variables:**
- \(y_i \in \{0,1\}\): plant \(i\) open/closed
- \(x_{ij} \ge 0\): units shipped from plant \(i\) to customer \(j\)

**Objective:**
\[
\min \sum_i f_i y_i + \sum_i c^{prod}_i \left(\sum_j x_{ij}\right) + \sum_{i,j} c^{trans}_{ij} x_{ij}
\]

**Constraints:**
- Demand: \(\sum_i x_{ij} = d_j\), for all \(j\)
- Capacity: \(\sum_j x_{ij} \le K_i \, y_i\), for all \(i\)


In [None]:

import pandas as pd, numpy as np, math

try:
    import pulp
    HAVE_PULP = True
except Exception:
    HAVE_PULP = False

def solve_mip(plants, customers, fixed_open_cost, prod_cost, capacity, demand, transport_cost):
    prob = pulp.LpProblem("SupplyChainPlantLocation", pulp.LpMinimize)
    y = {i: pulp.LpVariable(f"open_{i}", lowBound=0, upBound=1, cat="Binary") for i in plants}
    x = {(i,j): pulp.LpVariable(f"x_{i}_{j}", lowBound=0, cat="Continuous") for i in plants for j in customers}

    prob += (
        pulp.lpSum(fixed_open_cost[i] * y[i] for i in plants)
        + pulp.lpSum(prod_cost[i] * pulp.lpSum(x[(i,j)] for j in customers) for i in plants)
        + pulp.lpSum(transport_cost.loc[i, j] * x[(i,j)] for i in plants for j in customers)
    )

    for j in customers:
        prob += pulp.lpSum(x[(i,j)] for i in plants) == demand[j]
    for i in plants:
        prob += pulp.lpSum(x[(i,j)] for j in customers) <= capacity[i] * y[i]

    _ = prob.solve(pulp.PULP_CBC_CMD(msg=False))
    status = pulp.LpStatus[prob.status]
    x_sol = {(i,j): pulp.value(x[(i,j)]) for i in plants for j in customers}
    y_sol = {i: int(round(pulp.value(y[i]) or 0)) for i in plants}
    obj = pulp.value(prob.objective)
    return status, obj, x_sol, y_sol

def solve_heuristic(plants, customers, fixed_open_cost, prod_cost, capacity, demand, transport_cost):
    # Brute-force open-set + greedy transport allocation (same as earlier fallback)
    from itertools import product
    best = (None, float("inf"), None, None)

    var_cost = transport_cost.copy()
    for i in plants:
        var_cost.loc[i, :] = var_cost.loc[i, :] + prod_cost[i]

    for mask in product([0,1], repeat=len(plants)):
        open_set = [plants[k] for k,bit in enumerate(mask) if bit==1]
        if not open_set:
            continue
        if sum(capacity[i] for i in open_set) < demand.sum():
            continue

        remaining_cap = {i: capacity[i] for i in open_set}
        remaining_dem = demand.to_dict()
        x_sol = {(i,j): 0.0 for i in plants for j in customers}

        lanes = [(var_cost.loc[i,j], i, j) for i in open_set for j in customers]
        lanes.sort(key=lambda t: t[0])
        for _, i, j in lanes:
            if remaining_dem[j] <= 0 or remaining_cap[i] <= 0:
                continue
            qty = min(remaining_dem[j], remaining_cap[i])
            x_sol[(i,j)] += qty
            remaining_dem[j] -= qty
            remaining_cap[i] -= qty

        if any(remaining_dem[j] > 0 for j in customers):
            continue

        fixed = sum(fixed_open_cost[i] for i in open_set)
        variable = sum(prod_cost[i] * sum(x_sol[(i,j)] for j in customers) for i in open_set)
        transport = sum(transport_cost.loc[i,j] * x_sol[(i,j)] for i in open_set for j in customers)
        obj = fixed + variable + transport

        if obj < best[1]:
            y_sol = {i: (1 if i in open_set else 0) for i in plants}
            best = ("Heuristic", obj, x_sol, y_sol)

    if best[0] is None:
        return "Infeasible", None, None, None
    return best

if HAVE_PULP:
    mip_status, mip_obj, mip_x, mip_y = solve_mip(plants, customers, fixed_open_cost, prod_cost, capacity, demand, transport_cost)
else:
    mip_status, mip_obj, mip_x, mip_y = solve_heuristic(plants, customers, fixed_open_cost, prod_cost, capacity, demand, transport_cost)

print("MIP status:", mip_status, "| MIP objective:", mip_obj)


In [None]:

import pandas as pd, numpy as np, matplotlib.pyplot as plt

# Build summary tables
flow_rows = []
for i in plants:
    for j in customers:
        q = mip_x.get((i,j), 0.0)
        if q and q > 1e-6:
            flow_rows.append({"plant": i, "customer": j, "quantity": q})
df_flow = pd.DataFrame(flow_rows).sort_values(["plant","customer"]).reset_index(drop=True)

df_open = pd.DataFrame({"plant": plants, "open": [mip_y.get(i,0) for i in plants]})
plant_ship = {i: sum(mip_x.get((i,j),0.0) for j in customers) for i in plants}
df_open["shipped_units"] = df_open["plant"].map(plant_ship)
df_open["capacity"] = df_open["plant"].map(capacity)
df_open["utilization"] = (df_open["shipped_units"] / df_open["capacity"]).round(3)

display(df_plants)
display(df_customers)
display(df_transport)
display(df_open)
display(df_flow)

# Heatmap of shipments
import numpy as np
flow_matrix = np.array([[mip_x.get((i,j),0.0) for j in customers] for i in plants])
import matplotlib.pyplot as plt
plt.figure()
plt.imshow(flow_matrix, aspect='auto')
plt.title("MIP Shipments (Plants x Customers)")
plt.xlabel("Customers")
plt.ylabel("Plants")
plt.xticks(ticks=np.arange(len(customers)), labels=customers, rotation=45)
plt.yticks(ticks=np.arange(len(plants)), labels=plants)
for a in range(len(plants)):
    for b in range(len(customers)):
        val = flow_matrix[a,b]
        if val > 0:
            plt.text(b, a, f"{int(val)}", ha="center", va="center")
plt.tight_layout()



## Part 2 — QUBO (UFLP) on the Same Dataset

**Variables:**
- \(y_i\in\{0,1\}\): plant open
- \(z_{ij}\in\{0,1\}\): customer \(j\) assigned to plant \(i\)

**Objective:**
\[
\min \sum_i f_i y_i + \sum_{i,j} (c^{prod}_i + c^{trans}_{ij})\, d_j \, z_{ij}
\]
**Penalties:**
- Assignment exactly once: \(A\sum_j (1-\sum_i z_{ij})^2\)
- Open-if-assigned: \(B\sum_{i,j} z_{ij}(1-y_i)\)

**(Optional) Soft capacity term** (off by default):  
\(C \sum_i \big(\sum_j d_j z_{ij} - K_i y_i\big)^2\)


In [None]:

from collections import defaultdict
import numpy as np, pandas as pd
import dimod, neal

# Build costs used by QUBO (fold production into transport per unit)
unit_cost = df_transport.copy()
for i in plants:
    unit_cost.loc[i,:] = unit_cost.loc[i,:] + prod_cost[i]

# Penalty scales
M = float((unit_cost.values * demand.values).max())
A = 10.0 * M
B = 10.0 * M
USE_SOFT_CAP = False
C = 0.5 * M  # only used if USE_SOFT_CAP = True

# Label helpers
def y_label(i):
    return f"y|{i}"

def z_label(i,j):
    return f"z|{i}|{j}"

Q = defaultdict(float)

# Linear terms
for i in plants:
    Q[(y_label(i), y_label(i))] += float(fixed_open_cost[i])

for i in plants:
    for j in customers:
        Q[(z_label(i,j), z_label(i,j))] += float(unit_cost.loc[i,j] * demand[j] - A + B)

# Quadratic: assignment-equals-1 coupling among z for the same customer
for j in customers:
    for idx_i, i in enumerate(plants):
        for k in plants[idx_i+1:]:
            Q[(z_label(i,j), z_label(k,j))] += float(2*A)

# Quadratic: open-if-assigned coupling
for i in plants:
    for j in customers:
        Q[(z_label(i,j), y_label(i))] += float(-B)

# Optional soft capacity
if USE_SOFT_CAP:
    # C * (sum_j d_j z_ij - K_i y_i)^2
    for i in plants:
        # Linear terms from z_ij^2 and y_i^2
        # z_ij^2 -> adds C*(d_j^2) to diag; with binary variables z^2 = z, but included via diag weight
        for j in customers:
            Q[(z_label(i,j), z_label(i,j))] += float(C * (demand[j]**2))
        # y_i^2 term: C*(K_i^2)
        Q[(y_label(i), y_label(i))] += float(C * (capacity[i]**2))
        # Cross z_ij z_ik for j<k: 2C*dj*dk
        for a_idx, ja in enumerate(customers):
            for jb in customers[a_idx+1:]:
                Q[(z_label(i,ja), z_label(i,jb))] += float(2*C*demand[ja]*demand[jb])
        # Cross z_ij, y_i: -2C*dj*Ki
        for j in customers:
            Q[(z_label(i,j), y_label(i))] += float(-2*C*demand[j]*capacity[i])

# Build BQM and sample
bqm = dimod.BinaryQuadraticModel.from_qubo(dict(Q))
sampler = neal.SimulatedAnnealingSampler()
sampleset = sampler.sample(bqm, num_reads=3000)
best = sampleset.first

x = best.sample  # dict var->0/1

open_plants_qubo = [i for i in plants if x.get(y_label(i), 0) == 1]
assign_qubo = {(i,j): x.get(z_label(i,j), 0) for i in plants for j in customers}

# Validate constraints (assignment = 1 and open-if-assigned)
viol = []
for j in customers:
    if sum(assign_qubo[(i,j)] for i in plants) != 1:
        viol.append(f"Assignment count for {j} = {sum(assign_qubo[(i,j)] for i in plants)}")

for i in plants:
    for j in customers:
        if assign_qubo[(i,j)] == 1 and x.get(y_label(i), 0) != 1:
            viol.append(f"Assign to closed plant: {i}->{j}")

# Compute QUBO cost (fixed + unit_cost*demand per assigned pair)
fixed_cost_qubo = sum(fixed_open_cost[i] for i in open_plants_qubo)
ship_cost_qubo = 0.0
for j in customers:
    for i in plants:
        if assign_qubo[(i,j)] == 1:
            ship_cost_qubo += unit_cost.loc[i,j] * demand[j]
total_qubo = float(fixed_cost_qubo + ship_cost_qubo)

print("QUBO energy:", best.energy)
print("QUBO open plants:", open_plants_qubo)
print("QUBO total cost (fixed + prod+transport):", int(total_qubo))
if viol:
    print("Violations:", viol[:5], ("... (+ more)" if len(viol)>5 else ""))
else:
    print("All hard constraints satisfied in QUBO solution ✅")

# Show assignments
import pandas as pd
df_assign = pd.DataFrame(
    [(i,j,assign_qubo[(i,j)]) for i in plants for j in customers if assign_qubo[(i,j)]==1],
    columns=["plant","customer","assigned"]
).sort_values(["customer","plant"])
display(df_assign)



### Notes on Comparing MIP vs QUBO
- **MIP** may split a customer's demand across plants and respects capacities exactly.
- **QUBO** assigns **each customer to one plant** (no split), and capacity is optional/soft here.
- Therefore, **total costs** can differ. QUBO is a structured approximation that maps well to quantum/hybrid solvers.
