In [1]:
from __future__ import annotations

import re
import zipfile
from pathlib import Path
from typing import Dict, Tuple, Optional, List

import numpy as np
import pandas as pd
import cvxpy as cp
import requests


In [2]:
#!pip install cvxpy[glpk]
#!pip install ecos

In [3]:
print(cp.installed_solvers())


['CLARABEL', 'CVXOPT', 'ECOS', 'ECOS_BB', 'GLPK', 'GLPK_MI', 'OSQP', 'SCIPY', 'SCS']


In [4]:
WORKDIR = Path("./supply_chain_lp_work")
RAW_DIR = WORKDIR / "raw"
EXTRACT_DIR = RAW_DIR / "repo_extract"
OUT_DIR = WORKDIR / "out"

WORKDIR.mkdir(parents=True, exist_ok=True)
RAW_DIR.mkdir(parents=True, exist_ok=True)
OUT_DIR.mkdir(parents=True, exist_ok=True)

## Download Data

In [5]:
# -----------------------------
# Utilities: download & extract
# -----------------------------
def download_zip(url: str, dest_path: Path) -> None:
    dest_path.parent.mkdir(parents=True, exist_ok=True)
    print(f"Downloading: {url}")
    r = requests.get(url, timeout=60)
    r.raise_for_status()
    dest_path.write_bytes(r.content)
    print(f"Saved zip to: {dest_path.resolve()}")


def extract_zip(zip_path: Path, dest_dir: Path) -> None:
    dest_dir.mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(zip_path, "r") as zf:
        zf.extractall(dest_dir)
    print(f"Extracted zip to: {dest_dir.resolve()}")

# -----------------------------
# Cleaning helpers
# -----------------------------
def canon(s: str) -> str:
    """Canonical column name: lowercase, strip, replace non-alnum with underscore."""
    s = s.strip().lower()
    s = re.sub(r"[^a-z0-9]+", "_", s)
    s = re.sub(r"_+", "_", s).strip("_")
    return s


def clean_df(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df.columns = [canon(c) for c in df.columns]
    # Normalize common missing value tokens
    df = df.replace({"": np.nan, "NA": np.nan, "N/A": np.nan, "null": np.nan})
    return df


def load_excel_sheets(xlsx_path: Path) -> Dict[str, pd.DataFrame]:
    print(f"Loading workbook: {xlsx_path.resolve()}")
    xls = pd.ExcelFile(xlsx_path)
    sheets = {}
    for sheet_name in xls.sheet_names:
        df = pd.read_excel(xls, sheet_name=sheet_name)
        df = clean_df(df)
        sheets[canon(sheet_name)] = df
    print(f"Loaded {len(sheets)} sheets: {list(sheets.keys())}")
    return sheets

In [6]:
zip_path = RAW_DIR / "LogisticsDataset_main.zip"
GITHUB_ZIP_URL = "https://github.com/jaredbach/LogisticsDataset/archive/refs/heads/main.zip"

In [7]:
if not zip_path.exists():
    download_zip(GITHUB_ZIP_URL, zip_path)
else:
    print(f"Zip already exists: {zip_path.resolve()}")

if EXTRACT_DIR.exists():
        # keep it simple: don't re-extract unless user deletes folder
    print(f"Extract dir exists: {EXTRACT_DIR.resolve()}")
else:
    extract_zip(zip_path, EXTRACT_DIR)

Zip already exists: C:\Users\salmank\Documents\convex_optimization\docs\convex\tutorials\supply_chain_lp_work\raw\LogisticsDataset_main.zip
Extract dir exists: C:\Users\salmank\Documents\convex_optimization\docs\convex\tutorials\supply_chain_lp_work\raw\repo_extract


In [8]:
xlsx_path = Path("supply_chain_lp_work/raw/repo_extract/LogisticsDataset-main/SupplyChainLogisticsProblems.xlsx")

In [9]:
sheets = load_excel_sheets(xlsx_path)

Loading workbook: C:\Users\salmank\Documents\convex_optimization\docs\convex\tutorials\supply_chain_lp_work\raw\repo_extract\LogisticsDataset-main\SupplyChainLogisticsProblems.xlsx
Loaded 7 sheets: ['orderlist', 'freightrates', 'whcosts', 'whcapacities', 'productsperplant', 'vmicustomers', 'plantports']


In [10]:
## Demand measure
orders  = sheets['orderlist'][['order_id','weight']]
orders.columns = ["order_id", "demand"]
orders = orders[orders["demand"] > 0].copy()
orders["order_id"] = orders["order_id"].astype(str)
orders.head()

Unnamed: 0,order_id,demand
0,1447296446.7,14.3
1,1447158014.7,87.94
2,1447138898.7,61.2
3,1447363527.7,16.16
4,1447363980.7,52.34


In [11]:
# Plant capacities
plants = sheets['whcapacities']
plants.columns = ["plant_id", "supply_cap"]
plants["plant_id"] = plants["plant_id"].astype(str)
plants["supply_cap"] = pd.to_numeric(plants["supply_cap"], errors="coerce")
plants = plants.dropna(subset=["supply_cap"])
#plants = plants.merge(cap, on="plant_id", how="left")
plants.head()

Unnamed: 0,plant_id,supply_cap
0,PLANT15,11
1,PLANT17,8
2,PLANT18,111
3,PLANT05,385
4,PLANT02,138


In [14]:
# dummy unit cost between for each plant-order pair
rng = np.random.default_rng(7)
plant_factor = {pid: 0.8 + 0.4 * rng.random() for pid in plants["plant_id"]}

plant_ids = plants["plant_id"].tolist()

lanes = []
for _, o in orders.iterrows():
    oid = o["order_id"]
    dem = float(o["demand"])
    order_jitter = 0.95 + 0.1 * rng.random()
    for pid in plant_ids:
        unit_cost = 1 * plant_factor[pid] * order_jitter
        lanes.append((pid, oid, unit_cost))

lanes = pd.DataFrame(lanes, columns=["plant_id", "order_id", "unit_cost"])
lanes.head()

Unnamed: 0,plant_id,order_id,unit_cost
0,PLANT15,1447296446.7,1.101381
1,PLANT17,1447296446.7,1.21555
2,PLANT18,1447296446.7,1.164562
3,PLANT05,1447296446.7,0.933604
4,PLANT02,1447296446.7,0.965054


## CVXPY Solver

In [15]:
# Solve LP in CVXPY

# Index maps
plant_ids = plants["plant_id"].astype(str).tolist()
order_ids = orders["order_id"].astype(str).tolist()

plant_index = {p: i for i, p in enumerate(plant_ids)}
order_index = {o: j for j, o in enumerate(order_ids)}

# Keep only lanes with valid endpoints
lanes = lanes[lanes["plant_id"].isin(plant_index) & lanes["order_id"].isin(order_index)].copy()
lanes = lanes.reset_index(drop=True)

nA = len(lanes)
nP = len(plant_ids)
nO = len(order_ids)




In [16]:
order_to_arcs: List[List[int]] = [[] for _ in range(nO)]
plant_to_arcs: List[List[int]] = [[] for _ in range(nP)]

for a, row in lanes.iterrows():
    i = plant_index[str(row["plant_id"])]
    j = order_index[str(row["order_id"])]
    plant_to_arcs[i].append(a)
    order_to_arcs[j].append(a)

# Data vectors
cost = lanes["unit_cost"].to_numpy(dtype=float)
demand = orders.set_index("order_id").loc[order_ids, "demand"].to_numpy(dtype=float)
supply = plants.set_index("plant_id").loc[plant_ids, "supply_cap"].to_numpy(dtype=float)


In [21]:
# Decision variable
x = cp.Variable(nA, nonneg=True)

In [None]:
constraints = []

# Order demand equalities
order_constraints = []
for j in range(nO):
    arcs_j = order_to_arcs[j]
    if not arcs_j:
        raise ValueError(f"Order {order_ids[j]} has no incoming lanes; infeasible.")
    con = cp.sum(x[arcs_j]) == demand[j]
    constraints.append(con)
    order_constraints.append(con)

# Plant supply inequalities
plant_constraints = []
for i in range(nP):
    arcs_i = plant_to_arcs[i]
    if not arcs_i:
        # Plant unused; constraint 0 <= supply holds trivially
        continue
    con = cp.sum(x[arcs_i]) <= supply[i]
    constraints.append(con)
    plant_constraints.append((plant_ids[i], con))

In [25]:
obj = cp.Minimize(cost @ x)
prob = cp.Problem(obj, constraints)

In [26]:
prob.solve(verbose=False)

inf

In [27]:
print("\nThe optimal value is", prob.value)
print("The optimal x is")
print(x.value)


The optimal value is inf
The optimal x is
None
