In [1]:
from pathlib import Path
from typing import NamedTuple

import pandas as pd

DATA_DIR = Path("../data/bks/")

In [2]:
class Result(NamedTuple):
    problem: str
    category: str
    instance: str
    bks: int

results: list[Result] = []

## Machine scheduling data

Here we use Naderi et al. (2023) results data only for CP Optimizer.

In [3]:
df = pd.read_excel(DATA_DIR / "Results_22_9_2021.xlsx")
cp = df[df["Method"] == "CPLEX_CP"]
res = cp[["Problem", "Benchmark", "Instance", "BKS"]].copy()
res["Instance"] = res["Instance"].apply(lambda x: f"{x}.txt")

for _, row in res.iterrows():
    result = Result(*row.values.tolist())
    results.append(result)

## Project scheduling data

Here we parse several BKS results files from http://solutionsupdate.ugent.be/.

In [4]:
data = [
    # problem variant, category, filename, number of rows footer to skip
    ("RCPSP", "PSPLIB-J30", "J30.csv", 8),
    ("RCPSP", "PSPLIB-J60", "J60_1.csv", 57),
    ("RCPSP", "PSPLIB-J90", "J90_3.csv", 57),
    ("RCPSP", "PSPLIB-J120", "J120_0.csv", 71),
    ("MMRCPSP", "MMLIB50", "MM50_0.csv", 43),
    ("RCPSP", "RG300", "RG300_0.csv", 43),
    ("MMRCPSP", "MMLIB100", "MM100_0.csv", 43),
]
kwargs = {"sep": ";", "skiprows": 4, "encoding": "ISO-8859-1", "engine": "python"}

In [5]:
for problem, category, fname, skipfooter in data:
    df = pd.read_csv(DATA_DIR / fname, skipfooter=skipfooter, **kwargs)
    rows = df[["Ref1", "UB value"]]
    
    for idx, row in rows.iterrows():
        name, value = row.values
        name = name.split(".")[0] + ".txt" # change extension to .txt

        if problem == "RCPSP" and "PSPLIB" in category:
            # The BKS instance name is different than the actual instance name.
            cat = category.split("-")[1] # J30/J60/J90/J120
            name = f"{cat}_{idx+1}.txt"
        
        result = Result(problem, category, name, value)
        results.append(result)

Here we parse the BKS from MPLIB data.

In [6]:
loc = DATA_DIR / "MPLIB1 (Parameters and BKS).xlsx"
df = pd.read_excel(loc, sheet_name="TPM", skiprows=7, skipfooter=1)
rows = df[df[" Instance name"].str.contains("Set3")]

for _, row in rows.iterrows():
    name, makespan = row[[" Instance name", "TPM"]].values
    name = name.split(".")[0] + ".txt" # change extension to .txt
    result = Result("RCMPSP", "Set1.3", name, makespan)
    results.append(result)

## Output

In [7]:
df = pd.DataFrame(results, columns=["problem", "category", "instance", "bks"])
df.head()

Unnamed: 0,problem,category,instance,bks
0,Flowshop,Taillard,1.txt,1278
1,Flowshop,Taillard,2.txt,1359
2,Flowshop,Taillard,3.txt,1081
3,Flowshop,Taillard,4.txt,1293
4,Flowshop,Taillard,5.txt,1235


Map problem names to acronyms.

In [8]:
name2acronym = {
    'Flowshop': 'PFSP',
    'Non-Flowshop': 'NPFSP',
    'TCTflowshop': 'TCT-PFSP',
    'Setupflowshop': 'SDST-PFSP',
    'Tardinessflowshop': 'TT-PFSP',
    'Distributedflowshop': 'DFSP',
    'Hybridflowshop': 'HFSP',
    'Jobshop': 'JSP',
    'Flexiblejobshop': 'FJSP',
    'Openshop': 'OSP',
    'Parallelmachine': 'PMP',
    'Nowaitflowshop': 'NW-PFSP',
    'RCPSP': 'RCPSP',
    'MMRCPSP': 'MMRCPSP',
    'RCMPSP': 'RCMPSP',
}
df["problem"] = df["problem"].map(name2acronym)

In [9]:
df.to_csv("../data/bks.csv", index=False)