In [9]:
from typing import Any, Optional, Callable
from numpy.typing import ArrayLike, NDArray
from pandas import DataFrame

import numpy as np
import random

from cpscheduler.common_envs import JobShopEnv, make_env, register_env
from cpscheduler.environment import WeightedCompletionTime, SchedulingCPEnv, PrecedenceConstraint, NonOverlapConstraint, Env
from cpscheduler.policies.heuristics import ShortestProcessingTime, WeightedShortestProcessingTime

from cpscheduler.environment.instances import generate_taillard_instance
from cpscheduler.utils import dataframe_to_structured

# Generate Customer-based Scheduling Dataset

The customer-based Scheduling Dataset is a collection of JobShop instances with an additional feature for each job, linking every job to a given customer. Customers may seen as the ones who ordered the job, or any other characteristic that may affect a given priority for the job to be completed. We assume the job's customer is drawn from a categorical distribution of a finite pool of possible customers, with different oracle priority values, related to the probability of such customer to contract.

We first suppose these probabilities and priorities do not change over time, however this may sound unrealistic, the priority of a job is not fully determined by the customer priority, and can change in various unpredictable ways. We model this uncertainty by addressing a customer uncertainty to the priority, the job priority is then sampled from this distribution and revealed to the online policy, which takes decision with the sampled priority, which will be further unknown.

In [2]:
n_customers = 100

# Generate random data
rng = np.random.default_rng(seed=0)

desired_prob_variance = 1e-7
alpha = (n_customers**2 * desired_prob_variance + n_customers - 1)/(n_customers**3 * desired_prob_variance)
costumer_probabilities = rng.dirichlet(alpha* np.ones(n_customers))

oracle_priorities  = costumer_probabilities * n_customers * rng.uniform(0.66, 1.5, n_customers)
costumer_uncertainty = rng.uniform(0, oracle_priorities * 0.05)

def sample_priorities(seed: Optional[int] = None) -> NDArray[np.float32]:
    rng = np.random.default_rng(seed)

    return rng.normal(oracle_priorities, costumer_uncertainty).astype(np.float32)

In [3]:
class CustomerJobShopEnv(SchedulingCPEnv):
    def __init__(
            self,
            instance: DataFrame,
            duration: str | NDArray[np.int32] = 'processing_time',
            customer_weights: NDArray[np.float32] = np.ones(n_customers, dtype=np.float32),
            job_feature: str = 'job',
            customer_feature: str = 'customer',
            operation_feature: str = 'operation',
            machine_feature: str = 'machine'
        ) -> None:
        super().__init__(instance, duration)
        self.add_constraint(
            PrecedenceConstraint.jobshop_precedence(self.tasks, job_feature, operation_feature)
        )

        self.add_constraint(
            NonOverlapConstraint.jobshop_non_overlap(self.tasks, machine_feature)
        )

        n_jobs = len(np.unique(self.tasks[job_feature]))

        weights = np.zeros(n_jobs, dtype=np.float32)
        weights[self.tasks[job_feature]] = customer_weights[self.tasks[customer_feature]]

        self.set_objective(
            WeightedCompletionTime(self.tasks, job_feature, weights)
        )

    def render(self) -> None:
        return self.render_gantt(
            'machine',
            'job',
        )

register_env(CustomerJobShopEnv, 'customer_jobshop')

We sample random jobshop instances, with different job and operation characteristics, linked to a specific customer. Solutions are then sampled from different behavior patterns:
- Optimal proxy: The solutions are optimal to the weigthed completion time jobshop problem, taking priority values as weights for each job.
- Perturbed Expert solutions: Optimal solutions are perturbed by one or more of the other behavior pattern.
- Expert trajectories: A RL algorithm is trained to optimize the weigthed completion time jobshop problem with known priorities.
- PDR solutions: Solutions are taken from different PDR leveraging approximate job priorities.
- Biased solutions: Solutions taken from PDRs with no or minimal dependency with the priority values.

We consider the solutions, from top to bottom, as in decreasing order of trajectory quality and we build different datasets with different quality levels.

## Taillard Instances

Taillard instances were originally proposed to the makespan jobshop scheduling problem, usually used for benchmarking algorithms due to instances with existing solutions that can be compared across different strategies. It is a simple static scenario with deterministic processing times, and no uncertainty

In [4]:
def generate_customers_taillard(
    n_jobs: int,
    n_machines: int,
    customers_probabilities: NDArray[np.float64],
    seed: Optional[int] = None
) -> tuple[DataFrame, dict[str, Any]]:
    rng = np.random.default_rng(seed=seed)

    instance, metadata = generate_taillard_instance(n_jobs, n_machines, seed=seed)

    costumers = rng.choice(n_customers, n_jobs, p=customers_probabilities)

    instance["customer"] = costumers[instance["job"]]

    return instance, metadata

In [5]:
n_taillard_samples = 364
n_envs_per_priority = 7


instances = [generate_customers_taillard(20, 5, costumer_probabilities, seed=i)[0] for i in range(364)]
customer_weights = [
    sample_priorities(seed=i) for i in range(n_envs_per_priority)
]

env = make_env(
    'customer_jobshop',
    instances,
    'processing_time',
    num_envs=n_taillard_samples,
    vector_env='async',
    auto_reset=False,
    customer_weights=oracle_priorities
)

obss, infos = env.reset()

obs = np.vstack(obss)

In [6]:
actions = []

for i in range(n_taillard_samples // n_envs_per_priority):
    uncertain_priorities = sample_priorities()

    spt = WeightedShortestProcessingTime(
        uncertain_priorities,
        weighted_label='customer',
    )

    actions.extend(spt(obs[i * n_envs_per_priority: (i + 1) * n_envs_per_priority]))

actions = np.array(actions)

_, _, _, _, infos = env.step(actions, enforce_order=False)

actions = np.array(infos['solution'])