In [1]:
import numpy as np
import random
import gymnasium as gym
from gymnasium import spaces

##############################################
# Agent class – represents a robot with fixed position
##############################################
class Agent:
    def __init__(self, agent_id: int, position: int, operation_capability: list, speed: float, window_size: int, num_agents: int):
        self.id = agent_id
        self.position = position         # y_r: fixed position on the conveyor
        self.operation_capability = operation_capability  # e.g. [1,2]
        self.speed = speed
        self.window_size = window_size   # Number of sections the agent can observe
        self.num_agents = num_agents

        self.operation_now = 0           # O_{r,t}: current operation (0 means idle)
        self.status = 0                  # Agent's own status: 0=idle, 1=accept, 2=working, 3=completing
        self.window = [0] * window_size  # The observed job operations from the conveyor

        self.workbench = None            # Holds the job once accepted

    def build_state(self, global_status):
        """
        Build the observation vector ω₍r,t₎ as defined in the paper:
          [ y_r, O_r (2 values), O_{r,t}, Z_t (num_agents values), window (window_size values) ]
        For 3 agents and window_size 3, this gives: 1 + 2 + 1 + 3 + 3 = 10 dimensions.
        """
        state = np.array([self.position] +
                         self.operation_capability +
                         [self.operation_now] +
                         global_status +
                         self.window, dtype=np.float32)
        return state

    def processing_time(self, base_time):
        return int(np.ceil(base_time / self.speed))

##############################################
# CircularConveyor class – simulates the conveyor belt
##############################################
class CircularConveyor:
    def __init__(self, num_sections: int, max_capacity: float):
        self.num_sections = num_sections
        self.max_capacity = max_capacity  # e.g., 0.75
        self.conveyor = [None] * num_sections  # Initialize with empty slots
        self.buffer = []  # Buffer for jobs waiting to be loaded
        self.job_counter = 0

    def add_job(self):
        """
        Generate a new job from a fixed product library.
        Product A: [1,2], Product B: [2,3], Product C: [1,3]
        """
        self.job_counter += 1
        product = random.choice(["A", "B", "C"])
        if product == "A":
            ops = [1, 2]
        elif product == "B":
            ops = [2, 3]
        else:
            ops = [1, 3]
        job = (f"{product}-{self.job_counter}", ops)
        self.buffer.append(job)

    def load_job(self):
        """
        Load a job from the buffer into section 0 if:
          - The number of jobs on the conveyor is below max_capacity * num_sections.
          - Section 0 is empty.
        """
        current_jobs = sum(1 for x in self.conveyor if x is not None)
        if self.buffer and current_jobs < self.max_capacity * self.num_sections and self.conveyor[0] is None:
            self.conveyor[0] = self.buffer.pop(0)

    def move(self):
        """
        Move the conveyor one step (circularly).
        """
        last = self.conveyor[-1]
        for i in range(self.num_sections - 1, 0, -1):
            self.conveyor[i] = self.conveyor[i - 1]
        self.conveyor[0] = last

    def update(self):
        """
        Update the conveyor: move, possibly add a new job, and load job if possible.
        """
        self.move()
        if random.random() < 0.3:
            self.add_job()
        self.load_job()

    def get_window_values(self, position, window_size):
        """
        Return a list of the first operation of each job in the agent's observation window.
        For each section from position to position - window_size + 1 (wrapping around), 
        return 0 if empty; otherwise, return the first operation of the job.
        """
        values = []
        for r in range(window_size):
            idx = (position - r) % self.num_sections
            job = self.conveyor[idx]
            if job is None:
                values.append(0)
            else:
                values.append(job[1][0])
        return values

##############################################
# FJSPEnv – Gymnasium Environment for FJSP (matching the paper)
##############################################
class FJSPEnv(gym.Env):
    def __init__(self, window_size: int, num_agents: int, max_steps: int):
        super(FJSPEnv, self).__init__()
        self.window_size = window_size  # 3
        self.num_agents = num_agents    # 3
        self.max_steps = max_steps
        self.step_count = 0

        # Initialize the conveyor with 12 sections and max capacity.
        self.num_sections = 12
        self.conveyor = CircularConveyor(self.num_sections, max_capacity=0.75)

        # Create agents at fixed positions.
        self.agent_positions = [3, 7, 11]
        self.agent_capabilities = [[1, 2], [2, 3], [1, 3]]
        self.agent_speeds = [1, 2, 3]
        self.agents = []
        for i in range(self.num_agents):
            agent = Agent(
                agent_id=i + 1,
                position=self.agent_positions[i],
                operation_capability=self.agent_capabilities[i],
                speed=self.agent_speeds[i],
                window_size=self.window_size,
                num_agents=self.num_agents
            )
            self.agents.append(agent)

        # Global status vector: each agent's status.
        self.global_status = [agent.status for agent in self.agents]

        # Define observation space: 10-dimensional per agent.
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_agents, 10), dtype=np.float32)
        # Define action space: 0=ACCEPT, 1=WAIT_1, 2=WAIT_2, 3=DECLINE, 4=CONTINUE.
        self.action_space = spaces.MultiDiscrete([5] * self.num_agents)

    def _get_observation(self):
        """
        For each agent, update its window from the conveyor and build its state vector.
        """
        self.global_status = [agent.status for agent in self.agents]
        obs = []
        for agent in self.agents:
            agent.window = self.conveyor.get_window_values(agent.position, self.window_size)
            obs.append(agent.build_state(self.global_status))
        return np.array(obs)

    def reset(self, seed=None, options=None):
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)
        self.step_count = 0
        self.conveyor = CircularConveyor(self.num_sections, max_capacity=0.75)
        # Preload some jobs.
        for _ in range(3):
            self.conveyor.add_job()
        self.conveyor.load_job()
        for agent in self.agents:
            agent.operation_now = 0
            agent.status = 0
            agent.workbench = None
        return self._get_observation(), {}

    def step(self, actions):
        """
        Process a step given actions for each agent.
        Actions: list of integers in {0,1,2,3,4} corresponding to:
          0: ACCEPT – pick up job at current section.
          1: WAIT_1 – wait for job at position y_r - 1.
          2: WAIT_2 – wait for job at position y_r - 2.
          3: DECLINE – reject the job at current section.
          4: CONTINUE – do nothing.
        """
        reward = 0.0
        # Process actions for each agent.
        for i, agent in enumerate(self.agents):
            act = actions[i]
            # If the agent is idle:
            if agent.status == 0:
                if act == 0:  # ACCEPT
                    job = self.conveyor.conveyor[agent.position]
                    if job is not None:
                        agent.workbench = job
                        agent.operation_now = job[1][0]  # First operation required.
                        agent.status = 2  # Working.
                        # Set a processing time based on a base time; here we choose 24 for op1, 18 for op2, etc.
                        base_time = 24 if agent.operation_now == 1 else 18
                        agent.processing_time_remaining = agent.processing_time(base_time)
                        self.conveyor.conveyor[agent.position] = None
                    else:
                        reward -= 1  # Penalize if no job available.
                elif act in [1, 2]:  # WAIT actions.
                    factor = 2.0 if act == 1 else 3.0
                    reward += 0.5 / factor  # Small reward for waiting (encouraging long-term return).
                    # Agent remains idle.
                    agent.status = 0
                elif act == 3:  # DECLINE.
                    self.conveyor.conveyor[agent.position] = None
                    agent.status = 0
                    reward -= 0.5
                elif act == 4:  # CONTINUE.
                    pass
            else:
                # If agent is busy (working), then process its job.
                if agent.status == 2:
                    if agent.processing_time_remaining > 0:
                        agent.processing_time_remaining -= 1
                        reward += agent.speed / sum(self.agent_speeds)
                    if agent.processing_time_remaining == 0:
                        # Job complete.
                        agent.status = 3  # Completing.
                        agent.workbench = None
                        agent.operation_now = 0
                        agent.status = 0  # Reset to idle.
                        reward += 4  # Reward for job completion.
        # Update environment dynamics.
        self.conveyor.update()
        self.step_count += 1
        reward -= 1  # Timestep penalty.
        done = self.step_count >= self.max_steps
        obs = self._get_observation()
        info = {"actions": actions}
        return obs, reward, done, False, info

    def render(self):
        print(f"Step: {self.step_count}")
        for agent in self.agents:
            print(f"Agent {agent.id} at pos {agent.position}: Status {agent.status}, Operation {agent.operation_now}, Workbench {agent.workbench}, Window {agent.window}")
        conv_str = " | ".join([str(j[0]) if j is not None else "---" for j in self.conveyor.conveyor])
        print("Conveyor:", conv_str)
        print("-" * 40)

##############################################
# Main function to test the environment
##############################################
def main():
    env = FJSPEnv(window_size=3, num_agents=3, max_steps=300)
    obs, _ = env.reset(seed=42)
    print("Initial Observations:")
    print(obs)
    done = False
    steps = 0
    while not done:
        actions = env.action_space.sample()
        print(f"\nStep {steps+1}, Actions: {actions}")
        obs, reward, done, truncated, info = env.step(actions)
        done = done or truncated
        print("Observations:")
        print(obs)
        print("Reward:", reward)
        env.render()
        steps += 1

if __name__ == '__main__':
    main()


Initial Observations:
[[ 3.  1.  2.  0.  0.  0.  0.  0.  0.  0.]
 [ 7.  2.  3.  0.  0.  0.  0.  0.  0.  0.]
 [11.  1.  3.  0.  0.  0.  0.  0.  0.  0.]]

Step 1, Actions: [0 2 2]
Observations:
[[ 3.  1.  2.  0.  0.  0.  0.  0.  0.  1.]
 [ 7.  2.  3.  0.  0.  0.  0.  0.  0.  0.]
 [11.  1.  3.  0.  0.  0.  0.  0.  0.  0.]]
Reward: -1.6666666666666667
Step: 1
Agent 1 at pos 3: Status 0, Operation 0, Workbench None, Window [0, 0, 1]
Agent 2 at pos 7: Status 0, Operation 0, Workbench None, Window [0, 0, 0]
Agent 3 at pos 11: Status 0, Operation 0, Workbench None, Window [0, 0, 0]
Conveyor: A-2 | C-1 | --- | --- | --- | --- | --- | --- | --- | --- | --- | ---
----------------------------------------

Step 2, Actions: [3 1 4]
Observations:
[[ 3.  1.  2.  0.  0.  0.  0.  0.  1.  1.]
 [ 7.  2.  3.  0.  0.  0.  0.  0.  0.  0.]
 [11.  1.  3.  0.  0.  0.  0.  0.  0.  0.]]
Reward: -1.25
Step: 2
Agent 1 at pos 3: Status 0, Operation 0, Workbench None, Window [0, 1, 1]
Agent 2 at pos 7: Status 0, Oper

In [2]:
# Define the operation type, which specifies the nature of work
class OperationType:
    def __init__(self, name, capable_machines):
        self.name = name                  # e.g., "Assembly", "Inspection"
        self.capable_machines = capable_machines  # list of machines/robots that can perform this op

    def __repr__(self):
        return f"OperationType({self.name})"

# A processing step is one stage in the product’s manufacturing process.
# It is linked with a specific operation type and has a base processing time.
class ProcessingStep:
    def __init__(self, step_id, operation_type, base_time):
        self.step_id = step_id            # Order of the step (e.g., 1, 2, 3,...)
        self.operation_type = operation_type
        self.base_time = base_time        # Base time required for this step

    def __repr__(self):
        return f"ProcessingStep(id={self.step_id}, op={self.operation_type.name}, time={self.base_time})"

# A product is defined in the product library.
# It provides the overall "recipe" for manufacturing an item, i.e., its ordered processing steps.
class Product:
    def __init__(self, product_id, name, processing_steps):
        self.product_id = product_id
        self.name = name
        self.processing_steps = processing_steps  # List of ProcessingStep objects in order

    def __repr__(self):
        return f"Product({self.name})"

# A job is an individual instance of a product that enters the production system.
class Job:
    def __init__(self, job_id, product):
        self.job_id = job_id
        self.product = product
        self.current_step_index = 0  # Start with the first processing step

    def get_next_processing_step(self):
        if self.current_step_index < len(self.product.processing_steps):
            return self.product.processing_steps[self.current_step_index]
        else:
            return None  # All steps completed

    def complete_current_step(self):
        # For simulation, we mark the current step as done and move to the next one.
        step = self.get_next_processing_step()
        if step:
            print(f"Job {self.job_id} completed step {step.step_id}: {step.operation_type.name}")
            self.current_step_index += 1
        else:
            print(f"Job {self.job_id} is fully completed.")

    def __repr__(self):
        return f"Job(id={self.job_id}, product={self.product.name}, step={self.current_step_index + 1})"

# --- Example Usage ---
if __name__ == '__main__':
    # Define some operation types
    op_assembly = OperationType("Assembly", capable_machines=["Robot1", "Robot2"])
    op_inspection = OperationType("Inspection", capable_machines=["Robot3"])
    op_packaging = OperationType("Packaging", capable_machines=["Robot4", "Robot5"])

    # Define processing steps for a product (each processing step is an instance of an operation)
    step1 = ProcessingStep(step_id=1, operation_type=op_assembly, base_time=30)
    step2 = ProcessingStep(step_id=2, operation_type=op_inspection, base_time=20)
    step3 = ProcessingStep(step_id=3, operation_type=op_packaging, base_time=15)

    # Define a product using an ordered list of processing steps
    product_A = Product(product_id=101, name="WidgetA", processing_steps=[step1, step2, step3])

    # Create a job (work order) as an instance of product_A
    job1 = Job(job_id=1, product=product_A)

    # Simulate the processing of job1 through its steps
    while (next_step := job1.get_next_processing_step()) is not None:
        print(f"Job {job1.job_id} is processing step {next_step.step_id}: {next_step.operation_type.name}")
        # Here you might assign a robot/machine based on next_step.operation_type.capable_machines,
        # schedule the task, and then mark the step as completed.
        job1.complete_current_step()

    print(f"Job {job1.job_id} completed all processing steps.")

# Output:
# Job 1 is processing step 1: Assembly
# Job 1 completed step 1: Assembly
# Job 1 is processing step 2: Inspection
# Job 1 completed step 2: Inspection
# Job 1 is processing step 3: Packaging
# Job 1 completed step 3: Packaging
# Job 1 is fully completed.


Job 1 is processing step 1: Assembly
Job 1 completed step 1: Assembly
Job 1 is processing step 2: Inspection
Job 1 completed step 2: Inspection
Job 1 is processing step 3: Packaging
Job 1 completed step 3: Packaging
Job 1 completed all processing steps.
