In [3]:
import numpy as np
import random
import math
import gymnasium as gym
from gymnasium import spaces
import sys

# =============================================================================
# Agent class (represents a robot with a workbench)
# =============================================================================
class Agent:
    def __init__(self, agent_id: int, position: int, operation_capability: list, speed: float, window_size: int, num_agent: int):
        self.window_size = window_size  # Window size as defined in the paper
        # --- State attributes according to the paper ---
        # Observation vector: [yᵣ, Oᵣ (2 values), O₍r,t₎, Zₜ (3 values), Ŝ₍yᵣ,t₎, Ŝ₍yᵣ₋1,t₎, Ŝ₍yᵣ₋2,t₎, pick_product_window]
        # Total dimension: 1 + 2 + 1 + 3 + 3 + 1 = 11; however, here we add two groups (first and second product operations)
        # as separate 3-dim vectors so the total becomes 1+2+1+3+3+3+1 = 14.
        self.position = position  # Fixed position (yᵣ) on the conveyor (0-indexed)
        self.operation_capability = operation_capability  # e.g. [1, 2]
        self.operation_now = 0  # O₍r,t₎: current operation; 0 means idle.
        self.status_all = [0] * num_agent  # Zₜ: status for all agents; 0: idle, 1: accept, 2: working, 3: completing.
        self.first_product_operation = [0] * window_size  # Ŝ₍yᵣ,t₎, Ŝ₍yᵣ₋1,t₎, ... (for first operation)
        self.second_product_operation = [0] * window_size  # For second operation
        self.pick_product_window = 0  # Indicates which window section is targeted (if waiting)
        self.many_operations = 2  # For this example, each job has two operations.

        # Fixed properties
        self.id = agent_id
        self.speed = speed
        self.processing_time_remaining = 0

        # Buffers used when transferring jobs
        self.workbench = {}  # Dictionary holding the job transferred to workbench
        self.window_product = [None] * window_size  # The jobs observed in the agent's window
        self.buffer_job_to_workbench = {}  # Buffer to temporarily hold job from conveyor
        self.buffer_job_to_conveyor = {}  # Not used extensively here

    def build_state(self):
        """
        Build the state vector as a numpy array.
        The state vector order is:
          [position, operation_capability (2), operation_now, status_all (3),
           first_product_operation (window_size=3), second_product_operation (window_size=3),
           pick_product_window]
        Total dimension = 1+2+1+3+3+3+1 = 14.
        """
        return np.hstack([
            np.array([self.position]),
            np.array(self.operation_capability),
            np.array([self.operation_now]),
            np.array(self.status_all),
            np.array(self.first_product_operation),
            np.array(self.second_product_operation),
            np.array([self.pick_product_window])
        ])

    def processing_time(self, base_processing_time):
        return int(np.ceil(base_processing_time / self.speed))

# =============================================================================
# CircularConveyor class
# =============================================================================
class CircularConveyor:
    def __init__(self, num_sections: int, max_capacity: float, arrival_rate: float, num_agents: int, n_jobs: int, current_episode_count: int):
        self.num_sections = num_sections  # Total sections on the conveyor
        self.max_capacity = max_capacity  # Fill capacity fraction (e.g. 0.75)
        self.arrival_rate = arrival_rate  # Not used in full detail here.
        self.conveyor = [None] * num_sections  # Initialize the conveyor with empty slots (None)
        self.buffer_jobs = []  # Jobs waiting when the entry is full
        self.total_jobs = {"A": 0, "B": 0, "C": 0}  # Count jobs for each product
        # Define product operation sequences (each product has two operations)
        self.product_operations = {
            "A": [1, 2],
            "B": [2, 3],
            "C": [1, 3],
        }
        self.job_details = {}  # For each job label, store its list of remaining operations
        self.product_completed = []  # List of completed product job labels.
        self.n_jobs = n_jobs
        self.sum_n_jobs = 0
        self.num_agents = num_agents
        self.iteration = 0
        self.episode = current_episode_count
        self.episode_seed = 0

    def add_job(self, product_type: str):
        """Add a new job to the buffer (if capacity permits) with its operation sequence."""
        self.total_jobs[product_type] += 1
        job_label = f"{product_type}-{self.total_jobs[product_type]}"
        # Create a copy of the operation list.
        self.job_details[job_label] = self.product_operations[product_type][:]
        self.buffer_jobs.append(job_label)

    def move_conveyor(self):
        """Move jobs forward in a circular manner."""
        last_job = self.conveyor[-1]
        for i in range(self.num_sections - 1, 0, -1):
            self.conveyor[i] = self.conveyor[i - 1]
        self.conveyor[0] = last_job
        # Load a job from the buffer if conditions are met.
        if (self.buffer_jobs and 
            sum(1 for x in self.conveyor if x is not None) < self.max_capacity * self.num_sections and 
            self.conveyor[0] is None and 
            sum(1 for x in self.conveyor if x is None) > self.num_agents):
            self.conveyor[0] = self.buffer_jobs.pop(0)

    def generate_jobs(self):
        """Generate new jobs based on a simplified Poisson process."""
        if self.sum_n_jobs < self.n_jobs:
            self.iteration += 1
            new_job = 1 if self.iteration % 5 == 0 else 0
            if new_job == 1:
                allowed_types = [ptype for ptype in self.total_jobs if self.total_jobs[ptype] < 7]
                if not allowed_types:
                    return
                random.seed(int(self.episode_seed + self.iteration))
                product_type = random.choice(allowed_types)
                self.sum_n_jobs += 1
                self.add_job(product_type)

    def display(self):
        """Display the conveyor, buffer, and completed products."""
        conveyor_state = " <-> ".join([str(j) if j is not None else "---" for j in self.conveyor])
        print("Conveyor:", conveyor_state)
        print("Buffer:", self.buffer_jobs)
        print("Completed Products:", self.product_completed)

# =============================================================================
# FJSPEnv: Gymnasium Environment for Flexible Job Shop Scheduling
# =============================================================================
class FJSPEnv(gym.Env):
    def __init__(self, window_size: int, num_agents: int, max_steps: int, episode: int):
        super(FJSPEnv, self).__init__()
        self.episode_count = episode
        self.window_size = window_size
        self.num_agents = num_agents
        self.max_steps = max_steps
        self.step_count = 0

        # Flags for rewards and action outcomes
        self.is_action_wait_succeed = [False] * num_agents
        self.is_status_working_succeed = [False] * num_agents
        self.is_job_moving_to_workbench = [False] * num_agents
        self.product_return_to_conveyor = [False] * num_agents
        self.total_process_done = 0
        self.reward_product_complete = 0

        # Conveyor parameters
        self.num_sections = 12
        self.max_capacity = 0.75
        self.arrival_rate = 0.4
        self.n_jobs = 21

        # Agent configuration (fixed positions on the conveyor)
        self.agent_positions = [3, 3 + 1 + window_size, 3 + 1*2 + window_size*2]
        self.agent_operation_capability = [[1, 2], [2, 3], [1, 3]]
        # Define state vector index positions (must sum to 14 dimensions per agent)
        # Order: [position, O_capability (indices 1-2), operation_now (index 3), status_all (indices 4-6),
        #         first job op window (indices 7-9), second job op window (indices 10-12), pick_product_window (index 13)]
        self.state_yr_location = 0
        self.state_operation_capability_location = [1, 2]
        self.state_operation_now_location = 3
        self.state_status_location_all = [4, 5, 6]  # one for each agent; each agent’s own status is at its designated index.
        self.state_first_job_operation_location = [7, 8, 9]  # for window: yr, yr-1, yr-2
        self.state_second_job_operation_location = [10, 11, 12]  # similarly for second operation
        self.state_pick_job_window_location = 13

        self.agent_many_operations = 2
        self.agent_speeds = [1, 2, 3]  # Speeds for agent 1,2,3 respectively.
        self.base_processing_times = [24, 18, 12]  # Base processing times for operations (indexed by op-1)

        self.agents = []
        self.FAILED_ACTION = False

        # Observation space: each agent has a 14-dimensional state vector.
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.num_agents, 14), dtype=np.float32)
        # Action space: 0=ACCEPT, 1=WAIT (target yr-1), 2=WAIT (target yr-2), 3=DECLINE, 4=CONTINUE.
        self.action_space = spaces.MultiDiscrete([5] * self.num_agents)
        self.state_dim = self.observation_space.shape
        self.action_dim = 5

    def initial_state(self):
        obs = []
        for agent in self.agents:
            obs.append(agent.build_state())
        self.observation_all = np.array(obs)
        return self.observation_all

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.episode_count += 1
        current_episode_count = self.episode_count
        self.__init__(window_size=self.window_size, num_agents=self.num_agents, max_steps=self.max_steps, episode=current_episode_count)
        self.episode_count = current_episode_count
        self.step_count = 0
        self.conveyor = CircularConveyor(self.num_sections, self.max_capacity, self.arrival_rate,
                                          self.num_agents, n_jobs=self.n_jobs, current_episode_count=self.episode_count)
        self.agents = []
        for i in range(self.num_agents):
            agent = Agent(
                agent_id=i+1,
                position=self.agent_positions[i],
                operation_capability=self.agent_operation_capability[i],
                speed=self.agent_speeds[i],
                window_size=self.window_size,
                num_agent=self.num_agents,
            )
            self.agents.append(agent)
        return self.initial_state(), {}

    # ---------------------- Action Functions ---------------------------
    def action_accept(self, observation, agent, i, status_index):
        """
        ACCEPT:
          - If agent is idle (operation_now==0) and no job in workbench,
            then if the job is in the conveyor at agent’s position (pick_product_window==1),
            move it to workbench and update status from 0 -> 1 -> 2.
        """
        if observation[self.state_operation_now_location] == 0 and not agent.workbench:
            if observation[status_index] == 1 and self.is_job_moving_to_workbench[i]:
                # Job already moved to workbench
                self.is_job_moving_to_workbench[i] = False
                agent.workbench = agent.buffer_job_to_workbench.copy()
                agent.buffer_job_to_workbench = {}
                observation[self.state_pick_job_window_location] = 0
                observation[status_index] = 2  # now working
                # Get the first operation from the buffered job details
                list_operation = list(agent.workbench.values())[0]
                # Check if the job's first operation is in agent's capability:
                if list_operation[0] in agent.operation_capability:
                    select_operation = list_operation[0]
                    observation[self.state_operation_now_location] = select_operation
                    # Clear the job's operations from the conveyor window for this agent:
                    observation[self.state_first_job_operation_location[0]] = 0
                    observation[self.state_second_job_operation_location[0]] = 0
                    agent.processing_time_remaining = agent.processing_time(self.base_processing_times[select_operation - 1])
                else:
                    print("FAILED ACTION: Capability mismatch in workbench (ACCEPT)")
                    self.FAILED_ACTION = True
            elif observation[status_index] == 0 and observation[self.state_pick_job_window_location] == 1 and not self.is_job_moving_to_workbench[i]:
                # Job is still on the conveyor at agent's position.
                if observation[self.state_first_job_operation_location[0]] in agent.operation_capability:
                    observation[status_index] = 1  # set to accept
                    self.is_job_moving_to_workbench[i] = True
                    # Buffer the job from the conveyor based on the agent's observed window.
                    agent.buffer_job_to_workbench[str(agent.window_product[0])] = self.conveyor.job_details.get(agent.window_product[0], [])
                    # Remove job from the conveyor at the agent's position:
                    self.conveyor.conveyor[agent.position] = None
                else:
                    print("FAILED ACTION: Job operation not in agent capability (ACCEPT)")
                    self.FAILED_ACTION = True
            else:
                print("FAILED ACTION: Agent status not idle or job not ready (ACCEPT)")
                self.FAILED_ACTION = True
        else:
            print("FAILED ACTION: Workbench not empty (ACCEPT)")
            self.FAILED_ACTION = True
        return observation, agent

    def action_wait(self, observation, agent, i, status_index, actions):
        """
        WAIT:
          - When agent is idle, if a job exists in the window (for yr-1 or yr-2),
            then set pick_product_window accordingly.
        """
        if observation[status_index] == 0:
            if observation[self.state_first_job_operation_location[1]] != 0 and actions[i] == 1:
                observation[self.state_pick_job_window_location] = 1
                self.is_action_wait_succeed[i] = True
            elif observation[self.state_first_job_operation_location[2]] != 0 and actions[i] == 2:
                observation[self.state_pick_job_window_location] = 2
                self.is_action_wait_succeed[i] = True
            else:
                print("FAILED ACTION: No job available in window for WAIT")
                self.FAILED_ACTION = True
        else:
            print("FAILED ACTION: Agent status not idle for WAIT")
            self.FAILED_ACTION = True
        return observation, agent

    def action_decline(self, observation, agent, i, status_index):
        """
        DECLINE:
          - Reset the pick_product_window to 0.
        """
        observation[self.state_pick_job_window_location] = 0
        return observation, agent

    def action_continue(self, observation, agent, i, status_index):
        """
        CONTINUE:
          A. If agent is working (status==2), decrement processing time.
             If processing time becomes 0, transition status to completing (3).
          B. If agent is idle (status==0) or still in accept (1), no change.
        """
        if observation[status_index] == 2 and observation[self.state_operation_now_location] != 0 and agent.workbench:
            self.is_status_working_succeed[i] = True
            if agent.processing_time_remaining > 0:
                agent.processing_time_remaining -= 1
            elif agent.processing_time_remaining == 0:
                observation[status_index] = 3  # completing
                self.is_status_working_succeed[i] = False
                self.total_process_done += 1
            else:
                print("FAILED ACTION: Processing time error in CONTINUE")
                self.FAILED_ACTION = True
        elif observation[status_index] == 1:
            # Remain in accept state until workbench transfer occurs.
            pass
        return observation, agent

    # ---------------------- State Update Function ---------------------------
    def update_state(self, observation_all, actions):
        next_observation_all = observation_all.copy()
        for i, agent in enumerate(self.agents):
            # Use agent.position as the fixed reference for the window.
            current_pos = agent.position
            # Compute window indices (circularly): [yr, yr-1, yr-2]
            window_sections = [(current_pos - r) % self.num_sections for r in range(self.window_size)]
            agent.window_product = np.array(self.conveyor.conveyor)[window_sections]
            self.is_action_wait_succeed[i] = False
            self.is_status_working_succeed[i] = False

            observation = observation_all[i]
            # For each agent, we assume its own status is stored at an index in status_all.
            # Here we use self.state_status_location_all[i] as the agent’s own status.
            status_index = self.state_status_location_all[i]

            # Process the action for this agent.
            if actions[i] == 0:  # ACCEPT
                observation, agent = self.action_accept(observation, agent, i, status_index)
            elif actions[i] in [1, 2]:  # WAIT actions for yr-1 (action==1) and yr-2 (action==2)
                observation, agent = self.action_wait(observation, agent, i, status_index, actions)
            elif actions[i] == 3:  # DECLINE
                observation, agent = self.action_decline(observation, agent, i, status_index)
            elif actions[i] == 4:  # CONTINUE
                observation, agent = self.action_continue(observation, agent, i, status_index)

            # RETURN TO CONVEYOR logic:
            if self.product_return_to_conveyor[i] and agent.workbench:
                if self.conveyor.conveyor[agent.position] is None:
                    self.conveyor.conveyor[agent.position] = str(list(agent.workbench)[0])
                    self.product_return_to_conveyor[i] = False
                    agent.workbench = {}
                    observation[status_index] = 0
                    observation[self.state_operation_now_location] = 0
                else:
                    pass

            # If status is completing (3) and workbench not empty, update job details.
            if observation[status_index] == 3 and not self.product_return_to_conveyor[i]:
                for key, op_list in agent.workbench.items():
                    if len(op_list) > 1:
                        agent.workbench[key].pop(0)
                    else:
                        self.conveyor.product_completed.append(key)
                        agent.workbench = {}
                        observation[status_index] = 0
                        observation[self.state_operation_now_location] = 0
                    if agent.workbench:
                        if list(agent.workbench.values())[0][0] in agent.operation_capability:
                            self.product_return_to_conveyor[i] = False
                            observation[status_index] = 2
                            observation[self.state_operation_now_location] = list(agent.workbench.values())[0][0]
                        else:
                            self.product_return_to_conveyor[i] = True
                    try:
                        self.conveyor.job_details[key] = agent.workbench[key]
                    except:
                        self.conveyor.job_details.pop(key)
            # If no operations in first job window, reset pick_product_window.
            if np.sum(observation[self.state_first_job_operation_location]) == 0:
                observation[self.state_pick_job_window_location] = 0

            self.agents[i] = agent
            next_observation_all[i] = observation

        # After processing all agents, update the conveyor.
        self.conveyor.move_conveyor()
        self.conveyor.generate_jobs()

        # Update the window parts of the state for each agent.
        for i, agent in enumerate(self.agents):
            window_sections = [(agent.position - r) % self.num_sections for r in range(self.window_size)]
            agent.window_product = np.array(self.conveyor.conveyor)[window_sections]
            for j, pos in enumerate(window_sections):
                job_label = self.conveyor.conveyor[pos]
                job_details_value = self.conveyor.job_details.get(job_label, [])
                next_observation_all[i, self.state_first_job_operation_location[j]] = job_details_value[0] if len(job_details_value) > 0 else 0
                next_observation_all[i, self.state_second_job_operation_location[j]] = job_details_value[1] if len(job_details_value) > 1 else 0

        return next_observation_all

    def step(self, actions):
        """
        Step function:
         - actions: an array of length num_agents, where each action is in {0,1,2,3,4}
           0: ACCEPT, 1: WAIT (target yr-1), 2: WAIT (target yr-2), 3: DECLINE, 4: CONTINUE.
        """
        self.is_action_wait_succeed = [False] * self.num_agents
        self.is_status_working_succeed = [False] * self.num_agents
        self.reward_product_complete = 0
        self.step_count += 1

        next_observation_all = self.update_state(self.observation_all, actions)

        reward_wait_all = self.reward_wait(actions, self.is_action_wait_succeed)
        reward_working_all = self.reward_working(self.observation_all, self.is_status_working_succeed)
        reward_step_all = self.reward_complete()
        reward_agent_all = -1 + reward_wait_all + reward_working_all + reward_step_all

        done_step = self.step_count >= self.max_steps
        truncated_step = True if len(self.conveyor.product_completed) >= self.n_jobs else False
        self.observation_all = next_observation_all
        info_step = {"actions": actions}
        return next_observation_all, reward_agent_all, done_step, truncated_step, info_step

    def reward_wait(self, actions, is_action_wait_succeed, k_wait=1):
        rewards = []
        for i, agent in enumerate(self.agents):
            if (actions[i] in [1, 2]) and is_action_wait_succeed[i]:
                factor_x = 2.0 if actions[i] == 1 else 3.0
                rewards.append(agent.speed / sum(np.multiply(factor_x, self.agent_speeds)))
            else:
                rewards.append(0)
        return k_wait * np.array(rewards)

    def reward_working(self, observations, is_status_working_succeed, k_working=4):
        rewards = []
        for r, agent in enumerate(self.agents):
            obs = observations[r]
            # Each agent's own status is stored at the corresponding index in state_status_location_all.
            if obs[self.state_status_location_all[r]] == 2:
                rewards.append(float(agent.speed) / float(sum(self.agent_speeds)))
            else:
                rewards.append(0)
        return k_working * np.array(rewards)

    def reward_complete(self, k_complete=4):
        value = k_complete * self.total_process_done
        self.total_process_done = 0
        return value

    def render(self):
        for a, agent in enumerate(self.agents):
            print(f"Agent {agent.id} at pos {int(self.observation_all[a][0])}: Status {int(self.observation_all[a][self.state_status_location_all[a]])}")
            print("Window product:", agent.window_product, "Workbench:", agent.workbench)
        self.conveyor.display()

# =============================================================================
# Main function to test the environment
# =============================================================================
def main():
    # Create environment with window_size=3, 3 agents, max_steps=20, episode=1.
    env = FJSPEnv(window_size=3, num_agents=3, max_steps=20, episode=1)
    state, _ = env.reset()
    print("Initial State:")
    print(state)
    
    done = False
    steps = 0
    # For testing, we sample random actions from the defined action space.
    while not done and steps < 5:
        # For each agent, sample an action (0-4)
        actions = env.action_space.sample()
        print(f"\nStep {steps+1}, Actions: {actions}")
        next_state, reward, done, truncated, info = env.step(actions)
        print("Next State:")
        print(next_state)
        print("Reward:", reward)
        env.render()
        steps += 1

if __name__ == '__main__':
    main()


Initial State:
[[ 3  1  2  0  0  0  0  0  0  0  0  0  0  0]
 [ 7  2  3  0  0  0  0  0  0  0  0  0  0  0]
 [11  1  3  0  0  0  0  0  0  0  0  0  0  0]]

Step 1, Actions: [3 3 3]
Next State:
[[ 3  1  2  0  0  0  0  0  0  0  0  0  0  0]
 [ 7  2  3  0  0  0  0  0  0  0  0  0  0  0]
 [11  1  3  0  0  0  0  0  0  0  0  0  0  0]]
Reward: [-1 -1 -1]
Agent 1 at pos 3: Status 0
Window product: [None None None] Workbench: {}
Agent 2 at pos 7: Status 0
Window product: [None None None] Workbench: {}
Agent 3 at pos 11: Status 0
Window product: [None None None] Workbench: {}
Conveyor: --- <-> --- <-> --- <-> --- <-> --- <-> --- <-> --- <-> --- <-> --- <-> --- <-> --- <-> ---
Buffer: []
Completed Products: []

Step 2, Actions: [1 1 0]
FAILED ACTION: No job available in window for WAIT
FAILED ACTION: No job available in window for WAIT
FAILED ACTION: Agent status not idle or job not ready (ACCEPT)
Next State:
[[ 3  1  2  0  0  0  0  0  0  0  0  0  0  0]
 [ 7  2  3  0  0  0  0  0  0  0  0  0  0  0]
 [11

In [4]:
from RULED_BASED import MASKING_action,  FCFS_action, RANDOM_action, HITL_action
if __name__ == "__main__":
    env = FJSPEnv(window_size=3, num_agents=3, max_steps=1000, episode=1)
    rewards = {}
    makespan = {}
    episode_seed=0
    for episode in range(1, 20+1):
        print("\nepisode:", episode)
        state, info = env.reset(seed=1000+episode)
        if (episode-1) %1 == 0:
            episode_seed= episode-1

        env.conveyor.episode_seed= episode_seed
        print(env.conveyor.episode_seed)
        reward_satu_episode = 0
        done = False
        truncated = False
        #print("\nEpisode:", episode)
        #print("Initial state:", state)
        while not done and not truncated: 
            if len(env.conveyor.product_completed)>= env.n_jobs:
                print("All jobs are completed.")
                break

            actions, masking=HITL_action(state, env)
            #actions=FCFS_action(state, env)

            if None in actions:
                print("FAILED ACTION: ", actions)
                break


            next_state, reward, done, truncated, info = env.step(actions)
            reward = np.mean(reward)
            reward_satu_episode += reward
            
            if env.FAILED_ACTION:
                print("episode:", episode)
                print("state:\n", state)
                print("actions:", actions)
                print("next_state:\n", next_state)
                #print(env.observation_all)
                #print("info:", info)
                print("FAILED ENV")
                break

            state = next_state
            #print("next_state:", next_state)

        # if  None in actions:
        #     break

        rewards[episode] = reward_satu_episode
        makespan[episode] = env.step_count
        print("Episode complete. Total Reward:", reward_satu_episode, "jumlah step:", env.step_count, "total product completed:", len(env.conveyor.product_completed))
        order = {'A': 0, 'B': 1, 'C': 2}


    env.close()
    print("rewards:", rewards)
    print("makespan:", makespan)


episode: 1
0
Episode complete. Total Reward: 117.80555555555557 jumlah step: 193 total product completed: 21

episode: 2
1
Episode complete. Total Reward: 111.49074074074073 jumlah step: 220 total product completed: 21

episode: 3
2
Episode complete. Total Reward: 101.79629629629632 jumlah step: 220 total product completed: 21

episode: 4
3
Episode complete. Total Reward: 104.888888888889 jumlah step: 209 total product completed: 21

episode: 5
4
Episode complete. Total Reward: 119.78703703703697 jumlah step: 209 total product completed: 21

episode: 6
5
Episode complete. Total Reward: 114.49074074074065 jumlah step: 217 total product completed: 21

episode: 7
6
Episode complete. Total Reward: 118.4166666666666 jumlah step: 213 total product completed: 21

episode: 8
7
Episode complete. Total Reward: 114.46296296296303 jumlah step: 200 total product completed: 21

episode: 9
8
Episode complete. Total Reward: 113.70370370370374 jumlah step: 194 total product completed: 21

episode: 10
