In [1]:
import pandas as pd
import numpy as np


# IEEE-ICCE-RL-JSP

In this paper, authors use Graph Neural Network to determine which heuristics to choose. Heuristics considered in the following experiment are FIFO, MOR, SPT, and MWKR.

## Model

I had to train my own models using the script ['train_dqn.py'](train_dqn.py) provided by the owner of the repository. Models were trained on random instances with size up to 10x10. To check if my trained models are valid, I compared the experimental result presented in the paper with my results on the same instances. The results from the paper are

|        | IEEE-ICCE-RL-JSP  | ScheduleNet | L2D   | MOR   | FIFO  | SPT   | MWKR  |
| ------ | ----- |:----------- |:----- |:----- |:----- |:----- |:----- |
| Gap(S) | 20.0% | 17.7%       | 30.7% | 22.7% | 29.0% | 31.2% | 22.7% |
| Gap(L) | 10.5% | 11.3%       | 20.8% | 14.8% | 19.4% | 21.3% | 14.5% |
| Time   | 1.55s | N/A         | 5.10s | 1.18s | 1.12s | 1.01s | 1.13s |

Results of my 3 trained models are (on average)

In [2]:
df = pd.read_csv('eval_dqn.csv')
large = df[(df['J'] >= 50) & (df['M'] >= 15)]
small = df[~(df['J'] >= 50) & (df['M'] >= 15)]

print(f"Average time is {-df['Time'].mean().round(2)}")
print(f"Average gap of small instances is {small['Gap'].mean().round(4) * 100}%")
print(f"Average gap of large instances is {large['Gap'].mean().round(4) * 100}%")

Average time is 11.87
Average gap of small instances is 20.89%
Average gap of large instances is 11.44%


The time is much higher, because I used `cpu` instead of `cuda`. Average gaps approximately match the results of the paper, so I decided my trained model is valid.

In [3]:
from env.env import JSP_Env
from agent.DQN.agent import DQN_Agent
from datetime import datetime
from pprint import pprint

def eval_dqn(model, instance_path, args, partial_plan=None):
    '''Solve the JSSP instance in file instance_path using given model
    
      Args:
        model - path of model to use
        instance_path - path of instance to solve
        args - args from argparser
        partial_plan - steps to execute at start instead of using the agent

      Return:
        makespan
        start_times - start time of operations executed in given order
        plan - sequence of actions
    '''
    # load env
    env = JSP_Env(args)
    avai_ops = env.load_instance(instance_path)
    state = env.get_graph_data(args.device)

    # load agent
    agent = DQN_Agent(args, out_dim=len(env.rules))
    agent.load(model)

    # run the model
    plan = []
    i = 0
    while True:
        # choose action from partial plan if given
        if partial_plan is not None and i < len(partial_plan):
            action = partial_plan[i] 
        else:
            action = agent.select_action(state, random=False, test_only=True)
            
        state, reward, done, info = env.step(action)
        plan.append(action)
        i += 1
        if done:
            break

    makespan = env.get_makespan()
    start_times = [op['start_time'] for op in env.jsp_instance.logger.history]
    return makespan, start_times, plan


## Preprocessing

This model processes instances given in [Standard specifiation](http://jobshop.jjvh.nl/explanation.php#standard_def). I have our benchmarks also in Standard Specification, but for the sake of being consistent, I will consider only instances in [Taillard specification](http://jobshop.jjvh.nl/explanation.php#taillard_def). Therefore I have to write a function, which converts problem in taillard specification to standard specification.

In [4]:
def parse_instance_taillard(filename):
    '''Parses instance written in Taillard specification: http://jobshop.jjvh.nl/explanation.php
    
      Args:
        filename - file containing the instance in Taillard specification

      Returns:
        number of jobs,
        number of machines,
        the processor times for each operation,
        the order for visiting the machines
    '''

    with open(filename, 'r') as f:
        # parse number of jobs J and machines M
        J, M = map(int, f.readline().split())

        # Initialize two empty numpy arrays with dimensions J x M
        processor_times = np.empty((J, M), dtype=int)
        orders_of_machines = np.empty((J, M), dtype=int)
    
        # Read the next J lines containing processor times
        for i in range(J):
            processor_times[i] = list(map(int, f.readline().split()))
    
        # Read the next J lines containing orders of machines
        for i in range(J):
            orders_of_machines[i] = list(map(int, f.readline().split()))

        return J, M, processor_times, orders_of_machines

def taillard_to_standard(taillard_instance):
    # parse taillard instance
    J, M, processor_times, orders_of_machines = parse_instance_taillard(taillard_instance)

    # save as standard instance
    standard_instance = "/tmp/standard_" + taillard_instance.split("/")[-1]
    with open(standard_instance, 'w') as f:
        # save number of jobs and machines
        f.write(f"{J}\t{M}\n")

        for job in range(J):
            for machine in range(M):
                f.write(f'{orders_of_machines[job][machine] - 1}\t{processor_times[job][machine]} ')

            f.write('\n')

    return standard_instance

## Static experiment on our benchmarks

In [15]:
# import some stuff and define helper functions
import os, argparse, time

def get_all_instances_in_taillard_specification():
    '''Lists all instances in Taillard specification'''
    matching_files = []
    root_dir = "../../../benchmarks/jssp/"
    target_string = "Taillard_specification"

    for foldername, subfolders, filenames in os.walk(root_dir):
        for filename in filenames:
            filepath = os.path.join(foldername, filename)
            if target_string in filepath:
                matching_files.append(filepath)

    return matching_files

# need command line arguments for the model
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('-d', '--device', default='cpu')
# arguments for DQN
parser.add_argument('--warmup', default=10000, type=int)
parser.add_argument('--episode', default=100000, type=int)
parser.add_argument('--capacity', default=10000, type=int)
parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--lr', default=.01, type=float)
parser.add_argument('--eps', default=0.0, type=float)
parser.add_argument('--eps_decay', default=.995, type=float)
parser.add_argument('--eps_min', default=.01, type=float)
parser.add_argument('--gamma', default=1.0, type=float)
parser.add_argument('--freq', default=4, type=int)
parser.add_argument('--target_freq', default=1000, type=int)
parser.add_argument('--double', action='store_true')
parser.add_argument(
    '--max_process_time',
    type=int,
    default=100,
    help='Maximum Process Time of an Operation')
args = parser.parse_args(args=[])

# load models and instances
MODEL = 'agent/DQN/weight/DQN_ep1400'
models = os.listdir(MODELS_PATH)
BENCHMARKS_PATH = "../../../benchmarks/jssp/ta_instances/Taillard_specification/"
# instances = sorted(get_all_instances_in_taillard_specification())
instances = [
    'ta41.txt',
    'ta42.txt',
    'ta43.txt',
    'ta44.txt',
    'ta45.txt',
    'ta46.txt',
    'ta47.txt',
    'ta48.txt',
    'ta49.txt',
    'ta50.txt',
]

# run experiments
# for model in models[:1]:
for instance in instances:
    model_path = os.path.join(MODELS_PATH, model)
    makespan, _, _ = eval_dqn(model_path, taillard_to_standard(os.path.join(BENCHMARKS_PATH, instance)), args)
    print(f"Model: {model}, instance: {instance.split('/')[-1]}, makespan: {makespan}")

Model: DQN_ep1670, instance: ta41.txt, makespan: 2591
Model: DQN_ep1670, instance: ta42.txt, makespan: 2411
Model: DQN_ep1670, instance: ta43.txt, makespan: 2246
Model: DQN_ep1670, instance: ta44.txt, makespan: 2386
Model: DQN_ep1670, instance: ta45.txt, makespan: 2450
Model: DQN_ep1670, instance: ta46.txt, makespan: 2499
Model: DQN_ep1670, instance: ta47.txt, makespan: 2246
Model: DQN_ep1670, instance: ta48.txt, makespan: 2447
Model: DQN_ep1670, instance: ta49.txt, makespan: 2402
Model: DQN_ep1670, instance: ta50.txt, makespan: 2370


# Dynamic JSSP

In dynamic JSSP, only a subset of jobs is known at the beginning. The rest of jobs arrives dynamically online.

The following attempt to expand L2D to being dynamic is inspired by paper [Large-scale Dynamic Scheduling for Flexible Job-shop with Random Arrivals of New Jobs by Hierarchical Reinforcement Learning](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10114974), where authors schedule newly incoming jobs and reschedule not yet executed operations, already executed operations can not be rescheduled. During each rescheduling, they formulate static FJSP and solve it. They use cache for incoming jobs and an agent choosing either to add jobs from cache to scheduling problem, or keep them in cache. I will skip this agent and always add new jobs to scheduling problem.

Similarly to the paper, I will model the arrival of new jobs as poisson process with average arrival time following an exponential distribution.

In [9]:
from datetime import datetime

def get_dynamics_jssp(instance):
    '''Turns static JSSP instance to dynamic

      Args:
        filename of static JSSP instance

      Returns:
        list of jobs known at the beginning
        dictionary of arriving jobs as  as {time_of_arrival: (operations, machines)} 
    '''
    J, M, processor_times, orders_of_machines = parse_instance_taillard(instance)

    indices = np.arange(J)
    np.random.shuffle(indices)

    # separate jobs into known jobs and arriving jobs
    jobs_known_at_the_beginning = [(processor_times[i], orders_of_machines[i]) for i in indices[J//2:]]
    arriving_jobs_indeces = indices[:J//2]

    # calculate beta = 1/lambda
    average_time_between_arrivals = (processor_times.mean() * len(processor_times[0])) / M
    
    t = 0
    arriving_jobs = {}
    for index in arriving_jobs_indeces:
        t += int(np.random.exponential(scale=average_time_between_arrivals))
        arriving_jobs[t] = (processor_times[index], orders_of_machines[index])

    return jobs_known_at_the_beginning, arriving_jobs

def save_static_jssp_taillard(jobs):
    '''Saves list of jobs as static JSSP instance in taillards specification
        
      Args:
        list of jobs to save

      Returns:
        filename where JSSP instance was saved to
    '''
    J, M = len(jobs), len(jobs[0][0])
    formatted_datetime = datetime.now().strftime("%Y%m%d%H%M%S")
    with open(f"/tmp/{J}_{M}_{formatted_datetime}.txt", 'w', encoding="utf-8") as f:
        f.write(f"{J} {M}\n")
        for job in jobs:
            times, _ = job
            f.write(" ".join(map(str, times)) + '\n')
        for job in jobs:
            _, orders = job
            f.write(" ".join(map(str, orders)) + '\n')  

    return f"/tmp/{J}_{M}_{formatted_datetime}.txt"

In [10]:
def solve_dynamic_jssp(instance, model):
    '''Turns static JSSP in Taillard specification instance to dynamic and solves it

      Args: 
        instance to solve
        model to use

      Returns: 
        makespan
    '''
    # turn static JSSP instance to dynamic
    known_jobs, arriving_jobs = get_dynamics_jssp(instance)
    print(f"instance={instance.split('/')[-1]}, known_jobs={len(known_jobs)}, arriving_jobs={len(arriving_jobs)}")
    latest_time_of_arrival = max(arriving_jobs)
    print(f"Latest job arrives at {latest_time_of_arrival} and has total makespan {arriving_jobs[latest_time_of_arrival][0].max()}")
    
    # solve static JSSP with jobs known initially
    makespan, start_times, plan = eval_dqn(model, taillard_to_standard(save_static_jssp_taillard(known_jobs)), args)
    t = 0
    while True:
        t += 1
        
        # no jobs left
        if not arriving_jobs:
            break
    
        # no job arrived
        if not t in arriving_jobs:
            continue
        print(t)

        # new job arrived, remove not yet executed operations from the plan
        partial_plan = []
        for i, action in enumerate(plan):
            if start_times[i] < t:
                partial_plan.append(action)
 
        # add new job to the plan, with times shifted to current time t
        new_job = arriving_jobs.pop(t)
        known_jobs.append(new_job)
        
        # create new schedule WHILE REUSING THE ALREADY EXECUTED PLAN
        makespan, start_times, plan = eval_dqn(model, taillard_to_standard(save_static_jssp_taillard(known_jobs)), args, partial_plan=partial_plan) 

    print(f"Makespan: {makespan}")
    return makespan

MODEL = 'agent/DQN/weight/DQN_ep1400'
BENCHMARKS_PATH = "../../../benchmarks/jssp/ta_instances/Taillard_specification/"
INSTANCES = [
    'ta41.txt',
    'ta42.txt',
    'ta43.txt',
    'ta44.txt',
    'ta45.txt',
    'ta46.txt',
    'ta47.txt',
    'ta48.txt',
    'ta49.txt',
    'ta50.txt',
]

for instance in INSTANCES:
    makespan = solve_dynamic_jssp(BENCHMARKS_PATH + instance, MODEL)
    print(f"Makespan of instance '{instance}': {makespan}")

instance=ta41.txt, known_jobs=15, arriving_jobs=14
Latest job arrives at 698 and has total makespan 99
12
51
163
228
281
349
422
423
491
567
575
655
693
698
Makespan: 2467
Makespan of instance 'ta41.txt': 2467
instance=ta42.txt, known_jobs=15, arriving_jobs=15
Latest job arrives at 784 and has total makespan 99
66
137
303
335
342
377
429
431
435
442
483
597
692
719
784
Makespan: 2347
Makespan of instance 'ta42.txt': 2347
instance=ta43.txt, known_jobs=15, arriving_jobs=15
Latest job arrives at 763 and has total makespan 99
19
24
37
91
188
210
246
282
377
481
588
661
700
737
763
Makespan: 2325
Makespan of instance 'ta43.txt': 2325
instance=ta44.txt, known_jobs=15, arriving_jobs=14
Latest job arrives at 800 and has total makespan 95
20
49
147
155
210
237
340
396
577
581
617
676
724
800
Makespan: 2413
Makespan of instance 'ta44.txt': 2413
instance=ta45.txt, known_jobs=15, arriving_jobs=15
Latest job arrives at 1057 and has total makespan 91
7
37
40
108
339
389
411
489
538
605
619
633
880
9