In [1]:
import pandas as pd
import numpy as np


# IEEE-ICCE-RL-JSP

In this paper, authors use Graph Neural Network to determine which heuristics to choose. Heuristics considered in the following experiment are FIFO, MOR, SPT, and MWKR.

## Model

I had to train my own models using the script ['train_dqn.py'](train_dqn.py) provided by the owner of the repository. Models were trained on random instances with size up to 10x10. To check if my trained models are valid, I compared the experimental result presented in the paper with my results on the same instances. The results from the paper are

|        | IEEE-ICCE-RL-JSP  | ScheduleNet | L2D   | MOR   | FIFO  | SPT   | MWKR  |
| ------ | ----- |:----------- |:----- |:----- |:----- |:----- |:----- |
| Gap(S) | 20.0% | 17.7%       | 30.7% | 22.7% | 29.0% | 31.2% | 22.7% |
| Gap(L) | 10.5% | 11.3%       | 20.8% | 14.8% | 19.4% | 21.3% | 14.5% |
| Time   | 1.55s | N/A         | 5.10s | 1.18s | 1.12s | 1.01s | 1.13s |

Results of my 3 trained models are (on average)

In [2]:
df = pd.read_csv('eval_dqn.csv')
large = df[(df['J'] >= 50) & (df['M'] >= 15)]
small = df[~(df['J'] >= 50) & (df['M'] >= 15)]

print(f"Average time is {-df['Time'].mean().round(2)}")
print(f"Average gap of small instances is {small['Gap'].mean().round(4) * 100}%")
print(f"Average gap of large instances is {large['Gap'].mean().round(4) * 100}%")

Average time is 11.87
Average gap of small instances is 20.89%
Average gap of large instances is 11.44%


The time is much higher, because I used `cpu` instead of `cuda`. Average gaps approximately match the results of the paper, so I decided my trained model is valid.

In [33]:
from env.env import JSP_Env
from agent.DQN.agent import DQN_Agent
from datetime import datetime
from pprint import pprint

def eval_dqn(model, instance_path, args, arrival_times: list | None = None):
    '''Solve the JSSP instance in file instance_path using given model
    
      Args:
        model - path of model to use
        instance_path - path of instance to solve
        args - args from argparser
        partial_plan - steps to execute at start instead of using the agent

      Return:
        makespan
        start_times - start time of operations executed in given order
        plan - sequence of actions
    '''
    # load env
    env = JSP_Env(args)
    avai_ops = env.load_instance(instance_path, arrival_times)
    state = env.get_graph_data(args.device)

    # load agent
    agent = DQN_Agent(args, out_dim=len(env.rules))
    agent.load(model)     

    # run the model
    plan = []
    i = 0
    while True:
        # choose action from partial plan if given
        action = agent.select_action(state, random=False, test_only=True)
        state, reward, done, info, action_idx = env.step(action)
        i += 1
        if done:
            break

    makespan = env.get_makespan()
    # for job in env.jsp_instance.jobs:
    #     for operation in job.operations:
    #         print(job.job_id, operation.start_time, operation.process_time)
    start_times = [op['start_time'] for op in env.jsp_instance.logger.history]
    return makespan

# makespan, start_times, plan = eval_dqn('agent/DQN/weight/DQN_ep1670', taillard_to_standard("../../../benchmarks/jssp/ft_instances/Taillard_specification/ft06.txt"), args)

## Preprocessing

This model processes instances given in [Standard specifiation](http://jobshop.jjvh.nl/explanation.php#standard_def). I have our benchmarks also in Standard Specification, but for the sake of being consistent, I will consider only instances in [Taillard specification](http://jobshop.jjvh.nl/explanation.php#taillard_def). Therefore I have to write a function, which converts problem in taillard specification to standard specification.

In [12]:
def parse_instance_taillard(filename):
    '''Parses instance written in Taillard specification: http://jobshop.jjvh.nl/explanation.php
    
      Args:
        filename - file containing the instance in Taillard specification

      Returns:
        number of jobs,
        number of machines,
        the processor times for each operation,
        the order for visiting the machines
    '''

    with open(filename, 'r') as f:
        # parse number of jobs J and machines M
        J, M = map(int, f.readline().split())

        # Initialize two empty numpy arrays with dimensions J x M
        processor_times = np.empty((J, M), dtype=int)
        orders_of_machines = np.empty((J, M), dtype=int)
    
        # Read the next J lines containing processor times
        for i in range(J):
            processor_times[i] = list(map(int, f.readline().split()))
    
        # Read the next J lines containing orders of machines
        for i in range(J):
            orders_of_machines[i] = list(map(int, f.readline().split()))

        return J, M, processor_times, orders_of_machines

def taillard_to_standard(taillard_instance):
    # parse taillard instance
    J, M, processor_times, orders_of_machines = parse_instance_taillard(taillard_instance)

    # save as standard instance
    standard_instance = "/tmp/standard_" + taillard_instance.split("/")[-1]
    with open(standard_instance, 'w') as f:
        # save number of jobs and machines
        f.write(f"{J}\t{M}\n")

        for job in range(J):
            for machine in range(M):
                f.write(f'{orders_of_machines[job][machine] - 1}\t{processor_times[job][machine]} ')

            f.write('\n')

    return standard_instance

## Static experiment on our benchmarks

In [13]:
# import some stuff and define helper functions
import os, argparse, time

def get_all_instances_in_taillard_specification():
    '''Lists all instances in Taillard specification'''
    matching_files = []
    root_dir = "../../../benchmarks/jssp/"
    target_string = "Taillard_specification"

    for foldername, subfolders, filenames in os.walk(root_dir):
        for filename in filenames:
            filepath = os.path.join(foldername, filename)
            if target_string in filepath:
                matching_files.append(filepath)

    return matching_files

# need command line arguments for the model
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('-d', '--device', default='cpu')
# arguments for DQN
parser.add_argument('--warmup', default=10000, type=int)
parser.add_argument('--episode', default=100000, type=int)
parser.add_argument('--capacity', default=10000, type=int)
parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--lr', default=.01, type=float)
parser.add_argument('--eps', default=0.0, type=float)
parser.add_argument('--eps_decay', default=.995, type=float)
parser.add_argument('--eps_min', default=.01, type=float)
parser.add_argument('--gamma', default=1.0, type=float)
parser.add_argument('--freq', default=4, type=int)
parser.add_argument('--target_freq', default=1000, type=int)
parser.add_argument('--double', action='store_true')
parser.add_argument(
    '--max_process_time',
    type=int,
    default=100,
    help='Maximum Process Time of an Operation')
args = parser.parse_args(args=[])

# load models and instances
MODELS_PATH = 'agent/DQN/weight'
models = os.listdir(MODELS_PATH)
BENCHMARKS_PATH = "../../../benchmarks/jssp/ta_instances/Taillard_specification/"
# instances = sorted(get_all_instances_in_taillard_specification())
instances = [
    'ta41.txt',
    'ta42.txt',
    'ta43.txt',
    'ta44.txt',
    'ta45.txt',
    'ta46.txt',
    'ta47.txt',
    'ta48.txt',
    'ta49.txt',
    'ta50.txt',
]

# run experiments
for model in models[:1]:
    for instance in instances:
        model_path = os.path.join(MODELS_PATH, model)
        makespan, _, _ = eval_dqn(model_path, taillard_to_standard(os.path.join(BENCHMARKS_PATH, instance)), args)
        print(f"Model: {model}, instance: {instance.split('/')[-1]}, makespan: {makespan}")

0 0
0 59
0 232
0 481
0 516
0 533
0 731
0 804
0 837
0 892
0 943
0 968
0 1036
0 1150
0 1195
0 1274
0 1373
0 1439
0 1567
0 1663
1 0
1 160
1 251
1 303
1 317
1 544
1 1307
1 1343
1 1385
1 1402
1 1470
1 1527
1 1554
1 1736
1 1831
1 1970
1 2043
1 2056
1 2079
1 2157
2 0
2 93
2 157
2 191
2 261
2 333
2 459
2 576
2 619
2 792
2 918
2 1060
2 1156
2 1309
2 1394
2 1437
2 1474
2 1535
2 1648
2 1746
3 0
3 31
3 288
3 364
3 482
3 828
3 884
3 966
3 995
3 1093
3 1175
3 1199
3 1456
3 1630
3 1692
3 1850
3 1900
3 1975
3 2519
3 2554
4 0
4 132
4 193
4 455
4 495
4 559
4 638
4 654
4 765
4 934
4 980
4 1178
4 1261
4 1398
4 1470
4 1518
4 1736
4 2026
4 2038
4 2095
5 0
5 9
5 47
5 448
5 672
5 936
5 1033
5 1074
5 1094
5 1120
5 1195
5 1374
5 1470
5 1482
5 1623
5 1705
5 2041
5 2102
5 2154
5 2227
6 0
6 20
6 96
6 99
6 237
6 332
6 338
6 469
6 601
6 683
6 926
6 958
6 1072
6 1186
6 1267
6 1531
6 1742
6 1882
6 1942
6 1954
7 0
7 41
7 207
7 274
7 363
7 568
7 674
7 882
7 950
7 988
7 1212
7 1261
7 1396
7 1473
7 1489
7 1532
7 1801
7 18

# Dynamic JSSP

In dynamic JSSP, only a subset of jobs is known at the beginning. The rest of jobs arrives dynamically online.

The following attempt to expand L2D to being dynamic is inspired by paper [Large-scale Dynamic Scheduling for Flexible Job-shop with Random Arrivals of New Jobs by Hierarchical Reinforcement Learning](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=10114974), where authors schedule newly incoming jobs and reschedule not yet executed operations, already executed operations can not be rescheduled. During each rescheduling, they formulate static FJSP and solve it. They use cache for incoming jobs and an agent choosing either to add jobs from cache to scheduling problem, or keep them in cache. I will skip this agent and always add new jobs to scheduling problem.

Similarly to the paper, I will model the arrival of new jobs as poisson process with average arrival time following an exponential distribution.

In [23]:
from datetime import datetime

def get_dynamic_jssp(instance):
    '''Turns static JSSP instance to dynamic

      Args:
        filename of static JSSP instance

      Returns:
        list of jobs known at the beginning
        dictionary of arriving jobs as  as {time_of_arrival: (operations, machines)} 
    '''
    J, M, processor_times, orders_of_machines = parse_instance_taillard(instance)

    indices = np.arange(J)
    np.random.shuffle(indices)

    # separate jobs into known jobs and arriving jobs
    jobs_known_at_the_beginning = [(processor_times[i], orders_of_machines[i]) for i in indices[J//2:]]
    arriving_jobs_indeces = indices[:J//2]

    # calculate beta = 1/lambda
    average_time_between_arrivals = (processor_times.mean() * len(processor_times[0])) / M

    arrival_times = np.zeros(J, dtype=int)
    t = 1
    for index in arriving_jobs_indeces:
        t += int(np.random.exponential(scale=average_time_between_arrivals)) + 1
        arrival_times[index] = t

    return list(arrival_times)

In [34]:
MODEL = 'agent/DQN/weight/DQN_ep1400'
BENCHMARKS_PATH = "../../../benchmarks/jssp/ta_instances/Taillard_specification/"
INSTANCES = [
    'ta41.txt',
    'ta42.txt',
    'ta43.txt',
    'ta44.txt',
    'ta45.txt',
    'ta46.txt',
    'ta47.txt',
    'ta48.txt',
    'ta49.txt',
    'ta50.txt',
]

for instance in INSTANCES:
    static_makespan = eval_dqn(MODEL, taillard_to_standard(os.path.join(BENCHMARKS_PATH,instance)), args)
    arrival_times = get_dynamic_jssp(os.path.join(BENCHMARKS_PATH,instance))
    dynamic_makespan = eval_dqn(MODEL, taillard_to_standard(os.path.join(BENCHMARKS_PATH,instance)), args, arrival_times) 
    print(f"instance={instance}, static_makespan={static_makespan}, dynamic_makespan={dynamic_makespan}")

instance=ta41.txt, static_makespan=2551, dynamic_makespan=3092
instance=ta42.txt, static_makespan=2363, dynamic_makespan=2797
instance=ta43.txt, static_makespan=2370, dynamic_makespan=2785
instance=ta44.txt, static_makespan=2572, dynamic_makespan=3043
instance=ta45.txt, static_makespan=2463, dynamic_makespan=2754
instance=ta46.txt, static_makespan=2595, dynamic_makespan=3396
instance=ta47.txt, static_makespan=2246, dynamic_makespan=3395
instance=ta48.txt, static_makespan=2340, dynamic_makespan=3056
instance=ta49.txt, static_makespan=2394, dynamic_makespan=2992
instance=ta50.txt, static_makespan=2353, dynamic_makespan=2884
