# DJSSP without training without JOB Interrupt

In [1]:
import torch
if torch.cuda.is_available():
    accelerator = "gpu"
    batch_size = 4
    train_data_size = 2_000
    embed_dim = 128
    num_encoder_layers = 4
else:
    accelerator = "cpu"
    batch_size = 3
    train_data_size = 1_000
    embed_dim = 64
    num_encoder_layers = 2

In [2]:
import time

from rl4co.envs.scheduling.djssp.env import DJSSPEnv
from rl4co.models import L2DPolicy, L2DModel
from rl4co.utils import RL4COTrainer
import gc
from rl4co.envs import JSSPEnv
from rl4co.models.zoo.l2d.model import L2DPPOModel
from rl4co.models.zoo.l2d.policy import L2DPolicy4PPO
from torch.utils.data import DataLoader
import json
import os
%load_ext autoreload
%autoreload 2
generator_params = {
"num_jobs" : 8 ,
"num_machines": 8 ,
"min_processing_time": 1 ,
"max_processing_time": 99 ,
"mtbf" : 17 ,
"mttr" : 4
}
env = DJSSPEnv(generator_params=generator_params,
_torchrl_mode=True,
stepwise_reward=True)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Policy: neural network, in this case with encoder-decoder architecture
policy = L2DPolicy4PPO(
    embed_dim=embed_dim,
    num_encoder_layers=num_encoder_layers,
    env_name="jssp",
    het_emb=False
)

  right=ast.Str(s=sentinel),
  return Constant(*args, **kwargs)


TypeError: 'NoneType' object is not callable

In [None]:
# default decoder
policy.decoder

In [None]:
# default encoder
policy.encoder

# Visualize solution construction

In [None]:
def make_step(td, decoder):
    """
    Equivalent to FJSP make_step(), adapted for JSSP where no encoder is used.
    td: TensorDict representing the current state of the environment.
    decoder: The L2DDecoder or policy that generates action logits.
    env: The JSSP environment instance.
    """
    # Directly decode logits and mask from the raw input state `td`
    hidden, _ = decoder.feature_extractor(td)

    logits, mask = decoder(td, num_starts=0 , hidden = hidden)

    # Mask invalid actions by setting their logits to -inf
    action = logits.masked_fill(~mask, -torch.inf).argmax(1)

    # Update the state with the selected action
    td["action"] = action

    # Step the environment with the selected action
    td = env.step(td)["next"]

    return td

In [None]:
td = env.reset(batch_size = batch_size)

In [None]:
from matplotlib import pyplot as plt
from IPython.core.display_functions import clear_output

env.render(td, 0)
# Update plot within a for loop
while not td["done"].all():
    # Clear the previous output for the next iteration
    clear_output(wait=True)

    td = make_step(td=td ,decoder = policy.decoder)
    env.render(td, 0)
    # Display updated plot
    display(plt.gcf())

    # Pause for a moment to see the changes
    time.sleep(.4)

# Training model

In [None]:
# Policy: neural network, in this case with encoder-decoder architecture
policy = L2DPolicy4PPO(
    embed_dim=embed_dim,
    num_encoder_layers=num_encoder_layers,
    env_name="djssp",
    het_emb=False
)

model = L2DPPOModel(
    env=env,
    policy=policy,
    batch_size=batch_size,
    train_data_size=train_data_size,
    val_data_size=1,
    optimizer_kwargs={"lr": 1e-4}
)

In [None]:
td["time"]


In [None]:
CHECKPOINT_PATH = "last.ckpt"
device = "cuda" if torch.cuda.is_available() else "cpu"


trainer = RL4COTrainer(
    max_epochs=1,
    accelerator=accelerator,
    devices=1,
    logger=None,
)

trainer.fit(model)

model = model.to(device)

In [None]:

generator_params = {
"num_jobs" : 6 ,
"num_machines": 10 ,
"min_processing_time": 11 ,
"max_processing_time": 230 ,
"mtbf" : 8 ,
"mttr" : 3
}
new_env  = DJSSPEnv(generator_params=generator_params)
 

In [None]:
new_td = new_env.reset(batch_size = [2])
out = model.policy.generate(new_td , env = new_env , phase ="test" ,decode_type="multistart_sampling", num_starts=100, select_best=True)
out

# Currently this part cannot be used since i dont have any FileGenerator 


In [None]:
! git clone https://github.com/tamy0612/JSPLIB.git

In [None]:
import json

def prepare_taillard_data(nj, nm):
    fp = f"taillard/{nj}x{nm}"
    if os.path.exists(fp):
        pass
    else:
        os.makedirs(fp)
        with open('../JSPLIB/instances.json', 'r') as file:
            data = json.load(file)

        instances = [x for x in data if "ta" in x["name"] and x["jobs"] == nj and x["machines"] == nm]

        for instance in instances:
            os.popen(f"cp JSPLIB/{instance['path']} {fp}/{instance['name']}.txt")


In [None]:
# path to taillard instances
# FILE_PATH = "taillard/{nj}x{nm}"
import gc
from rl4co.envs import JSSPEnv
from rl4co.models.zoo.l2d.model import L2DPPOModel
from rl4co.models.zoo.l2d.policy import L2DPolicy4PPO
from torch.utils.data import DataLoader
import json
import os

FILE_PATH = "JSPLIB/taillard/{nj}x{nm}"
results = {}
instance_types = [(15, 15), (20, 15), (20, 20), (30, 15), (30, 20)]

for instance_type in instance_types:
    nj, nm = instance_type
    prepare_taillard_data(nj, nm)
    dataset = env.dataset(batch_size=[3], phase="test", filename=FILE_PATH.format(nj=nj, nm=nm))
    print(dataset)
    dl = DataLoader(dataset, batch_size=3, collate_fn=dataset.collate_fn)
    rewards = []

    for batch in dl:
        td = env.reset(batch).to(device)
        env.render(td,0)
        # use policy.generate to avoid grad calculations which can lead to oom
        out = model.policy.generate(td, env=env, phase="test", decode_type="multistart_sampling", num_starts=100, select_best=True)
        rewards.append(out["reward"])
        
        

    reward = torch.cat(rewards, dim=0).mean().item()
    results[instance_type] = reward

    print("Done evaluating instance type %s with reward %s" % (instance_type, reward))

    # avoid ooms due to cache not being cleared
    model.rb.empty()
    gc.collect()
    torch.cuda.empty_cache()

In [None]:
import torch

torch.cuda.is_available()
