In [None]:
import torch
from torch.utils.data import DataLoader

from rl4co.envs.scheduling.djssp.env import DJSSPEnv
from rl4co.envs.scheduling.djssp.render import render

if torch.cuda.is_available():
    accelerator = "gpu"
    batch_size = 256
    train_data_size = 2_000
    embed_dim = 128
    num_encoder_layers = 4
else:
    accelerator = "cpu"
    batch_size = 32
    train_data_size = 1_000
    embed_dim = 64
    num_encoder_layers = 2

In [None]:
generator_params = {
    "num_jobs":6 ,
    "num_machines": 6 ,
    "min_processing_time":20 ,
    "max_processing_time": 99 ,
    "mtbf" : 40 ,
    "mttr" : 5
  }


from rl4co.envs import JSSPEnv



env = DJSSPEnv(generator_params=generator_params,stepwise_reward=True)

In [None]:
import gc
from rl4co.models.zoo.l2d.model import L2DPPOModel
from rl4co.models.zoo.l2d.policy import L2DPolicy4PPO
from torch.utils.data import DataLoader
import json
import os
from rl4co.utils.trainer import RL4COTrainer
import torch

# Policy: neural network, in this case with encoder-decoder architecture
policy = L2DPolicy4PPO(
    embed_dim=embed_dim,
    num_encoder_layers=num_encoder_layers,
    env_name="djsp",
    het_emb=False
)

model = L2DPPOModel(
    env=env,
    policy=policy,
    batch_size=batch_size,
    train_data_size=train_data_size,
    val_data_size=1_000,
    optimizer_kwargs={"lr": 1e-4}
)

In [None]:
td = env.reset(batch_size=[2])


In [None]:
td["machine_breakdowns"]

In [None]:
# td["try"][1]

In [None]:
print(td["machine_breakdowns"])

In [None]:
td["machine_breakdowns"][1]

In [None]:
td["proc_times"][0]

In [None]:
td["machine_breakdowns"][0]

In [None]:
out = model.policy.generate(td.clone(), env=env, phase="test", decode_type="multistart_sampling", num_starts=100,  select_best=True)


In [None]:
output = out["td"]

In [None]:
output["start_times"]

In [None]:
output["finish_times"]