# Trained vs Untrained Using MatNet and POMO

In [None]:
import torch
from rl4co.envs.scheduling.djssp.env import DJSSPEnv
from rl4co.utils import RL4COTrainer

generator_params = {
"num_jobs" : 8 ,
"num_machines": 8 ,
"min_processing_time": 20 ,
"max_processing_time": 99 ,
"mtbf": 30,
"mttr": 10
}
env = DJSSPEnv(generator_params=generator_params,
_torchrl_mode=True,
stepwise_reward=True
)

In [None]:
torch.cuda.is_available()

# Device Related Details

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

if torch.cuda.is_available():
    accelerator = "gpu"
    batch_size = 256
    train_data_size = 2_000
    embed_dim = 128
    num_encoder_layers = 4
else:
    accelerator = "cpu"
    batch_size = 32
    train_data_size = 1_000
    embed_dim = 64
    num_encoder_layers = 2

### WandB


In [None]:
import wandb
from lightning.pytorch.loggers import WandbLogger

run = wandb.init(
    project = "rl4co",
    notes = "MatNET-POMO-Training"
)


logger = WandbLogger(project="rl4co", name="MatNET-POMO-Training")

## Policy = L2DPolicy

In [None]:
from rl4co.models.nn.env_embeddings.init import FJSPMatNetInitEmbedding
from rl4co.models.zoo.matnet.matnet_w_sa import Encoder
from rl4co.models import L2DPolicy, POMO

init_embedding=FJSPMatNetInitEmbedding(embed_dim=256,scaling_factor=1000)
encoder = Encoder(embed_dim=256,num_heads=8,num_layers=4,normalization="batch",init_embedding=init_embedding)
policy = L2DPolicy(env_name=env.name,encoder=encoder,embed_dim=256,stepwise_encoding=False,het_emb=True,scaling_factor=1000)



## Model = POMO

In [None]:
metrics = {
  "val": ["reward", "max_reward"],
  "test":["reward", "max_reward"] }

model = POMO(env=env,policy=policy,batch_size=64,num_starts=10,num_augment=0,baseline="shared",metrics=metrics)

# Scheduling of the Untrained Model

In [None]:
td_init = env.reset(batch_size=[1])
# we must send td to device otherwise it throws an error
td_init = td_init.to(device)

In [None]:
# same for the policy
policy = model.policy.to(device=device)

### Greedy


In [None]:
out_untrained_greedy = policy(td_init.clone(), env, phase="test", decode_type="greedy")

print("Untrained greedy output with the makespan ", out_untrained_greedy["reward"][0])

# for multiple batches use this:
# print(f"Greedy - Rewards: {[f'{-r.item():.2f}' for r in out_untrained_greedy['reward']]}")


In [None]:
# render the output of the untrained greedy
env.render(out_untrained_greedy["td"] , 0)

### MultistartSampling

In [None]:
out_untrained_multistart = policy.generate(td_init.clone(), env=env, phase="test", decode_type="multistart_sampling", num_starts=100, select_best=True)
print(f"Untrained Multistart - Makespan: {[f'{-r.item():.2f}' for r in out_untrained_multistart['reward']]}")

In [None]:
env.render(out_untrained_multistart["td"] , 0)

# Training the Model

### Checkpoint - Callback Setup

In [None]:
from lightning.pytorch.callbacks import ModelCheckpoint, RichModelSummary

# Checkpointing callback: save models when validation reward improves
checkpoint_callback = ModelCheckpoint(  dirpath="checkpoints", # save to checkpoints/
                                        filename="epoch_{epoch:03d}",  # save as epoch_XXX.ckpt
                                        save_top_k=1, # save only the best model
                                        save_last=True, # save the last model
                                        monitor="val/reward", # monitor validation reward
                                        mode="max") # maximize validation reward
# Print model summary
rich_model_summary = RichModelSummary(max_depth=3)

# Callbacks list
callbacks = [checkpoint_callback, rich_model_summary]



### Training

In [None]:
#CHECKPOINT_PATH = "last.ckpt"
CHECKPOINT_PATH = "checkpoints/last.ckpt"
try:
    model = POMO.load_from_checkpoint(CHECKPOINT_PATH)
except FileNotFoundError:

    # max_epochs = 10
    trainer = RL4COTrainer(
    max_epochs=10,
    accelerator=accelerator,
    devices=1,
    logger=logger,
    callbacks=callbacks
    )

    trainer.fit(model)
finally:
    model = model.to(device)


# Scheduling of the Trained Model

In [None]:
td_init = td_init.to(device)
# same for the policy
policy = model.policy.to(device=device)

### Greedy


In [None]:
out_trained_greedy = policy(td_init.clone(), env, phase="test", decode_type="greedy")

print("Trained greedy output with the makespan ", out_trained_greedy["reward"][0])

In [None]:
env.render(out_trained_greedy["td"] , 0)

### Multistart


In [None]:
out_trained_multistart = policy.generate(td_init.clone(), env=env, phase="test", decode_type="multistart_sampling", num_starts=100, select_best=True)
print(f"Trained Multistart - Rewards: {[f'{-r.item():.2f}' for r in out_trained_multistart['reward']]}")


In [None]:
env.render(out_trained_multistart["td"] , 0)

# Test on Taillard Benchmark

In [None]:
! git clone https://github.com/tamy0612/JSPLIB.git

In [None]:
import json
import os


def prepare_taillard_data(nj, nm):
    # Target folder for Taillard instances
    fp = f"taillard/{nj}x{nm}"

    if not os.path.exists(fp):
        os.makedirs(fp)

    # Load the JSON file
    with open('JSPLIB/instances.json', 'r') as file:
        data = json.load(file)

    # Filter Taillard instances with matching jobs and machines
    instances = [x for x in data if "ta" in x["name"] and x["jobs"] == nj and x["machines"] == nm]
    print(f"Found {len(instances)} instances for {nj} jobs and {nm} machines")

    if not instances:
        raise FileNotFoundError(f"No matching Taillard instances found for {nj}x{nm}")

    # Copy files and validate
    for instance in instances:
        source_path = os.path.join("JSPLIB", instance['path'])
        target_path = os.path.join(fp, f"{instance['name']}.txt")

        # Check if the source file exists
        if os.path.exists(source_path):
            print(f"Copying {source_path} to {target_path}")
            os.system(f"cp {source_path} {target_path}")
        else:
            print(f"Warning: Source file {source_path} does not exist")

    # Verify if files were copied
    files_in_target = os.listdir(fp)
    assert len(files_in_target) > 0, f"No files copied to {fp}. Check source paths."
    print(f"Successfully prepared {len(files_in_target)} files in {fp}")

In [None]:
import gc
from torch.utils.data import DataLoader

device = "cuda" if torch.cuda.is_available() else "cpu"

# path to taillard instances
FILE_PATH = "taillard/{nj}x{nm}"

results = {}
instance_types = [(15, 15), (20, 15), (20, 20), (30, 15), (30, 20)]

for instance_type in instance_types:
    print("------------")
    nj, nm = instance_type
    prepare_taillard_data(nj, nm)
    dataset = env.dataset(batch_size=[10], phase="test", filename=FILE_PATH.format(nj=nj, nm=nm))
    dl = DataLoader(dataset, batch_size=5, collate_fn=dataset.collate_fn)
    rewards = []

    for batch in dl:
        td = env.reset(batch).to(device)
        # use policy.generate to avoid grad calculations which can lead to oom
        out = model.policy.generate(td, env=env, phase="test", decode_type="multistart_sampling", num_starts=100, select_best=True)
        rewards.append(out["reward"])

    reward = torch.cat(rewards, dim=0).mean().item()
    results[instance_type] = reward

    print("Done evaluating instance type %s with reward %s" % (instance_type, reward))

    # avoid ooms due to cache not being cleared
    model.rb.empty()
    gc.collect()
    torch.cuda.empty_cache()

In [None]:
wandb.finish()