# HGNN - PPO Training
- Model = L2DPPOModel

In [None]:
import torch
from torch_geometric.data import DataLoader

from rl4co.envs.scheduling.djssp.env import DJSSPEnv
from rl4co.models import L2DPPOModel
from rl4co.utils import RL4COTrainer

generator_params = {
"num_jobs" : 8 ,
"num_machines": 8 ,
"min_processing_time": 1 ,
"max_processing_time": 99 ,
"mtbf":20 ,
"mttr":4
}
env = DJSSPEnv(generator_params=generator_params,
_torchrl_mode=True,
stepwise_reward=True)

In [None]:
torch.cuda.is_available()

# Device Realted Details

In [None]:
if torch.cuda.is_available():
    accelerator = "gpu"
    batch_size = 32
    train_data_size = 2_000
    embed_dim = 128
    num_encoder_layers = 4
else:
    accelerator = "cpu"
    batch_size = 4
    train_data_size = 1_000
    embed_dim = 64
    num_encoder_layers = 2
device = "cuda" if torch.cuda.is_available() else "cpu"

## Model - Policy
- het embeddings


In [None]:
policy_kwargs = {
  "embed_dim": 256,
  "num_encoder_layers":3,
  "scaling_factor": 1000,
  "ppo_epochs":2,
  "het_emb": True,
  "normalization": "instance"
}
model = L2DPPOModel(env=env,policy_kwargs=policy_kwargs,  batch_size=128,val_batch_size=512,test_batch_size=64,mini_batch_size=512)

# Scheduling of the untrained model

In [None]:
td = env.reset(batch_size=[1])

In [None]:
policy = model.policy.to(device)
out = model.policy.generate(td.clone(), env=env, phase="test", decode_type="multistart_sampling", num_starts=100, select_best=True)

# Finishing Time

In [None]:
out["reward"]

# Training the Model

In [None]:

# max_epochs = 10
trainer = RL4COTrainer(
    max_epochs=1,
    accelerator=accelerator,
    devices=1,
    logger=None,
)

trainer.fit(model)

model = model.to(device)

# Scheduling of the Trained Model

In [None]:
out = model.policy.generate(td.clone(), env=env, phase="test", decode_type="multistart_sampling", num_starts=100, select_best=True)

## Exact finishing time

In [None]:
out["reward"]

# Test on Taillard Benchmark

In [None]:
! git clone https://github.com/tamy0612/JSPLIB.git

In [None]:
import json
import os


def prepare_taillard_data(nj, nm):
    # Target folder for Taillard instances
    fp = f"taillard/{nj}x{nm}"

    if not os.path.exists(fp):
        os.makedirs(fp)

    # Load the JSON file
    with open('JSPLIB/instances.json', 'r') as file:
        data = json.load(file)

    # Filter Taillard instances with matching jobs and machines
    instances = [x for x in data if "ta" in x["name"] and x["jobs"] == nj and x["machines"] == nm]
    print(f"Found {len(instances)} instances for {nj} jobs and {nm} machines")

    if not instances:
        raise FileNotFoundError(f"No matching Taillard instances found for {nj}x{nm}")

    # Copy files and validate
    for instance in instances:
        source_path = os.path.join("JSPLIB", instance['path'])
        target_path = os.path.join(fp, f"{instance['name']}.txt")

        # Check if the source file exists
        if os.path.exists(source_path):
            print(f"Copying {source_path} to {target_path}")
            os.system(f"cp {source_path} {target_path}")
        else:
            print(f"Warning: Source file {source_path} does not exist")

    # Verify if files were copied
    files_in_target = os.listdir(fp)
    assert len(files_in_target) > 0, f"No files copied to {fp}. Check source paths."
    print(f"Successfully prepared {len(files_in_target)} files in {fp}")

In [None]:
import gc

device =  "cpu"

# path to taillard instances
FILE_PATH = "taillard/{nj}x{nm}"

results = {}
instance_types = [(15, 15), (20, 15), (20, 20), (30, 15), (30, 20)]

for instance_type in instance_types:
    print("------------")
    nj, nm = instance_type
    prepare_taillard_data(nj, nm)
    dataset = env.dataset(batch_size=[10], phase="test", filename=FILE_PATH.format(nj=nj, nm=nm))
    dl = DataLoader(dataset, batch_size=5, collate_fn=dataset.collate_fn)
    rewards = []

    for batch in dl:
        td = env.reset(batch).to(device)
        # use policy.generate to avoid grad calculations which can lead to oom
        out = model.policy.generate(td, env=env, phase="test", decode_type="multistart_sampling", num_starts=100, select_best=True)
        rewards.append(out["reward"])

    reward = torch.cat(rewards, dim=0).mean().item()
    results[instance_type] = reward

    print("Done evaluating instance type %s with reward %s" % (instance_type, reward))

    # avoid ooms due to cache not being cleared
    model.rb.empty()
    gc.collect()
    torch.cuda.empty_cache()