In [1]:
# runner.py
from SPARS.Gym import config  # monkey patching the gym config
from SPARS.Gym import utils as G
from SPARS.Gym.gym import HPCGymEnv
from SPARS.Simulator.Simulator import Simulator, run_simulation
from SPARS.Logger import (
    set_log_level,
    log_info,
    log_trace,
)


import os

import time


import torch as T

from SPARS.Utils import log_output, _load_config, _choose_device, _build_agent, get_action, log_config_summary

DEFAULT_CFG_PATH = "simulator_config.yaml"

cfg = _load_config(DEFAULT_CFG_PATH)
set_log_level(cfg["logging"]["level"])
log_config_summary(cfg)




[INFO] === Config Summary ===
[INFO] Paths: workload=workloads/nasa-20-back.json, platform=platforms/platform-nasa-128.json, output=results_test
[INFO] Run: algorithm=easy_psas, overrun_policy=terminate, start_time=0, timeout=None
[INFO] RL: ENABLED (type=discrete, dt=1800, device=cuda, epochs=1, num_nodes=128)
[INFO] RL: agent=spars, checkpoint=None
[INFO] Logging: level=TRACE, file=results/simulation.log


In [2]:
def main():
 
    # --- Config Unpack (only the pieces you still use) ---
    output_path = cfg["paths"]["output"]

    rl_enabled = bool(cfg["rl"]["enabled"])
    rl_type = cfg["rl"]["type"] if rl_enabled else None
    rl_dt = cfg["rl"]["dt"] if rl_type == "discrete" else None
    device = _choose_device(cfg["rl"]["device"])

    # === RL parameters ===
    epochs = int(cfg["rl"]["epochs"])
    num_nodes = int(cfg["rl"]["num_nodes"])

    if rl_enabled and rl_type == "discrete" and rl_dt is None:
        raise RuntimeError("Discrete RL requires rl.dt in the config file.")

    if rl_enabled:
        # Select the agent per new config structure: rl.agents + rl.assign
        # e.g., "thomas" or "spars"
        assigned_name = cfg["rl"]["assign"]
        # dict with both agents
        agents_dict = cfg["rl"]["agents"]
        # pick the requested one
        agent_cfg = agents_dict[assigned_name]

        checkpoint = cfg["rl"]["checkpoint"]
        learn = cfg['rl']['learn']

        # Build simulator from config (no CLI/args)
        simulator = Simulator.from_config(
            cfg,
            rl_kwargs={"rl_type": rl_type, "rl_dt": rl_dt},
        )
        env = HPCGymEnv(simulator, device)

        # Build the agent using your existing builder by passing a tiny shim:
        # we keep its expected shape: {'agent': <agent_cfg>, 'device': <rl.device>}
        model, model_opt = _build_agent(
            {"agent": agent_cfg, "device": cfg["rl"]["device"]}, device)

        if checkpoint is not None and learn:
            ckpt = T.load(checkpoint, map_location=device)
            model.load_state_dict(ckpt["model_state_dict"])
            model_opt.load_state_dict(ckpt["optimizer_state_dict"])
            model.to(device).train()
        
        elif checkpoint is not None and not learn:
            ckpt = T.load(checkpoint, map_location=device)
            model.load_state_dict(ckpt["model_state_dict"])
            model_opt.load_state_dict(ckpt["optimizer_state_dict"])
            model.eval()

        if learn:
            MAX_EPOCH_REWARD = -99999999999
            NO_IMPROVEMENT_COUNT = 0

            for _ in range(epochs):
                log_info(f'========== EPOCH {_} ==========')
                # reset per epoch
                simulator = Simulator.from_config(
                    cfg,
                    rl_kwargs={"rl_type": rl_type, "rl_dt": rl_dt},
                )
                env.reset(simulator)
                env.simulator.start_simulator()
                observation = env.get_observation()
                batch_timesteps_size = 32

                while env.simulator.is_running:
                    
                    memory_features = []
                    memory_logprob = []
                    memory_actions = []
                    memory_rewards = []

                    # roll out
                    for i in range(batch_timesteps_size):
                        rollout_start = time.time()
                        features_ = observation
                        features_ = features_.to(device)

                        # your policy/value forward

                        # --- Thomas Reshape ---
                        # features_reshaped = features_.reshape(1, num_nodes, 11)
                        # action, logprob = get_action(model, features_reshaped)

                        # SPARS
                        action, logprob = get_action(model, features_)

                        next_observation, reward, done = env.step(action)

                        log_trace(f"Step reward: {reward}")

                        # store experience (detach from graph)
                        # memory_actions.append(action.detach())
                        # memory_logprob.append(logprob.detach())
                        # memory_features.append(features_.detach())
                        # memory_rewards.append(reward.detach() if isinstance(reward, T.Tensor)
                        #                     else T.tensor(float(reward)))

                        # saved_experiences = (
                        #     memory_actions, memory_features, memory_logprob, memory_rewards
                        # )

                        memory_actions.append(action.detach())
                        memory_features.append(features_.detach())
                        memory_rewards.append(reward.detach() if isinstance(reward, T.Tensor)
                                              else T.tensor(float(reward)))

                        saved_experiences = (
                            memory_actions, memory_features, memory_rewards
                        )

                        observation = next_observation
                        if done == True:
                            break
                        rollout_finish = time.time()
                        log_info(
                            f'Rollout duration: {rollout_finish - rollout_start}')
                    G.learn(model, model_opt,
                            saved_experiences)

                avg_epoch_reward = sum(memory_rewards) / len(memory_rewards)
                avg_action = sum(memory_actions) / len(memory_actions)

                if avg_epoch_reward > MAX_EPOCH_REWARD:
                    MAX_EPOCH_REWARD = avg_epoch_reward
                    NO_IMPROVEMENT_COUNT = 0
                    # --- Save agent checkpoint ---
                    os.makedirs(output_path, exist_ok=True)
                    ckpt = {
                        "agent_class": f"{model.__class__.__module__}:{model.__class__.__name__}",
                        "model_state_dict": model.state_dict(),
                        "optimizer_state_dict": model_opt.state_dict(),
                        "rl_config": cfg.get("rl", {}),  # left as-is
                        "epochs_trained": epochs,
                    }
                    ckpt_path = os.path.join(
                        output_path, "agent_checkpoint.pt")
                    T.save(ckpt, ckpt_path)
                    log_trace(f"Saved agent checkpoint to: {ckpt_path}")
                    log_output(env.simulator, output_path)
                else:
                    NO_IMPROVEMENT_COUNT += 1

                log_info(f"AVG ACTION: {avg_action}")
                log_info(f"AVG REWARD: {avg_epoch_reward}")
                log_info(f"MAX REWARD: {MAX_EPOCH_REWARD}")
                log_info(f"NO IMPROVEMENT COUNT: {NO_IMPROVEMENT_COUNT}")

                if NO_IMPROVEMENT_COUNT > 50:
                    # if NO_IMPROVEMENT_COUNT > 3 and _ > 50:
                    break
        else:
            env.reset(simulator)
            env.simulator.start_simulator()
            observation = env.get_observation()

            while env.simulator.is_running:
                features_, mask_ = observation
                features_ = features_.to(device)
                # features_ = features_.reshape(1, num_nodes, 11)
                action, logprob = get_action(model, features_)
                next_observation, reward, done = env.step(action)
                observation = next_observation

            log_output(env.simulator, output_path)

    else:
        simulator = Simulator.from_config(cfg)
        run_simulation(simulator, output_path)


if __name__ == "__main__":
    main()

[INFO] Current Time: 0
[INFO] Action taken: tensor([0.7087])
[TRACE] Translated Actions: tensor([91.], device='cuda:0') active nodes
[TRACE] [Time=0.00] type=simulation_start
[TRACE] [Time=0.00] job_id=34036 type=arrival res=4 subtime=0 profile=100 user_id=0 reqtime=132 runtime=132
[TRACE] [Time=0.00] type=switch_off nodes=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36]
[TRACE] [Time=0.00] job_id=34036 type=execution_start subtime=0 reqtime=132 runtime=132 res=4 nodes=[37, 38, 39, 40]
[TRACE] [Time=132.00] job_id=34036 type=execution_finished res=4 nodes=[37, 38, 39, 40] start_time=0.0 subtime=0 reqtime=132 req_finish_time=132.0 runtime=132 actual_finish_time=132.0
[TRACE] [Time=256.00] job_id=34039 type=arrival res=1 subtime=256 profile=100 user_id=0 reqtime=111 runtime=111
[TRACE] [Time=256.00] job_id=34039 type=execution_start subtime=256 reqtime=111 runtime=111 res=1 nodes=[37]
[TRACE] [Time=