# Demo

In [None]:
%load_ext autoreload
%autoreload 2

from stable_baselines3.ppo import PPO
from stable_baselines3.ppo.policies import MlpPolicy

from gym_pcgrl.wrappers import SwapFullWrapper
from utils import generate_preset_diff

In [None]:
BALANCING = 0.5

In [None]:
# define environment
def create_env(env="nmmodiff", size=6):
    env_config = {
        "sim_runs": 14,
        "init_random_map": generate_preset_diff,
        "width": size,
        "height": size,
        "b_method": 0,
        "balancing": BALANCING
    }
    env_name = '{}-{}-v0'.format(env, "swap")
    return SwapFullWrapper(env_name, **env_config)

In [None]:
# create env
env = create_env()
obs, stats = env.reset()
print("Initial balancing of random level:", stats["balancing"])

In [None]:
env.render()

In [None]:
# load model
path = f"models/wide_swap/balancing_{str(BALANCING).replace('.', '_')}.zip"
model = PPO(MlpPolicy, env, n_steps=128).load(path, env)

In [None]:
def inference(model_, env):
    obs, _ = env.reset()
    init_info = env.unwrapped.get_rep_stats()
    init_map = env.unwrapped.get_map()
    
    print("Balancing start", round(init_info["balancing"], 1))

    reward_total = 0
    for i in range(100):
        action, _ = model_.predict(obs)
        obs, reward, done, trunc, info = env.step(action)
        reward_total += reward
        if done:
            break
            
    print("Balancing after", round(init_info["balancing"], 1))

    info["init-balancing"] = init_info["balancing"]
    info["init-map"] = init_map
    info["map"] = env.unwrapped.get_map()
    return env, info, reward_total

In [None]:
# do balancing via trained model
env, info, reward = inference(model, env)