In [13]:
import gym
from stable_baselines3 import PPO
from envs import ALGEnv, ALGDemoWrapper, AgentDemoWrapper, SokobanEnv
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.utils import set_random_seed
import os
from IPython.display import Image, display
import numpy as np
from demo_utils import evaluate_agents

# 1. Initiations

In [14]:
num_boxes = 1
dim_room = (7, 7)
train_mode = 'mlp'
alg_path = '../demo_checkpoints/alg_v1b_v'
agent_lb_path = '../demo_checkpoints/agent_v1b_0'
agent_ub_path = '../demo_checkpoints/agent_v1b_1'

# setting probability of randomly placing spcific tiles
init_box_target_prob = 0.5
init_box_prob = 0.5
init_player_prob = 0.5
init_probs = [init_box_target_prob, init_box_prob, init_player_prob]
soko_env = SokobanEnv(dim_room=dim_room, max_steps=50, num_boxes=num_boxes, train_mode=train_mode, log_train_info=False)
agent_demo_lb = AgentDemoWrapper(soko_env, agent_path=agent_lb_path)
agent_demo_ub = AgentDemoWrapper(soko_env, agent_path=agent_ub_path)

# 2. Generate sample levels
Generate sample levels with three versions of generators<br>
v0: generated levels will be basic playable (player can reach all the box and box target tiles, and boxes are not stuck in corners, which will be unpushable)<br>
<span style="color:red">can't ensure solvability</span><br>
v1: generated levels will be solvable by upper bound agent<br>
<span style="color:red">ensures solvability, but can't ensure difficulty</span><br>
v2: generated levels will be solvable by upper bound agent but not by lower bound agent<br>
<span style="color:red">ensures solvability, ensures difficulty</span><br>

### v0 generation and agents solving attempts

In [15]:
alg_version = 0
alg_env = ALGEnv(dim_room=dim_room, num_boxes=num_boxes, train_mode=train_mode,
                 alg_version=alg_version, agent_lb_path=agent_lb_path, agent_ub_path=agent_ub_path, init_probs=init_probs)
alg_demo = ALGDemoWrapper(alg_env, alg_path=alg_path, alg_version=alg_version)
succeeded, generated_map = alg_demo.generate_episode_gif()
if succeeded:
    display(Image(filename='temp/alg_episode_v{}.gif'.format(alg_version), width=200, height=200))

<IPython.core.display.Image object>

In [16]:
agent_demo_ub.generate_episode_gif(generated_map)
display(Image(filename='temp/agent_episode.gif'.format(alg_version), width=200, height=200))
agent_demo_lb.generate_episode_gif(generated_map)
display(Image(filename='temp/agent_episode.gif'.format(alg_version), width=200, height=200))

<IPython.core.display.Image object>

<IPython.core.display.Image object>

### v1 generation and agents solving attempts

In [17]:
alg_version = 1
alg_env = ALGEnv(dim_room=dim_room, num_boxes=num_boxes, train_mode=train_mode,
                 alg_version=alg_version, agent_lb_path=agent_lb_path, agent_ub_path=agent_ub_path, init_probs=init_probs)
alg_demo = ALGDemoWrapper(alg_env, alg_path=alg_path, alg_version=alg_version)
succeeded, generated_map = alg_demo.generate_episode_gif()
if succeeded:
    display(Image(filename='temp/alg_episode_v{}.gif'.format(alg_version), width=200, height=200))

loaded ../demo_checkpoints/agent_v1b_1 as ub


<IPython.core.display.Image object>

In [18]:
agent_demo_ub.generate_episode_gif(generated_map)
display(Image(filename='temp/agent_episode.gif'.format(alg_version), width=200, height=200))
agent_demo_lb.generate_episode_gif(generated_map)
display(Image(filename='temp/agent_episode.gif'.format(alg_version), width=200, height=200))

<IPython.core.display.Image object>

<IPython.core.display.Image object>

### v2 generation and agents solving attempts

In [19]:
alg_version = 2
alg_env = ALGEnv(dim_room=dim_room, num_boxes=num_boxes, train_mode=train_mode,
                 alg_version=alg_version, agent_lb_path=agent_lb_path, agent_ub_path=agent_ub_path, init_probs=init_probs)
alg_demo = ALGDemoWrapper(alg_env, alg_path=alg_path, alg_version=alg_version)
succeeded, generated_map = alg_demo.generate_episode_gif()
if succeeded:
    display(Image(filename='temp/alg_episode_v{}.gif'.format(alg_version), width=200, height=200))

loaded ../demo_checkpoints/agent_v1b_1 as ub
loaded ../demo_checkpoints/agent_v1b_0 as lb


<IPython.core.display.Image object>

In [20]:
agent_demo_ub.generate_episode_gif(generated_map)
display(Image(filename='temp/agent_episode.gif'.format(alg_version), width=200, height=200))
agent_demo_lb.generate_episode_gif(generated_map)
display(Image(filename='temp/agent_episode.gif'.format(alg_version), width=200, height=200))

<IPython.core.display.Image object>

<IPython.core.display.Image object>

# 3. Evaluate agents
Compute the number of levels each agent can solve and the number of levels each agent can uniquely solve (only that agent can solve)

In [21]:
evaluate_agents(version_li=['1b_0', '1b_1'], num_tests=1000, load_dir='../demo_checkpoints')

  0%|          | 2/1000 [00:00<01:11, 14.06it/s]

created soko env


100%|██████████| 1000/1000 [00:58<00:00, 17.06it/s]

1b_0 solved 371, uniquely solved 65
1b_1 solved 460, uniquely solved 154





# 4. Evaluate different versions of ALGs
Compute the success generation rate of each version of ALG checkpoints

In [22]:
evaluate_agents(version_li=['1b_0', '1b_1'], num_tests=1000, load_dir='../demo_checkpoints', alg_path=alg_path, alg_version=0)

  0%|          | 2/1000 [00:00<01:10, 14.09it/s]

created soko env


100%|██████████| 1000/1000 [00:51<00:00, 19.52it/s]

1b_0 solved 24, uniquely solved 22
1b_1 solved 2, uniquely solved 0





In [25]:
evaluate_agents(version_li=['1b_0', '1b_1'], num_tests=1000, load_dir='../demo_checkpoints', alg_path=alg_path, alg_version=1)

  0%|          | 2/1000 [00:00<01:09, 14.29it/s]

created soko env


100%|██████████| 1000/1000 [00:46<00:00, 21.39it/s]

1b_0 solved 13, uniquely solved 2
1b_1 solved 177, uniquely solved 166





In [26]:
evaluate_agents(version_li=['1b_0', '1b_1'], num_tests=1000, load_dir='../demo_checkpoints', alg_path=alg_path, alg_version=2)

  0%|          | 1/1000 [00:00<01:43,  9.68it/s]

created soko env


100%|██████████| 1000/1000 [01:07<00:00, 14.73it/s]

1b_0 solved 9, uniquely solved 6
1b_1 solved 170, uniquely solved 167



