In [26]:
import gymnasium as gym

In [27]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper

In [28]:
from gymcts.logger import log

In [29]:
log.setLevel(20)

In [30]:
if __name__ == '__main__':
    log.debug("Starting example")

    # 0. create the environment
    env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=False, render_mode="rgb_array")
    env.reset()

    # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
    env = DeepCopyMCTSGymEnvWrapper(env)

    # 2. create the agent
    agent = GymctsAgent(
        env=env,
        clear_mcts_tree_after_step=False,
        render_tree_after_step=True,
        number_of_simulations_per_step=200,
        exclude_unvisited_nodes_from_render=True
    )

    # 3. solve the environment
    actions = agent.solve()

    # 4. render the environment solution
    env = gym.wrappers.RecordVideo(
        env,
        video_folder="./videos",
        episode_trigger=lambda episode_id: True,
        name_prefix="frozenlake_4x4"
    )
    env.reset()

    for a in actions:
        obs, rew, term, trun, info = env.step(a)
    env.close()

    # 5. print the solution
    # read the solution from the info provided by the RecordEpisodeStatistics wrapper (that NaiveSoloMCTSGymEnvWrapper wraps internally)
    episode_length = info["episode"]["l"]
    episode_return = info["episode"]["r"]

    if episode_return == 1.0:
        print(f"Environment solved in {episode_length} steps.")
    else:
        print(f"Environment not solved in {episode_length} steps.")

([33mN[0m=200, [33mQ_v[0m=0.02, [33mbest[0m=1.00)
├── ([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m44[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.25[0m)
│   [38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m11[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.41[0m)
│   [38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m3[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.63[0m)
│   [38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m3[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.63[0m)
│   [38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;128;254;179m2[0m, [33mN[0m=[96m2[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0

([33ma[0m=[38;2;128;254;179m2[0m, [33mN[0m=[96m267[0m, [33mQ_v[0m=[38;2;184;135;140m0.36[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.47[0m)
[38;2;128;254;179m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m41[0m, [33mQ_v[0m=[38;2;250;102;128m0.02[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.29[0m)
[38;2;128;254;179m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m10[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.43[0m)
[38;2;128;254;179m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m3[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.62[0m)
[38;2;128;254;179m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m2[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m,

([33ma[0m=[38;2;128;254;179m2[0m, [33mN[0m=[96m357[0m, [33mQ_v[0m=[38;2;100;177;154m0.80[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.90[0m)
[38;2;128;254;179m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m11[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.52[0m)
[38;2;128;254;179m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m3[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.63[0m)
[38;2;128;254;179m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.74[0m)
[38;2;128;254;179m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m└── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, 

([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m523[0m, [33mQ_v[0m=[38;2;76;190;158m0.93[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m1.01[0m)
[38;2;0;180;235m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m4[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.88[0m)
[38;2;0;180;235m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m510[0m, [33mQ_v[0m=[38;2;72;192;159m0.95[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m1.03[0m)
[38;2;0;180;235m│   [0m[38;2;0;180;235m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m5[0m, [33mQ_v[0m=[38;2;216;119;134m0.20[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.99[0m)
[38;2;0;180;235m│   [0m[38;2;0;180;235m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;63;197;161m1.00[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m1.90[0m)
[38;2;0;180;235m│   [0m[38;2;0;

([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m710[0m, [33mQ_v[0m=[38;2;69;193;159m0.96[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m1.03[0m)
[38;2;0;180;235m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m5[0m, [33mQ_v[0m=[38;2;216;119;134m0.20[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m1.01[0m)
[38;2;0;180;235m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;63;197;161m1.00[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m1.90[0m)
[38;2;0;180;235m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.90[0m)
[38;2;0;180;235m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;128;254;179m2[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.90[0m)
[38;2;0;180;235m│   [0m[38;2;1

([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m896[0m, [33mQ_v[0m=[38;2;66;195;160m0.98[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m1.04[0m)
[38;2;0;180;235m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m3[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m1.06[0m)
[38;2;0;180;235m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.74[0m)
[38;2;0;180;235m│   [0m[38;2;127;0;255m└── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.74[0m)
[38;2;0;180;235m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m303[0m, [33mQ_v[0m=[38;2;68;194;160m0.97[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m1.08[0m)
[38;2;0;180;235m│   [0m[38;2;0;180;235m├── [0m([33ma

Environment solved in 6 steps.
