In [6]:
import gymnasium as gym

In [7]:
from gymcts.gymcts_agent import GymctsAgent
from gymcts.gymcts_deepcopy_wrapper import DeepCopyMCTSGymEnvWrapper

In [8]:
from gymcts.logger import log

In [9]:
# set log level to 20 (INFO)
# set log level to 10 (DEBUG) to see more detailed information
log.setLevel(20)

In [10]:
if __name__ == '__main__':
    # 0. create the environment
    env = gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True, render_mode="ansi")
    env.reset()

    # 1. wrap the environment with the naive wrapper or a custom gymcts wrapper
    env = DeepCopyMCTSGymEnvWrapper(env)

    # 2. create the agent
    agent = GymctsAgent(
        env=env,
        clear_mcts_tree_after_step=False,
        render_tree_after_step=True,
        number_of_simulations_per_step=50,
        exclude_unvisited_nodes_from_render=True
    )

    # 3. solve the environment
    actions = agent.solve()

    # 4. render the environment solution in the terminal
    print(env.render())
    for a in actions:
        obs, rew, term, trun, info = env.step(a)
        print(env.render())

    # 5. print the solution
    # read the solution from the info provided by the RecordEpisodeStatistics wrapper
    # (that NaiveSoloMCTSGymEnvWrapper uses internally)
    episode_length = info["episode"]["l"]
    episode_return = info["episode"]["r"]

    if episode_return == 1.0:
        print(f"Environment solved in {episode_length} steps.")
    else:
        print(f"Environment not solved in {episode_length} steps.")

([33mN[0m=50, [33mQ_v[0m=0.00, [33mbest[0m=0.00)
├── ([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m13[0m, [33mQ_v[0m=0.00, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.39[0m)
│   [38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m3[0m, [33mQ_v[0m=0.00, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.65[0m)
│   [38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=0.00, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.74[0m)
│   [38;2;127;0;255m│   [0m[38;2;127;0;255m└── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=0.00, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.74[0m)
│   [38;2;127;0;255m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m3[0m, [33mQ_v[0m=0.00, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.65[0m)
│   [38;2;127;0;255m│   [0m[38;2;0;180;235m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0

([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m63[0m, [33mQ_v[0m=0.00, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.19[0m)
[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m16[0m, [33mQ_v[0m=0.00, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.36[0m)
[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m4[0m, [33mQ_v[0m=0.00, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.59[0m)
[38;2;127;0;255m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=0.00, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.83[0m)
[38;2;127;0;255m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=0.00, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.83[0m)
[38;2;127;0;255m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m└── [0m([33ma[0m=[38;2;255;

([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m66[0m, [33mQ_v[0m=[38;2;237;108;130m0.09[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.28[0m)
[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m22[0m, [33mQ_v[0m=[38;2;229;112;132m0.14[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.44[0m)
[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m4[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.62[0m)
[38;2;127;0;255m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.83[0m)
[38;2;127;0;255m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0

([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m72[0m, [33mQ_v[0m=[38;2;239;107;130m0.08[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.26[0m)
[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m12[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.42[0m)
[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m3[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.64[0m)
[38;2;127;0;255m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.74[0m)
[38;2;127;0;255m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m└── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0

([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m85[0m, [33mQ_v[0m=[38;2;203;126;136m0.27[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.44[0m)
[38;2;0;180;235m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m17[0m, [33mQ_v[0m=[38;2;232;111;131m0.12[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.48[0m)
[38;2;0;180;235m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m4[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.60[0m)
[38;2;0;180;235m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.83[0m)
[38;2;0;180;235m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0

([33ma[0m=[38;2;128;254;179m2[0m, [33mN[0m=[96m97[0m, [33mQ_v[0m=[38;2;122;166;150m0.69[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.85[0m)
[38;2;128;254;179m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m4[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.76[0m)
[38;2;128;254;179m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m9[0m, [33mQ_v[0m=[38;2;212;121;135m0.22[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.73[0m)
[38;2;128;254;179m│   [0m[38;2;0;180;235m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m2[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.74[0m)
[38;2;128;254;179m│   [0m[38;2;0;180;235m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m3[0m, [33mQ_v[0m=[38;2;191;132;139m0.33[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m0.94[0m)
[38;2;128;254;179m│   [0m[38;2;0;180;235m│   

([33ma[0m=[38;2;255;178;96m3[0m, [33mN[0m=[96m126[0m, [33mQ_v[0m=[38;2;87;184;156m0.87[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m1.01[0m)
[38;2;255;178;96m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m116[0m, [33mQ_v[0m=[38;2;74;191;159m0.94[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m1.08[0m)
[38;2;255;178;96m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m2[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m1.09[0m)
[38;2;255;178;96m│   [0m[38;2;127;0;255m│   [0m[38;2;127;0;255m└── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.59[0m)
[38;2;255;178;96m│   [0m[38;2;127;0;255m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m2[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m1.09[0m)
[3

([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m166[0m, [33mQ_v[0m=[38;2;72;192;159m0.95[0m, [33mbest[0m=[96m1.00[0m, [33mubc[0m=[96m1.07[0m)
[38;2;127;0;255m├── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m2[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m1.13[0m)
[38;2;127;0;255m│   [0m[38;2;127;0;255m└── [0m([33ma[0m=[38;2;127;0;255m0[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.59[0m)
[38;2;127;0;255m├── [0m([33ma[0m=[38;2;0;180;235m1[0m, [33mN[0m=[96m2[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m1.13[0m)
[38;2;127;0;255m│   [0m[38;2;0;180;235m└── [0m([33ma[0m=[38;2;255;178;96m3[0m, [33mN[0m=[96m1[0m, [33mQ_v[0m=[38;2;255;100;128m0.00[0m, [33mbest[0m=[31m0.00[0m, [33mubc[0m=[96m0.59[0m)
[38;2;127;0;255m├── [0m([33ma[0m=[38;2;128;254;179m2


[41mS[0mFFF
FHFH
FFFH
HFFG

  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG

  (Left)
[41mS[0mFFF
FHFH
FFFH
HFFG

  (Left)
SFFF
[41mF[0mHFH
FFFH
HFFG

  (Down)
SFFF
FHFH
[41mF[0mFFH
HFFG

  (Right)
SFFF
FHFH
F[41mF[0mFH
HFFG

  (Up)
SFFF
FHFH
FF[41mF[0mH
HFFG

  (Left)
SFFF
FHFH
FFFH
HF[41mF[0mG

  (Up)
SFFF
FHFH
FFFH
HFF[41mG[0m

Environment solved in 8 steps.
