In [None]:
def EnvironmentSetup():
    import gymnasium as gym
    import numpy as np
    if not hasattr(np,"bool8"):np.bool8=np.bool_
    import matplotlib.pyplot as plt

    env=gym.make('CliffWalking-v1')
    state,info=env.reset()

    print("=== CliffWalking-v0 ===")
    print("Initial State:",state)
    print("Action Space:",env.action_space)        # Discrete(4)
    print("Observation Space:",env.observation_space)  # Discrete(48)
    print("Actions: 0=Up, 1=Right, 2=Down, 3=Left")
    print("Reward Scheme: step=-1, cliff=-100 (done), goal=-1 (done)")

    return env,state

env,state=EnvironmentSetup()

def InteractionLoop(env,num_episodes=2):
    for episode in range(num_episodes):
        state,info=env.reset()
        terminated=False
        total_reward=0
        step_count=0
        while not terminated:
            action=env.action_space.sample()
            next_state,reward,terminated,truncated,info=env.step(action)
            terminated=terminated or truncated
            total_reward+=reward
            print(f"Step {step_count}: State={state}, Action={action}, Reward={reward}, Next State={next_state}, Terminated={terminated}")
            state=next_state
            step_count+=1
        print(f"Episode {episode+1} ended with total reward: {total_reward}\n")

InteractionLoop(env,2)

def VisualizePathRun(env,rows=4,cols=12):
    import numpy as np
    def visualize_path(path,rows=4,cols=12):
        grid=np.full((rows,cols),'-')
        for step,state in enumerate(path):
            r,c=divmod(state,cols)
            grid[r,c]=str(step)
        print(grid)

    state,info=env.reset()
    terminated=False
    path=[state]
    while not terminated:
        action=env.action_space.sample()
        next_state,reward,terminated,truncated,info=env.step(action)
        terminated=terminated or truncated
        state=next_state
        path.append(state)
    visualize_path(path,rows,cols)

path_return=VisualizePathRun(env)

def TrackCumulativeRewards(env,n_episodes=10,plot=True):
    import matplotlib.pyplot as plt
    rewards=[]
    for episode in range(n_episodes):
        state,info=env.reset()
        terminated=False
        total_reward=0
        while not terminated:
            action=env.action_space.sample()
            state,reward,terminated,truncated,info=env.step(action)
            terminated=terminated or truncated
            total_reward+=reward
        rewards.append(total_reward)
    if plot:
        plt.figure()
        plt.plot(rewards)
        plt.xlabel("Episode")
        plt.ylabel("Total Reward")
        plt.title("Random Policy Reward per Episode (CliffWalking)")
        plt.show()
    return rewards

rewards=TrackCumulativeRewards(env,n_episodes=10,plot=True)
