In [None]:
def EnvironmentSetup():
    import gymnasium as gym
    import numpy as np
    if not hasattr(np,"bool8"):np.bool8=np.bool_
    import matplotlib.pyplot as plt

    env=gym.make('FrozenLake-v1',is_slippery=False)  # deterministic environment
    state=env.reset()

    print("Initial State:",state)
    print("Action Space:",env.action_space)
    print("Observation Space:",env.observation_space)
    print("Actions: 0=Left, 1=Down, 2=Right, 3=Up")

    return env,state
env,state=EnvironmentSetup()

def InteractionLoop(env,num_episodes=5):
    for episode in range(num_episodes):
        state=env.reset()
        terminated=False
        total_reward=0
        step_count=0
        while not terminated:
            action=env.action_space.sample()
            next_state, reward, terminated, truncated, info = env.step(action)
            terminated = terminated or truncated   # merge flags
            total_reward+=reward
            print(f"Step {step_count}: State={state}, Action={action}, Reward={reward}, Next State={next_state}, Terminated={terminated}")
            state=next_state
            step_count+=1
        print(f"Episode {episode+1} ended with total reward: {total_reward}\n")
InteractionLoop(env,5)

############
def VisualizePathComparison(env,size=4,num_episodes=3):
    import numpy as np

    def visualize_path(path,size=4):
        grid=np.full((size,size),'-')
        visit_count=np.zeros((size,size),dtype=int)
        for step,state in enumerate(path):
            row,col=divmod(state,size)
            grid[row,col]=str(step)
            visit_count[row,col]+=1
        print("Path Grid (Step Order):")
        print(grid)
        print("\nVisit Count Grid:")
        print(visit_count)
        most_visited=np.unravel_index(np.argmax(visit_count),(size,size))
        print(f"\nMost visited cell: {most_visited} visited {visit_count[most_visited]} times")

    paths=[]
    steps_list=[]

    for ep in range(num_episodes):
        state=env.reset()
        if isinstance(state,tuple):state=state[0]
        terminated=False
        path=[state]
        step_count=0
        while not terminated:
            action=env.action_space.sample()
            state, reward, terminated, truncated, info = env.step(action)
            terminated = terminated or truncated
            path.append(state)
            step_count+=1
        print(f"\nEpisode {ep+1} finished in {step_count} steps")
        visualize_path(path)
        paths.append(path)
        steps_list.append(step_count)

    print("\n--- Path Efficiency Comparison ---")
    for i,steps in enumerate(steps_list):
        print(f"Episode {i+1}: {steps} steps")
    best_ep=np.argmin(steps_list)
    print(f"\nMost efficient path: Episode {best_ep+1} with {steps_list[best_ep]} steps")

VisualizePathComparison(env,size=4,num_episodes=3)
#####

def VisualizePathRun(env,size=4):
    import numpy as np
    def visualize_path(path,size=4):
        grid=np.full((size,size),'-')
        for step,state in enumerate(path):
            row,col=divmod(state,size)
            grid[row,col]=str(step)
        print(grid)

    state=env.reset()
    if isinstance(state,tuple):state=state[0]
    terminated=False
    path=[state]
    while not terminated:
        action=env.action_space.sample()
        next_state, reward, terminated, truncated, info = env.step(action)
        terminated = terminated or truncated   # merge flags
        path.append(state)
    visualize_path(path)
path_return=VisualizePathRun(env)

def TrackCumulativeRewards(env,n_episodes=10,plot=True):
    import matplotlib.pyplot as plt
    rewards=[]
    for episode in range(n_episodes):
        state=env.reset()
        if isinstance(state,tuple):state=state[0]
        terminated=False
        total_reward=0
        while not terminated:
            action=env.action_space.sample()
            next_state, reward, terminated, truncated, info = env.step(action)
            terminated = terminated or truncated   # merge flags
            total_reward+=reward
        rewards.append(total_reward)
    if plot:
        plt.figure()
        plt.plot(rewards)
        plt.xlabel("Episode")
        plt.ylabel("Total Reward")
        plt.title("Random Policy Reward per Episode")
        plt.show()
    return rewards
rewards=TrackCumulativeRewards(env,n_episodes=10,plot=True)
