In [None]:
def EnvironmentSetup():
    import gymnasium as gym
    import numpy as np
    if not hasattr(np,"bool8"):np.bool8=np.bool_
    import matplotlib.pyplot as plt

    env=gym.make('Taxi-v3')
    state,info=env.reset()      # gymnasium reset -> (obs,info)

    print("=== Taxi-v3 ===")
    print("Initial State:",state)
    print("Action Space:",env.action_space)            # Discrete(6)
    print("Observation Space:",env.observation_space)  # Discrete(500)
    print("Actions: 0=South, 1=North, 2=East, 3=West, 4=Pickup, 5=Dropoff")
    print("Reward Scheme: move=-1, correct_dropoff=+20 (done), illegal_pick/drop=-10")

    return env,state

env,state=EnvironmentSetup()

def InteractionLoop(env,num_episodes=2):
    for episode in range(num_episodes):
        state,info=env.reset()
        terminated=False
        total_reward=0
        step_count=0
        while not terminated:
            action=env.action_space.sample()
            next_state,reward,terminated,truncated,info=env.step(action)
            terminated=terminated or truncated
            total_reward+=reward
            print(f"Step {step_count}: State={state}, Action={action}, Reward={reward}, Next State={next_state}, Terminated={terminated}")
            state=next_state
            step_count+=1
        print(f"Episode {episode+1} ended with total reward: {total_reward}\n")

InteractionLoop(env,2)

def VisualizePathRun(env,rows=5,cols=5):
    import numpy as np
    def visualize_path(path,rows=5,cols=5):
        grid=np.full((rows,cols),'-')
        for step,(r,c) in enumerate(path):
            grid[r,c]=str(step)
        print(grid)

    state,info=env.reset()
    taxi_row,taxi_col,pass_loc,dest=env.unwrapped.decode(state)
    terminated=False
    path=[(taxi_row,taxi_col)]
    while not terminated:
        action=env.action_space.sample()
        state,reward,terminated,truncated,info=env.step(action)
        terminated=terminated or truncated
        taxi_row,taxi_col,pass_loc,dest=env.unwrapped.decode(state)
        path.append((taxi_row,taxi_col))
    visualize_path(path,rows,cols)

path_return=VisualizePathRun(env)

def TrackCumulativeRewards(env,n_episodes=10,plot=True):
    import matplotlib.pyplot as plt
    rewards=[]
    for episode in range(n_episodes):
        state,info=env.reset()
        terminated=False
        total_reward=0
        while not terminated:
            action=env.action_space.sample()
            state,reward,terminated,truncated,info=env.step(action)
            terminated=terminated or truncated
            total_reward+=reward
        rewards.append(total_reward)
    if plot:
        plt.figure()
        plt.plot(rewards)
        plt.xlabel("Episode")
        plt.ylabel("Total Reward")
        plt.title("Random Policy Reward per Episode (Taxi-v3)")
        plt.show()
    return rewards

rewards=TrackCumulativeRewards(env,n_episodes=10,plot=True)
