In [None]:
import gymnasium as gym

from stable_baselines3 import A2C

env = gym.make("CartPole-v1", render_mode="rgb_array")

model = A2C("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10_000)

vec_env = model.get_env()
obs = vec_env.reset()
for i in range(1000):
    action, _state = model.predict(obs, deterministic=True)
    obs, reward, done, info = vec_env.step(action)
    vec_env.render("human")
    # VecEnv resets automatically
    # if done:
    obs = vec_env.reset()

RuntimeError: module compiled against API version 0xf but this version of numpy is 0xe

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 34.5     |
|    ep_rew_mean        | 34.5     |
| time/                 |          |
|    fps                | 567      |
|    iterations         | 100      |
|    time_elapsed       | 0        |
|    total_timesteps    | 500      |
| train/                |          |
|    entropy_loss       | -0.68    |
|    explained_variance | -0.53    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 2.05     |
|    value_loss         | 13.9     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 31.5     |
|    ep_rew_mean        | 31.5     |
| time/                 |          |
|    fps                | 590      |
|    iterations         | 200      |
|    time_elapsed 

------------------------------------
| rollout/              |          |
|    ep_len_mean        | 62.2     |
|    ep_rew_mean        | 62.2     |
| time/                 |          |
|    fps                | 610      |
|    iterations         | 1400     |
|    time_elapsed       | 11       |
|    total_timesteps    | 7000     |
| train/                |          |
|    entropy_loss       | -0.458   |
|    explained_variance | -7.5e-05 |
|    learning_rate      | 0.0007   |
|    n_updates          | 1399     |
|    policy_loss        | 0.397    |
|    value_loss         | 1.36     |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 65.2     |
|    ep_rew_mean        | 65.2     |
| time/                 |          |
|    fps                | 612      |
|    iterations         | 1500     |
|    time_elapsed       | 12       |
|    total_timesteps    | 7500     |
| train/                |          |
|

In [None]:
pip install pygame

In [None]:
import gymnasium as gym

from stable_baselines3 import DQN

env = gym.make("CartPole-v1", render_mode="human")

model = DQN("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000, log_interval=4)
model.save("dqn_cartpole")

del model # remove to demonstrate saving and loading

model = DQN.load("dqn_cartpole")

obs, info = env.reset()
while True:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)
    if terminated or truncated:
        obs, info = env.reset()

In [None]:
import gymnasium as gym
env = gym.make("LunarLander-v2", render_mode="human")
observation, info = env.reset()

for _ in range(1000):
    action = env.action_space.sample()  # agent policy that uses the observation and info
    print(f"action: {action}")
    observation, reward, terminated, truncated, info = env.step(action)
    print(f"observation: {observation}, reward: {reward}, Terminated: {terminated}, Truncated: {truncated}, info: {info} ")

    if terminated or truncated:
        observation, info = env.reset()

env.close()

In [None]:
env.action_space  # agent policy that uses the observation and info


In [None]:
%lsmagic

In [None]:
%timeit

In [None]:
import gymnasium as gym

In [None]:
gym.envs.registry.keys()

In [None]:
gym.envs.registry.keys()

In [None]:
gym.envs.registry

In [None]:
gym.envs.registry.keys()

In [None]:
gym.envs.registry.keys().make

In [None]:
env = gym.make("LunarLander-v2", render_mode="human")

In [None]:
env.observation_space.sample()

# Making Custom Environment

In [1]:
import gymnasium as gym

In [2]:
from gymnasium import spaces

In [3]:
import numpy as np

In [4]:
import pygame

In [None]:
class MazeGameEnv(gym.Env):
    def __init__(self,maze):
        self.maze=np.array(maze) #maze represented as a 2D numpy array
        print(self.maze)
        self.start_pos=np.where(self.maze=='S')
        print(self.start_pos)
        self.goal_pos=np.where(self.maze=='G') # Goal position
        print(self.goal_pos)
        self.current_pos=self.start_pos # Starting position is the current position initially
        print(self.current_pos)
        self.num_rows,self.num_cols=self.maze.shape
        print(f"num rows: {self.num_rows} and num columns: {self.num_cols}")
        #4 possible actions, 0=up, 1=down,2=left,3=right
        self.action_space=spaces.discrete(4)
        print(f"Current Action: {self.action_space}")
        
        # observation_space is a grid of size: rowsxcolumns
        self.observation_space=spaces.Tuple((spaces.Discrete(self.num_rows),spaces.Discrete(self.num_cols)))
        print(f"Observation Space: {self.observation_space}")
        
        #initialize the pygame
        pygame.init()
        self.cell_size=125
        
        #setting Display Size
        self.screen=pygame.display.set_mode((self.num_cols*self.cell_size, self.num_rows*self.cell_size))
    
    
    def reset(self):
        self.current_pos=self.start_pos
        return self.current_pos
    
    def step(self,action):
        #Move the agent based on the selected action
        new_pos=np.array(self.current_pos)
        
        if action==0: #Up
            new_pos[0] -=1
            print(f"new position after UP: {new_pos}")
        elif action==1:
            new_pos[0] +=1
            print(f"new position after down: {new_pos}")
            
        
        elif action==2: # Left
            new_pos[1]-=1
            print(f"new position after Left: {new_pos}")
            
        
        elif action==3: #Right
            new_pos[1]+=1
            print(f"new position after Right: {new_pos[0]}")
            
        
        #Check if the new position is valid
        if self.is_valid_position(new_pos):
            self.current_pos=new_pos
            
        #Reward Function
        if np.array_equal(self.current_pos,self.goal_pos):
            reward=1.0
            done=True
        else:
            reward=0.0
            done=False
        return self.current_pos,reward,done, {}
    
    def is_valid_position(self, pos):
        row,col=pos
        # if agent goes out of the grid
        if row<0 or col<0 or row>=self.num_rows or col>=self.num_cols:
            return false
        
        #if the agent hits an obstacle
        if self.maze[row,col]=='#':
            return False
        
        return True
    
    def render(self):
        #clear the screen
        self.screen.fill((255,255,255))
        
        # Draw env elements one cell at a time
        for row in range(self.num_rows):
            for col in range(self.num_cols):
                cell_left=col*self.cell_size
                cell_top=row*self.cell_size
                
                try:
                    print(np.array(self.current_pos)==np.array([row,col]).reshape(-1,1))
                except Exception as e:
                    print('Initial State')
                    
                if self.maze[row,col]=='#': #Obstacle
                    pygame.draw.rect(self.screen,(0,0,0),(cell_left,cell_top,self.cell_size,self.cell_size))
                
                elif self.maze[row,col]=='S': # Starting postion
                    pygame.draw.rect(self.screen,(0,255,0),(cell_left,cell_top,self.cell_size,self.cell_size))
                
                elif self.maze[row,col]=='G': # Goal postion
                    pygame.draw.rect(self.screen,(0,0,255),(cell_left,cell_top,self.cell_size,self.cell_size))
        pygame.display.update() #update the display

# Register the environment

In [5]:
gym.register(id='MazeGame-v0',entry_point='mazegame:MazeGameEnv',kwargs={'maze':None})

# Load the environment

In [6]:
maze= [
    ['S','','.','.'],
    ['.','#','.','#'],
    ['.','.','.','.'],
    ['#',".","#",'G']
]

# Test the environment

In [7]:
env=gym.make('MazeGame-v0',maze=maze)

[['S' '' '.' '.']
 ['.' '#' '.' '#']
 ['.' '.' '.' '.']
 ['#' '.' '#' 'G']]
(array([0]), array([0]))
(array([3]), array([3]))
(array([0]), array([0]))
num rows: 4 and num columns: 4
Current Action: Discrete(4)
Observation Space: Tuple(Discrete(4), Discrete(4))
