# PufferLib Docs

In [1]:
from pufferlib.environments import atari
env_creator = atari.env_creator('breakout')

In [2]:
import pufferlib.vector
vecenv = pufferlib.vector.make(
    env_creator, # A callable (class or function) that returns an env
    env_args = None, # A list of arguments to pass to each environment
    env_kwargs = None, # A list of dictionary keyword arguments to pass to each environment
    backend = pufferlib.vector.Serial, # pufferlib.vector.[Serial|Multiprocessing|Native|Ray]
    num_envs = 1, # The total number of environments to create
)

A.L.E: Arcade Learning Environment (version 0.9.0+750d7f9)
[Powered by Stella]


Make 4 copies of Breakout on the current process

In [3]:
vecenv = pufferlib.vector.make(env_creator, num_envs=4,
    backend=pufferlib.vector.Serial)

Make 4 copies of Breakout, each on a separate process

In [4]:
vecenv = pufferlib.vector.make(env_creator, num_envs=4,
    backend=pufferlib.vector.Multiprocessing)

Make 4 copies of Breakout, 2 on each of 2 processes

In [5]:
vecenv = pufferlib.vector.make(env_creator, num_envs=4,
    backend=pufferlib.vector.Multiprocessing, num_workers=2)

Make 4 copies of Breakout, 2 on each of 2 processes,
but only get two observations per step

In [6]:
vecenv = pufferlib.vector.make(env_creator, num_envs=4,
    backend=pufferlib.vector.Multiprocessing, num_workers=2,
    batch_size=2)

Make 1024 instances of Ocean breakout on the current process

In [7]:
from pufferlib.ocean import Breakout
vecenv = pufferlib.vector.make(Breakout,
    backend=pufferlib.vector.Serial,
    env_kwargs={'num_envs': 1024},
)

Notice that Native envs handle multiple instances internally.
You can still multiprocess/async, but don't make multiple external
copies per process.

In [8]:
vecenv = pufferlib.vector.make(Breakout, num_envs=2,
    backend=pufferlib.vector.Multiprocessing, batch_size=1)

Process Process-10:
Traceback (most recent call last):
  File "/home/fitti/.conda/envs/puffer/lib/python3.11/site-packages/pufferlib/ocean/breakout/breakout.py", line 44, in step
    if self.tick % self.report_interval == 0:
       ^^^^^^^^^
  File "/home/fitti/.conda/envs/puffer/lib/python3.11/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/home/fitti/.conda/envs/puffer/lib/python3.11/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/home/fitti/.conda/envs/puffer/lib/python3.11/site-packages/pufferlib/vector.py", line 185, in _worker_process
    _, _, _, _, infos = envs.step(atn_arr)
                        ^^^^^^^^^^^^^^^^^^
AttributeError: 'Breakout' object has no attribute 'tick'
Process Process-9:
  File "/home/fitti/.conda/envs/puffer/lib/python3.11/site-packages/pufferlib/vector.py", line 185, in _worker_process
    _, _, _, _, infos = envs.step(atn_arr)
                        ^^^^^^^^^^^^^^^^^^
Tra

## FPS Comparison — Synchronous API vs. Async API

### Synchronous API - reset/step

In [9]:
import time
vecenv = pufferlib.vector.make(Breakout, num_envs=2,
    backend=pufferlib.vector.Multiprocessing)
vecenv.reset()
start, steps, TIMEOUT = time.time(), 0, 3
while time.time() - start < TIMEOUT:
    vecenv.step(vecenv.action_space.sample())
    steps += 1

vecenv.close()
print('Puffer FPS: {:.2f}'.format(steps*vecenv.num_envs/TIMEOUT))

Puffer FPS: 212672.00


### Async API - async_reset, send/recv

Call your model between recv() and send()

In [10]:
vecenv = pufferlib.vector.make(Breakout, num_envs=2,
    backend=pufferlib.vector.Multiprocessing, batch_size=1)
vecenv.async_reset()
start, steps, TIMEOUT = time.time(), 0, 3
while time.time() - start < TIMEOUT:
    vecenv.recv()
    vecenv.send(vecenv.action_space.sample())
    steps += 1

vecenv.close()
print('Puffer Async FPS: {:.2f}'.format(steps*vecenv.num_envs/TIMEOUT))

Puffer Async FPS: 215312.67


## My own experimentation

### Breakout

In [11]:
vecenv = pufferlib.vector.make(env_creator, num_envs=4,
    backend=pufferlib.vector.Multiprocessing)

In [12]:
reset_return = vecenv.reset()
for thing in reset_return:
    print(type(thing))

<class 'numpy.ndarray'>
<class 'list'>


In [13]:
first, second = reset_return

In [14]:
first.shape

(4, 1, 105, 80)

In [15]:
second # Info?

[{}, {}, {}, {}]

In [16]:
vecenv.reset?

[0;31mSignature:[0m [0mvecenv[0m[0;34m.[0m[0mreset[0m[0;34m([0m[0mseed[0m[0;34m=[0m[0;36m42[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mFile:[0m      ~/.conda/envs/puffer/lib/python3.11/site-packages/pufferlib/vector.py
[0;31mType:[0m      method

Interesting to note: Default seed set!

In [17]:
step_return = vecenv.step(vecenv.action_space.sample())
for thing in step_return:
    print(type(thing))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'list'>


In [18]:
next_states, rewards, dones, truncateds, infos = step_return

In [19]:
next_states.shape, rewards.shape, dones.shape, truncateds.shape, infos

((4, 1, 105, 80), (4,), (4,), (4,), [])

In [20]:
rewards, dones, truncateds

(array([0., 0., 0., 0.], dtype=float32),
 array([False, False, False, False]),
 array([False, False, False, False]))

### Crafter

In [21]:
from pufferlib.environments import crafter

In [22]:
crafter?

[0;31mType:[0m        module
[0;31mString form:[0m <module 'pufferlib.environments.crafter' from '/home/fitti/.conda/envs/puffer/lib/python3.11/site-packages/pufferlib/environments/crafter/__init__.py'>
[0;31mFile:[0m        ~/.conda/envs/puffer/lib/python3.11/site-packages/pufferlib/environments/crafter/__init__.py
[0;31mDocstring:[0m   <no docstring>

In [23]:
help(crafter)

Help on package pufferlib.environments.crafter in pufferlib.environments:

NAME
    pufferlib.environments.crafter

PACKAGE CONTENTS
    environment
    torch

DATA
    Recurrent = None

FILE
    /home/fitti/.conda/envs/puffer/lib/python3.11/site-packages/pufferlib/environments/crafter/__init__.py




In [24]:
help(crafter.environment.env_creator)

Help on function env_creator in module pufferlib.environments.crafter.environment:

env_creator(name='crafter')



In [25]:
env_creator = crafter.environment.env_creator()

In [26]:
env_creator?

[0;31mSignature:[0m      [0menv_creator[0m[0;34m([0m[0mbuf[0m[0;34m=[0m[0;32mNone[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mCall signature:[0m [0menv_creator[0m[0;34m([0m[0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mType:[0m           partial
[0;31mString form:[0m    functools.partial(<function make at 0x7fe8aa3e0e00>, 'crafter')
[0;31mFile:[0m           ~/.conda/envs/puffer/lib/python3.11/functools.py
[0;31mDocstring:[0m     
partial(func, *args, **keywords) - new function with partial application
of the given arguments and keywords.

In [27]:
vecenv = pufferlib.vector.make(env_creator, num_envs=4,
    backend=pufferlib.vector.Multiprocessing)

In [28]:
reset_return = vecenv.reset()
for thing in reset_return:
    print(type(thing))

<class 'numpy.ndarray'>
<class 'list'>


In [29]:
first, second = reset_return

In [30]:
first.shape

(4, 64, 64, 3)

In [31]:
second # Info?

[{}, {}, {}, {}]

In [32]:
env.reset?

Object `env.reset` not found.


Interesting to note: No default seed set for this one!

In [33]:
step_return = vecenv.step(vecenv.action_space.sample())
for thing in step_return:
    print(type(thing))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'list'>


In [34]:
next_states, rewards, dones, truncateds, infos = step_return

In [35]:
next_states.shape, rewards.shape, dones.shape, truncateds.shape, infos

((4, 64, 64, 3), (4,), (4,), (4,), [])

In [36]:
rewards, dones, truncateds

(array([0., 0., 0., 0.], dtype=float32),
 array([False, False, False, False]),
 array([False, False, False, False]))

### Random stuff

In [37]:
import gymnasium as gym
import pufferlib.emulation as emulation

# Create a standard Gym environment
base_env = gym.make("CartPole-v1")

# Wrap it with PufferLib for consistency
env = emulation.GymnasiumPufferEnv(env=base_env)

obs = env.reset()  # reset works like Gym, returns initial observation
print(env.observation_space, env.action_space)  # still standard spaces

Box([-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38], [4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38], (4,), float32) Discrete(2)


In [41]:
from pufferlib import wrappers

In [42]:
help(wrappers)

Help on module pufferlib.wrappers in pufferlib:

NAME
    pufferlib.wrappers

CLASSES
    builtins.object
        GymToGymnasium
        PettingZooTruncatedWrapper
    
    class GymToGymnasium(builtins.object)
     |  GymToGymnasium(env)
     |  
     |  Methods defined here:
     |  
     |  __init__(self, env)
     |      Initialize self.  See help(type(self)) for accurate signature.
     |  
     |  close(self)
     |  
     |  reset(self, seed=None, options=None)
     |  
     |  step(self, action)
     |  
     |  ----------------------------------------------------------------------
     |  Data descriptors defined here:
     |  
     |  __dict__
     |      dictionary for instance variables
     |  
     |  __weakref__
     |      list of weak references to the object
    
    class PettingZooTruncatedWrapper(builtins.object)
     |  PettingZooTruncatedWrapper(env)
     |  
     |  Methods defined here:
     |  
     |  __init__(self, env)
     |      Initialize self.  See help