# About this notebook
The scope of this notebook is to have a better understanding of how AI-Economist environment works and how to interact with it.

## Utils and libraries

In [1]:
from ai_economist import foundation
import os, sys
import numpy as np
import gym

# Same environment as economic_simulation_basia.ipynb

# Define the configuration of the environment that will be built

env_config = {'env_config_dict' : {
    
    'scenario_name': 'layout_from_file/simple_wood_and_stone',
    
    'components': [
        # (1) Building houses
        ('Build', {'skill_dist': "pareto", 'payment_max_skill_multiplier': 3}),
        # (2) Trading collectible resources
        ('ContinuousDoubleAuction', {'max_num_orders': 5}),
        # (3) Movement and resource collection
        ('Gather', {}),
    ],
    
    'env_layout_file': 'quadrant_25x25_20each_30clump.txt',
    'starting_agent_coin': 10,
    'fixed_four_skill_and_loc': True,
    
    'n_agents': 2,          # Number of non-planner agents (must be > 1)
    'world_size': [25, 25], # [Height, Width] of the env world
    'episode_length': 1000, # Number of timesteps per episode
    
    'multi_action_mode_agents': False,
    'multi_action_mode_planner': True,
    
    'flatten_observations': True,

    'flatten_masks': True,
    }
}

env_config1 = {
    
    'scenario_name': 'layout_from_file/simple_wood_and_stone',
    
    'components': [
        # (1) Building houses
        ('Build', {'skill_dist': "pareto", 'payment_max_skill_multiplier': 3}),
        # (2) Trading collectible resources
        ('ContinuousDoubleAuction', {'max_num_orders': 5}),
        # (3) Movement and resource collection
        ('Gather', {}),
    ],
    
    'env_layout_file': 'quadrant_25x25_20each_30clump.txt',
    'starting_agent_coin': 10,
    'fixed_four_skill_and_loc': True,
    
    'n_agents': 2,          # Number of non-planner agents (must be > 1)
    'world_size': [25, 25], # [Height, Width] of the env world
    'episode_length': 1000, # Number of timesteps per episode
    
    'multi_action_mode_agents': False,
    'multi_action_mode_planner': True,
    
    'flatten_observations': True,

    'flatten_masks': True,
    }


Notes about how to use dictionaries:
clear()	Removes all the elements from the dictionary  
copy()	Returns a copy of the dictionary  
fromkeys()	Returns a dictionary with the specified keys and value  
get()	Returns the value of the specified key  
items()	Returns a list containing a tuple for each key value pair  
keys()	Returns a list containing the dictionary's keys  
pop()	Removes the element with the specified key  
popitem()	Removes the last inserted key-value pair  
setdefault()	Returns the value of the specified key. If the key does not exist: insert the key, with the specified value   
update()	Updates the dictionary with the specified key-value pairs  
values()	Returns a list of all the values in the dictionary  

# 1. Environment wrapping with 'tutorials.rllib.env_wrapper'

In [2]:
from tutorials.rllib.env_wrapper import RLlibEnvWrapper
wrapped_env = RLlibEnvWrapper(env_config)

In [3]:
wrapped_env.n_agents

2

In [4]:
wrapped_env.reset()
obs,rew,done,_ = wrapped_env.step({
'0':1,
'1':1
})
print(f"Type of obs: {type(obs)}, Type of rew: {type(rew)}, Type of done: {type(done)}")

Type of obs: <class 'dict'>, Type of rew: <class 'dict'>, Type of done: <class 'dict'>


In [5]:
# print(obs.get('0'))
type(obs['0'].get('world-map'))

numpy.ndarray

In [7]:
obs['0']

{'world-map': array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.]],
 
        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0.],
         [0., 0., 0., 0., 0., 1., 1., 0., 1., 0.,

so we know that  
#### action_space is DISCRETE  
#### observation_space is DICT
this means we can do tests with stablebaselines3

# ⚠
obs, rew, done are numpy datatype formatted

# 2. Environment wrapping with 'env_wrapper.py'
Basically it's 'tutorials.rllib.env_wrapper' without RLLib dependencies

In [7]:
from env_wrapped_tmp import EnvWrapper
env_wrapped_tmp = EnvWrapper(env_config)

In [8]:
env_wrapped_tmp.reset()
obs,rew,done,_ = env_wrapped_tmp.step({
'0':1,
'1':1
})
print(f"Type of obs: {type(obs)}, Type of rew: {type(rew)}, Type of done: {type(done)}")

Type of obs: <class 'dict'>, Type of rew: <class 'dict'>, Type of done: <class 'dict'>


In [9]:
env_wrapped_tmp.n_agents

2

SyntaxError: invalid syntax (<ipython-input-10-71d77311f1b3>, line 1)

In [17]:
type(obs['0'].get('world-map'))
# obs

numpy.ndarray

In [18]:
print(f"{type(env_wrapped_tmp.action_space)},{type(env_wrapped_tmp.observation_space)}")

<class 'gym.spaces.discrete.Discrete'>,<class 'gym.spaces.dict.Dict'>


In [19]:
env_wrapped_tmp.observation_space

Dict(Build-build_payment:Box([-1.e+20], [1.e+20], (1,), float64), Build-build_skill:Box([-1.e+20], [1.e+20], (1,), float64), ContinuousDoubleAuction-available_asks-Stone:Box([-1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20
 -1.e+20 -1.e+20], [1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20
 1.e+20], (11,), float64), ContinuousDoubleAuction-available_asks-Wood:Box([-1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20
 -1.e+20 -1.e+20], [1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20
 1.e+20], (11,), float64), ContinuousDoubleAuction-available_bids-Stone:Box([-1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20
 -1.e+20 -1.e+20], [1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20 1.e+20
 1.e+20], (11,), float64), ContinuousDoubleAuction-available_bids-Wood:Box([-1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20 -1.e+20
 -1.e+20 -1.e+20], [1.e+20 1.e+20 1.e+

In [None]:
## https://github.com/openai/gym/issues/1830
gym.spaces.flatdim(env_wrapped_tmp.observation_space)
# 1260
print(env_wrapped_tmp.observation_space)

# 3. pure env

In [12]:
env = foundation.make_env_instance(**env_config1)

In [13]:
env.reset()
obs,rew,done,_ = env.step({
'0':1,
'1':1
})
print(f"Type of obs: {type(obs)}, Type of rew: {type(rew)}, Type of done: {type(done)}")

Type of obs: <class 'dict'>, Type of rew: <class 'dict'>, Type of done: <class 'dict'>


In [14]:
obs

{'0': {'world-map': array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],
  
         [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
          [0., 0., 1., 0.,

# 4. Algo TESTs

In [32]:
import gym

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

# Parallel environments

model = PPO("MultiInputPolicy", wrapped_env, verbose=1)
model.learn(total_timesteps=25000)
# model.save("ppo_cartpole")

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


KeyError: 'action_mask'