In [17]:
import gym
from gym.spaces import Discrete, Box
import numpy as np

In [18]:
def reset(n_agents, ):
    state = np.zeros(n_agents+1)
    return state

In [95]:
n_agents = 1000
action_space = Discrete(2)
state_space = Box(
    low=0, high=3, shape=(n_agents,), dtype=int
) 

In [96]:
actions_sample = [action_space.sample() for i in range(n_agents)]
state_sample = state_space.sample()

In [97]:
transition_matrix = np.zeros(shape=(4, 4))
transition_matrix

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [98]:
state_sample

array([2, 0, 2, 2, 0, 0, 1, 3, 1, 2, 0, 1, 3, 1, 1, 2, 3, 1, 1, 1, 2, 3,
       0, 1, 0, 0, 0, 0, 3, 0, 0, 3, 1, 1, 1, 3, 1, 3, 1, 0, 1, 2, 3, 0,
       0, 3, 3, 0, 3, 0, 0, 0, 0, 1, 3, 3, 0, 3, 0, 1, 1, 3, 3, 1, 3, 2,
       1, 3, 1, 2, 3, 3, 1, 3, 2, 2, 0, 0, 0, 2, 0, 1, 1, 3, 3, 2, 2, 0,
       1, 1, 0, 2, 3, 2, 1, 0, 0, 2, 1, 2, 0, 3, 0, 0, 1, 0, 1, 2, 0, 2,
       3, 0, 0, 0, 2, 2, 2, 2, 2, 2, 3, 0, 0, 3, 0, 0, 3, 1, 3, 0, 2, 1,
       3, 1, 3, 3, 1, 3, 0, 2, 3, 3, 1, 0, 0, 3, 3, 0, 1, 2, 0, 3, 0, 2,
       1, 2, 0, 0, 1, 0, 1, 1, 0, 2, 2, 2, 3, 0, 1, 3, 3, 0, 1, 1, 2, 2,
       0, 1, 0, 0, 3, 2, 0, 0, 2, 2, 3, 3, 1, 3, 2, 0, 1, 3, 1, 3, 3, 1,
       3, 1, 1, 3, 1, 1, 3, 0, 3, 1, 2, 0, 1, 1, 0, 1, 0, 3, 2, 0, 3, 1,
       1, 3, 1, 3, 0, 1, 3, 1, 3, 0, 1, 1, 3, 3, 3, 3, 3, 2, 2, 0, 2, 3,
       3, 2, 1, 1, 1, 3, 2, 2, 3, 2, 0, 2, 0, 0, 1, 0, 0, 3, 0, 3, 2, 0,
       3, 3, 3, 1, 1, 1, 3, 0, 3, 2, 0, 3, 2, 2, 2, 1, 1, 3, 0, 3, 1, 1,
       0, 2, 3, 1, 2, 0, 1, 0, 1, 1, 2, 0, 2, 1, 1,

In [102]:
# Find a more pythonic way to do this
transition_matrix[0, 1] = sum([1 for x in zip(state_sample, actions_sample) if x[0]==0 and x[1]==0])
transition_matrix[0, 2] = sum([1 for x in zip(state_sample, actions_sample) if x[0]==0 and x[1]==1])
transition_matrix[1, 3] = sum([1 for x in zip(state_sample, actions_sample) if x[0]==1 and x[1]==0])
transition_matrix[1, 2] = sum([1 for x in zip(state_sample, actions_sample) if x[1]==1 and x[1]==1])
transition_matrix[2, 3] = sum([1 for x in zip(state_sample, actions_sample) if x[1]==2 and x[1]==1])

In [104]:
transition_matrix

array([[  0., 140., 111.,   0.],
       [  0.,   0., 515., 117.],
       [  0.,   0.,   0.,   0.],
       [  0.,   0.,   0.,   0.]])

In [126]:
class BraessGymEnv(gym.Env):
    
    """Custom Environment that follows gym interface"""
    metadata = {'render.modes': ['human']}

    def __init__(self, n_agents=4000, social_welfare_type='utilitarian'):
        
        super(BraessGymEnv, self).__init__()
        
        # Game
        self.n_agents = n_agents
        self.cost_params  = {'c1': -n_agents*45/4000, 'c2': -n_agents/40}
        self.social_welfare_type = social_welfare_type
        
        # State
        self.reset()
        
        # Environment
        self.action_space = Discrete(2) # A or B
        self.state_space = Box(
            low=0, high=3, shape=(self.n_agents,), dtype=int
        )  
        
    
    def reset(self):
        self.done = False
        self.done = False
        self.state = np.zeros(self.n_agents)
    
    def step(self, actions):
        
        #info = {}
        # Find a more pythonic way to do this
        transition_matrix = np.zeros(shape=(4, 4))
        transition_matrix[0, 1] = sum([1 for x in zip(self.state, actions) if x[0]==0 and x[1]==0])
        transition_matrix[0, 2] = sum([1 for x in zip(self.state, actions) if x[0]==0 and x[1]==1])
        transition_matrix[1, 3] = sum([1 for x in zip(self.state, actions) if x[0]==1 and x[1]==0])
        transition_matrix[1, 2] = sum([1 for x in zip(self.state, actions) if x[1]==1 and x[1]==1])
        transition_matrix[2, 3] = sum([1 for x in zip(self.state, actions) if x[1]==2 and x[1]==1])
        
        rewards = np.zeros(self.n_agents)
        info = {'transitions_matrix': transition_matrix}
        
        return self.state, rewards, self.done, info

    
    def social_welfare(self, rewards):

        # Utilitarian Social Welfare
        if self.social_welfare_type == 'utilitarian':
            rewards = [sum(rewards)/(self.n_agents)]*(self.n_agents)
        # Rawls Social Welfare
        elif self.social_welfare_type == 'rawlsian':
            rewards = [min(rewards)]*(self.n_agents)

        return rewards

    def render(self, mode='human', close=False):
        
        # Render the environment to the screen
        pass

In [127]:
env = BraessGymEnv()

In [136]:
env = BraessGymEnv()
i = 0
done = False
while not done:
    actions = [action_space.sample() for i in range(n_agents)]
    state, rewards, done, info = env.step(actions)
    if i == 10: done=True
    i+=1

In [137]:
action_space.sample()

0

In [138]:
info

{'transitions_matrix': array([[  0., 497., 503.,   0.],
        [  0.,   0., 503.,   0.],
        [  0.,   0.,   0.,   0.],
        [  0.,   0.,   0.,   0.]])}