In [None]:

# RoadNetEnv

import gymnasium
from gymnasium.spaces import Box, Dict, Discrete
from gymnasium.spaces.utils import flatten_space
from gymnasium.utils import EzPickle
import numpy as np

from pettingzoo import AECEnv
from pettingzoo.utils import agent_selector, wrappers
from pettingzoo.utils.conversions import parallel_wrapper_fn


# from NguyenNetwork import nguyenNetwork, traffic, latency

# allows to import the parallel environment using "from NguyenNetworkEnv import parallel_env"
__all__ = ["ManualPolicy", "env", "parallel_env", "raw_env"]

# environment wrapper
def env(**kwargs):
    env = raw_env(**kwargs)
    env = wrappers.OrderEnforcingWrapper(env)
    return env

# AEC to parallel wrapper
parallel_env = parallel_wrapper_fn(env)
class raw_env(AECEnv):
    metadata = {
        "name": "NguyenNet",
        "is_parallelizable": True
    }
    
    """
    This is the traffic assignment environment. More documentation to follow.
    """
    
    # initialize environment with the Nguyen Network and Traffic Demand from agents
    def __init__(
        self,
        net = nguyenNetwork(),
        traffic = traffic_f(high=False),
        render_mode = None
        ):
        
        # no rendering at the moment
        self.render_mode = render_mode
        
        # initialize network from Nguyen Network
        self.road_network = net
        self.traffic = traffic
        
        # initialize agents
        self.agents = self.traffic["agents"] # list of agents in environment
        self.possible_agents = self.agents[:]
        self.agent_name_mapping = dict(zip(self.agents, list(range(len(self.agents)))))
        self._agent_selector = agent_selector(self.agents)
        
        # check
        self.terminations_check = dict(zip(self.agents, [False for _ in self.agents]))
        self.truncations_check = dict(zip(self.agents, [False for _ in self.agents]))
        self.termination_trigger = dict(zip(self.agents, [False for _ in self.agents]))
        self.truncation_trigger = dict(zip(self.agents, [False for _ in self.agents]))
        
        # agent origin, destination, and location information
        self.agent_origins = self.traffic["origins"].copy()
        self.agent_origin_backup = self.traffic["origins"].copy()
        self.agent_locations = self.traffic["origins"].copy()
        self.agent_destinations = self.traffic["destinations"]
        self.agent_path_histories = {agent: [location] for agent, location in zip(self.agents, self.agent_locations)}
        self.agent_wait_time = {agent: 0 for agent in self.agents}
        self.agent_travel_time = {agent: 0 for agent in self.agents}

        # agent unflattened observation space, this is flattened alphabetically btw.
        self.unflattened_observation_spaces = {
            agent: Dict({
                "observation": Box(low=-1, high=12, shape=(2,1), dtype=int),
                "latencies": Box(low=0, high=30, shape=(2,1), dtype=int),
                "location": Box(low = 0, high = 12, shape = (1,1), dtype=int),
            }) for agent in self.agents
        }
        
        # agent flattened observatino space
        self.observation_spaces = {
            i: flatten_space(self.unflattened_observation_spaces[i]) for i in self.unflattened_observation_spaces
        }
        
        # agent action space
        self.action_spaces = dict(
            # with the nguyen network agents have at most 2 choices
            zip(
                self.agents,
                [gymnasium.spaces.Discrete(2)]*len(self.agents)
            )
        )
        
        # agent terminal and truncated states
        self.terminate = False
        self.truncate = False
        
    def observation_space(self, agent):
        return self.observation_spaces[agent]
    
    def action_space(self, agent):
        return self.action_spaces[agent]
    
    def observe(self, agent):

        agent_idx = self.agent_name_mapping[agent]
        
        # get possible nodes the agent can travel to
        agent_position = self.agent_locations[agent_idx]
        agent_node_neighbors = list(self.road_network.neighbors(agent_position))
        
        # encode position
        position = []
        encoding = int(self.agent_locations[agent_idx])-1
        position.append(encoding)
        
        # encode node positions
        node_encoded = []
        for node in agent_node_neighbors:
            encoding = int(node)-1
            node_encoded.append(encoding)
        
        # currently using ffs attribute, need to revise to use updated latency
        # neighboring_nodes_ffs = []
        # for node in agent_node_neighbors:
        #     ffs_value = self.road_network.get_edge_data(agent_position, node)["ffs"]
        #     neighboring_nodes_ffs.append(ffs_value)

        # updated to use "latency" instead of "ffs"
        neighboring_nodes_latencies = []
        for node in agent_node_neighbors:
            latency_value = self.road_network.get_edge_data(agent_position, node)["latency"]
            neighboring_nodes_latencies.append(latency_value)

        # return observation – a list in structured as [node1, latency1, node 2, latency2]
        if len(agent_node_neighbors) == 1:
            node_encoded = node_encoded*2
            # neighboring_nodes_ffs = neighboring_nodes_ffs*2
            neighboring_nodes_latencies = neighboring_nodes_latencies*2
        if len(agent_node_neighbors) == 0:
            node_encoded = [-1,-1]
            # neighboring_nodes_ffs = [0,0]
            neighboring_nodes_latencies = [0,0]
            
        # observations  = np.array(neighboring_nodes_ffs+position+node_encoded)
        observations = np.array(neighboring_nodes_latencies + position + node_encoded)
        
        return observations



    def state(self) -> np.ndarray:
        """We need to return an np-array like object for logging"""
        return list(self.termination_trigger.values()), list(self.agent_travel_time.values())



    def step(self, action):
        """This does not work"""
        # # check if agent is dead
        # if (
        #     self.terminations_check[self.agent_selection] or
        #     self.truncations_check[self.agent_selection]
        # ):
        #     # self.rewards[self.agent_selection] = 0
        #     self.agent_selection = self._agent_selector.next()
        #     #self._accumulate_rewards()
        #     return
        
        action = np.asarray(action)
        agent = self.agent_selection
        agent_idx = self.agent_name_mapping[agent]        

        self._cumulative_rewards[agent] = 0
        
        if self._agent_selector.is_last():
            # Track the count of agents on each link for all agents' positions
            agents_on_link = {edge: 0 for edge in self.road_network.edges()}
            # print(agents_on_link)
            # print(self.road_network.edges())

            # Update the count based on all agents' neighbors
            for current_agent in self.agents:                   
                agent_idx = self.agent_name_mapping[current_agent]                
                agent_position = self.agent_locations[agent_idx]                
                agent_node_neighbors = list(self.road_network.neighbors(agent_position))

                for node in agent_node_neighbors:                    
                    for edge in zip([agent_position], [node]):                        
                        agents_on_link[edge] += 1  # Increment count when an agent enters a link                                  

            for edge, agent_count in agents_on_link.items():                
                link_data = self.road_network.get_edge_data(*edge)  
                # print(link_data) # not updated                                       

                flow = agent_count  # 'flow' takes the total number of vehicles in the link                                         
                start_node = np.int64(edge[0])            
                end_node = np.int64(edge[1])            

                new_latency = latency(flow, start_node, end_node)                       
                link_data["latency"] = new_latency            
                # print(new_latency)                   

                self.road_network[edge[0]][edge[1]]["latency"] = new_latency
                # print(f"Updated latency for edge {edge}: {self.road_network[edge[0]][edge[1]]['latency']}") 

            self._clear_rewards()
        else:
            self._clear_rewards()
            pass

        # agent travel decrement
        if self.agent_wait_time[agent] != 0:
            # if agent has waiting time (i.e. "traveling" along edge, decrement wait time by one time step)
            self.agent_wait_time[agent] -= 1
            self.agent_selection = self._agent_selector.next()
            self._accumulate_rewards()
            return
        
        # agent reaches terminal state
        if self.agent_locations[agent_idx] == self.agent_destinations[agent_idx]:
            self.terminations_check[agent] = True
            
            # return reward for arriving at destionation
            if self.termination_trigger[agent] == False:
                completion_reward = 100
                self.termination_trigger[agent] = True
            else:
                completion_reward = 0
            
            self.rewards[agent] = completion_reward
            self.agent_selection = self._agent_selector.next()
            self._accumulate_rewards()
            return
        
        # agent reaches truncation state
        if self.agent_locations[agent_idx] != self.agent_destinations[agent_idx] and \
        (self.agent_locations[agent_idx] == "2" or self.agent_locations[agent_idx] == "3"):
            self.truncations_check[agent] = True
                    
            # return penalty for arriving at wrong destination
            if self.truncation_trigger[agent] == False:
                completion_penalty = -100
                self.truncation_trigger[agent] = True
            else:
                completion_penalty = 0
                
            self.rewards[agent] = completion_penalty
            self.agent_selection = self._agent_selector.next()
            self._accumulate_rewards()
            return
        
        # agent chooses action
        choices =  list(
            self.road_network.neighbors(
                self.agent_locations[agent_idx]
            )
        )
        
        # if only one route
        if len(choices) == 1:
            choices = [choices[0], choices[0]]
        
        # select action
        chosen_route = choices[action]

        # reward based on chosen route latency, again using ffs instead of calculated latency, need a _calculate_reward(agent) method for this
        reward = self.road_network.get_edge_data(
            self.agent_locations[agent_idx],
            # chosen_route)["ffs"]
            chosen_route)["latency"]
        
        # add negative latency to reward – DQN to maximize negative reward
        self.rewards[agent] = -1*reward
        
        # update latency
        self.agent_wait_time[agent] += reward
        self.agent_travel_time[agent] += reward        
        
        # update agent position
        self.agent_locations[agent_idx] = chosen_route
        
        # update path history
        self.agent_path_histories[agent].append(chosen_route)
        
        
        # set the next agent to act
        self.agent_selection = self._agent_selector.next()
        self._accumulate_rewards()


    def reset(self, seed=None, options=None):
        # reset to initial states
        self.agent_origins = self.agent_origin_backup.copy()
        self.agent_locations = self.agent_origin_backup.copy()
        self.agent_path_histories = {agent: [location] for agent, location in zip(self.agents, self.agent_origins)}
        self.agent_wait_time = {agent: 0 for agent in self.agents}
        self.agent_travel_time = {agent: 0 for agent in self.agents} # added

        self.agents = self.possible_agents[:]        
        self._agent_selector.reinit(self.agents)
        self.agent_selection = self._agent_selector.next()
        self.terminate = False
        self.truncate = False
        self.rewards = dict(zip(self.agents, [0 for _ in self.agents]))
        self._cumulative_rewards = {a: 0 for a in self.agents}
        self.terminations = dict(zip(self.agents, [False for _ in self.agents]))
        self.truncations = dict(zip(self.agents, [False for _ in self.agents]))
        self.infos = dict(zip(self.agents, [{} for _ in self.agents]))
        
        # Reset termination and truncation checks
        # check
        self.terminations_check = dict(zip(self.agents, [False for _ in self.agents]))
        self.truncations_check = dict(zip(self.agents, [False for _ in self.agents]))
        self.termination_trigger = dict(zip(self.agents, [False for _ in self.agents]))
        self.truncation_trigger = dict(zip(self.agents, [False for _ in self.agents]))
        
        # Reset road network-related variables
        # print("Resetting latency values")
        for edge in self.road_network.edges():
            start_node, end_node = edge
            self.road_network[start_node][end_node]["latency"] = 0            

        # Clear any existing rewards
        #self._clear_rewards()
        # print(self.rewards)
        # print(self.state())

        # Return the initial state
        #return self.state()
        

train_agileRL
# import RoadNetEnv
import pandas as pd
import torch
import numpy as np
from agilerl.components.multi_agent_replay_buffer import MultiAgentReplayBuffer
from agilerl.algorithms.maddpg import MADDPG

# instantiate env and torch device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# env = RoadNetEnv.parallel_env()
env = parallel_env()
env.reset()

# configure algo input parameters
try:
    state_dim = [env.observation_space(agent).n for agent in env.agents]
    one_hot = True
except Exception:
    state_dim = [env.observation_space(agent).shape for agent in env.agents]
    one_hot = False
try:
    action_dim = [env.action_space(agent).n for agent in env.agents]
    discrete_actions = True
    max_action = None
    min_action = None
except Exception:
    action_dim = [env.action_space(agent).shape[0] for agent in env.agents]
    discrete_actions = False
    max_action = [env.action_space(agent).high for agent in env.agents]
    min_action = [env.action_space(agent).low for agent in env.agents]

n_agents = env.num_agents
agent_ids = [agent_id for agent_id in env.agents]
done = {agent_id: False for agent_id in env.agents}
field_names = ["state", "action", "reward", "next_state", "done"]
memory = MultiAgentReplayBuffer(
    memory_size=1000000,
    field_names=field_names,
    agent_ids=agent_ids,
    device=device
)

with open('device_info.txt', 'w') as file:
	file.write(f"Device: {device}")

NET_CONFIG = {
    "arch": "mlp",
    "h_size": [128, 128]
}

agent = MADDPG(
    state_dims=state_dim,
    action_dims=action_dim,
    one_hot=one_hot,
    n_agents=n_agents,
    agent_ids=agent_ids,
    max_action=max_action,
    min_action=min_action,
    discrete_actions=True,
    device=device,
    net_config=NET_CONFIG,
    lr=1e-2,
    batch_size=8
)

# load checkpoint
# agent.loadCheckpoint("./checkpoint/checkpoint_50.pt")

episodes = 400
max_steps = 50
epsilon = 1.0
eps_end = 0.1
eps_decay = 0.995

episode_travel_times = []

log_interval = 10
start_ep = 51

for ep in range(episodes):
    state, info = env.reset()
    agent_reward = {agent_id: 0 for agent_id in env.agents}
    
    for i in range(max_steps):
        agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
        env_defined_actions = (
            info["env_defined_actions"]
            if "env_defined_actions" in info.keys()
            else None
        )
        
        # get next action from agent
        cont_actions, discrete_action = agent.getAction(
            state, epsilon, agent_mask, env_defined_actions
        )
        if agent.discrete_actions:
            action = discrete_action
        else:
            action = cont_actions
        
        # act in environment
        next_state, reward, termination, truncation, info = env.step(
            action
        )
        
        # save experience to replay buffer
        memory.save2memory(state, cont_actions, reward, next_state, done)
        
        for agent_id, r in reward.items():
            agent_reward[agent_id] += r
        
        # learn according to learning frequency
        if (memory.counter % agent.learn_step == 0) and (len(memory) >= agent.batch_size):
            experiences = memory.sample(agent.batch_size)
            agent.learn(experiences)
            
        # update state
        state = next_state

    # metric logging
    finishers, travel_time = env.state()
    episode_travel_times.append(travel_time) # export to csv
    
    # save the total episode reward
    score = sum(agent_reward.values())
    agent.scores.append(score)
    
    
    # update epsilon for exploration
    epsilon = max(eps_end, epsilon * eps_decay)

    if (ep+start_ep) % log_interval == 0 or ep == episodes -1:
        reward_scores = pd.DataFrame(agent.scores)
        reward_scores.to_csv(f"./results/reward_{ep+start_ep}.csv")
        travel_times = pd.DataFrame(episode_travel_times)
        travel_times.to_csv(f"./results/travel_times_{ep+start_ep}.csv")
        agent.saveCheckpoint(f"./checkpoint/checkpoint_{ep+start_ep}.pt")
    



# NguyenNetwork

In [2]:
import networkx as nx
import pandas as pd

def nguyenNetwork(high=True):
    
    # Read the CSV file into a DataFrame
    data_types = {"start node": str, "end node": str} # node names should be str
    
    # for mac OS
    if high == True:
        links = pd.read_csv("./network/NguyenLinksHighDemand200.csv", dtype=data_types)
    else:
        links = pd.read_csv("./network/NguyenLinksLowDemand50.csv", dtype=data_types)
    
    links['flow'] = 0
    
    # instantiate null directed graph
    network = nx.DiGraph()
    
    # initialize intersections
    intersections = {
        "1": {"pos": (1, 3)},
        "2": {"pos": (4, 1)},
        "3": {"pos": (3, 0)},
        "4": {"pos": (0, 2)},
        "5": {"pos": (1, 2)},
        "6": {"pos": (2, 2)},
        "7": {"pos": (3, 2)},
        "8": {"pos": (4, 2)},
        "9": {"pos": (1, 1)},
        "10": {"pos": (2, 1)},
        "11": {"pos": (3, 1)},
        "12": {"pos": (2, 3)},
        "13": {"pos": (2, 0)}
    }

    # add intersections
    for node, attrs in intersections.items():
        network.add_node(node, **attrs)
    
    # intialize roads

    # Create a list of tuples from the DataFrame
    roads = [(row["start node"], 
              row["end node"], 
              {"ffs": row["free flow speed"], 
               "capacity": row["capacity"],
               "alpha": row["alpha"],
               "beta": row["beta"],
               "latency": row["latency"],
               "flow": row["flow"]}) for _, row in links.iterrows()]

    # Print the list of roads
    # print(roads)


    # add roads
    network.add_edges_from(roads)
    
    return network

def latency(flow, start_node, end_node):
    network = nguyenNetwork()
    edge_data = network.get_edge_data(str(start_node), str(end_node))
    
    # 'flow' takes the total No. of vehicle in the link
    # 'start_node' and 'end_node' should be integer values
    # This function assumes that the travel time of the links are BPR functions.
    # for more information about BPR functions read p#358 of Sheffi's book.
    c = edge_data['capacity']
    t_0 = edge_data['ffs']
    a = edge_data['alpha']
    b = edge_data['beta']
    t_link = t_0 * (1 + (a * ((flow/c) ** b)))
    return round(t_link) # this function only returns integer values


def traffic_f(high=True):
    if high == True:
        demand = pd.read_csv("NguyenDemandHighDemand200.csv")
    else:
        demand = pd.read_csv("NguyenLowDemand50.csv")
    agents = []
    origins = []
    destinations = []
    agent_no = 0

    for index, row in demand.iterrows():
        origin = str(row['Origin'])
        destination = str(row['Destination'])
        count = int(row['OD demand'])
        for i in range(count):
            agent_no += 1
            agents.append(f'agent_{agent_no}')
            origins.append(origin)
            destinations.append(destination)

    traffic = {
        "agents": agents,
        "origins": origins,
        "destinations": destinations
    }

    return traffic



if __name__ == "__main__":
    traffic = traffic_f(high=False)
    print(traffic)
RoadNetEnv

FileNotFoundError: [Errno 2] No such file or directory: 'NguyenLowDemand50.csv'