### Import library

In [2]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical
import time
from env import Environment

### Environment

In [None]:
class Robot: 
    def __init__(self, position): 
        self.position = position
        self.carrying = 0

class Package: 
    def __init__(self, start, start_time, target, deadline, package_id): 
        self.start = start
        self.start_time = start_time
        self.target = target
        self.deadline = deadline
        self.package_id = package_id
        self.status = 'None' # Possible statuses: 'waiting', 'in_transit', 'delivered'

class Environment: 

    def __init__(self, map_file, max_time_steps = 100, n_robots = 5, n_packages=20,
             move_cost=-0.01, delivery_reward=10., delay_reward=1., 
             seed=2025): 
        """ Initializes the simulation environment. :param map_file: Path to the map text file. :param move_cost: Cost incurred when a robot moves (LRUD). :param delivery_reward: Reward for delivering a package on time. """ 
        self.map_file = map_file
        self.grid = self.load_map()
        self.n_rows = len(self.grid)
        self.n_cols = len(self.grid[0]) if self.grid else 0 
        self.move_cost = move_cost 
        self.delivery_reward = delivery_reward 
        self.delay_reward = delay_reward
        self.t = 0 
        self.robots = [] # List of Robot objects.
        self.packages = [] # List of Package objects.
        self.total_reward = 0

        self.n_robots = n_robots
        self.max_time_steps = max_time_steps
        self.n_packages = n_packages

        self.rng = np.random.RandomState(seed)
        self.reset()
        self.done = False
        self.state = None

    def load_map(self):
        """
        Reads the map file and returns a 2D grid.
        Assumes that each line in the file contains numbers separated by space.
        0 indicates free cell and 1 indicates an obstacle.
        """
        grid = []
        with open(self.map_file, 'r') as f:
            for line in f:
                # Strip line breaks and split into numbers
                row = [int(x) for x in line.strip().split(' ')]
                grid.append(row)
        return grid
    
    def is_free_cell(self, position):
        """
        Checks if the cell at the given position is free (0) or occupied (1).
        :param position: Tuple (row, column) to check.
        :return: True if the cell is free, False otherwise.
        """
        r, c = position
        if r < 0 or r >= self.n_rows or c < 0 or c >= self.n_cols:
            return False
        return self.grid[r][c] == 0

    def add_robot(self, position):
        """
        Adds a robot at the given position if the cell is free.
        :param position: Tuple (row, column) for the robot's starting location.
        """
        if self.is_free_cell(position):
            robot = Robot(position)
            self.robots.append(robot)
        else:
            raise ValueError("Invalid robot position: must be on a free cell not occupied by an obstacle or another robot.")

    def reset(self):
        """
        Resets the environment to its initial state.
        Clears all robots and packages, and reinitializes the grid.
        """
        self.t = 0
        self.robots = []
        self.packages = []
        self.total_reward = 0
        self.done = False
        self.state = None

        # Reinitialize the grid
        #self.grid = self.load_map(sel)
        # Add robots and packages
        tmp_grid = np.array(self.grid)
        for i in range(self.n_robots):
            # Randomly select a free cell for the robot
            position, tmp_grid = self.get_random_free_cell(tmp_grid)
            self.add_robot(position)
        
        N = self.n_rows
        list_packages = []
        for i in range(self.n_packages):
            # Randomly select a free cell for the package
            start = self.get_random_free_cell_p()
            while True:
                target = self.get_random_free_cell_p()
                if start != target:
                    break
            
            to_deadline = 10 + self.rng.randint(N/2, 3*N)
            if i <= min(self.n_robots, 20):
                start_time = 0
            else:
                start_time = self.rng.randint(1, self.max_time_steps)
            list_packages.append((start_time, start, target, start_time + to_deadline ))

        list_packages.sort(key=lambda x: x[0])
        for i in range(self.n_packages):
            start_time, start, target, deadline = list_packages[i]
            package_id = i+1
            self.packages.append(Package(start, start_time, target, deadline, package_id))

        return self.get_state()
    
    def get_state(self):
        """
        Returns the current state of the environment.
        The state includes the positions of robots and packages.
        :return: State representation.
        """
        selected_packages = []
        for i in range(len(self.packages)):
            if self.packages[i].start_time == self.t:
                selected_packages.append(self.packages[i])
                self.packages[i].status = 'waiting'

        state = {
            'time_step': self.t,
            'map': self.grid,
            'robots': [(robot.position[0] + 1, robot.position[1] + 1,
                        robot.carrying) for robot in self.robots],
            'packages': [(package.package_id, package.start[0] + 1, package.start[1] + 1, 
                          package.target[0] + 1, package.target[1] + 1, package.start_time, package.deadline) for package in selected_packages]
        }
        return state
        

    def get_random_free_cell_p(self):
        """
        Returns a random free cell in the grid.
        :return: Tuple (row, col) of a free cell.
        """
        free_cells = [(i, j) for i in range(self.n_rows) for j in range(self.n_cols) \
                      if self.grid[i][j] == 0]
        i = self.rng.randint(0, len(free_cells))
        return free_cells[i]


    def get_random_free_cell(self, new_grid):
        """
        Returns a random free cell in the grid.
        :return: Tuple (row, col) of a free cell.
        """
        free_cells = [(i, j) for i in range(self.n_rows) for j in range(self.n_cols) \
                      if new_grid[i][j] == 0]
        i = self.rng.randint(0, len(free_cells))
        new_grid[free_cells[i][0]][free_cells[i][1]] = 1
        return free_cells[i], new_grid

    
    def step(self, actions):
        """
        Advances the simulation by one timestep.
        :param actions: A list where each element is a tuple (move_action, package_action) for a robot.
            move_action: one of 'S', 'L', 'R', 'U', 'D'.
            package_action: '1' (pickup), '2' (drop), or '0' (do nothing).
        :return: The updated state and total accumulated reward.
        """
        r = 0
        if len(actions) != len(self.robots):
            raise ValueError("The number of actions must match the number of robots.")

        #print("Package env: ")
        #print([p.status for p in self.packages])

        # -------- Process Movement --------
        proposed_positions = []
        # For each robot, compute the new position based on the movement action.
        old_pos = {}
        next_pos = {}
        for i, robot in enumerate(self.robots):
            move, pkg_act = actions[i]
            new_pos = self.compute_new_position(robot.position, move)
            # Check if the new position is valid (inside bounds and not an obstacle).
            if not self.valid_position(new_pos):
                new_pos = robot.position  # Invalid moves result in no change.
            proposed_positions.append(new_pos)
            old_pos[robot.position] = i
            next_pos[new_pos] = i

        moved_robots = [0 for _ in range(len(self.robots))]
        computed_moved = [0 for _ in range(len(self.robots))]
        final_positions = [None] * len(self.robots)
        occupied = {}  # Dictionary to record occupied cells.
        while True:
            updated = False
            for i in range(len(self.robots)):
            
                if computed_moved[i] != 0: 
                    continue

                pos = self.robots[i].position
                new_pos = proposed_positions[i]
                can_move = False
                if new_pos not in old_pos:
                    can_move = True
                else:
                    j = old_pos[new_pos]
                    if (j != i) and (computed_moved[j] == 0): # We must wait for the conflict resolve
                        continue
                    # We can decide where the robot can go now
                    can_move = True

                if can_move:
                    # print("Updated: ", i, new_pos)
                    if new_pos not in occupied:
                        occupied[new_pos] = i
                        final_positions[i] = new_pos
                        computed_moved[i] = 1
                        moved_robots[i] = 1
                        updated = True
                    else:
                        new_pos = pos
                        occupied[new_pos] = i
                        final_positions[i] = pos
                        computed_moved[i] = 1
                        moved_robots[i] = 0
                        updated = True

                if updated:
                    break

            if not updated:
                break
        #print("Computed postions: ", final_positions)
        for i in range(len(self.robots)):
            if computed_moved[i] == 0:
                final_positions[i] = self.robots[i].position 
        
        # Update robot positions and apply movement cost when applicable.
        for i, robot in enumerate(self.robots):
            move, pkg_act = actions[i]
            if move in ['L', 'R', 'U', 'D'] and final_positions[i] != robot.position:
                r += self.move_cost
            robot.position = final_positions[i]

        # -------- Process Package Actions --------
        for i, robot in enumerate(self.robots):
            move, pkg_act = actions[i]
            #print(i, move, pkg_act)
            # Pick up action.
            if pkg_act == '1':
                if robot.carrying == 0:
                    # Check for available packages at the current cell.
                    for j in range(len(self.packages)):
                        if self.packages[j].status == 'waiting' and self.packages[j].start == robot.position and self.packages[j].start_time <= self.t:
                            # Pick the package with the smallest package_id.
                            package_id = self.packages[j].package_id
                            robot.carrying = package_id
                            self.packages[j].status = 'in_transit'
                            # print(package_id, 'in transit')
                            break

            # Drop action.
            elif pkg_act == '2':
                if robot.carrying != 0:
                    package_id = robot.carrying
                    target = self.packages[package_id - 1].target
                    # Check if the robot is at the target position.
                    if robot.position == target:
                        # Update package status to delivered.
                        pkg = self.packages[package_id - 1]
                        pkg.status = 'delivered'
                        # Apply reward based on whether the delivery is on time.
                        if self.t <= pkg.deadline:
                            r += self.delivery_reward
                        else:
                            # Example: a reduced reward for late delivery.
                            r += self.delay_reward
                        robot.carrying = 0  
        
        # Increment the simulation timestep.
        self.t += 1

        self.total_reward += r

        done = False
        infos = {}
        if self.check_terminate():
            done = True
            infos['total_reward'] = self.total_reward
            infos['total_time_steps'] = self.t

        return self.get_state(), r, done, infos
    
    def check_terminate(self):
        if self.t == self.max_time_steps:
            return True
        
        for p in self.packages:
            if p.status != 'delivered':
                return False
            
        return True

    def compute_new_position(self, position, move):
        """
        Computes the intended new position for a robot given its current position and move command.
        """
        r, c = position
        if move == 'S':
            return (r, c)
        elif move == 'L':
            return (r, c - 1)
        elif move == 'R':
            return (r, c + 1)
        elif move == 'U':
            return (r - 1, c)
        elif move == 'D':
            return (r + 1, c)
        else:
            return (r, c)

    def valid_position(self, pos):
        """
        Checks if the new position is within the grid and not an obstacle.
        """
        r, c = pos
        if r < 0 or r >= self.n_rows or c < 0 or c >= self.n_cols:
            return False
        if self.grid[r][c] == 1:
            return False
        return True

    def render(self):
        """
        A simple text-based rendering of the map showing obstacles and robot positions.
        Obstacles are represented by 1, free cells by 0, and robots by 'R'.
        """
        # Make a deep copy of the grid
        grid_copy = [row[:] for row in self.grid]
        for i, robot in enumerate(self.robots):
            r, c = robot.position
            grid_copy[r][c] = 'R%i'%i
        for row in grid_copy:
            print('\t'.join(str(cell) for cell in row))

### Package storage

In [3]:
"""
{'package_id' : 'x',
                    'y',
                        'target_x',
                            'target_y',
                                'start_time',
                                    'deadline',
                                        'status'
}
"""
package_dict = {}

In [4]:
def state_to_agent(package_dict, state):
    if state['time_step'] == 0:
        package_dict = {}
    packages_in_carry = set()
    if len(state['packages']) > 0:
        for package in state['packages']:
            package_id = package[0]
            x, y, target_x, target_y, start_time, deadline = package[1:]
            status = 'waiting'
            package_dict[package_id] = {
                'x': x,
                'y': y,
                'target_x': target_x,
                'target_y': target_y,
                'start_time': start_time,
                'deadline': deadline,
                'status': status
            }
    for robot in state['robots']:
        package_carry = robot[2]
        if package_carry != 0:
            package_dict[package_carry]['status'] = 'carrying'
            packages_in_carry.add(package_carry)
    for package_id in package_dict:
        if package_dict[package_id]['status'] == 'carrying' and package_id not in packages_in_carry:
            package_dict[package_id]['status'] = 'Delivered'
    state = {
        'robots': state['robots'],
        'packages':[(package_id,
                        package_dict[package_id]['x'],
                        package_dict[package_id]['y'],
                            package_dict[package_id]['target_x'],
                            package_dict[package_id]['target_y'],
                                package_dict[package_id]['start_time'],
                                package_dict[package_id]['deadline'],
                                    package_dict[package_id]['status'])for package_id in package_dict],
        'map': state['map'],
        'time_step': state['time_step'],
    }
    return state, package_dict

### Critic observation

In [5]:
def convert_global_state_to_critic_input(global_state_dict: dict, all_packages_data: dict, map_dims: int = 20, max_time_steps: int = None):
    """
    Chuyển đổi global state và thông tin package đầy đủ thành input tensor cho Critic.

    Args:
        global_state_dict (dict): Output từ state to agent.
        all_packages_data (dict): Dictionary chứa thông tin đầy đủ của tất cả các package
                                  (ví dụ: agents_object.packages).
                                  Format: {pkg_id: {'x': int, 'y': int, 'target_x': int,
                                                    'target_y': int, 'deadline': int,
                                                    'status': str, 'start_time': int}}
        map_dims (tuple): (MAP_HEIGHT, MAP_WIDTH).
        max_time_steps (int, optional): Tổng số bước thời gian tối đa trong một episode.
                                        Cần thiết nếu use_time_channel=True.
        use_time_channel (bool): True nếu muốn thêm kênh thời gian còn lại.

    Returns:
        np.ndarray: Input tensor cho Critic.
    """
    MAP_HEIGHT, MAP_WIDTH = map_dims
    num_channels = 7

    critic_input = np.zeros((num_channels, MAP_HEIGHT, MAP_WIDTH), dtype=np.float32)
    current_time = global_state_dict['time_step']

    # --- Channel 0: Map Obstacles ---
    grid_map = np.array(global_state_dict['map'])
    critic_input[0, :, :] = grid_map[:MAP_HEIGHT, :MAP_WIDTH]

    # --- Lấy thông tin robots ---
    robots_info = global_state_dict['robots']  # list of (x, y, carrying_package_id)

    # --- Channel 1: Vị trí của tất cả Robots ---
    # --- Channel 2: Trạng thái mang hàng của Robots ---
    for r_data in robots_info:
        # Tọa độ từ state là 1-based, chuyển về 0-based cho array
        r_x_1based, r_y_1based, carrying_package_id = r_data
        r_row, r_col = r_x_1based - 1, r_y_1based - 1

        if 0 <= r_row < MAP_HEIGHT and 0 <= r_col < MAP_WIDTH:
            critic_input[1, r_row, r_col] = 1.0  # Đánh dấu vị trí robot
            if carrying_package_id != 0:
                critic_input[2, r_row, r_col] = 1.0  # Đánh dấu robot đang mang hàng


    for pkg_id, pkg_details in all_packages_data.items():
        # Tọa độ từ package_details là 1-based
        p_x_1based, p_y_1based = pkg_details['x'], pkg_details['y']
        pt_x_1based, pt_y_1based = pkg_details['target_x'], pkg_details['target_y']

        p_row, p_col = p_x_1based - 1, p_y_1based - 1
        pt_row, pt_col = pt_x_1based - 1, pt_y_1based - 1

        status = pkg_details['status']
        deadline = pkg_details['deadline']

        # Tính toán urgency (giống như trong state_to_agent của bạn nhưng có chuẩn hóa)
        time_to_deadline = max(0, deadline - current_time) # Tránh số âm nếu đã qua deadline
        # epsilon để tránh chia cho 0, và một giá trị cơ sở nhỏ để urgency không quá lớn
        urgency_val = 1.0 / (time_to_deadline + 1e-5 + 0.1) # Thêm 0.1 để giảm độ lớn của urgency

        normalized_urgency = min(1.0, urgency_val / 10.0) # Giả sử max urgency sau khi +0.1 là khoảng 10

        if status == 'waiting':
            # --- Channel 3: Vị trí các Package đang chờ (Waiting) ---
            if 0 <= p_row < MAP_HEIGHT and 0 <= p_col < MAP_WIDTH:
                critic_input[3, p_row, p_col] = 1.0

            # --- Channel 4: Vị trí đích của các Package đang chờ (Waiting) ---
            if 0 <= pt_row < MAP_WIDTH and 0 <= pt_col < MAP_WIDTH: # Sửa: pt_row < MAP_HEIGHT
                 critic_input[4, pt_row, pt_col] = 1.0

            # --- Channel 6: Mức độ khẩn cấp (đặt tại vị trí đích) ---
            if 0 <= pt_row < MAP_HEIGHT and 0 <= pt_col < MAP_WIDTH:
                critic_input[6, pt_row, pt_col] = max(critic_input[6, pt_row, pt_col], normalized_urgency)

        elif status == 'carrying': # Hoặc 'in_transit' tùy theo định nghĩa của bạn
            # Package 'carrying' không được đánh dấu ở Channel 3 hoặc 4
            # Vị trí hiện tại của nó là vị trí robot (đã có ở Channel 1 & 2)

            # --- Channel 5: Vị trí đích của các Package đang vận chuyển (Carrying) ---
            if 0 <= pt_row < MAP_HEIGHT and 0 <= pt_col < MAP_WIDTH:
                critic_input[5, pt_row, pt_col] = 1.0

            # --- Channel 6: Mức độ khẩn cấp (đặt tại vị trí đích) ---
            if 0 <= pt_row < MAP_HEIGHT and 0 <= pt_col < MAP_WIDTH:
                critic_input[6, pt_row, pt_col] = max(critic_input[6, pt_row, pt_col], normalized_urgency)
    return critic_input

### Actor observation

In [6]:
def convert_state_to_actor_input(global_state_dict, all_packages_data, current_agent_idx, map_dims):

    MAP_HEIGHT, MAP_WIDTH = map_dims
    num_actor_channels = 6 # Theo đề xuất 6 kênh ở trên
    actor_obs = np.zeros((num_actor_channels, MAP_HEIGHT, MAP_WIDTH), dtype=np.float32)
    current_time = global_state_dict['time_step']

    # --- Channel 0: Map Obstacles ---
    grid_map = np.array(global_state_dict['map'])
    actor_obs[0, :, :] = grid_map[:MAP_HEIGHT, :MAP_WIDTH]

    # --- Lấy thông tin robots ---
    robots_info = global_state_dict['robots']  # list of (x, y, carrying_package_id)

    # Dữ liệu của agent hiện tại
    agent_data = robots_info[current_agent_idx]
    agent_x_1based, agent_y_1based, agent_carrying_pkg_id = agent_data
    agent_r, agent_c = agent_x_1based - 1, agent_y_1based - 1

    # --- Channel 1: Vị trí của Agent hiện tại ---
    if 0 <= agent_r < MAP_HEIGHT and 0 <= agent_c < MAP_WIDTH:
        actor_obs[1, agent_r, agent_c] = 1.0

    # --- Channel 2: Vị trí của các Agent khác ---
    for i, other_robot_data in enumerate(robots_info):
        if i == current_agent_idx:
            continue # Bỏ qua chính agent này

        other_x_1based, other_y_1based, _ = other_robot_data
        other_r, other_c = other_x_1based - 1, other_y_1based - 1
        if 0 <= other_r < MAP_HEIGHT and 0 <= other_c < MAP_WIDTH:
            actor_obs[2, other_r, other_c] = 1.0

    # --- Xử lý Packages ---

    # Chuẩn hóa urgency (có thể dùng hàm riêng nếu logic phức tạp)
    def get_normalized_urgency(pkg_deadline, current_time_step):
        time_to_deadline = max(0, pkg_deadline - current_time_step)
        urgency_val = 1.0 / (time_to_deadline + 1e-5 + 0.1) # Giống critic
        # Ví dụ chuẩn hóa: urgency_val / (urgency_val + K), K=4
        normalized = urgency_val / (urgency_val + 4.0)
        return min(1.0, max(0.0, normalized)) # Đảm bảo trong [0,1]

    # Xử lý package mà agent hiện tại đang mang
    if agent_carrying_pkg_id != 0:
        if agent_carrying_pkg_id in all_packages_data:
            carried_pkg_details = all_packages_data[agent_carrying_pkg_id]

            # --- Channel 4: Vị trí đích của Package mà Agent hiện tại đang mang ---
            pt_x_1based = carried_pkg_details['target_x']
            pt_y_1based = carried_pkg_details['target_y']
            pt_r, pt_c = pt_x_1based - 1, pt_y_1based - 1
            if 0 <= pt_r < MAP_HEIGHT and 0 <= pt_c < MAP_WIDTH:
                actor_obs[4, pt_r, pt_c] = 1.0

            # --- Channel 5: Mức độ khẩn cấp (Cách 2 - Agent-specific) ---
            urgency = get_normalized_urgency(carried_pkg_details['deadline'], current_time)
            if 0 <= agent_r < MAP_HEIGHT and 0 <= agent_c < MAP_WIDTH:
                 actor_obs[5, agent_r, agent_c] = max(actor_obs[5, agent_r, agent_c], urgency)


    # Xử lý các package khác (chủ yếu là 'waiting' cho agent không mang hàng)
    for pkg_id, pkg_details in all_packages_data.items():
        status = pkg_details['status']

        if status == 'waiting':
            # --- Channel 3: Vị trí các Package đang chờ (Waiting) ---
            p_x_1based, p_y_1based = pkg_details['x'], pkg_details['y']
            p_r, p_c = p_x_1based - 1, p_y_1based - 1
            if 0 <= p_r < MAP_HEIGHT and 0 <= p_c < MAP_WIDTH:
                actor_obs[3, p_r, p_c] = 1.0

            # --- Channel 5: Mức độ khẩn cấp (Cách 2 - Agent-specific) ---
            # Nếu agent không mang hàng, đặt urgency của package 'waiting' tại vị trí HIỆN TẠI của package đó
            # (để agent biết package nào gần/khẩn cấp để nhặt)
            if agent_carrying_pkg_id == 0: # Chỉ xem xét nếu agent đang rảnh
                urgency = get_normalized_urgency(pkg_details['deadline'], current_time)
                if 0 <= p_r < MAP_HEIGHT and 0 <= p_c < MAP_WIDTH:
                    actor_obs[5, p_r, p_c] = max(actor_obs[5, p_r, p_c], urgency)
    return actor_obs

### Critic Network

In [7]:
import torch.nn as nn
import torch.nn.functional as F

class CriticCNN_V3(nn.Module):
    def __init__(self, input_channels=7):
        super(CriticCNN_V3, self).__init__()
        self.input_channels = input_channels

        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        # Thêm lớp conv4
        self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1) # Hoặc giữ 128 filters
        self.bn4 = nn.BatchNorm2d(256)
        self.pool = nn.AdaptiveAvgPool2d((1, 1))

        self.fc1 = nn.Linear(256, 128) # Input từ conv4 (256 filters)
        self.fc_critic_head = nn.Linear(128, 1)

    def forward(self, global_observation):
        x = F.relu(self.bn1(self.conv1(global_observation)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x))) # Thêm forward qua lớp mới
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        state_value = self.fc_critic_head(x)
        return state_value

### Actor Network

In [8]:
class ActorCNN(nn.Module):
    def __init__(self, input_channels=6, num_actions=15):
        """
        Actor Network sử dụng CNN để xử lý local observation và đưa ra logit hành động.

        Args:
            input_channels (int): Số lượng kênh của local observation (ví dụ: 6).
            num_actions (int): Tổng số hành động mà agent có thể thực hiện (ví dụ: 5 di chuyển * 3 package_ops = 15).
        """
        super(ActorCNN, self).__init__()
        self.input_channels = input_channels
        self.num_actions = num_actions

        # Convolutional layers (Kiến trúc ví dụ, có thể điều chỉnh)
        # Input: (batch_size, input_channels, MAP_HEIGHT, MAP_WIDTH)
        self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool = nn.AdaptiveAvgPool2d((1, 1)) # Global Average Pooling

        # Fully connected layers
        # Kích thước sau pooling là (batch_size, 128, 1, 1) -> flatten thành (batch_size, 128)
        self.fc1 = nn.Linear(128, 128) # Giữ nguyên 128 hoặc giảm xuống
        self.fc_actor_head = nn.Linear(128, num_actions) # Output logits cho các hành động

    def forward(self, local_observation):
        """
        Forward pass của Actor network.

        Args:
            local_observation (torch.Tensor): Local observation của agent,
                                             shape (batch_size, input_channels, height, width).

        Returns:
            torch.Tensor: Logits cho các hành động, shape (batch_size, num_actions).
        """
        # CNN feature extraction
        x = F.relu(self.bn1(self.conv1(local_observation)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = F.relu(self.bn3(self.conv3(x)))
        x = self.pool(x) # (batch_size, 128, 1, 1)

        # Flatten
        x = x.view(x.size(0), -1) # (batch_size, 128)

        # Fully connected layers
        x = F.relu(self.fc1(x))
        action_logits = self.fc_actor_head(x) # (batch_size, num_actions)

        return action_logits

    @staticmethod
    def decode_action(action_index):
        """
        Chuyển đổi một action index (0-14) thành (move_action_str, package_action_str).
        Ví dụ: 0 -> ('S', '0')
        """
        if not (0 <= action_index < 15):
            raise ValueError("Action index phải nằm trong khoảng [0, 14]")

        move_actions = ['S', 'L', 'R', 'U', 'D']  # Stay, Left, Right, Up, Down
        package_actions = ['0', '1', '2']          # Do_nothing, Pickup, Drop

        move_idx = action_index // len(package_actions) # 0, 1, 2, 3, 4
        package_idx = action_index % len(package_actions) # 0, 1, 2

        return move_actions[move_idx], package_actions[package_idx]

    @staticmethod
    def encode_action(move_action_str, package_action_str):
        """
        Chuyển đổi (move_action_str, package_action_str) thành action index (0-14).
        Ví dụ: ('S', '0') -> 0
        """
        move_actions = ['S', 'L', 'R', 'U', 'D']
        package_actions = ['0', '1', '2']

        try:
            move_idx = move_actions.index(move_action_str)
            package_idx = package_actions.index(package_action_str)
        except ValueError:
            raise ValueError(f"Hành động không hợp lệ: ({move_action_str}, {package_action_str})")

        return move_idx * len(package_actions) + package_idx

In [9]:
class MAPPORolloutBuffer:
    """
    Rollout Buffer for Centralized Critic / Decentralized Actor (CC/DA) MAPPO.
    Stores experience for a fixed-size rollout and provides minibatches for training.
    Assumes per-agent rewards and a single value estimate from the Centralized Critic
    for the global state.

    Designed to store data matching the output of your environment and observation
    conversion functions (convert_global_state_to_critic_input,
    convert_state_to_actor_input, compute_shaped_rewards).
    """
    def __init__(self, buffer_size: int, num_agents: int, map_dims: tuple[int, int],
                 critic_obs_channels: int, actor_obs_channels: int,
                 gamma: float = 0.99, gae_lambda: float = 0.95, device: str | torch.device = 'cpu'):
        """
        Initializes the MAPPO Rollout Buffer.

        Args:
            buffer_size (int): The maximum number of environment steps to store
                               in the buffer (rollout length per training epoch).
            num_agents (int): The number of agents (robots) in the environment.
            map_dims (tuple): The dimensions of the map (MAP_HEIGHT, MAP_WIDTH).
            critic_obs_channels (int): Number of channels in the global observation
                                       for the Critic (based on convert_global_state_to_critic_input).
            actor_obs_channels (int): Number of channels in the local observation
                                      for each Actor (based on convert_state_to_actor_input).
            gamma (float): Discount factor for rewards.
            gae_lambda (float): Lambda parameter for Generalized Advantage Estimation (GAE).
            device (str or torch.device): The device ('cpu' or 'cuda') to move tensors to
                                          when sampling for training.
        """
        self.buffer_size = buffer_size
        self.num_agents = num_agents
        self.map_height, self.map_width = map_dims
        self.critic_obs_channels = critic_obs_channels
        self.actor_obs_channels = actor_obs_channels
        self.gamma = gamma
        self.gae_lambda = gae_lambda
        self.device = device

        self._allocate_storage()
        self.reset()

    def _allocate_storage(self):
        """
        Allocates NumPy arrays for storing the rollout data.
        Each array is sized to hold `buffer_size` steps.
        """
        # Global state observation for the Centralized Critic at time step t
        # Shape: (buffer_size, C_critic, H, W)
        self.global_states = np.zeros(
            (self.buffer_size, self.critic_obs_channels, self.map_height, self.map_width),
            dtype=np.float32
        )

        # Local state observation for *each* Decentralized Actor at time step t
        # Shape: (buffer_size, num_agents, C_actor, H, W)
        self.agent_local_states = np.zeros(
            (self.buffer_size, self.num_agents, self.actor_obs_channels, self.map_height, self.map_width),
            dtype=np.float32
        )

        # Action taken by each Agent at time step t (stored as integer index)
        # Shape: (buffer_size, num_agents)
        self.actions = np.zeros(
            (self.buffer_size, self.num_agents),
            dtype=np.int64
        )

        # Shaped Reward received by *each* Agent after taking action at time step t
        # Shape: (buffer_size, num_agents)
        self.rewards = np.zeros(
            (self.buffer_size, self.num_agents),
            dtype=np.float32
        )

        # Terminal flag indicating if the episode ended after time step t
        # Shape: (buffer_size,)
        self.dones = np.zeros(
            self.buffer_size,
            dtype=np.bool_
        )

        # Log probability of the action taken by each Agent at time step t
        # (from the policy used to select the action)
        # Shape: (buffer_size, num_agents)
        self.log_probs = np.zeros(
            (self.buffer_size, self.num_agents),
            dtype=np.float32
        )

        # Value estimate V(s_t) from the Centralized Critic for the global state at time step t
        # Shape: (buffer_size,) - storing a single scalar value per step
        self.values = np.zeros(
            self.buffer_size,
            dtype=np.float32
        )

        # Placeholders for Advantages and Returns (calculated AFTER data collection)
        # These are calculated PER AGENT based on the global value estimate and per-agent rewards.
        # Shape: (buffer_size, num_agents)
        self.advantages = np.zeros(
            (self.buffer_size, self.num_agents),
            dtype=np.float32
        )
        self.returns = np.zeros(
            (self.buffer_size, self.num_agents),
            dtype=np.float32
        )

    def reset(self):
        """
        Resets the buffer pointers and effectively clears the collected data
        for the next rollout. The underlying NumPy arrays are kept to avoid
        reallocation overhead, but the `ptr` indicates the currently active data range.
        """
        self.ptr = 0  # Pointer to the next empty slot to add data
        self.path_start_idx = 0  # Index marking the start of the current episode segment

    def add(self, global_obs: np.ndarray, agent_obs_list: list[np.ndarray],
            action_list: list[int], reward_list: list[float], done: bool,
            log_prob_list: list[float], value: float):
        """
        Adds one step of experience (from t to t+1) to the buffer.

        Args:
            global_obs (np.ndarray): Global observation for the Critic at time t.
                                     Shape (C_critic, H, W).
            agent_obs_list (list[np.ndarray]): List of local observations for each agent
                                             at time t. Each np.ndarray shape (C_actor, H, W).
            action_list (list[int]): List of action indices taken by each agent at time t.
                                     Length num_agents.
            reward_list (list[float]): List of shaped rewards received by each agent
                                       after action at time t. Length num_agents.
            done (bool): True if the episode terminates after time step t.
            log_prob_list (list[float]): List of log probabilities for the actions taken
                                         at time t. Length num_agents.
            value (float): Value estimate V(s_t) from the Centralized Critic for global_obs.
        """
        if self.ptr >= self.buffer_size:
            # In a standard rollout buffer, this signals the end of data collection for the epoch.
            # No more data is added until after training and reset.
            # print("Warning: Buffer is full. Cannot add more steps.") # Optional warning
            return

        # Store data at the current pointer position
        self.global_states[self.ptr] = global_obs
        # Stack the list of agent observations into a single numpy array
        self.agent_local_states[self.ptr] = np.stack(agent_obs_list, axis=0) # Shape becomes (num_agents, C_actor, H, W)
        self.actions[self.ptr] = np.array(action_list, dtype=np.int64)
        self.rewards[self.ptr] = np.array(reward_list, dtype=np.float32)
        self.dones[self.ptr] = done
        self.log_probs[self.ptr] = np.array(log_prob_list, dtype=np.float32)
        self.values[self.ptr] = value # Store scalar V(s_t)

        # If the episode finished at this step, calculate GAE/Returns for the completed segment
        if done:
            # Calculate GAE and Returns for the segment from path_start_idx up to (and including) ptr
            # The last_value for a terminal state is 0.0.
            self._compute_returns_and_advantages_segment(
                path_start_idx=self.path_start_idx,
                path_end_idx=self.ptr + 1, # Segment ends *after* the step at ptr
                last_value=0.0 # V(s_{T+1}) = 0 for a terminal state s_T
            )
            # Start index for the next segment/episode is the step after the current one
            self.path_start_idx = self.ptr + 1

        # Increment the pointer for the next `add` call
        self.ptr += 1

    def _compute_returns_and_advantages_segment(self, path_start_idx: int, path_end_idx: int, last_value: float):
        """
        Internal helper to compute Returns and Generalized Advantage Estimation (GAE)
        for a specific segment of the buffer ([path_start_idx, path_end_idx - 1]).

        Args:
            path_start_idx (int): The starting index (inclusive) in the buffer.
            path_end_idx (int): The ending index (exclusive) in the buffer.
                                This is one step BEYOND the last step in the segment.
            last_value (float): The estimated value of the state *after* the last
                                step in this segment (V(s_T) if T is the last step
                                in the segment). Used for bootstrapping the returns/GAE.
        """
        # Extract data for the current segment
        rewards_segment = self.rewards[path_start_idx : path_end_idx] # Shape: (segment_len, num_agents)
        values_segment = self.values[path_start_idx : path_end_idx]   # Shape: (segment_len,)
        dones_segment = self.dones[path_start_idx : path_end_idx]     # Shape: (segment_len,)

        segment_len = rewards_segment.shape[0]

        # Initialize GAE for the current segment. It represents the advantage
        # *from* the state *after* the segment ends.
        current_gae = np.zeros(self.num_agents, dtype=np.float32)

        # Iterate backwards through the segment to calculate GAE and Returns
        # t is the index within the segment [0, segment_len - 1]
        # buffer_t is the corresponding index in the full buffer [path_start_idx, path_end_idx - 1]
        for t in reversed(range(segment_len)):
            buffer_t = path_start_idx + t

            # Determine V(s_{t+1}) and the non-terminal factor for the next state
            if t == segment_len - 1:
                # This is the last step of the segment. The "next state" is after the segment.
                # Its value is last_value, and its non-terminal factor depends on whether
                # the state at 'buffer_t' was terminal (dones_segment[t]).
                next_non_terminal = 1.0 - dones_segment[t]
                next_value = last_value
            else:
                # Not the last step of the segment. The "next state" is at buffer_t + 1.
                # Its value is values_segment[t+1], and its non-terminal factor depends on
                # whether the state at 'buffer_t + 1' was terminal (dones_segment[t+1]).
                next_non_terminal = 1.0 - dones_segment[t+1]
                next_value = values_segment[t+1] # Scalar value

            # delta_t (Temporal Difference error) for each agent:
            # delta_t = r_t + gamma * V(s_{t+1}) * (1 - done_{t+1}) - V(s_t)
            # rewards_segment[t] is (num_agents,), values_segment[t] is scalar.
            # Broadcasting works correctly.
            delta = rewards_segment[t] + self.gamma * next_value * next_non_terminal - values_segment[t]

            # GAE_t (Generalized Advantage Estimation) for each agent:
            # GAE_t = delta_t + gamma * lambda * GAE_{t+1} * (1 - done_{t+1})
            # current_gae holds GAE_{t+1} from the previous iteration.
            current_gae = delta + self.gamma * self.gae_lambda * current_gae * next_non_terminal

            # Store the calculated GAE for the step at buffer_t
            self.advantages[buffer_t] = current_gae

        # Returns_t = Advantages_t + Values_t
        # Returns are simply GAE + the value prediction V(s_t)
        # values_segment is (segment_len,), self.advantages[...] is (segment_len, num_agents)
        # Need to add a new axis to values_segment for correct broadcasting before addition
        self.returns[path_start_idx : path_end_idx] = self.advantages[path_start_idx : path_end_idx] + values_segment[:, np.newaxis]


    def compute_final_returns_and_advantages(self, last_global_state_obs: np.ndarray | None, last_value_fn: callable):
        """
        Computes the Returns and Advantages for the final segment of the buffer
        after all steps have been added. This method should be called once per rollout
        after the main data collection loop finishes (either buffer full or episode done).

        Args:
            last_global_state_obs (np.ndarray or None): The global observation of the
                                                       state *after* the last step
                                                       added to the buffer (s_T), but
                                                       *only if* the episode did NOT
                                                       terminate at the last step (dones[ptr-1] is False).
                                                       Shape (C_critic, H, W). Is None if the
                                                       last step added had done=True.
            last_value_fn (callable): A function (typically the Critic's forward method)
                                      that takes a global observation tensor and returns
                                      the value estimate tensor. Used to get V(s_T) if the
                                      rollout ends mid-episode (`last_global_state_obs` is not None).
                                      Should accept a batch (e.g., (1, C, H, W)).
        """
        # If the buffer is empty, there's nothing to compute
        if self.ptr == 0:
            return

        # Determine the value of the state *after* the last step added to the buffer (s_T).
        # This value is used to bootstrap the GAE/Returns calculation for the final segment.
        if self.dones[self.ptr - 1]:
            # The last state added (s_{ptr-1}) was terminal, so the value of the next state (s_T) is 0.
            final_bootstrap_value = 0.0
        else:
            # The rollout ended mid-episode (buffer filled or MAX_TIME_STEPS reached).
            # We need to predict the value of the state *after* the last step added (s_T) using the Critic.
            if last_global_state_obs is None:
                 raise ValueError("last_global_state_obs must be provided to compute the final bootstrap value when the rollout doesn't end with done=True.")

            # Convert the last global state observation to a tensor and predict its value using the Critic
            last_obs_tensor = torch.tensor(last_global_state_obs[np.newaxis, ...], dtype=torch.float32).to(self.device)
            with torch.no_grad():
                # Assuming the critic model takes a batch and returns a batch of scalar values
                final_bootstrap_value = last_value_fn(last_obs_tensor).item() # .item() extracts scalar from 1-element tensor

        # Compute GAE and Returns for the final segment.
        # This segment includes all steps from the last episode's start index (path_start_idx)
        # up to the last step added to the buffer (ptr - 1).
        self._compute_returns_and_advantages_segment(
            path_start_idx=self.path_start_idx, # Start of the segment
            path_end_idx=self.ptr,              # End of the segment (exclusive)
            last_value=final_bootstrap_value    # Bootstrap value for the state after this segment
        )


    def sample_minibatches(self, num_minibatches: int, normalize_advantages: bool = True):
        """
        Generates minibatches from the collected rollout data.
        This method should only be called AFTER compute_final_returns_and_advantages
        has been called to ensure advantages and returns are computed.

        Data is flattened such that each (timestep, agent) pair becomes a single
        sample, which is then used for random minibatching.

        Args:
            num_minibatches (int): The number of minibatches to divide the collected
                                   data into. The size of each minibatch will be
                                   (total_steps * num_agents) // num_minibatches.
            normalize_advantages (bool): Whether to normalize advantages across
                                         all collected samples. Standard practice in PPO.

        Yields:
            tuple: A minibatch of tensors on the specified device, containing:
                   (global_states_batch, agent_local_states_batch, actions_batch,
                    old_log_probs_batch, old_values_batch, returns_batch, advantages_batch)
                   Each tensor has shape (minibatch_size, ...).
        """
        # Total number of valid steps collected in this rollout
        num_steps_collected = self.ptr

        if num_steps_collected == 0:
            print("Warning: Buffer is empty, cannot sample minibatches.")
            return # Nothing to yield

        # Flatten the data across time steps and agents
        # This treats each (timestep, agent) combination as a single sample for batching
        total_samples = num_steps_collected * self.num_agents

        # Reshape global state and value to match the size of agent data
        # (buffer_size, C, H, W) -> (buffer_size * num_agents, C, H, W) by repeating per agent
        flat_global_states = np.repeat(self.global_states[:num_steps_collected, np.newaxis, ...],
                                       self.num_agents, axis=1).reshape(-1, self.critic_obs_channels, self.map_height, self.map_width)

        # (buffer_size, num_agents, C_actor, H, W) -> (buffer_size * num_agents, C_actor, H, W)
        flat_agent_local_states = self.agent_local_states[:num_steps_collected].reshape(-1, self.actor_obs_channels, self.map_height, self.map_width)

        # (buffer_size, num_agents) -> (buffer_size * num_agents,)
        flat_actions = self.actions[:num_steps_collected].reshape(-1)
        flat_log_probs = self.log_probs[:num_steps_collected].reshape(-1)
        flat_returns = self.returns[:num_steps_collected].reshape(-1)
        flat_advantages = self.advantages[:num_steps_collected].reshape(-1)

        # (buffer_size,) -> (buffer_size * num_agents,) by repeating the scalar value per agent
        flat_values = np.repeat(self.values[:num_steps_collected, np.newaxis],
                                self.num_agents, axis=1).reshape(-1)

        # Convert NumPy arrays to PyTorch Tensors and move to device
        flat_global_states_tensor = torch.tensor(flat_global_states, dtype=torch.float32).to(self.device)
        flat_agent_local_states_tensor = torch.tensor(flat_agent_local_states, dtype=torch.float32).to(self.device)
        flat_actions_tensor = torch.tensor(flat_actions, dtype=torch.int64).to(self.device)
        flat_log_probs_tensor = torch.tensor(flat_log_probs, dtype=torch.float32).to(self.device)
        flat_values_tensor = torch.tensor(flat_values, dtype=torch.float32).to(self.device)
        flat_returns_tensor = torch.tensor(flat_returns, dtype=torch.float32).to(self.device)
        flat_advantages_tensor = torch.tensor(flat_advantages, dtype=torch.float32).to(self.device)

        # Normalize advantages if requested
        if normalize_advantages:
            # Add a small epsilon to avoid division by zero if std is 0 (highly unlikely with actual data)
            advantages_mean = flat_advantages_tensor.mean()
            advantages_std = flat_advantages_tensor.std()
            flat_advantages_tensor = (flat_advantages_tensor - advantages_mean) / (advantages_std + 1e-8)

        # Create a sampler to yield indices for minibatches
        # We sample random indices from the flattened range [0, total_samples - 1]
        minibatch_size = total_samples // num_minibatches
        if minibatch_size == 0:
             raise ValueError(f"Minibatch size is 0. Increase buffer_size or decrease num_minibatches. total_samples={total_samples}, num_minibatches={num_minibatches}")

        sampler = BatchSampler(
            SubsetRandomSampler(range(total_samples)), # Samples indices randomly
            minibatch_size,                           # Size of each batch of indices
            drop_last=True                            # Drop the last batch if it's smaller than minibatch_size
        )

        # Yield minibatches using the sampled indices
        for indices in sampler:
            yield (
                flat_global_states_tensor[indices],
                flat_agent_local_states_tensor[indices],
                flat_actions_tensor[indices],
                flat_log_probs_tensor[indices],
                flat_values_tensor[indices],
                flat_returns_tensor[indices],
                flat_advantages_tensor[indices]
            )

    def ready_to_sample(self):
        """
        Checks if the buffer has collected enough steps to form a full rollout.
        (i.e., reached the buffer_size).
        """
        return self.ptr >= self.buffer_size

### Trainning set up



In [10]:
import collections

def run_bfs_distance(grid_map, start, goal):
    """
    Runs BFS starting from the goal position to find the shortest distance
    to the start position on a grid map.

    The BFS calculates the shortest distance from the goal to every reachable cell.
    The function returns the shortest distance from the start to the goal.

    :param grid_map: 2D list representing the map (0: free, 1: obstacle).
                     Assumes grid_map is rectangular and non-empty.
    :param start: Tuple (row, column) of the starting position (0-indexed).
    :param goal: Tuple (row, column) of the goal position (0-indexed).
    :return: The shortest distance (an integer) from 'start' to 'goal'.
             Returns float('inf') if start is the goal, if start or goal
             is an obstacle, or if the goal is unreachable from the start.
             Returns float('inf') for invalid start/goal positions outside map bounds.
    """
    n_rows = len(grid_map)
    n_cols = len(grid_map[0]) if n_rows > 0 else 0

    # Basic validation: Check if start and goal are within bounds and not obstacles
    if not (0 <= start[0] < n_rows and 0 <= start[1] < n_cols) or \
       not (0 <= goal[0] < n_rows and 0 <= goal[1] < n_cols):
        # Positions are out of bounds
        return float('inf')

    if grid_map[start[0]][start[1]] == 1 or grid_map[goal[0]][goal[1]] == 1:
         # Start or goal position is an obstacle
         return float('inf')

    # Edge case: Start is already the goal
    if start == goal:
        return 0 # Distance is 0

    # Initialize BFS structures
    # queue stores positions to visit
    queue = collections.deque()
    # distance dictionary stores the shortest distance from the goal to each position
    distance = {}

    # Define possible moves (dr, dc)
    moves = [(-1, 0), (1, 0), (0, -1), (0, 1)] # Up, Down, Left, Right

    # Start BFS from the goal position
    queue.append(goal)
    distance[goal] = 0

    # Run BFS
    while queue:
        current_pos = queue.popleft()
        dist = distance[current_pos]

        # Explore neighbors
        for dr, dc in moves:
            neighbor_pos = (current_pos[0] + dr, current_pos[1] + dc)

            # Check bounds
            if not (0 <= neighbor_pos[0] < n_rows and 0 <= neighbor_pos[1] < n_cols):
                continue

            # Check for obstacles
            if grid_map[neighbor_pos[0]][neighbor_pos[1]] == 1:
                continue

            # If neighbor has not been visited yet (distance is not set)
            if neighbor_pos not in distance:
                distance[neighbor_pos] = dist + 1
                queue.append(neighbor_pos)

    # After BFS, check if the start position was reached
    if start in distance:
        # Start is reachable from the goal, return the stored distance
        return distance[start]
    else:
        # Goal is unreachable from the start position
        return float('inf')

In [11]:
def compute_shaped_rewards(
    global_reward,
    prev_env_state_dict,
    current_env_state_dict,
    actions_taken_for_all_agents,
    persistent_packages_at_prev_state,
    num_agents,
):
    """
    Computes shaped rewards for each agent based on transitions and intended actions.
    Returns: tổng shaped reward (float), và shaped reward từng agent (np.array)
    """
    # --- Shaping Constants ---
    SHAPING_SUCCESSFUL_PICKUP = 4
    SHAPING_SUCCESSFUL_DELIVERY_ON_TIME = 200
    SHAPING_SUCCESSFUL_DELIVERY_LATE = 50
    SHAPING_MOVED_CLOSER_TO_TARGET = 1
    SHAPING_WASTED_PICKUP_ATTEMPT = 0
    SHAPING_WASTED_DROP_ATTEMPT = 0
    SHAPING_COLLISION_OR_STUCK = -0.5
    SHAPING_IDLE_WITH_AVAILABLE_TASKS = -0.5
    SHAPING_MOVED_AWAY_FROM_TARGET = -1


    shaped_rewards = np.zeros(num_agents, dtype=np.float32)
    current_time = int(current_env_state_dict['time_step'])


    for agent_idx in range(num_agents):
        prev_r, prev_c, prev_pkg = [int(x) for x in prev_env_state_dict['robots'][agent_idx]]
        curr_r, curr_c, curr_pkg = [int(x) for x in current_env_state_dict['robots'][agent_idx]]
        prev_r -= 1; prev_c -= 1; curr_r -= 1; curr_c -= 1
        move_str, pkg_op_str = actions_taken_for_all_agents[agent_idx]
        pkg_op = int(pkg_op_str)

        # 1. Nhặt/thả thành công
        if prev_pkg == 0 and curr_pkg != 0:
            shaped_rewards[agent_idx] += SHAPING_SUCCESSFUL_PICKUP
        elif prev_pkg != 0 and curr_pkg == 0:
            dropped_pkg = prev_pkg
            if dropped_pkg in persistent_packages_at_prev_state:
                pkg_info = persistent_packages_at_prev_state[dropped_pkg]
                if current_time <= pkg_info['deadline']:
                    shaped_rewards[agent_idx] += SHAPING_SUCCESSFUL_DELIVERY_ON_TIME
                else:
                    shaped_rewards[agent_idx] += SHAPING_SUCCESSFUL_DELIVERY_LATE

        # 2. Phạt hành động lãng phí
        if pkg_op == 1:  # Pick
            if prev_pkg != 0:
                shaped_rewards[agent_idx] += SHAPING_WASTED_PICKUP_ATTEMPT
            elif curr_pkg == 0:
                can_pickup = any(
                    persistent_packages_at_prev_state[pkg_id]['status'] == 'waiting' and
                    persistent_packages_at_prev_state[pkg_id]['start_time'] <= prev_env_state_dict['time_step'] and
                    (persistent_packages_at_prev_state[pkg_id]['x'],persistent_packages_at_prev_state[pkg_id]['y']) == (curr_r, curr_c)
                    for pkg_id in persistent_packages_at_prev_state
                )
                if not can_pickup:
                    shaped_rewards[agent_idx] += SHAPING_WASTED_PICKUP_ATTEMPT
        elif pkg_op == 2:  # Drop
            if prev_pkg == 0:
                shaped_rewards[agent_idx] += SHAPING_WASTED_DROP_ATTEMPT
            elif curr_pkg != 0:
                shaped_rewards[agent_idx] += 0

        # 3. Di chuyển
        moved = (prev_r, prev_c) != (curr_r, curr_c)
        intended_move = move_str != 'S'
        if intended_move and not moved:
            shaped_rewards[agent_idx] += SHAPING_COLLISION_OR_STUCK

        # Tính mục tiêu di chuyển
        target_pos = None
        if prev_pkg != 0 and prev_pkg in persistent_packages_at_prev_state:
            target_pos = (persistent_packages_at_prev_state[prev_pkg]['target_x'],
                          persistent_packages_at_prev_state[prev_pkg]['target_y'])
        else:
            # Gói waiting gần nhất
            waiting_pkgs = [
                pkg for pkg in persistent_packages_at_prev_state
                if persistent_packages_at_prev_state[pkg]['status'] == 'waiting' and persistent_packages_at_prev_state[pkg]['start_time'] <= prev_env_state_dict['time_step']
            ]
            if waiting_pkgs:
                target_pos = min(
                    ((persistent_packages_at_prev_state[pkg]['x'],
                          persistent_packages_at_prev_state[pkg]['y']) for pkg in waiting_pkgs),
                    key=lambda pos: run_bfs_distance(prev_env_state_dict['map'],(prev_r, prev_c), pos)
                )
        if target_pos and moved:
            dist_before = run_bfs_distance(prev_env_state_dict['map'],(prev_r, prev_c), target_pos)
            dist_after = run_bfs_distance(prev_env_state_dict['map'],(curr_r, curr_c), target_pos)
            if dist_after < dist_before:
                shaped_rewards[agent_idx] += SHAPING_MOVED_CLOSER_TO_TARGET
            elif dist_after > dist_before:
                shaped_rewards[agent_idx] += SHAPING_MOVED_AWAY_FROM_TARGET

        # 4. Phạt đứng yên không cần thiết
        if not moved and move_str == 'S' and prev_pkg == 0:
            idle_nearby = any(
                persistent_packages_at_prev_state[pkg]['status'] == 'waiting' and
                persistent_packages_at_prev_state[pkg]['start_time'] <= prev_env_state_dict['time_step'] and
                run_bfs_distance(prev_env_state_dict['map'],(prev_r, prev_c), (persistent_packages_at_prev_state[pkg]['x'],
                          persistent_packages_at_prev_state[pkg]['y'])) <= 3
                for pkg in persistent_packages_at_prev_state
            )
            if idle_nearby:
                shaped_rewards[agent_idx] += SHAPING_IDLE_WITH_AVAILABLE_TASKS

    return global_reward + shaped_rewards.sum()

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.distributions import Categorical
import time
import os # Import os for creating directories
from env import Environment # Assuming env.py is in the same directory or importable
from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
import collections # For package_dict management and episode stats deque
from tqdm import tqdm # For progress bar

# Assuming ActorCNN, CriticCNN_V3, MAPPORolloutBuffer,
# state_to_agent, convert_global_state_to_critic_input,
# convert_state_to_actor_input, compute_shaped_rewards_v2 (modified)
# are defined above this code block.

# --- Helper for modified shaped rewards (assumed to return per-agent array) ---
# IMPORTANT: Replace the placeholder below with your actual compute_shaped_rewards_v2 logic
# that returns a numpy array of shape (num_agents,).


# --- Hyperparameters ---
MAP_FILE = 'map1.txt'
MAX_TIME_STEPS = 500
NUM_ROBOTS = 5
NUM_PACKAGES = 20
ENV_SEED = 2025

# Environment rewards (less important with shaping, but still contribute)
MOVE_COST = -0.01
DELIVERY_REWARD = 10
DELAY_REWARD = 1

# Training parameters
TOTAL_TIMESTEPS = 1_000_000 # Total environment steps to run training
BUFFER_SIZE = 500         # Number of environment steps to collect per rollout
NUM_EPOCHS = 10             # Number of PPO update epochs per rollout
NUM_MINIBATCHES = 64        # Number of minibatches to split rollout data into
LEARNING_RATE_ACTOR = 5e-15
LEARNING_RATE_CRITIC = 1e-10
GAMMA = 0.99                # Discount factor
GAE_LAMBDA = 0.95           # GAE parameter
CLIP_EPSILON = 0.2          # PPO clipping parameter
ENTROPY_COEF = 0.01         # Coefficient for entropy bonus
VALUE_LOSS_COEF = 0.5       # Coefficient for value loss

# Saving parameters
SAVE_INTERVAL_EPISODES = 10 # Save model every X completed episodes
CHECKPOINT_DIR = 'mappo_checkpoints_map1' # Directory to save checkpoints

# Observation dimensions (match your converter functions)
CRITIC_OBS_CHANNELS = 7
ACTOR_OBS_CHANNELS = 6

# Map dimensions will be determined by loading the map
MAP_HEIGHT = -1
MAP_WIDTH = -1

# Device setting
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

# --- Create Checkpoint Directory ---


# --- Initialize Environment ---
try:
    env = Environment(MAP_FILE, max_time_steps=MAX_TIME_STEPS, n_robots=NUM_ROBOTS, n_packages=NUM_PACKAGES,
                      move_cost=MOVE_COST, delivery_reward=DELIVERY_REWARD, delay_reward=DELAY_REWARD,
                      seed=ENV_SEED)
    initial_state = env.reset()
    MAP_HEIGHT = len(env.grid)
    MAP_WIDTH = len(env.grid[0]) if MAP_HEIGHT > 0 else 0
    if MAP_WIDTH == 0:
        raise ValueError("Map file is empty or invalid.")
    MAP_DIMS = (MAP_HEIGHT, MAP_WIDTH)
    print(f"Environment initialized. Map dimensions: {MAP_DIMS}")
except Exception as e:
    print(f"Error initializing environment: {e}")
    exit()

# --- Initialize Networks ---
critic = CriticCNN_V3(input_channels=CRITIC_OBS_CHANNELS).to(DEVICE)
critic.train()

actor = ActorCNN(input_channels=ACTOR_OBS_CHANNELS, num_actions=15).to(DEVICE)
actor.train()

# --- Initialize Optimizers ---
actor_optimizer = optim.Adam(actor.parameters(), lr=LEARNING_RATE_ACTOR)
critic_optimizer = optim.Adam(critic.parameters(), lr=LEARNING_RATE_CRITIC)

# --- Initialize Rollout Buffer ---
buffer = MAPPORolloutBuffer(
    buffer_size=BUFFER_SIZE,
    num_agents=NUM_ROBOTS,
    map_dims=MAP_DIMS,
    critic_obs_channels=CRITIC_OBS_CHANNELS, # Fixed typo here
    actor_obs_channels=ACTOR_OBS_CHANNELS,
    gamma=GAMMA,
    gae_lambda=GAE_LAMBDA,
    device=DEVICE
)

# --- Training Loop ---
max_env_reward = 0
total_steps_trained = 0
total_episodes_finished = 0 # Counter for completed episodes

episode_rewards = collections.deque(maxlen=100) # Track rewards for last 100 episodes
episode_durations = collections.deque(maxlen=100)
episode_delivered_packages = collections.deque(maxlen=100)
global_package_dict = {} # Persistent storage for package info across steps

current_env_state = env.get_state() # Get initial state
prev_env_state = current_env_state # Store state before action

estimated_num_training_iterations = TOTAL_TIMESTEPS // BUFFER_SIZE

print(f"Starting training (approx {TOTAL_TIMESTEPS} env steps)...")

# Use a while loop based on total_steps_trained instead of fixed iterations
# to ensure we train for approximately the target total steps.
pbar = tqdm(total=TOTAL_TIMESTEPS, desc="Total Steps Trained", unit="step")

while total_steps_trained < TOTAL_TIMESTEPS:

    # --- Data Collection (Rollout) ---
    buffer.reset() # Reset buffer for a new rollout
    rollout_start_time = time.time()
    steps_in_rollout = 0

    # Need the state AFTER the last step added to the buffer for final GAE bootstrapping
    last_global_state_obs_for_gae = None
    while buffer.ptr < BUFFER_SIZE and total_steps_trained < TOTAL_TIMESTEPS:
        current_state_for_agents, global_package_dict = state_to_agent(global_package_dict, current_env_state)

        global_obs_critic = convert_global_state_to_critic_input(
            current_state_for_agents, global_package_dict, map_dims=MAP_DIMS, max_time_steps=MAX_TIME_STEPS
        ) # Shape: (C_critic, H, W)

        agent_obs_actors = [] # List of obs for each agent
        for agent_idx in range(NUM_ROBOTS):
            local_obs_actor = convert_state_to_actor_input(
                current_state_for_agents, global_package_dict, agent_idx, map_dims=MAP_DIMS
            ) # Shape: (C_actor, H, W)
            agent_obs_actors.append(local_obs_actor)

        # --- Get Actions and Values from Networks ---
        with torch.no_grad():
            # Get value estimate for the global state
            global_obs_critic_tensor = torch.tensor(global_obs_critic, dtype=torch.float32).unsqueeze(0).to(DEVICE)
            state_value = critic(global_obs_critic_tensor).squeeze(0).item() # Get scalar value

            # Get actions and log probabilities for each agent
            actions_list = [] # List of int action indices
            log_probs_list = [] # List of log_probs
            agent_actions_for_env = [] # List of (move_str, pkg_op_str) for env.step

            for agent_idx in range(NUM_ROBOTS):
                local_obs_actor_tensor = torch.tensor(agent_obs_actors[agent_idx], dtype=torch.float32).unsqueeze(0).to(DEVICE)

                # Get action logits from the actor model for the current agent
                action_logits = actor(local_obs_actor_tensor) # Shape: (1, num_actions)

                # Sample action using Categorical distribution
                action_distribution = Categorical(logits=action_logits)
                action_tensor = action_distribution.sample() # Shape: (1,)
                action_index = action_tensor.item() # Get scalar action index

                # Get log probability of the sampled action
                log_prob_tensor = action_distribution.log_prob(action_tensor) # Shape: (1,)
                log_prob = log_prob_tensor.item() # Get scalar log_prob

                # Store results
                actions_list.append(action_index)
                log_probs_list.append(log_prob)

                # Decode action index for the environment step
                move_str, pkg_op_str = ActorCNN.decode_action(action_index) # Use the static method
                agent_actions_for_env.append((move_str, pkg_op_str))

        # --- Step the Environment ---
        # Store info needed for shaped reward calculation before the step
        prev_state_for_shaping = prev_env_state
        current_state_for_shaping = current_env_state # This will be prev_env_state *after* the step
        packages_for_shaping = global_package_dict.copy() # Use packages info *before* the step

        next_env_state, global_reward, done, infos = env.step(agent_actions_for_env)

        # --- Compute Shaped Rewards ---
        # Use the state *before* the step and the resulting state *after* the step
        # along with actions and package info from before the step.
        per_agent_shaped_rewards = compute_shaped_rewards(
             global_reward=global_reward,
             prev_env_state_dict=current_state_for_shaping, # State *before* the step
             current_env_state_dict=next_env_state,       # State *after* the step
             actions_taken_for_all_agents=agent_actions_for_env,
             persistent_packages_at_prev_state=packages_for_shaping, # Package info *before* the step
             num_agents=NUM_ROBOTS,
        )
        # The total reward for logging is the sum of per-agent rewards + any base global reward
        # Note: The per-agent rewards should ideally already incorporate the global reward logic from the env.step.
        # If compute_shaped_rewards_v2 adds its shaping ON TOP of the global_reward, you need to decide
        # if you log the sum or just the sum of the per-agent shaped rewards.
        # A simple approach is total_step_reward_for_logging = np.sum(per_agent_shaped_rewards)
        total_step_reward_for_logging = np.sum(per_agent_shaped_rewards)


        # --- Add Experience to Buffer ---
        buffer.add(
            global_obs=global_obs_critic,
            agent_obs_list=agent_obs_actors,
            action_list=actions_list,
            reward_list=per_agent_shaped_rewards, # Use per-agent rewards
            done=done,
            log_prob_list=log_probs_list,
            value=state_value
        )

        # --- Update State and Metrics ---
        prev_env_state = current_env_state # Current state becomes previous state for next step
        current_env_state = next_env_state # Next state becomes current state
        steps_in_rollout += 1
        total_steps_trained += 1
        # Update tqdm progress bar
        pbar.update(1)

        # --- Episode Handling ---
        if done:
            total_episodes_finished += 1 # Increment episode counter

            # Log episode stats (Need to accumulate them per episode first!)
            # We need episode stats accumulators *outside* this inner loop.
            # Let's add them before the while buffer.ptr loop.
            # Reset accumulators at the start of each episode.
            # Add state.total_reward to current_episode_reward before reset.
            episode_rewards.append(env.total_reward) # env.total_reward is the total for the episode
            episode_durations.append(env.t) # env.t is the duration
            delivered_in_episode = sum(1 for pkg in env.packages if pkg.status == 'delivered') # This might be state['packages'] status
            episode_delivered_packages.append(delivered_in_episode)


            print(f"\nEpisode {total_episodes_finished} finished at step {env.t} ({total_steps_trained} total). Env Reward: {env.total_reward:.2f}, Steps: {env.t}, Delivered: {delivered_in_episode}")
            max_env_reward = max(max_env_reward, env.total_reward)
            # --- Save Model Checkpoint ---
            if total_episodes_finished > 0 and total_episodes_finished % SAVE_INTERVAL_EPISODES == 0 or env.total_reward >= max_env_reward :
                 checkpoint_path = os.path.join(CHECKPOINT_DIR, f"mappo_checkpoint_ep_{total_episodes_finished:06d}_map1.pth")
                 torch.save({
                     'total_steps_trained': total_steps_trained,
                     'total_episodes_finished': total_episodes_finished,
                     'actor_state_dict': actor.state_dict(),
                     'critic_state_dict': critic.state_dict(),
                     'actor_optimizer_state_dict': actor_optimizer.state_dict(),
                     'critic_optimizer_state_dict': critic_optimizer.state_dict(),
                     'episode_rewards': list(episode_rewards), # Save deque content
                     'episode_durations': list(episode_durations),
                     'episode_delivered_packages': list(episode_delivered_packages),
                 }, checkpoint_path)
                 print(f"Saved checkpoint to {checkpoint_path}")


            # Reset environment for the next episode within the same rollout
            current_env_state = env.reset() # Reset environment
            prev_env_state = current_env_state # Store initial state as previous
            global_package_dict = {} # Clear package dict for new episode

    if not done and total_steps_trained < TOTAL_TIMESTEPS:
        current_state_for_agents, _ = state_to_agent(global_package_dict, current_env_state) # Re-process current state
        last_global_state_obs_for_gae = convert_global_state_to_critic_input(
            current_state_for_agents, global_package_dict, map_dims=MAP_DIMS, max_time_steps=MAX_TIME_STEPS
        )
    else:
        last_global_state_obs_for_gae = None # GAE bootstrap value will be 0


    # --- Compute Returns and Advantages ---
    if buffer.ptr > 0:
        buffer.compute_final_returns_and_advantages(
            last_global_state_obs=last_global_state_obs_for_gae,
            last_value_fn=critic
        )
    else:
        print("Buffer is empty, skipping GAE computation and PPO update.")
        continue


    # --- PPO Update Phase ---
    update_start_time = time.time()
    actor_loss_sum = 0
    critic_loss_sum = 0
    entropy_sum = 0
    num_minibatches_processed = 0

    minibatch_sampler = buffer.sample_minibatches(NUM_MINIBATCHES, normalize_advantages=True)

    for epoch in range(NUM_EPOCHS):
         minibatch_generator_this_epoch = buffer.sample_minibatches(NUM_MINIBATCHES, normalize_advantages=True)

         for minibatch in minibatch_generator_this_epoch:
             (
                 global_states_mb,
                 agent_local_states_mb,
                 actions_mb,
                 old_log_probs_mb,
                 old_values_mb,
                 returns_mb,
                 advantages_mb
             ) = minibatch

             # --- Critic Update ---
             critic_optimizer.zero_grad()
             predicted_values = critic(global_states_mb).squeeze(-1)
             value_loss = F.mse_loss(predicted_values, returns_mb)

             # --- Actor Update ---
             actor_optimizer.zero_grad()
             action_logits = actor(agent_local_states_mb)
             action_distribution = Categorical(logits=action_logits)
             new_log_probs = action_distribution.log_prob(actions_mb)

             ratio = torch.exp(new_log_probs - old_log_probs_mb)
             term1 = ratio * advantages_mb
             clipped_ratio = torch.clamp(ratio, 1.0 - CLIP_EPSILON, 1.0 + CLIP_EPSILON)
             term2 = clipped_ratio * advantages_mb
             actor_loss = -torch.min(term1, term2).mean()

             entropy = action_distribution.entropy().mean()
             entropy_sum += entropy.item()

             # --- Combined Loss and Optimization ---
             total_loss = actor_loss + VALUE_LOSS_COEF * value_loss - ENTROPY_COEF * entropy

             total_loss.backward()
             actor_optimizer.step()
             critic_optimizer.step()

             actor_loss_sum += actor_loss.item()
             critic_loss_sum += value_loss.item()
             num_minibatches_processed += 1

    update_end_time = time.time()
    #print(f"PPO Update finished in {update_end_time - update_start_time:.2f}s.")

    # --- Logging ---
    avg_actor_loss = actor_loss_sum / max(1, num_minibatches_processed)
    avg_critic_loss = critic_loss_sum / max(1, num_minibatches_processed)
    # Average entropy over all minibatches processed during the update phase
    avg_entropy = entropy_sum / max(1, num_minibatches_processed)


    # Print summary after update phase for this rollout
    print(f"\n--- Rollout Summary (Steps {total_steps_trained - steps_in_rollout + 1} to {total_steps_trained}) ---")
    # print(f"Rollout Duration: {rollout_end_time - rollout_start_time:.2f}s")
    print(f"Update Duration: {update_end_time - update_start_time:.2f}s")
    if episode_rewards:
        print(f"Avg Episode Reward (last {len(episode_rewards)}): {np.mean(episode_rewards):.2f}")
        print(f"Avg Episode Duration (last {len(episode_durations)}): {np.mean(episode_durations):.2f}")
        print(f"Avg Delivered Packages (last {len(episode_delivered_packages)}): {np.mean(episode_delivered_packages):.2f}")
    print(f"Avg Actor Loss (per minibatch): {avg_actor_loss:.4f}")
    print(f"Avg Critic Loss (per minibatch): {avg_critic_loss:.4f}")
    print(f"Avg Entropy (per minibatch): {avg_entropy:.4f}")
    print("-" * 30)


pbar.close() # Close tqdm progress bar

print("Training finished.")

# Optional: Save final model
final_checkpoint_path = os.path.join(CHECKPOINT_DIR, f"mappo_final_ep_{total_episodes_finished:06d}_steps_{total_steps_trained}.pth")
torch.save({
    'total_steps_trained': total_steps_trained,
    'total_episodes_finished': total_episodes_finished,
    'actor_state_dict': actor.state_dict(),
    'critic_state_dict': critic.state_dict(),
    'actor_optimizer_state_dict': actor_optimizer.state_dict(),
    'critic_optimizer_state_dict': critic_optimizer.state_dict(),
    'episode_rewards': list(episode_rewards),
    'episode_durations': list(episode_durations),
    'episode_delivered_packages': list(episode_delivered_packages),
}, final_checkpoint_path)
print(f"Saved final checkpoint to {final_checkpoint_path}")

# --- You can add evaluation code here after training ---

Using device: cuda
Environment initialized. Map dimensions: (10, 10)
Starting training (approx 1000000 env steps)...


Total Steps Trained:   0%|          | 488/1000000 [00:05<2:18:31, 120.25step/s]


Episode 1 finished at step 500 (500 total). Env Reward: -9.44, Steps: 500, Delivered: 5


Total Steps Trained:   0%|          | 512/1000000 [00:10<26:08:12, 10.62step/s]


--- Rollout Summary (Steps 1 to 500) ---
Update Duration: 4.98s
Avg Episode Reward (last 1): -9.44
Avg Episode Duration (last 1): 500.00
Avg Delivered Packages (last 1): 5.00
Avg Actor Loss (per minibatch): -0.0006
Avg Critic Loss (per minibatch): 478.5330
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   0%|          | 1000/1000000 [00:14<2:25:46, 114.21step/s]


Episode 2 finished at step 500 (1000 total). Env Reward: -10.03, Steps: 500, Delivered: 5


Total Steps Trained:   0%|          | 1012/1000000 [00:19<33:49:44,  8.20step/s]


--- Rollout Summary (Steps 501 to 1000) ---
Update Duration: 4.52s
Avg Episode Reward (last 2): -9.73
Avg Episode Duration (last 2): 500.00
Avg Delivered Packages (last 2): 5.00
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 483.6759
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   0%|          | 1494/1000000 [00:23<2:31:27, 109.88step/s]


Episode 3 finished at step 500 (1500 total). Env Reward: 13.93, Steps: 500, Delivered: 11
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000003_map1.pth


Total Steps Trained:   0%|          | 1515/1000000 [00:28<27:21:14, 10.14step/s]


--- Rollout Summary (Steps 1001 to 1500) ---
Update Duration: 4.70s
Avg Episode Reward (last 3): -1.85
Avg Episode Duration (last 3): 500.00
Avg Delivered Packages (last 3): 7.00
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 1612.4592
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:   0%|          | 1989/1000000 [00:32<2:27:03, 113.10step/s]


Episode 4 finished at step 500 (2000 total). Env Reward: -7.33, Steps: 500, Delivered: 8


Total Steps Trained:   0%|          | 2012/1000000 [00:37<24:26:57, 11.34step/s]


--- Rollout Summary (Steps 1501 to 2000) ---
Update Duration: 4.45s
Avg Episode Reward (last 4): -3.22
Avg Episode Duration (last 4): 500.00
Avg Delivered Packages (last 4): 7.25
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 519.1102
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:   0%|          | 2495/1000000 [00:41<2:24:40, 114.91step/s]


Episode 5 finished at step 500 (2500 total). Env Reward: -7.94, Steps: 500, Delivered: 7


Total Steps Trained:   0%|          | 2519/1000000 [00:46<24:12:10, 11.45step/s]


--- Rollout Summary (Steps 2001 to 2500) ---
Update Duration: 4.47s
Avg Episode Reward (last 5): -4.16
Avg Episode Duration (last 5): 500.00
Avg Delivered Packages (last 5): 7.20
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 476.0020
Avg Entropy (per minibatch): 2.5579
------------------------------


Total Steps Trained:   0%|          | 2988/1000000 [00:50<2:18:06, 120.32step/s]


Episode 6 finished at step 500 (3000 total). Env Reward: 0.83, Steps: 500, Delivered: 6


Total Steps Trained:   0%|          | 3012/1000000 [00:55<23:45:23, 11.66step/s]


--- Rollout Summary (Steps 2501 to 3000) ---
Update Duration: 4.46s
Avg Episode Reward (last 6): -3.33
Avg Episode Duration (last 6): 500.00
Avg Delivered Packages (last 6): 7.00
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 649.1066
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:   0%|          | 3490/1000000 [00:59<2:18:48, 119.64step/s]


Episode 7 finished at step 500 (3500 total). Env Reward: -12.18, Steps: 500, Delivered: 2


Total Steps Trained:   0%|          | 3512/1000000 [01:03<24:29:19, 11.30step/s]


--- Rollout Summary (Steps 3001 to 3500) ---
Update Duration: 4.46s
Avg Episode Reward (last 7): -4.59
Avg Episode Duration (last 7): 500.00
Avg Delivered Packages (last 7): 6.29
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 568.8085
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   0%|          | 3997/1000000 [01:08<2:28:29, 111.79step/s]


Episode 8 finished at step 500 (4000 total). Env Reward: -1.26, Steps: 500, Delivered: 4


Total Steps Trained:   0%|          | 4020/1000000 [01:13<24:52:24, 11.12step/s]


--- Rollout Summary (Steps 3501 to 4000) ---
Update Duration: 4.51s
Avg Episode Reward (last 8): -4.18
Avg Episode Duration (last 8): 500.00
Avg Delivered Packages (last 8): 6.00
Avg Actor Loss (per minibatch): 0.0005
Avg Critic Loss (per minibatch): 740.7956
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:   0%|          | 4498/1000000 [01:17<2:29:03, 111.31step/s]


Episode 9 finished at step 500 (4500 total). Env Reward: -7.79, Steps: 500, Delivered: 7


Total Steps Trained:   0%|          | 4510/1000000 [01:21<33:50:58,  8.17step/s]


--- Rollout Summary (Steps 4001 to 4500) ---
Update Duration: 4.53s
Avg Episode Reward (last 9): -4.58
Avg Episode Duration (last 9): 500.00
Avg Delivered Packages (last 9): 6.11
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 664.0867
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:   0%|          | 5000/1000000 [01:26<2:22:26, 116.42step/s]


Episode 10 finished at step 500 (5000 total). Env Reward: 2.14, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000010_map1.pth


Total Steps Trained:   1%|          | 5012/1000000 [01:31<33:52:45,  8.16step/s]


--- Rollout Summary (Steps 4501 to 5000) ---
Update Duration: 4.67s
Avg Episode Reward (last 10): -3.91
Avg Episode Duration (last 10): 500.00
Avg Delivered Packages (last 10): 6.30
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 1455.3193
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   1%|          | 5497/1000000 [01:35<2:19:26, 118.87step/s]


Episode 11 finished at step 500 (5500 total). Env Reward: 2.14, Steps: 500, Delivered: 8


Total Steps Trained:   1%|          | 5520/1000000 [01:40<24:53:51, 11.10step/s]


--- Rollout Summary (Steps 5001 to 5500) ---
Update Duration: 4.55s
Avg Episode Reward (last 11): -3.36
Avg Episode Duration (last 11): 500.00
Avg Delivered Packages (last 11): 6.45
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 986.7280
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:   1%|          | 5990/1000000 [01:44<2:22:50, 115.98step/s]


Episode 12 finished at step 500 (6000 total). Env Reward: -10.30, Steps: 500, Delivered: 5


Total Steps Trained:   1%|          | 6013/1000000 [01:48<24:46:38, 11.14step/s]


--- Rollout Summary (Steps 5501 to 6000) ---
Update Duration: 4.50s
Avg Episode Reward (last 12): -3.94
Avg Episode Duration (last 12): 500.00
Avg Delivered Packages (last 12): 6.33
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 619.5383
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   1%|          | 6499/1000000 [01:52<2:18:11, 119.82step/s]


Episode 13 finished at step 500 (6500 total). Env Reward: -9.88, Steps: 500, Delivered: 4


Total Steps Trained:   1%|          | 6511/1000000 [01:57<31:57:52,  8.63step/s]


--- Rollout Summary (Steps 6001 to 6500) ---
Update Duration: 4.53s
Avg Episode Reward (last 13): -4.39
Avg Episode Duration (last 13): 500.00
Avg Delivered Packages (last 13): 6.15
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 601.6076
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   1%|          | 6997/1000000 [02:01<2:24:29, 114.54step/s]


Episode 14 finished at step 500 (7000 total). Env Reward: -6.63, Steps: 500, Delivered: 8


Total Steps Trained:   1%|          | 7020/1000000 [02:06<24:43:34, 11.16step/s]


--- Rollout Summary (Steps 6501 to 7000) ---
Update Duration: 4.49s
Avg Episode Reward (last 14): -4.55
Avg Episode Duration (last 14): 500.00
Avg Delivered Packages (last 14): 6.29
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 537.4056
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:   1%|          | 7496/1000000 [02:10<2:24:03, 114.82step/s]


Episode 15 finished at step 500 (7500 total). Env Reward: 1.25, Steps: 500, Delivered: 7


Total Steps Trained:   1%|          | 7520/1000000 [02:15<24:15:35, 11.36step/s]


--- Rollout Summary (Steps 7001 to 7500) ---
Update Duration: 4.48s
Avg Episode Reward (last 15): -4.17
Avg Episode Duration (last 15): 500.00
Avg Delivered Packages (last 15): 6.33
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 778.6452
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:   1%|          | 7997/1000000 [02:19<2:17:11, 120.51step/s]


Episode 16 finished at step 500 (8000 total). Env Reward: -12.53, Steps: 500, Delivered: 2


Total Steps Trained:   1%|          | 8022/1000000 [02:24<22:59:17, 11.99step/s]


--- Rollout Summary (Steps 7501 to 8000) ---
Update Duration: 4.52s
Avg Episode Reward (last 16): -4.69
Avg Episode Duration (last 16): 500.00
Avg Delivered Packages (last 16): 6.06
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 699.4321
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   1%|          | 8489/1000000 [02:28<2:24:42, 114.20step/s]


Episode 17 finished at step 500 (8500 total). Env Reward: -1.29, Steps: 500, Delivered: 4


Total Steps Trained:   1%|          | 8512/1000000 [02:33<25:02:06, 11.00step/s]


--- Rollout Summary (Steps 8001 to 8500) ---
Update Duration: 4.66s
Avg Episode Reward (last 17): -4.49
Avg Episode Duration (last 17): 500.00
Avg Delivered Packages (last 17): 5.94
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 723.3328
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   1%|          | 8993/1000000 [02:37<2:13:45, 123.48step/s]


Episode 18 finished at step 500 (9000 total). Env Reward: -6.50, Steps: 500, Delivered: 8


Total Steps Trained:   1%|          | 9018/1000000 [02:41<22:55:25, 12.01step/s]


--- Rollout Summary (Steps 8501 to 9000) ---
Update Duration: 4.50s
Avg Episode Reward (last 18): -4.60
Avg Episode Duration (last 18): 500.00
Avg Delivered Packages (last 18): 6.06
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 358.6821
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:   1%|          | 9499/1000000 [02:45<2:18:05, 119.55step/s]


Episode 19 finished at step 500 (9500 total). Env Reward: -8.96, Steps: 500, Delivered: 6


Total Steps Trained:   1%|          | 9511/1000000 [02:50<32:02:45,  8.59step/s]


--- Rollout Summary (Steps 9001 to 9500) ---
Update Duration: 4.50s
Avg Episode Reward (last 19): -4.83
Avg Episode Duration (last 19): 500.00
Avg Delivered Packages (last 19): 6.05
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 511.7822
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:   1%|          | 9990/1000000 [02:54<2:15:28, 121.80step/s]


Episode 20 finished at step 500 (10000 total). Env Reward: -10.64, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000020_map1.pth


Total Steps Trained:   1%|          | 10014/1000000 [02:59<23:45:41, 11.57step/s]


--- Rollout Summary (Steps 9501 to 10000) ---
Update Duration: 4.55s
Avg Episode Reward (last 20): -5.12
Avg Episode Duration (last 20): 500.00
Avg Delivered Packages (last 20): 5.95
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 581.6746
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   1%|          | 10499/1000000 [03:03<2:26:35, 112.50step/s]


Episode 21 finished at step 500 (10500 total). Env Reward: -6.75, Steps: 500, Delivered: 8


Total Steps Trained:   1%|          | 10511/1000000 [03:08<33:37:53,  8.17step/s]


--- Rollout Summary (Steps 10001 to 10500) ---
Update Duration: 4.53s
Avg Episode Reward (last 21): -5.20
Avg Episode Duration (last 21): 500.00
Avg Delivered Packages (last 21): 6.05
Avg Actor Loss (per minibatch): -0.0071
Avg Critic Loss (per minibatch): 615.3992
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:   1%|          | 10990/1000000 [03:12<2:18:51, 118.70step/s]


Episode 22 finished at step 500 (11000 total). Env Reward: 3.38, Steps: 500, Delivered: 9


Total Steps Trained:   1%|          | 11013/1000000 [03:17<23:54:56, 11.49step/s]


--- Rollout Summary (Steps 10501 to 11000) ---
Update Duration: 4.52s
Avg Episode Reward (last 22): -4.81
Avg Episode Duration (last 22): 500.00
Avg Delivered Packages (last 22): 6.18
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 780.3336
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:   1%|          | 11491/1000000 [03:21<2:17:08, 120.13step/s]


Episode 23 finished at step 500 (11500 total). Env Reward: -9.06, Steps: 500, Delivered: 6


Total Steps Trained:   1%|          | 11515/1000000 [03:26<24:23:39, 11.26step/s]


--- Rollout Summary (Steps 11001 to 11500) ---
Update Duration: 4.66s
Avg Episode Reward (last 23): -4.99
Avg Episode Duration (last 23): 500.00
Avg Delivered Packages (last 23): 6.17
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 471.3119
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   1%|          | 11989/1000000 [03:30<2:25:52, 112.88step/s]


Episode 24 finished at step 500 (12000 total). Env Reward: -8.99, Steps: 500, Delivered: 6


Total Steps Trained:   1%|          | 12012/1000000 [03:34<24:54:49, 11.02step/s]


--- Rollout Summary (Steps 11501 to 12000) ---
Update Duration: 4.56s
Avg Episode Reward (last 24): -5.16
Avg Episode Duration (last 24): 500.00
Avg Delivered Packages (last 24): 6.17
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 347.8640
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:   1%|          | 12492/1000000 [03:39<2:19:24, 118.05step/s]


Episode 25 finished at step 500 (12500 total). Env Reward: -11.13, Steps: 500, Delivered: 4


Total Steps Trained:   1%|▏         | 12515/1000000 [03:43<24:31:35, 11.18step/s]


--- Rollout Summary (Steps 12001 to 12500) ---
Update Duration: 4.49s
Avg Episode Reward (last 25): -5.40
Avg Episode Duration (last 25): 500.00
Avg Delivered Packages (last 25): 6.08
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 478.6241
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:   1%|▏         | 12996/1000000 [03:47<2:19:03, 118.30step/s]


Episode 26 finished at step 500 (13000 total). Env Reward: -10.76, Steps: 500, Delivered: 4


Total Steps Trained:   1%|▏         | 13019/1000000 [03:52<24:26:40, 11.22step/s]


--- Rollout Summary (Steps 12501 to 13000) ---
Update Duration: 4.50s
Avg Episode Reward (last 26): -5.60
Avg Episode Duration (last 26): 500.00
Avg Delivered Packages (last 26): 6.00
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 700.1774
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:   1%|▏         | 13498/1000000 [03:56<2:39:17, 103.21step/s]


Episode 27 finished at step 500 (13500 total). Env Reward: -6.44, Steps: 500, Delivered: 8


Total Steps Trained:   1%|▏         | 13520/1000000 [04:01<25:56:11, 10.57step/s]


--- Rollout Summary (Steps 13001 to 13500) ---
Update Duration: 4.54s
Avg Episode Reward (last 27): -5.64
Avg Episode Duration (last 27): 500.00
Avg Delivered Packages (last 27): 6.07
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 427.9006
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   1%|▏         | 13995/1000000 [04:05<2:12:08, 124.37step/s]


Episode 28 finished at step 500 (14000 total). Env Reward: -10.64, Steps: 500, Delivered: 4


Total Steps Trained:   1%|▏         | 14019/1000000 [04:10<23:56:19, 11.44step/s]


--- Rollout Summary (Steps 13501 to 14000) ---
Update Duration: 4.64s
Avg Episode Reward (last 28): -5.81
Avg Episode Duration (last 28): 500.00
Avg Delivered Packages (last 28): 6.00
Avg Actor Loss (per minibatch): -0.0014
Avg Critic Loss (per minibatch): 560.6939
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   1%|▏         | 14492/1000000 [04:14<2:32:41, 107.57step/s]


Episode 29 finished at step 500 (14500 total). Env Reward: -10.86, Steps: 500, Delivered: 4


Total Steps Trained:   1%|▏         | 14514/1000000 [04:19<25:34:23, 10.70step/s]


--- Rollout Summary (Steps 14001 to 14500) ---
Update Duration: 4.49s
Avg Episode Reward (last 29): -5.99
Avg Episode Duration (last 29): 500.00
Avg Delivered Packages (last 29): 5.93
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 623.7636
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:   1%|▏         | 14998/1000000 [04:23<2:43:45, 100.24step/s]


Episode 30 finished at step 500 (15000 total). Env Reward: -5.47, Steps: 500, Delivered: 9
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000030_map1.pth


Total Steps Trained:   2%|▏         | 15009/1000000 [04:28<36:15:44,  7.55step/s]


--- Rollout Summary (Steps 14501 to 15000) ---
Update Duration: 4.54s
Avg Episode Reward (last 30): -5.97
Avg Episode Duration (last 30): 500.00
Avg Delivered Packages (last 30): 6.03
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 606.1541
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:   2%|▏         | 15492/1000000 [04:32<2:20:47, 116.55step/s]


Episode 31 finished at step 500 (15500 total). Env Reward: -8.06, Steps: 500, Delivered: 7


Total Steps Trained:   2%|▏         | 15515/1000000 [04:37<24:36:54, 11.11step/s]


--- Rollout Summary (Steps 15001 to 15500) ---
Update Duration: 4.51s
Avg Episode Reward (last 31): -6.04
Avg Episode Duration (last 31): 500.00
Avg Delivered Packages (last 31): 6.06
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 543.8595
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:   2%|▏         | 15998/1000000 [04:41<2:22:08, 115.37step/s]


Episode 32 finished at step 500 (16000 total). Env Reward: -4.66, Steps: 500, Delivered: 10


Total Steps Trained:   2%|▏         | 16021/1000000 [04:46<24:29:32, 11.16step/s]


--- Rollout Summary (Steps 15501 to 16000) ---
Update Duration: 4.50s
Avg Episode Reward (last 32): -6.00
Avg Episode Duration (last 32): 500.00
Avg Delivered Packages (last 32): 6.19
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 603.2174
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   2%|▏         | 16490/1000000 [04:50<2:19:20, 117.64step/s]


Episode 33 finished at step 500 (16500 total). Env Reward: -10.92, Steps: 500, Delivered: 4


Total Steps Trained:   2%|▏         | 16512/1000000 [04:55<24:14:41, 11.27step/s]


--- Rollout Summary (Steps 16001 to 16500) ---
Update Duration: 4.49s
Avg Episode Reward (last 33): -6.14
Avg Episode Duration (last 33): 500.00
Avg Delivered Packages (last 33): 6.12
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 561.7291
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:   2%|▏         | 16994/1000000 [04:59<2:19:01, 117.85step/s]


Episode 34 finished at step 500 (17000 total). Env Reward: -10.50, Steps: 500, Delivered: 4


Total Steps Trained:   2%|▏         | 17017/1000000 [05:03<23:50:30, 11.45step/s]


--- Rollout Summary (Steps 16501 to 17000) ---
Update Duration: 4.48s
Avg Episode Reward (last 34): -6.27
Avg Episode Duration (last 34): 500.00
Avg Delivered Packages (last 34): 6.06
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 596.5063
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:   2%|▏         | 17498/1000000 [05:07<2:16:28, 119.98step/s]


Episode 35 finished at step 500 (17500 total). Env Reward: -12.67, Steps: 500, Delivered: 2


Total Steps Trained:   2%|▏         | 17521/1000000 [05:12<24:57:45, 10.93step/s]


--- Rollout Summary (Steps 17001 to 17500) ---
Update Duration: 4.74s
Avg Episode Reward (last 35): -6.46
Avg Episode Duration (last 35): 500.00
Avg Delivered Packages (last 35): 5.94
Avg Actor Loss (per minibatch): -0.0007
Avg Critic Loss (per minibatch): 572.4833
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   2%|▏         | 17994/1000000 [05:16<2:26:44, 111.54step/s]


Episode 36 finished at step 500 (18000 total). Env Reward: -6.38, Steps: 500, Delivered: 8


Total Steps Trained:   2%|▏         | 18017/1000000 [05:21<24:42:51, 11.04step/s]


--- Rollout Summary (Steps 17501 to 18000) ---
Update Duration: 4.53s
Avg Episode Reward (last 36): -6.45
Avg Episode Duration (last 36): 500.00
Avg Delivered Packages (last 36): 6.00
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 495.0487
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   2%|▏         | 18499/1000000 [05:25<2:17:49, 118.69step/s]


Episode 37 finished at step 500 (18500 total). Env Reward: -11.17, Steps: 500, Delivered: 4


Total Steps Trained:   2%|▏         | 18511/1000000 [05:30<31:51:48,  8.56step/s]


--- Rollout Summary (Steps 18001 to 18500) ---
Update Duration: 4.57s
Avg Episode Reward (last 37): -6.58
Avg Episode Duration (last 37): 500.00
Avg Delivered Packages (last 37): 5.95
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 380.3092
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:   2%|▏         | 18998/1000000 [05:34<2:17:40, 118.76step/s]


Episode 38 finished at step 500 (19000 total). Env Reward: -8.26, Steps: 500, Delivered: 6


Total Steps Trained:   2%|▏         | 19010/1000000 [05:39<32:53:11,  8.29step/s]


--- Rollout Summary (Steps 18501 to 19000) ---
Update Duration: 4.55s
Avg Episode Reward (last 38): -6.62
Avg Episode Duration (last 38): 500.00
Avg Delivered Packages (last 38): 5.95
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 544.5601
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:   2%|▏         | 19489/1000000 [05:43<2:25:00, 112.70step/s]


Episode 39 finished at step 500 (19500 total). Env Reward: -10.14, Steps: 500, Delivered: 4


Total Steps Trained:   2%|▏         | 19513/1000000 [05:48<23:52:08, 11.41step/s]


--- Rollout Summary (Steps 19001 to 19500) ---
Update Duration: 4.50s
Avg Episode Reward (last 39): -6.72
Avg Episode Duration (last 39): 500.00
Avg Delivered Packages (last 39): 5.90
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 613.1965
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   2%|▏         | 19989/1000000 [05:52<2:14:05, 121.81step/s]


Episode 40 finished at step 500 (20000 total). Env Reward: -9.31, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000040_map1.pth


Total Steps Trained:   2%|▏         | 20013/1000000 [05:56<23:08:28, 11.76step/s]


--- Rollout Summary (Steps 19501 to 20000) ---
Update Duration: 4.49s
Avg Episode Reward (last 40): -6.78
Avg Episode Duration (last 40): 500.00
Avg Delivered Packages (last 40): 5.88
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 492.0933
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   2%|▏         | 20497/1000000 [06:01<2:21:43, 115.19step/s]


Episode 41 finished at step 500 (20500 total). Env Reward: -9.31, Steps: 500, Delivered: 5


Total Steps Trained:   2%|▏         | 20520/1000000 [06:05<24:16:32, 11.21step/s]


--- Rollout Summary (Steps 20001 to 20500) ---
Update Duration: 4.51s
Avg Episode Reward (last 41): -6.84
Avg Episode Duration (last 41): 500.00
Avg Delivered Packages (last 41): 5.85
Avg Actor Loss (per minibatch): -0.0082
Avg Critic Loss (per minibatch): 456.0094
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:   2%|▏         | 20996/1000000 [06:09<2:19:31, 116.95step/s]


Episode 42 finished at step 500 (21000 total). Env Reward: -11.91, Steps: 500, Delivered: 2


Total Steps Trained:   2%|▏         | 21019/1000000 [06:14<25:14:00, 10.78step/s]


--- Rollout Summary (Steps 20501 to 21000) ---
Update Duration: 4.72s
Avg Episode Reward (last 42): -6.96
Avg Episode Duration (last 42): 500.00
Avg Delivered Packages (last 42): 5.76
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 682.6067
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:   2%|▏         | 21491/1000000 [06:19<2:18:07, 118.07step/s]


Episode 43 finished at step 500 (21500 total). Env Reward: -10.69, Steps: 500, Delivered: 4


Total Steps Trained:   2%|▏         | 21512/1000000 [06:23<25:03:29, 10.85step/s]


--- Rollout Summary (Steps 21001 to 21500) ---
Update Duration: 4.51s
Avg Episode Reward (last 43): -7.05
Avg Episode Duration (last 43): 500.00
Avg Delivered Packages (last 43): 5.72
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 536.7331
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   2%|▏         | 22000/1000000 [06:28<2:21:11, 115.45step/s]


Episode 44 finished at step 500 (22000 total). Env Reward: -1.55, Steps: 500, Delivered: 4


Total Steps Trained:   2%|▏         | 22012/1000000 [06:32<31:47:43,  8.54step/s]


--- Rollout Summary (Steps 21501 to 22000) ---
Update Duration: 4.47s
Avg Episode Reward (last 44): -6.92
Avg Episode Duration (last 44): 500.00
Avg Delivered Packages (last 44): 5.68
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 949.7781
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   2%|▏         | 22491/1000000 [06:37<2:32:57, 106.51step/s]


Episode 45 finished at step 500 (22500 total). Env Reward: -8.69, Steps: 500, Delivered: 6


Total Steps Trained:   2%|▏         | 22513/1000000 [06:41<25:24:45, 10.68step/s]


--- Rollout Summary (Steps 22001 to 22500) ---
Update Duration: 4.52s
Avg Episode Reward (last 45): -6.96
Avg Episode Duration (last 45): 500.00
Avg Delivered Packages (last 45): 5.69
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 530.1193
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:   2%|▏         | 22989/1000000 [06:45<2:19:38, 116.61step/s]


Episode 46 finished at step 500 (23000 total). Env Reward: -7.44, Steps: 500, Delivered: 7


Total Steps Trained:   2%|▏         | 23012/1000000 [06:50<24:31:18, 11.07step/s]


--- Rollout Summary (Steps 22501 to 23000) ---
Update Duration: 4.55s
Avg Episode Reward (last 46): -6.97
Avg Episode Duration (last 46): 500.00
Avg Delivered Packages (last 46): 5.72
Avg Actor Loss (per minibatch): -0.0074
Avg Critic Loss (per minibatch): 608.1795
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:   2%|▏         | 23499/1000000 [06:54<2:16:10, 119.52step/s]


Episode 47 finished at step 500 (23500 total). Env Reward: -10.32, Steps: 500, Delivered: 4


Total Steps Trained:   2%|▏         | 23523/1000000 [06:59<23:07:12, 11.73step/s]


--- Rollout Summary (Steps 23001 to 23500) ---
Update Duration: 4.53s
Avg Episode Reward (last 47): -7.04
Avg Episode Duration (last 47): 500.00
Avg Delivered Packages (last 47): 5.68
Avg Actor Loss (per minibatch): -0.0099
Avg Critic Loss (per minibatch): 871.8695
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   2%|▏         | 23992/1000000 [07:03<2:22:05, 114.48step/s]


Episode 48 finished at step 500 (24000 total). Env Reward: 2.77, Steps: 500, Delivered: 8


Total Steps Trained:   2%|▏         | 24014/1000000 [07:08<25:05:57, 10.80step/s]


--- Rollout Summary (Steps 23501 to 24000) ---
Update Duration: 4.52s
Avg Episode Reward (last 48): -6.84
Avg Episode Duration (last 48): 500.00
Avg Delivered Packages (last 48): 5.73
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 787.2823
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:   2%|▏         | 24500/1000000 [07:12<2:19:42, 116.37step/s]


Episode 49 finished at step 500 (24500 total). Env Reward: -1.11, Steps: 500, Delivered: 5


Total Steps Trained:   2%|▏         | 24512/1000000 [07:17<33:23:10,  8.12step/s]


--- Rollout Summary (Steps 24001 to 24500) ---
Update Duration: 4.58s
Avg Episode Reward (last 49): -6.72
Avg Episode Duration (last 49): 500.00
Avg Delivered Packages (last 49): 5.71
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 760.7182
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   2%|▏         | 24990/1000000 [07:21<2:27:34, 110.12step/s]


Episode 50 finished at step 500 (25000 total). Env Reward: -5.27, Steps: 500, Delivered: 10
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000050_map1.pth


Total Steps Trained:   3%|▎         | 25012/1000000 [07:26<25:28:38, 10.63step/s]


--- Rollout Summary (Steps 24501 to 25000) ---
Update Duration: 4.52s
Avg Episode Reward (last 50): -6.69
Avg Episode Duration (last 50): 500.00
Avg Delivered Packages (last 50): 5.80
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 423.6533
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:   3%|▎         | 25500/1000000 [07:30<2:33:01, 106.14step/s]


Episode 51 finished at step 500 (25500 total). Env Reward: 2.17, Steps: 500, Delivered: 8


Total Steps Trained:   3%|▎         | 25511/1000000 [07:35<34:46:21,  7.78step/s]


--- Rollout Summary (Steps 25001 to 25500) ---
Update Duration: 4.52s
Avg Episode Reward (last 51): -6.52
Avg Episode Duration (last 51): 500.00
Avg Delivered Packages (last 51): 5.84
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 1326.8423
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:   3%|▎         | 26000/1000000 [07:39<2:25:09, 111.83step/s]


Episode 52 finished at step 500 (26000 total). Env Reward: -6.72, Steps: 500, Delivered: 8


Total Steps Trained:   3%|▎         | 26012/1000000 [07:44<32:44:59,  8.26step/s]


--- Rollout Summary (Steps 25501 to 26000) ---
Update Duration: 4.48s
Avg Episode Reward (last 52): -6.52
Avg Episode Duration (last 52): 500.00
Avg Delivered Packages (last 52): 5.88
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 714.2158
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   3%|▎         | 26493/1000000 [07:48<2:29:09, 108.78step/s]


Episode 53 finished at step 500 (26500 total). Env Reward: -10.09, Steps: 500, Delivered: 5


Total Steps Trained:   3%|▎         | 26514/1000000 [07:53<25:26:10, 10.63step/s]


--- Rollout Summary (Steps 26001 to 26500) ---
Update Duration: 4.48s
Avg Episode Reward (last 53): -6.59
Avg Episode Duration (last 53): 500.00
Avg Delivered Packages (last 53): 5.87
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 439.3851
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:   3%|▎         | 26990/1000000 [07:57<2:15:55, 119.31step/s]


Episode 54 finished at step 500 (27000 total). Env Reward: -13.08, Steps: 500, Delivered: 2


Total Steps Trained:   3%|▎         | 27012/1000000 [08:02<24:05:44, 11.22step/s]


--- Rollout Summary (Steps 26501 to 27000) ---
Update Duration: 4.54s
Avg Episode Reward (last 54): -6.71
Avg Episode Duration (last 54): 500.00
Avg Delivered Packages (last 54): 5.80
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 455.2065
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   3%|▎         | 27495/1000000 [08:06<2:17:56, 117.50step/s]


Episode 55 finished at step 500 (27500 total). Env Reward: 1.73, Steps: 500, Delivered: 7


Total Steps Trained:   3%|▎         | 27518/1000000 [08:11<23:56:16, 11.28step/s]


--- Rollout Summary (Steps 27001 to 27500) ---
Update Duration: 4.52s
Avg Episode Reward (last 55): -6.56
Avg Episode Duration (last 55): 500.00
Avg Delivered Packages (last 55): 5.82
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 1203.9479
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:   3%|▎         | 28000/1000000 [08:15<2:18:11, 117.23step/s]


Episode 56 finished at step 500 (28000 total). Env Reward: -10.79, Steps: 500, Delivered: 4


Total Steps Trained:   3%|▎         | 28012/1000000 [08:19<32:04:46,  8.42step/s]


--- Rollout Summary (Steps 27501 to 28000) ---
Update Duration: 4.63s
Avg Episode Reward (last 56): -6.63
Avg Episode Duration (last 56): 500.00
Avg Delivered Packages (last 56): 5.79
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 481.5635
Avg Entropy (per minibatch): 2.5615
------------------------------


Total Steps Trained:   3%|▎         | 28493/1000000 [08:24<2:28:43, 108.87step/s]


Episode 57 finished at step 500 (28500 total). Env Reward: -9.52, Steps: 500, Delivered: 5


Total Steps Trained:   3%|▎         | 28515/1000000 [08:28<25:22:37, 10.63step/s]


--- Rollout Summary (Steps 28001 to 28500) ---
Update Duration: 4.53s
Avg Episode Reward (last 57): -6.68
Avg Episode Duration (last 57): 500.00
Avg Delivered Packages (last 57): 5.77
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 668.5308
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:   3%|▎         | 29000/1000000 [08:33<2:22:11, 113.82step/s]


Episode 58 finished at step 500 (29000 total). Env Reward: -8.36, Steps: 500, Delivered: 6


Total Steps Trained:   3%|▎         | 29012/1000000 [08:37<32:44:43,  8.24step/s]


--- Rollout Summary (Steps 28501 to 29000) ---
Update Duration: 4.49s
Avg Episode Reward (last 58): -6.71
Avg Episode Duration (last 58): 500.00
Avg Delivered Packages (last 58): 5.78
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 579.8985
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:   3%|▎         | 29492/1000000 [08:42<2:17:42, 117.47step/s]


Episode 59 finished at step 500 (29500 total). Env Reward: -9.64, Steps: 500, Delivered: 5


Total Steps Trained:   3%|▎         | 29516/1000000 [08:46<23:05:20, 11.68step/s]


--- Rollout Summary (Steps 29001 to 29500) ---
Update Duration: 4.50s
Avg Episode Reward (last 59): -6.76
Avg Episode Duration (last 59): 500.00
Avg Delivered Packages (last 59): 5.76
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 535.8525
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   3%|▎         | 30000/1000000 [08:50<2:33:20, 105.43step/s]


Episode 60 finished at step 500 (30000 total). Env Reward: -10.30, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000060_map1.pth


Total Steps Trained:   3%|▎         | 30011/1000000 [08:55<34:53:56,  7.72step/s]


--- Rollout Summary (Steps 29501 to 30000) ---
Update Duration: 4.61s
Avg Episode Reward (last 60): -6.82
Avg Episode Duration (last 60): 500.00
Avg Delivered Packages (last 60): 5.75
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 429.6280
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   3%|▎         | 30493/1000000 [09:00<2:23:24, 112.67step/s]


Episode 61 finished at step 500 (30500 total). Env Reward: 3.90, Steps: 500, Delivered: 9


Total Steps Trained:   3%|▎         | 30516/1000000 [09:04<24:10:40, 11.14step/s]


--- Rollout Summary (Steps 30001 to 30500) ---
Update Duration: 4.49s
Avg Episode Reward (last 61): -6.65
Avg Episode Duration (last 61): 500.00
Avg Delivered Packages (last 61): 5.80
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 861.5248
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:   3%|▎         | 30995/1000000 [09:08<2:21:46, 113.91step/s]


Episode 62 finished at step 500 (31000 total). Env Reward: 1.53, Steps: 500, Delivered: 7


Total Steps Trained:   3%|▎         | 31018/1000000 [09:13<24:56:15, 10.79step/s]


--- Rollout Summary (Steps 30501 to 31000) ---
Update Duration: 4.64s
Avg Episode Reward (last 62): -6.51
Avg Episode Duration (last 62): 500.00
Avg Delivered Packages (last 62): 5.82
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 864.7652
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:   3%|▎         | 31494/1000000 [09:17<2:12:49, 121.52step/s]


Episode 63 finished at step 500 (31500 total). Env Reward: -8.44, Steps: 500, Delivered: 6


Total Steps Trained:   3%|▎         | 31518/1000000 [09:22<23:25:04, 11.49step/s]


--- Rollout Summary (Steps 31001 to 31500) ---
Update Duration: 4.59s
Avg Episode Reward (last 63): -6.54
Avg Episode Duration (last 63): 500.00
Avg Delivered Packages (last 63): 5.83
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 526.6378
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   3%|▎         | 31992/1000000 [09:26<2:21:45, 113.81step/s]


Episode 64 finished at step 500 (32000 total). Env Reward: 0.64, Steps: 500, Delivered: 6


Total Steps Trained:   3%|▎         | 32015/1000000 [09:31<24:29:39, 10.98step/s]


--- Rollout Summary (Steps 31501 to 32000) ---
Update Duration: 4.56s
Avg Episode Reward (last 64): -6.43
Avg Episode Duration (last 64): 500.00
Avg Delivered Packages (last 64): 5.83
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 1150.7898
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   3%|▎         | 32493/1000000 [09:35<2:15:14, 119.23step/s]


Episode 65 finished at step 500 (32500 total). Env Reward: -11.85, Steps: 500, Delivered: 3


Total Steps Trained:   3%|▎         | 32515/1000000 [09:40<24:32:24, 10.95step/s]


--- Rollout Summary (Steps 32001 to 32500) ---
Update Duration: 4.54s
Avg Episode Reward (last 65): -6.52
Avg Episode Duration (last 65): 500.00
Avg Delivered Packages (last 65): 5.78
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 509.1318
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:   3%|▎         | 32993/1000000 [09:44<2:12:39, 121.50step/s]


Episode 66 finished at step 500 (33000 total). Env Reward: -5.55, Steps: 500, Delivered: 9


Total Steps Trained:   3%|▎         | 33016/1000000 [09:49<23:35:48, 11.38step/s]


--- Rollout Summary (Steps 32501 to 33000) ---
Update Duration: 4.52s
Avg Episode Reward (last 66): -6.50
Avg Episode Duration (last 66): 500.00
Avg Delivered Packages (last 66): 5.83
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 790.2273
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:   3%|▎         | 33498/1000000 [09:53<2:17:14, 117.37step/s]


Episode 67 finished at step 500 (33500 total). Env Reward: -10.55, Steps: 500, Delivered: 4


Total Steps Trained:   3%|▎         | 33510/1000000 [09:58<32:34:18,  8.24step/s]


--- Rollout Summary (Steps 33001 to 33500) ---
Update Duration: 4.58s
Avg Episode Reward (last 67): -6.56
Avg Episode Duration (last 67): 500.00
Avg Delivered Packages (last 67): 5.81
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 841.6922
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:   3%|▎         | 33991/1000000 [10:02<2:13:06, 120.95step/s]


Episode 68 finished at step 500 (34000 total). Env Reward: -12.09, Steps: 500, Delivered: 3


Total Steps Trained:   3%|▎         | 34013/1000000 [10:07<24:03:18, 11.15step/s]


--- Rollout Summary (Steps 33501 to 34000) ---
Update Duration: 4.47s
Avg Episode Reward (last 68): -6.64
Avg Episode Duration (last 68): 500.00
Avg Delivered Packages (last 68): 5.76
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 686.2620
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:   3%|▎         | 34499/1000000 [10:11<2:11:01, 122.82step/s]


Episode 69 finished at step 500 (34500 total). Env Reward: -9.45, Steps: 500, Delivered: 5


Total Steps Trained:   3%|▎         | 34512/1000000 [10:16<30:08:50,  8.90step/s]


--- Rollout Summary (Steps 34001 to 34500) ---
Update Duration: 4.49s
Avg Episode Reward (last 69): -6.68
Avg Episode Duration (last 69): 500.00
Avg Delivered Packages (last 69): 5.75
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 454.5318
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:   3%|▎         | 34995/1000000 [10:20<2:18:21, 116.25step/s]


Episode 70 finished at step 500 (35000 total). Env Reward: -11.47, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000070_map1.pth


Total Steps Trained:   4%|▎         | 35018/1000000 [10:24<23:58:28, 11.18step/s]


--- Rollout Summary (Steps 34501 to 35000) ---
Update Duration: 4.51s
Avg Episode Reward (last 70): -6.75
Avg Episode Duration (last 70): 500.00
Avg Delivered Packages (last 70): 5.71
Avg Actor Loss (per minibatch): -0.0075
Avg Critic Loss (per minibatch): 526.5692
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   4%|▎         | 35494/1000000 [10:29<2:14:53, 119.17step/s]


Episode 71 finished at step 500 (35500 total). Env Reward: -9.97, Steps: 500, Delivered: 5


Total Steps Trained:   4%|▎         | 35517/1000000 [10:33<23:23:13, 11.46step/s]


--- Rollout Summary (Steps 35001 to 35500) ---
Update Duration: 4.49s
Avg Episode Reward (last 71): -6.80
Avg Episode Duration (last 71): 500.00
Avg Delivered Packages (last 71): 5.70
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 479.2188
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:   4%|▎         | 35998/1000000 [10:37<2:26:31, 109.65step/s]


Episode 72 finished at step 500 (36000 total). Env Reward: -6.26, Steps: 500, Delivered: 8


Total Steps Trained:   4%|▎         | 36020/1000000 [10:42<25:02:06, 10.70step/s]


--- Rollout Summary (Steps 35501 to 36000) ---
Update Duration: 4.53s
Avg Episode Reward (last 72): -6.79
Avg Episode Duration (last 72): 500.00
Avg Delivered Packages (last 72): 5.74
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 454.2853
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:   4%|▎         | 36494/1000000 [10:46<2:12:21, 121.32step/s]


Episode 73 finished at step 500 (36500 total). Env Reward: 2.65, Steps: 500, Delivered: 9


Total Steps Trained:   4%|▎         | 36520/1000000 [10:51<21:46:50, 12.29step/s]


--- Rollout Summary (Steps 36001 to 36500) ---
Update Duration: 4.50s
Avg Episode Reward (last 73): -6.66
Avg Episode Duration (last 73): 500.00
Avg Delivered Packages (last 73): 5.78
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 947.2683
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   4%|▎         | 36993/1000000 [10:55<2:13:43, 120.03step/s]


Episode 74 finished at step 500 (37000 total). Env Reward: -0.33, Steps: 500, Delivered: 5


Total Steps Trained:   4%|▎         | 37017/1000000 [11:00<23:33:50, 11.35step/s]


--- Rollout Summary (Steps 36501 to 37000) ---
Update Duration: 4.59s
Avg Episode Reward (last 74): -6.58
Avg Episode Duration (last 74): 500.00
Avg Delivered Packages (last 74): 5.77
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 405.6595
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:   4%|▎         | 37495/1000000 [11:04<2:12:55, 120.69step/s]


Episode 75 finished at step 500 (37500 total). Env Reward: -9.95, Steps: 500, Delivered: 5


Total Steps Trained:   4%|▍         | 37520/1000000 [11:09<22:18:58, 11.98step/s]


--- Rollout Summary (Steps 37001 to 37500) ---
Update Duration: 4.52s
Avg Episode Reward (last 75): -6.62
Avg Episode Duration (last 75): 500.00
Avg Delivered Packages (last 75): 5.76
Avg Actor Loss (per minibatch): -0.0084
Avg Critic Loss (per minibatch): 583.9540
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:   4%|▍         | 37989/1000000 [11:13<2:22:14, 112.73step/s]


Episode 76 finished at step 500 (38000 total). Env Reward: -6.97, Steps: 500, Delivered: 8


Total Steps Trained:   4%|▍         | 38013/1000000 [11:17<23:17:19, 11.47step/s]


--- Rollout Summary (Steps 37501 to 38000) ---
Update Duration: 4.46s
Avg Episode Reward (last 76): -6.62
Avg Episode Duration (last 76): 500.00
Avg Delivered Packages (last 76): 5.79
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 491.1135
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:   4%|▍         | 38491/1000000 [11:21<2:11:46, 121.61step/s]


Episode 77 finished at step 500 (38500 total). Env Reward: 8.31, Steps: 500, Delivered: 5


Total Steps Trained:   4%|▍         | 38515/1000000 [11:26<22:41:24, 11.77step/s]


--- Rollout Summary (Steps 38001 to 38500) ---
Update Duration: 4.49s
Avg Episode Reward (last 77): -6.43
Avg Episode Duration (last 77): 500.00
Avg Delivered Packages (last 77): 5.78
Avg Actor Loss (per minibatch): -0.0002
Avg Critic Loss (per minibatch): 1903.5154
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   4%|▍         | 38999/1000000 [11:30<2:13:13, 120.23step/s]


Episode 78 finished at step 500 (39000 total). Env Reward: -0.39, Steps: 500, Delivered: 5


Total Steps Trained:   4%|▍         | 39012/1000000 [11:35<30:07:02,  8.86step/s]


--- Rollout Summary (Steps 38501 to 39000) ---
Update Duration: 4.49s
Avg Episode Reward (last 78): -6.35
Avg Episode Duration (last 78): 500.00
Avg Delivered Packages (last 78): 5.77
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 978.4547
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:   4%|▍         | 39490/1000000 [11:39<2:12:03, 121.22step/s]


Episode 79 finished at step 500 (39500 total). Env Reward: -10.38, Steps: 500, Delivered: 4


Total Steps Trained:   4%|▍         | 39514/1000000 [11:44<22:39:45, 11.77step/s]


--- Rollout Summary (Steps 39001 to 39500) ---
Update Duration: 4.50s
Avg Episode Reward (last 79): -6.40
Avg Episode Duration (last 79): 500.00
Avg Delivered Packages (last 79): 5.75
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 611.6977
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:   4%|▍         | 39990/1000000 [11:48<2:24:22, 110.83step/s]


Episode 80 finished at step 500 (40000 total). Env Reward: -8.13, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000080_map1.pth


Total Steps Trained:   4%|▍         | 40013/1000000 [11:53<23:54:13, 11.16step/s]


--- Rollout Summary (Steps 39501 to 40000) ---
Update Duration: 4.47s
Avg Episode Reward (last 80): -6.43
Avg Episode Duration (last 80): 500.00
Avg Delivered Packages (last 80): 5.76
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 535.5916
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   4%|▍         | 40494/1000000 [11:57<2:12:12, 120.96step/s]


Episode 81 finished at step 500 (40500 total). Env Reward: -11.66, Steps: 500, Delivered: 3


Total Steps Trained:   4%|▍         | 40518/1000000 [12:01<23:11:18, 11.49step/s]


--- Rollout Summary (Steps 40001 to 40500) ---
Update Duration: 4.61s
Avg Episode Reward (last 81): -6.49
Avg Episode Duration (last 81): 500.00
Avg Delivered Packages (last 81): 5.73
Avg Actor Loss (per minibatch): -0.0083
Avg Critic Loss (per minibatch): 427.8864
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:   4%|▍         | 40994/1000000 [12:05<2:13:17, 119.91step/s]


Episode 82 finished at step 500 (41000 total). Env Reward: -10.26, Steps: 500, Delivered: 5


Total Steps Trained:   4%|▍         | 41017/1000000 [12:10<23:39:02, 11.26step/s]


--- Rollout Summary (Steps 40501 to 41000) ---
Update Duration: 4.62s
Avg Episode Reward (last 82): -6.54
Avg Episode Duration (last 82): 500.00
Avg Delivered Packages (last 82): 5.72
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 472.2602
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:   4%|▍         | 41497/1000000 [12:14<2:13:46, 119.41step/s]


Episode 83 finished at step 500 (41500 total). Env Reward: -9.18, Steps: 500, Delivered: 5


Total Steps Trained:   4%|▍         | 41521/1000000 [12:19<22:49:57, 11.66step/s]


--- Rollout Summary (Steps 41001 to 41500) ---
Update Duration: 4.53s
Avg Episode Reward (last 83): -6.57
Avg Episode Duration (last 83): 500.00
Avg Delivered Packages (last 83): 5.71
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 521.1362
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   4%|▍         | 41997/1000000 [12:23<2:11:08, 121.75step/s]


Episode 84 finished at step 500 (42000 total). Env Reward: -13.39, Steps: 500, Delivered: 1


Total Steps Trained:   4%|▍         | 42022/1000000 [12:28<22:33:19, 11.80step/s]


--- Rollout Summary (Steps 41501 to 42000) ---
Update Duration: 4.57s
Avg Episode Reward (last 84): -6.65
Avg Episode Duration (last 84): 500.00
Avg Delivered Packages (last 84): 5.65
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 546.2378
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:   4%|▍         | 42493/1000000 [12:32<2:19:53, 114.08step/s]


Episode 85 finished at step 500 (42500 total). Env Reward: -7.70, Steps: 500, Delivered: 7


Total Steps Trained:   4%|▍         | 42517/1000000 [12:37<23:24:57, 11.36step/s]


--- Rollout Summary (Steps 42001 to 42500) ---
Update Duration: 4.51s
Avg Episode Reward (last 85): -6.66
Avg Episode Duration (last 85): 500.00
Avg Delivered Packages (last 85): 5.67
Avg Actor Loss (per minibatch): -0.0083
Avg Critic Loss (per minibatch): 490.2426
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:   4%|▍         | 42995/1000000 [12:41<2:12:21, 120.51step/s]


Episode 86 finished at step 500 (43000 total). Env Reward: 1.35, Steps: 500, Delivered: 7


Total Steps Trained:   4%|▍         | 43018/1000000 [12:46<23:19:16, 11.40step/s]


--- Rollout Summary (Steps 42501 to 43000) ---
Update Duration: 4.49s
Avg Episode Reward (last 86): -6.57
Avg Episode Duration (last 86): 500.00
Avg Delivered Packages (last 86): 5.69
Avg Actor Loss (per minibatch): -0.0074
Avg Critic Loss (per minibatch): 719.2600
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:   4%|▍         | 43492/1000000 [12:50<2:13:45, 119.18step/s]


Episode 87 finished at step 500 (43500 total). Env Reward: -12.05, Steps: 500, Delivered: 2


Total Steps Trained:   4%|▍         | 43515/1000000 [12:54<22:54:26, 11.60step/s]


--- Rollout Summary (Steps 43001 to 43500) ---
Update Duration: 4.49s
Avg Episode Reward (last 87): -6.63
Avg Episode Duration (last 87): 500.00
Avg Delivered Packages (last 87): 5.64
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 467.3368
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:   4%|▍         | 43998/1000000 [12:58<2:16:39, 116.60step/s]


Episode 88 finished at step 500 (44000 total). Env Reward: 8.46, Steps: 500, Delivered: 5


Total Steps Trained:   4%|▍         | 44020/1000000 [13:03<24:15:10, 10.95step/s]


--- Rollout Summary (Steps 43501 to 44000) ---
Update Duration: 4.52s
Avg Episode Reward (last 88): -6.46
Avg Episode Duration (last 88): 500.00
Avg Delivered Packages (last 88): 5.64
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 2399.0320
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:   4%|▍         | 44500/1000000 [13:08<2:16:12, 116.92step/s]


Episode 89 finished at step 500 (44500 total). Env Reward: -8.72, Steps: 500, Delivered: 6


Total Steps Trained:   4%|▍         | 44512/1000000 [13:12<31:56:36,  8.31step/s]


--- Rollout Summary (Steps 44001 to 44500) ---
Update Duration: 4.48s
Avg Episode Reward (last 89): -6.49
Avg Episode Duration (last 89): 500.00
Avg Delivered Packages (last 89): 5.64
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 412.1083
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:   4%|▍         | 44999/1000000 [13:16<2:16:59, 116.18step/s]


Episode 90 finished at step 500 (45000 total). Env Reward: -9.69, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000090_map1.pth


Total Steps Trained:   5%|▍         | 45011/1000000 [13:21<31:37:46,  8.39step/s]


--- Rollout Summary (Steps 44501 to 45000) ---
Update Duration: 4.51s
Avg Episode Reward (last 90): -6.52
Avg Episode Duration (last 90): 500.00
Avg Delivered Packages (last 90): 5.64
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 512.4961
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:   5%|▍         | 45491/1000000 [13:25<2:13:22, 119.28step/s]


Episode 91 finished at step 500 (45500 total). Env Reward: -8.34, Steps: 500, Delivered: 6


Total Steps Trained:   5%|▍         | 45514/1000000 [13:30<23:20:50, 11.36step/s]


--- Rollout Summary (Steps 45001 to 45500) ---
Update Duration: 4.50s
Avg Episode Reward (last 91): -6.54
Avg Episode Duration (last 91): 500.00
Avg Delivered Packages (last 91): 5.65
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 531.6427
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:   5%|▍         | 45994/1000000 [13:34<2:12:08, 120.32step/s]


Episode 92 finished at step 500 (46000 total). Env Reward: -9.40, Steps: 500, Delivered: 5


Total Steps Trained:   5%|▍         | 46019/1000000 [13:39<23:06:48, 11.46step/s]


--- Rollout Summary (Steps 45501 to 46000) ---
Update Duration: 4.67s
Avg Episode Reward (last 92): -6.57
Avg Episode Duration (last 92): 500.00
Avg Delivered Packages (last 92): 5.64
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 504.2290
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   5%|▍         | 46495/1000000 [13:43<2:11:14, 121.08step/s]


Episode 93 finished at step 500 (46500 total). Env Reward: -0.42, Steps: 500, Delivered: 5


Total Steps Trained:   5%|▍         | 46519/1000000 [13:48<22:27:37, 11.79step/s]


--- Rollout Summary (Steps 46001 to 46500) ---
Update Duration: 4.47s
Avg Episode Reward (last 93): -6.51
Avg Episode Duration (last 93): 500.00
Avg Delivered Packages (last 93): 5.63
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 1179.4182
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   5%|▍         | 46990/1000000 [13:52<2:10:35, 121.63step/s]


Episode 94 finished at step 500 (47000 total). Env Reward: -12.29, Steps: 500, Delivered: 2


Total Steps Trained:   5%|▍         | 47014/1000000 [13:56<22:39:14, 11.69step/s]


--- Rollout Summary (Steps 46501 to 47000) ---
Update Duration: 4.51s
Avg Episode Reward (last 94): -6.57
Avg Episode Duration (last 94): 500.00
Avg Delivered Packages (last 94): 5.60
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 499.5593
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:   5%|▍         | 47489/1000000 [14:00<2:16:05, 116.65step/s]


Episode 95 finished at step 500 (47500 total). Env Reward: -8.41, Steps: 500, Delivered: 6


Total Steps Trained:   5%|▍         | 47512/1000000 [14:05<23:35:17, 11.22step/s]


--- Rollout Summary (Steps 47001 to 47500) ---
Update Duration: 4.49s
Avg Episode Reward (last 95): -6.59
Avg Episode Duration (last 95): 500.00
Avg Delivered Packages (last 95): 5.60
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 529.4491
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:   5%|▍         | 47999/1000000 [14:09<2:09:11, 122.82step/s]


Episode 96 finished at step 500 (48000 total). Env Reward: -9.62, Steps: 500, Delivered: 5


Total Steps Trained:   5%|▍         | 48012/1000000 [14:14<29:49:59,  8.86step/s]


--- Rollout Summary (Steps 47501 to 48000) ---
Update Duration: 4.51s
Avg Episode Reward (last 96): -6.62
Avg Episode Duration (last 96): 500.00
Avg Delivered Packages (last 96): 5.59
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 575.7020
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   5%|▍         | 48498/1000000 [14:18<2:21:15, 112.27step/s]


Episode 97 finished at step 500 (48500 total). Env Reward: -10.72, Steps: 500, Delivered: 4


Total Steps Trained:   5%|▍         | 48510/1000000 [14:23<32:00:29,  8.26step/s]


--- Rollout Summary (Steps 48001 to 48500) ---
Update Duration: 4.51s
Avg Episode Reward (last 97): -6.66
Avg Episode Duration (last 97): 500.00
Avg Delivered Packages (last 97): 5.58
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 461.9708
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:   5%|▍         | 48991/1000000 [14:27<2:23:52, 110.17step/s]


Episode 98 finished at step 500 (49000 total). Env Reward: -4.20, Steps: 500, Delivered: 11


Total Steps Trained:   5%|▍         | 49015/1000000 [14:32<23:47:32, 11.10step/s]


--- Rollout Summary (Steps 48501 to 49000) ---
Update Duration: 4.57s
Avg Episode Reward (last 98): -6.64
Avg Episode Duration (last 98): 500.00
Avg Delivered Packages (last 98): 5.63
Avg Actor Loss (per minibatch): -0.0082
Avg Critic Loss (per minibatch): 625.4959
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:   5%|▍         | 49494/1000000 [14:36<2:12:17, 119.76step/s]


Episode 99 finished at step 500 (49500 total). Env Reward: -10.41, Steps: 500, Delivered: 4


Total Steps Trained:   5%|▍         | 49517/1000000 [14:41<23:50:37, 11.07step/s]


--- Rollout Summary (Steps 49001 to 49500) ---
Update Duration: 4.63s
Avg Episode Reward (last 99): -6.67
Avg Episode Duration (last 99): 500.00
Avg Delivered Packages (last 99): 5.62
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 575.9793
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:   5%|▍         | 49995/1000000 [14:45<2:16:44, 115.79step/s]


Episode 100 finished at step 500 (50000 total). Env Reward: -12.16, Steps: 500, Delivered: 2
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000100_map1.pth


Total Steps Trained:   5%|▌         | 50019/1000000 [14:50<22:52:04, 11.54step/s]


--- Rollout Summary (Steps 49501 to 50000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 593.7023
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   5%|▌         | 50489/1000000 [14:54<2:11:41, 120.17step/s]


Episode 101 finished at step 500 (50500 total). Env Reward: -11.68, Steps: 500, Delivered: 3


Total Steps Trained:   5%|▌         | 50512/1000000 [14:58<23:12:04, 11.37step/s]


--- Rollout Summary (Steps 50001 to 50500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 542.3405
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:   5%|▌         | 50994/1000000 [15:02<2:12:58, 118.94step/s]


Episode 102 finished at step 500 (51000 total). Env Reward: -9.80, Steps: 500, Delivered: 5


Total Steps Trained:   5%|▌         | 51017/1000000 [15:07<23:49:53, 11.06step/s]


--- Rollout Summary (Steps 50501 to 51000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -6.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 520.6439
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   5%|▌         | 51490/1000000 [15:11<2:16:48, 115.55step/s]


Episode 103 finished at step 500 (51500 total). Env Reward: -4.65, Steps: 500, Delivered: 10


Total Steps Trained:   5%|▌         | 51513/1000000 [15:16<23:30:41, 11.21step/s]


--- Rollout Summary (Steps 51001 to 51500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 700.7217
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   5%|▌         | 51991/1000000 [15:20<2:13:22, 118.47step/s]


Episode 104 finished at step 500 (52000 total). Env Reward: 1.43, Steps: 500, Delivered: 7


Total Steps Trained:   5%|▌         | 52014/1000000 [15:25<23:18:12, 11.30step/s]


--- Rollout Summary (Steps 51501 to 52000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 681.9061
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   5%|▌         | 52491/1000000 [15:29<2:12:24, 119.27step/s]


Episode 105 finished at step 500 (52500 total). Env Reward: -13.21, Steps: 500, Delivered: 2


Total Steps Trained:   5%|▌         | 52514/1000000 [15:34<23:00:02, 11.44step/s]


--- Rollout Summary (Steps 52001 to 52500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0092
Avg Critic Loss (per minibatch): 475.7180
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:   5%|▌         | 52991/1000000 [15:38<2:24:11, 109.47step/s]


Episode 106 finished at step 500 (53000 total). Env Reward: -8.07, Steps: 500, Delivered: 6


Total Steps Trained:   5%|▌         | 53013/1000000 [15:43<24:33:34, 10.71step/s]


--- Rollout Summary (Steps 52501 to 53000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 503.1545
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:   5%|▌         | 53489/1000000 [15:47<2:10:14, 121.13step/s]


Episode 107 finished at step 500 (53500 total). Env Reward: -11.53, Steps: 500, Delivered: 3


Total Steps Trained:   5%|▌         | 53513/1000000 [15:51<22:32:32, 11.66step/s]


--- Rollout Summary (Steps 53001 to 53500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 474.7295
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:   5%|▌         | 53994/1000000 [15:56<2:13:18, 118.28step/s]


Episode 108 finished at step 500 (54000 total). Env Reward: -11.77, Steps: 500, Delivered: 3


Total Steps Trained:   5%|▌         | 54017/1000000 [16:00<23:13:41, 11.31step/s]


--- Rollout Summary (Steps 53501 to 54000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 523.4359
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:   5%|▌         | 54489/1000000 [16:04<2:11:29, 119.85step/s]


Episode 109 finished at step 500 (54500 total). Env Reward: -8.20, Steps: 500, Delivered: 6


Total Steps Trained:   5%|▌         | 54512/1000000 [16:09<23:12:14, 11.32step/s]


--- Rollout Summary (Steps 54001 to 54500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 523.5654
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   5%|▌         | 54989/1000000 [16:13<2:16:34, 115.33step/s]


Episode 110 finished at step 500 (55000 total). Env Reward: -7.91, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000110_map1.pth


Total Steps Trained:   6%|▌         | 55012/1000000 [16:18<23:30:17, 11.17step/s]


--- Rollout Summary (Steps 54501 to 55000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.19
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0075
Avg Critic Loss (per minibatch): 581.1759
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:   6%|▌         | 55493/1000000 [16:22<2:11:09, 120.03step/s]


Episode 111 finished at step 500 (55500 total). Env Reward: -8.60, Steps: 500, Delivered: 6


Total Steps Trained:   6%|▌         | 55516/1000000 [16:27<22:50:02, 11.49step/s]


--- Rollout Summary (Steps 55001 to 55500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 522.2048
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:   6%|▌         | 55992/1000000 [16:31<2:11:58, 119.21step/s]


Episode 112 finished at step 500 (56000 total). Env Reward: -9.65, Steps: 500, Delivered: 5


Total Steps Trained:   6%|▌         | 56015/1000000 [16:36<23:00:47, 11.39step/s]


--- Rollout Summary (Steps 55501 to 56000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 668.8694
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   6%|▌         | 56499/1000000 [16:40<2:11:11, 119.86step/s]


Episode 113 finished at step 500 (56500 total). Env Reward: 7.23, Steps: 500, Delivered: 4


Total Steps Trained:   6%|▌         | 56511/1000000 [16:45<31:23:53,  8.35step/s]


--- Rollout Summary (Steps 56001 to 56500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 1752.4226
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:   6%|▌         | 56994/1000000 [16:49<2:22:12, 110.52step/s]


Episode 114 finished at step 500 (57000 total). Env Reward: -6.82, Steps: 500, Delivered: 8


Total Steps Trained:   6%|▌         | 57017/1000000 [16:54<23:27:46, 11.16step/s]


--- Rollout Summary (Steps 56501 to 57000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 442.2682
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:   6%|▌         | 57495/1000000 [16:58<2:24:07, 108.99step/s]


Episode 115 finished at step 500 (57500 total). Env Reward: -10.84, Steps: 500, Delivered: 4


Total Steps Trained:   6%|▌         | 57517/1000000 [17:03<24:11:38, 10.82step/s]


--- Rollout Summary (Steps 57001 to 57500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.24
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 557.5903
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   6%|▌         | 57994/1000000 [17:07<2:14:13, 116.96step/s]


Episode 116 finished at step 500 (58000 total). Env Reward: -8.93, Steps: 500, Delivered: 6


Total Steps Trained:   6%|▌         | 58017/1000000 [17:11<23:35:26, 11.09step/s]


--- Rollout Summary (Steps 57501 to 58000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 441.0347
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:   6%|▌         | 58500/1000000 [17:16<2:30:59, 103.92step/s]


Episode 117 finished at step 500 (58500 total). Env Reward: 4.52, Steps: 500, Delivered: 10


Total Steps Trained:   6%|▌         | 58511/1000000 [17:20<33:21:03,  7.84step/s]


--- Rollout Summary (Steps 58001 to 58500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.15
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 879.8244
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:   6%|▌         | 58992/1000000 [17:24<2:15:47, 115.49step/s]


Episode 118 finished at step 500 (59000 total). Env Reward: -9.65, Steps: 500, Delivered: 5


Total Steps Trained:   6%|▌         | 59015/1000000 [17:29<23:17:50, 11.22step/s]


--- Rollout Summary (Steps 58501 to 59000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 609.4838
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:   6%|▌         | 59489/1000000 [17:33<2:10:09, 120.43step/s]


Episode 119 finished at step 500 (59500 total). Env Reward: -11.46, Steps: 500, Delivered: 3


Total Steps Trained:   6%|▌         | 59511/1000000 [17:38<23:22:01, 11.18step/s]


--- Rollout Summary (Steps 59001 to 59500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 605.3440
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:   6%|▌         | 59995/1000000 [17:42<2:10:09, 120.37step/s]


Episode 120 finished at step 500 (60000 total). Env Reward: -12.98, Steps: 500, Delivered: 2
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000120_map1.pth


Total Steps Trained:   6%|▌         | 60019/1000000 [17:47<22:35:35, 11.56step/s]


--- Rollout Summary (Steps 59501 to 60000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0004
Avg Critic Loss (per minibatch): 635.2801
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   6%|▌         | 60495/1000000 [17:51<2:08:31, 121.83step/s]


Episode 121 finished at step 500 (60500 total). Env Reward: -8.52, Steps: 500, Delivered: 6


Total Steps Trained:   6%|▌         | 60519/1000000 [17:56<22:14:54, 11.73step/s]


--- Rollout Summary (Steps 60001 to 60500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 447.0874
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:   6%|▌         | 60989/1000000 [18:00<2:09:59, 120.40step/s]


Episode 122 finished at step 500 (61000 total). Env Reward: -9.73, Steps: 500, Delivered: 5


Total Steps Trained:   6%|▌         | 61013/1000000 [18:05<22:56:51, 11.37step/s]


--- Rollout Summary (Steps 60501 to 61000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 683.2824
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:   6%|▌         | 61497/1000000 [18:09<2:09:26, 120.83step/s]


Episode 123 finished at step 500 (61500 total). Env Reward: -8.75, Steps: 500, Delivered: 6


Total Steps Trained:   6%|▌         | 61510/1000000 [18:13<29:19:20,  8.89step/s]


--- Rollout Summary (Steps 61001 to 61500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 734.1293
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   6%|▌         | 61997/1000000 [18:18<2:23:07, 109.23step/s]


Episode 124 finished at step 500 (62000 total). Env Reward: -10.37, Steps: 500, Delivered: 5


Total Steps Trained:   6%|▌         | 62019/1000000 [18:22<24:36:26, 10.59step/s]


--- Rollout Summary (Steps 61501 to 62000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 533.6364
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   6%|▌         | 62492/1000000 [18:26<2:13:03, 117.43step/s]


Episode 125 finished at step 500 (62500 total). Env Reward: -9.87, Steps: 500, Delivered: 5


Total Steps Trained:   6%|▋         | 62515/1000000 [18:31<23:03:53, 11.29step/s]


--- Rollout Summary (Steps 62001 to 62500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 538.2156
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:   6%|▋         | 62995/1000000 [18:36<2:29:54, 104.17step/s]


Episode 126 finished at step 500 (63000 total). Env Reward: -8.24, Steps: 500, Delivered: 6


Total Steps Trained:   6%|▋         | 63018/1000000 [18:40<24:12:00, 10.75step/s]


--- Rollout Summary (Steps 62501 to 63000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.35
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 485.8340
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:   6%|▋         | 63495/1000000 [18:44<2:07:14, 122.67step/s]


Episode 127 finished at step 500 (63500 total). Env Reward: -11.91, Steps: 500, Delivered: 3


Total Steps Trained:   6%|▋         | 63519/1000000 [18:49<22:09:51, 11.74step/s]


--- Rollout Summary (Steps 63001 to 63500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 374.3313
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:   6%|▋         | 63998/1000000 [18:53<2:15:28, 115.14step/s]


Episode 128 finished at step 500 (64000 total). Env Reward: -6.96, Steps: 500, Delivered: 7


Total Steps Trained:   6%|▋         | 64010/1000000 [18:58<31:32:06,  8.24step/s]


--- Rollout Summary (Steps 63501 to 64000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 485.7244
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:   6%|▋         | 64492/1000000 [19:02<2:15:57, 114.68step/s]


Episode 129 finished at step 500 (64500 total). Env Reward: -11.00, Steps: 500, Delivered: 4


Total Steps Trained:   6%|▋         | 64514/1000000 [19:07<23:59:45, 10.83step/s]


--- Rollout Summary (Steps 64001 to 64500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 473.9291
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:   6%|▋         | 64998/1000000 [19:11<2:18:13, 112.74step/s]


Episode 130 finished at step 500 (65000 total). Env Reward: -9.84, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000130_map1.pth


Total Steps Trained:   7%|▋         | 65010/1000000 [19:16<31:40:00,  8.20step/s]


--- Rollout Summary (Steps 64501 to 65000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 765.8558
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:   7%|▋         | 65494/1000000 [19:20<2:11:09, 118.75step/s]


Episode 131 finished at step 500 (65500 total). Env Reward: -9.59, Steps: 500, Delivered: 5


Total Steps Trained:   7%|▋         | 65518/1000000 [19:25<22:39:13, 11.46step/s]


--- Rollout Summary (Steps 65001 to 65500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 588.2261
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:   7%|▋         | 65989/1000000 [19:29<2:19:16, 111.77step/s]


Episode 132 finished at step 500 (66000 total). Env Reward: -5.54, Steps: 500, Delivered: 9


Total Steps Trained:   7%|▋         | 66011/1000000 [19:34<24:17:33, 10.68step/s]


--- Rollout Summary (Steps 65501 to 66000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0086
Avg Critic Loss (per minibatch): 429.4756
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   7%|▋         | 66495/1000000 [19:38<2:13:51, 116.23step/s]


Episode 133 finished at step 500 (66500 total). Env Reward: -11.45, Steps: 500, Delivered: 3


Total Steps Trained:   7%|▋         | 66519/1000000 [19:42<22:17:14, 11.63step/s]


--- Rollout Summary (Steps 66001 to 66500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 408.8226
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:   7%|▋         | 66989/1000000 [19:47<2:12:03, 117.75step/s]


Episode 134 finished at step 500 (67000 total). Env Reward: -8.29, Steps: 500, Delivered: 6


Total Steps Trained:   7%|▋         | 67012/1000000 [19:51<22:43:13, 11.41step/s]


--- Rollout Summary (Steps 66501 to 67000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0088
Avg Critic Loss (per minibatch): 590.0978
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   7%|▋         | 67496/1000000 [19:55<2:12:56, 116.90step/s]


Episode 135 finished at step 500 (67500 total). Env Reward: -11.11, Steps: 500, Delivered: 4


Total Steps Trained:   7%|▋         | 67519/1000000 [20:00<23:10:54, 11.17step/s]


--- Rollout Summary (Steps 67001 to 67500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 745.0558
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   7%|▋         | 67993/1000000 [20:04<2:11:53, 117.77step/s]


Episode 136 finished at step 500 (68000 total). Env Reward: -7.63, Steps: 500, Delivered: 7


Total Steps Trained:   7%|▋         | 68016/1000000 [20:09<23:06:25, 11.20step/s]


--- Rollout Summary (Steps 67501 to 68000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 510.2140
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   7%|▋         | 68491/1000000 [20:13<2:17:24, 112.99step/s]


Episode 137 finished at step 500 (68500 total). Env Reward: -7.73, Steps: 500, Delivered: 7


Total Steps Trained:   7%|▋         | 68514/1000000 [20:18<23:33:26, 10.98step/s]


--- Rollout Summary (Steps 68001 to 68500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0082
Avg Critic Loss (per minibatch): 615.1427
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:   7%|▋         | 69000/1000000 [20:22<2:09:53, 119.46step/s]


Episode 138 finished at step 500 (69000 total). Env Reward: -2.02, Steps: 500, Delivered: 4


Total Steps Trained:   7%|▋         | 69012/1000000 [20:27<30:28:35,  8.49step/s]


--- Rollout Summary (Steps 68501 to 69000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 1244.1559
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   7%|▋         | 69493/1000000 [20:31<2:14:03, 115.68step/s]


Episode 139 finished at step 500 (69500 total). Env Reward: -11.85, Steps: 500, Delivered: 3


Total Steps Trained:   7%|▋         | 69516/1000000 [20:36<22:44:37, 11.36step/s]


--- Rollout Summary (Steps 69001 to 69500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 596.7321
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:   7%|▋         | 70000/1000000 [20:40<2:13:05, 116.47step/s]


Episode 140 finished at step 500 (70000 total). Env Reward: -2.86, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000140_map1.pth


Total Steps Trained:   7%|▋         | 70012/1000000 [20:44<30:45:19,  8.40step/s]


--- Rollout Summary (Steps 69501 to 70000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 611.2150
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   7%|▋         | 70490/1000000 [20:49<2:13:02, 116.45step/s]


Episode 141 finished at step 500 (70500 total). Env Reward: 10.04, Steps: 500, Delivered: 7


Total Steps Trained:   7%|▋         | 70513/1000000 [20:53<22:55:26, 11.26step/s]


--- Rollout Summary (Steps 70001 to 70500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 1577.8283
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:   7%|▋         | 70992/1000000 [20:58<2:11:35, 117.66step/s]


Episode 142 finished at step 500 (71000 total). Env Reward: -10.52, Steps: 500, Delivered: 4


Total Steps Trained:   7%|▋         | 71015/1000000 [21:02<23:18:56, 11.07step/s]


--- Rollout Summary (Steps 70501 to 71000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0009
Avg Critic Loss (per minibatch): 510.4426
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:   7%|▋         | 71497/1000000 [21:07<2:07:10, 121.68step/s]


Episode 143 finished at step 500 (71500 total). Env Reward: -10.58, Steps: 500, Delivered: 4


Total Steps Trained:   7%|▋         | 71510/1000000 [21:11<29:20:55,  8.79step/s]


--- Rollout Summary (Steps 71001 to 71500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 688.2261
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   7%|▋         | 71996/1000000 [21:15<2:05:41, 123.05step/s]


Episode 144 finished at step 500 (72000 total). Env Reward: -9.70, Steps: 500, Delivered: 5


Total Steps Trained:   7%|▋         | 72020/1000000 [21:20<22:11:58, 11.61step/s]


--- Rollout Summary (Steps 71501 to 72000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.15
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 480.7463
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:   7%|▋         | 72496/1000000 [21:24<2:14:11, 115.20step/s]


Episode 145 finished at step 500 (72500 total). Env Reward: -11.40, Steps: 500, Delivered: 3


Total Steps Trained:   7%|▋         | 72519/1000000 [21:29<23:53:05, 10.79step/s]


--- Rollout Summary (Steps 72001 to 72500) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 645.3867
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:   7%|▋         | 72995/1000000 [21:33<2:07:58, 120.72step/s]


Episode 146 finished at step 500 (73000 total). Env Reward: -7.13, Steps: 500, Delivered: 7


Total Steps Trained:   7%|▋         | 73020/1000000 [21:38<21:43:56, 11.85step/s]


--- Rollout Summary (Steps 72501 to 73000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.17
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 358.2382
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:   7%|▋         | 73489/1000000 [21:42<2:15:44, 113.76step/s]


Episode 147 finished at step 500 (73500 total). Env Reward: -7.19, Steps: 500, Delivered: 7


Total Steps Trained:   7%|▋         | 73511/1000000 [21:47<23:46:32, 10.82step/s]


--- Rollout Summary (Steps 73001 to 73500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.14
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 473.1101
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:   7%|▋         | 73990/1000000 [21:51<2:13:19, 115.75step/s]


Episode 148 finished at step 500 (74000 total). Env Reward: -1.08, Steps: 500, Delivered: 5


Total Steps Trained:   7%|▋         | 74013/1000000 [21:56<23:14:46, 11.07step/s]


--- Rollout Summary (Steps 73501 to 74000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 708.2333
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:   7%|▋         | 74494/1000000 [22:00<2:20:13, 110.01step/s]


Episode 149 finished at step 500 (74500 total). Env Reward: -8.86, Steps: 500, Delivered: 6


Total Steps Trained:   7%|▋         | 74518/1000000 [22:05<22:51:19, 11.25step/s]


--- Rollout Summary (Steps 74001 to 74500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 473.1459
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:   7%|▋         | 74991/1000000 [22:09<2:15:33, 113.72step/s]


Episode 150 finished at step 500 (75000 total). Env Reward: -9.70, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000150_map1.pth


Total Steps Trained:   8%|▊         | 75014/1000000 [22:14<23:29:43, 10.94step/s]


--- Rollout Summary (Steps 74501 to 75000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 432.7483
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:   8%|▊         | 75496/1000000 [22:18<2:17:43, 111.88step/s]


Episode 151 finished at step 500 (75500 total). Env Reward: 4.88, Steps: 500, Delivered: 11


Total Steps Trained:   8%|▊         | 75520/1000000 [22:23<22:50:03, 11.25step/s]


--- Rollout Summary (Steps 75001 to 75500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 902.8015
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:   8%|▊         | 75999/1000000 [22:27<2:09:55, 118.52step/s]


Episode 152 finished at step 500 (76000 total). Env Reward: -7.51, Steps: 500, Delivered: 7


Total Steps Trained:   8%|▊         | 76011/1000000 [22:32<31:28:10,  8.16step/s]


--- Rollout Summary (Steps 75501 to 76000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 509.2082
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:   8%|▊         | 76489/1000000 [22:36<2:10:07, 118.28step/s]


Episode 153 finished at step 500 (76500 total). Env Reward: 0.41, Steps: 500, Delivered: 6


Total Steps Trained:   8%|▊         | 76512/1000000 [22:41<23:09:09, 11.08step/s]


--- Rollout Summary (Steps 76001 to 76500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 783.0443
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:   8%|▊         | 76994/1000000 [22:45<2:09:23, 118.88step/s]


Episode 154 finished at step 500 (77000 total). Env Reward: -10.27, Steps: 500, Delivered: 4


Total Steps Trained:   8%|▊         | 77016/1000000 [22:50<23:05:53, 11.10step/s]


--- Rollout Summary (Steps 76501 to 77000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.15
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 587.5503
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:   8%|▊         | 77494/1000000 [22:54<2:09:17, 118.92step/s]


Episode 155 finished at step 500 (77500 total). Env Reward: -9.18, Steps: 500, Delivered: 5


Total Steps Trained:   8%|▊         | 77518/1000000 [22:59<22:21:47, 11.46step/s]


--- Rollout Summary (Steps 77001 to 77500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0088
Avg Critic Loss (per minibatch): 491.9090
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:   8%|▊         | 77997/1000000 [23:03<2:07:07, 120.88step/s]


Episode 156 finished at step 500 (78000 total). Env Reward: -12.39, Steps: 500, Delivered: 2


Total Steps Trained:   8%|▊         | 78010/1000000 [23:07<29:30:46,  8.68step/s]


--- Rollout Summary (Steps 77501 to 78000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 466.8487
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:   8%|▊         | 78490/1000000 [23:11<2:11:57, 116.38step/s]


Episode 157 finished at step 500 (78500 total). Env Reward: -7.56, Steps: 500, Delivered: 7


Total Steps Trained:   8%|▊         | 78512/1000000 [23:16<23:28:19, 10.91step/s]


--- Rollout Summary (Steps 78001 to 78500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 432.5051
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   8%|▊         | 78993/1000000 [23:20<2:14:03, 114.50step/s]


Episode 158 finished at step 500 (79000 total). Env Reward: -10.22, Steps: 500, Delivered: 4


Total Steps Trained:   8%|▊         | 79016/1000000 [23:25<22:51:22, 11.19step/s]


--- Rollout Summary (Steps 78501 to 79000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 493.9897
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:   8%|▊         | 79497/1000000 [23:29<2:09:13, 118.72step/s]


Episode 159 finished at step 500 (79500 total). Env Reward: -9.69, Steps: 500, Delivered: 5


Total Steps Trained:   8%|▊         | 79520/1000000 [23:34<22:31:20, 11.35step/s]


--- Rollout Summary (Steps 79001 to 79500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0084
Avg Critic Loss (per minibatch): 380.1940
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:   8%|▊         | 79995/1000000 [23:38<2:18:46, 110.50step/s]


Episode 160 finished at step 500 (80000 total). Env Reward: -7.72, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000160_map1.pth


Total Steps Trained:   8%|▊         | 80018/1000000 [23:43<23:18:26, 10.96step/s]


--- Rollout Summary (Steps 79501 to 80000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 598.4441
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   8%|▊         | 80497/1000000 [23:47<2:08:42, 119.07step/s]


Episode 161 finished at step 500 (80500 total). Env Reward: -11.73, Steps: 500, Delivered: 3


Total Steps Trained:   8%|▊         | 80520/1000000 [23:52<23:08:39, 11.04step/s]


--- Rollout Summary (Steps 80001 to 80500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 551.6612
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   8%|▊         | 80998/1000000 [23:56<2:09:52, 117.94step/s]


Episode 162 finished at step 500 (81000 total). Env Reward: -0.53, Steps: 500, Delivered: 5


Total Steps Trained:   8%|▊         | 81022/1000000 [24:01<22:15:14, 11.47step/s]


--- Rollout Summary (Steps 80501 to 81000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 738.4812
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:   8%|▊         | 81491/1000000 [24:05<2:05:49, 121.66step/s]


Episode 163 finished at step 500 (81500 total). Env Reward: -8.59, Steps: 500, Delivered: 6


Total Steps Trained:   8%|▊         | 81514/1000000 [24:10<23:00:01, 11.09step/s]


--- Rollout Summary (Steps 81001 to 81500) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 643.1144
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:   8%|▊         | 81990/1000000 [24:14<2:04:50, 122.56step/s]


Episode 164 finished at step 500 (82000 total). Env Reward: -11.47, Steps: 500, Delivered: 3


Total Steps Trained:   8%|▊         | 82014/1000000 [24:19<21:47:51, 11.70step/s]


--- Rollout Summary (Steps 81501 to 82000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 477.3454
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   8%|▊         | 82499/1000000 [24:23<2:13:24, 114.63step/s]


Episode 165 finished at step 500 (82500 total). Env Reward: -9.30, Steps: 500, Delivered: 5


Total Steps Trained:   8%|▊         | 82511/1000000 [24:28<31:10:55,  8.17step/s]


--- Rollout Summary (Steps 82001 to 82500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0089
Avg Critic Loss (per minibatch): 750.6121
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:   8%|▊         | 82988/1000000 [24:32<2:07:01, 120.32step/s]


Episode 166 finished at step 500 (83000 total). Env Reward: -6.83, Steps: 500, Delivered: 8


Total Steps Trained:   8%|▊         | 83011/1000000 [24:36<22:26:31, 11.35step/s]


--- Rollout Summary (Steps 82501 to 83000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 546.6234
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:   8%|▊         | 83491/1000000 [24:41<2:12:32, 115.25step/s]


Episode 167 finished at step 500 (83500 total). Env Reward: -5.87, Steps: 500, Delivered: 9


Total Steps Trained:   8%|▊         | 83514/1000000 [24:45<22:49:47, 11.15step/s]


--- Rollout Summary (Steps 83001 to 83500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 592.1245
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:   8%|▊         | 83993/1000000 [24:50<2:11:11, 116.37step/s]


Episode 168 finished at step 500 (84000 total). Env Reward: -9.91, Steps: 500, Delivered: 5


Total Steps Trained:   8%|▊         | 84015/1000000 [24:54<23:25:48, 10.86step/s]


--- Rollout Summary (Steps 83501 to 84000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): 0.0003
Avg Critic Loss (per minibatch): 471.9660
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:   8%|▊         | 84489/1000000 [24:59<2:10:59, 116.48step/s]


Episode 169 finished at step 500 (84500 total). Env Reward: 1.62, Steps: 500, Delivered: 7


Total Steps Trained:   8%|▊         | 84512/1000000 [25:03<22:48:11, 11.15step/s]


--- Rollout Summary (Steps 84001 to 84500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.36
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 814.4136
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:   8%|▊         | 84996/1000000 [25:08<2:25:19, 104.93step/s]


Episode 170 finished at step 500 (85000 total). Env Reward: -8.42, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000170_map1.pth


Total Steps Trained:   9%|▊         | 85018/1000000 [25:12<24:53:36, 10.21step/s]


--- Rollout Summary (Steps 84501 to 85000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 562.5107
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   9%|▊         | 85499/1000000 [25:17<2:08:50, 118.30step/s]


Episode 171 finished at step 500 (85500 total). Env Reward: -0.84, Steps: 500, Delivered: 5


Total Steps Trained:   9%|▊         | 85523/1000000 [25:21<21:59:52, 11.55step/s]


--- Rollout Summary (Steps 85001 to 85500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 756.0099
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:   9%|▊         | 85988/1000000 [25:25<2:06:09, 120.75step/s]


Episode 172 finished at step 500 (86000 total). Env Reward: -10.04, Steps: 500, Delivered: 4


Total Steps Trained:   9%|▊         | 86012/1000000 [25:30<21:41:59, 11.70step/s]


--- Rollout Summary (Steps 85501 to 86000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 340.5927
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:   9%|▊         | 86493/1000000 [25:34<2:06:44, 120.13step/s]


Episode 173 finished at step 500 (86500 total). Env Reward: -10.87, Steps: 500, Delivered: 4


Total Steps Trained:   9%|▊         | 86517/1000000 [25:39<21:49:32, 11.63step/s]


--- Rollout Summary (Steps 86001 to 86500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0003
Avg Critic Loss (per minibatch): 433.8191
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:   9%|▊         | 86999/1000000 [25:43<2:10:02, 117.02step/s]


Episode 174 finished at step 500 (87000 total). Env Reward: -10.68, Steps: 500, Delivered: 4


Total Steps Trained:   9%|▊         | 87011/1000000 [25:48<29:32:28,  8.58step/s]


--- Rollout Summary (Steps 86501 to 87000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 583.0672
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:   9%|▊         | 87494/1000000 [25:52<2:06:39, 120.07step/s]


Episode 175 finished at step 500 (87500 total). Env Reward: -12.03, Steps: 500, Delivered: 3


Total Steps Trained:   9%|▉         | 87517/1000000 [25:56<21:57:10, 11.55step/s]


--- Rollout Summary (Steps 87001 to 87500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 515.8053
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:   9%|▉         | 87992/1000000 [26:01<2:16:22, 111.46step/s]


Episode 176 finished at step 500 (88000 total). Env Reward: -9.15, Steps: 500, Delivered: 6


Total Steps Trained:   9%|▉         | 88015/1000000 [26:05<22:38:17, 11.19step/s]


--- Rollout Summary (Steps 87501 to 88000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 506.7591
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:   9%|▉         | 88497/1000000 [26:09<2:13:00, 114.21step/s]


Episode 177 finished at step 500 (88500 total). Env Reward: -10.09, Steps: 500, Delivered: 5


Total Steps Trained:   9%|▉         | 88520/1000000 [26:14<23:42:22, 10.68step/s]


--- Rollout Summary (Steps 88001 to 88500) ---
Update Duration: 4.74s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 511.8331
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:   9%|▉         | 88997/1000000 [26:19<2:19:49, 108.59step/s]


Episode 178 finished at step 500 (89000 total). Env Reward: -8.94, Steps: 500, Delivered: 6


Total Steps Trained:   9%|▉         | 89020/1000000 [26:23<22:39:20, 11.17step/s]


--- Rollout Summary (Steps 88501 to 89000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 746.6537
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:   9%|▉         | 89495/1000000 [26:27<2:16:55, 110.83step/s]


Episode 179 finished at step 500 (89500 total). Env Reward: -7.07, Steps: 500, Delivered: 8


Total Steps Trained:   9%|▉         | 89520/1000000 [26:32<21:40:46, 11.67step/s]


--- Rollout Summary (Steps 89001 to 89500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 360.5925
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:   9%|▉         | 89991/1000000 [26:36<2:03:02, 123.26step/s]


Episode 180 finished at step 500 (90000 total). Env Reward: -9.08, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000180_map1.pth


Total Steps Trained:   9%|▉         | 90015/1000000 [26:41<21:42:36, 11.64step/s]


--- Rollout Summary (Steps 89501 to 90000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 610.1453
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   9%|▉         | 90494/1000000 [26:45<2:10:33, 116.11step/s]


Episode 181 finished at step 500 (90500 total). Env Reward: -0.08, Steps: 500, Delivered: 6


Total Steps Trained:   9%|▉         | 90517/1000000 [26:50<22:35:06, 11.19step/s]


--- Rollout Summary (Steps 90001 to 90500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 814.6037
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:   9%|▉         | 90995/1000000 [26:54<2:10:27, 116.12step/s]


Episode 182 finished at step 500 (91000 total). Env Reward: 0.32, Steps: 500, Delivered: 6


Total Steps Trained:   9%|▉         | 91018/1000000 [26:59<23:07:44, 10.92step/s]


--- Rollout Summary (Steps 90501 to 91000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 848.2795
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:   9%|▉         | 91493/1000000 [27:03<2:12:21, 114.40step/s]


Episode 183 finished at step 500 (91500 total). Env Reward: -10.36, Steps: 500, Delivered: 5


Total Steps Trained:   9%|▉         | 91516/1000000 [27:08<22:37:07, 11.16step/s]


--- Rollout Summary (Steps 91001 to 91500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 480.4481
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:   9%|▉         | 91993/1000000 [27:12<2:07:25, 118.76step/s]


Episode 184 finished at step 500 (92000 total). Env Reward: -11.05, Steps: 500, Delivered: 4


Total Steps Trained:   9%|▉         | 92016/1000000 [27:17<22:38:26, 11.14step/s]


--- Rollout Summary (Steps 91501 to 92000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 402.3409
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:   9%|▉         | 92500/1000000 [27:21<2:18:02, 109.56step/s]


Episode 185 finished at step 500 (92500 total). Env Reward: -7.63, Steps: 500, Delivered: 7


Total Steps Trained:   9%|▉         | 92511/1000000 [27:26<31:31:17,  8.00step/s]


--- Rollout Summary (Steps 92001 to 92500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 544.6339
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:   9%|▉         | 92999/1000000 [27:30<2:12:07, 114.41step/s]


Episode 186 finished at step 500 (93000 total). Env Reward: -10.02, Steps: 500, Delivered: 5


Total Steps Trained:   9%|▉         | 93011/1000000 [27:35<31:04:20,  8.11step/s]


--- Rollout Summary (Steps 92501 to 93000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 503.3923
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:   9%|▉         | 93495/1000000 [27:39<2:10:59, 115.34step/s]


Episode 187 finished at step 500 (93500 total). Env Reward: -12.62, Steps: 500, Delivered: 2


Total Steps Trained:   9%|▉         | 93519/1000000 [27:43<22:01:31, 11.43step/s]


--- Rollout Summary (Steps 93001 to 93500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 534.5857
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:   9%|▉         | 93990/1000000 [27:47<2:04:54, 120.89step/s]


Episode 188 finished at step 500 (94000 total). Env Reward: -10.05, Steps: 500, Delivered: 4


Total Steps Trained:   9%|▉         | 94015/1000000 [27:52<21:15:59, 11.83step/s]


--- Rollout Summary (Steps 93501 to 94000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 485.6748
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:   9%|▉         | 94495/1000000 [27:56<2:12:22, 114.00step/s]


Episode 189 finished at step 500 (94500 total). Env Reward: -7.72, Steps: 500, Delivered: 7


Total Steps Trained:   9%|▉         | 94519/1000000 [28:01<21:48:54, 11.53step/s]


--- Rollout Summary (Steps 94001 to 94500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0083
Avg Critic Loss (per minibatch): 436.6902
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  10%|▉         | 95000/1000000 [28:05<2:12:36, 113.74step/s]


Episode 190 finished at step 500 (95000 total). Env Reward: -10.93, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000190_map1.pth


Total Steps Trained:  10%|▉         | 95012/1000000 [28:10<30:22:53,  8.27step/s]


--- Rollout Summary (Steps 94501 to 95000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 501.8165
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  10%|▉         | 95491/1000000 [28:14<2:07:02, 118.67step/s]


Episode 191 finished at step 500 (95500 total). Env Reward: -9.42, Steps: 500, Delivered: 5


Total Steps Trained:  10%|▉         | 95514/1000000 [28:19<22:15:44, 11.29step/s]


--- Rollout Summary (Steps 95001 to 95500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 393.0932
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  10%|▉         | 95999/1000000 [28:23<2:16:26, 110.43step/s]


Episode 192 finished at step 500 (96000 total). Env Reward: 4.19, Steps: 500, Delivered: 10


Total Steps Trained:  10%|▉         | 96011/1000000 [28:28<30:50:19,  8.14step/s]


--- Rollout Summary (Steps 95501 to 96000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 1174.2826
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  10%|▉         | 96500/1000000 [28:32<2:07:22, 118.22step/s]


Episode 193 finished at step 500 (96500 total). Env Reward: -6.15, Steps: 500, Delivered: 8


Total Steps Trained:  10%|▉         | 96512/1000000 [28:36<29:06:21,  8.62step/s]


--- Rollout Summary (Steps 96001 to 96500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 472.8661
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  10%|▉         | 96992/1000000 [28:41<2:05:57, 119.49step/s]


Episode 194 finished at step 500 (97000 total). Env Reward: -11.70, Steps: 500, Delivered: 3


Total Steps Trained:  10%|▉         | 97014/1000000 [28:45<22:52:22, 10.97step/s]


--- Rollout Summary (Steps 96501 to 97000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 561.9529
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  10%|▉         | 97491/1000000 [28:49<2:10:05, 115.63step/s]


Episode 195 finished at step 500 (97500 total). Env Reward: -10.00, Steps: 500, Delivered: 5


Total Steps Trained:  10%|▉         | 97514/1000000 [28:54<22:54:19, 10.94step/s]


--- Rollout Summary (Steps 97001 to 97500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 600.4672
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  10%|▉         | 97996/1000000 [28:58<2:07:08, 118.25step/s]


Episode 196 finished at step 500 (98000 total). Env Reward: -11.40, Steps: 500, Delivered: 3


Total Steps Trained:  10%|▉         | 98019/1000000 [29:03<22:23:25, 11.19step/s]


--- Rollout Summary (Steps 97501 to 98000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 502.0245
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  10%|▉         | 98498/1000000 [29:07<2:02:35, 122.57step/s]


Episode 197 finished at step 500 (98500 total). Env Reward: -12.16, Steps: 500, Delivered: 3


Total Steps Trained:  10%|▉         | 98511/1000000 [29:12<28:18:22,  8.85step/s]


--- Rollout Summary (Steps 98001 to 98500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 467.5615
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  10%|▉         | 98992/1000000 [29:16<2:04:54, 120.22step/s]


Episode 198 finished at step 500 (99000 total). Env Reward: -10.65, Steps: 500, Delivered: 4


Total Steps Trained:  10%|▉         | 99016/1000000 [29:21<21:34:32, 11.60step/s]


--- Rollout Summary (Steps 98501 to 99000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 526.3349
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  10%|▉         | 99497/1000000 [29:25<2:04:20, 120.70step/s]


Episode 199 finished at step 500 (99500 total). Env Reward: -10.28, Steps: 500, Delivered: 4


Total Steps Trained:  10%|▉         | 99521/1000000 [29:30<21:30:22, 11.63step/s]


--- Rollout Summary (Steps 99001 to 99500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 549.9692
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  10%|▉         | 99989/1000000 [29:34<2:12:32, 113.17step/s]


Episode 200 finished at step 500 (100000 total). Env Reward: -4.86, Steps: 500, Delivered: 9
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000200_map1.pth


Total Steps Trained:  10%|█         | 100012/1000000 [29:38<22:41:56, 11.01step/s]


--- Rollout Summary (Steps 99501 to 100000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0078
Avg Critic Loss (per minibatch): 751.9697
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  10%|█         | 100497/1000000 [29:43<2:09:23, 115.87step/s]


Episode 201 finished at step 500 (100500 total). Env Reward: 0.39, Steps: 500, Delivered: 6


Total Steps Trained:  10%|█         | 100520/1000000 [29:47<22:32:19, 11.09step/s]


--- Rollout Summary (Steps 100001 to 100500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 721.9620
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  10%|█         | 100997/1000000 [29:51<2:09:28, 115.72step/s]


Episode 202 finished at step 500 (101000 total). Env Reward: 1.32, Steps: 500, Delivered: 6


Total Steps Trained:  10%|█         | 101021/1000000 [29:56<22:25:44, 11.13step/s]


--- Rollout Summary (Steps 100501 to 101000) ---
Update Duration: 4.73s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 675.9169
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  10%|█         | 101497/1000000 [30:01<2:12:47, 112.78step/s]


Episode 203 finished at step 500 (101500 total). Env Reward: 1.83, Steps: 500, Delivered: 8


Total Steps Trained:  10%|█         | 101520/1000000 [30:05<22:16:12, 11.21step/s]


--- Rollout Summary (Steps 101001 to 101500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 449.3243
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  10%|█         | 101991/1000000 [30:09<2:04:30, 120.20step/s]


Episode 204 finished at step 500 (102000 total). Env Reward: -11.19, Steps: 500, Delivered: 3


Total Steps Trained:  10%|█         | 102016/1000000 [30:14<20:48:32, 11.99step/s]


--- Rollout Summary (Steps 101501 to 102000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 501.3996
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  10%|█         | 102495/1000000 [30:18<2:11:51, 113.44step/s]


Episode 205 finished at step 500 (102500 total). Env Reward: -8.73, Steps: 500, Delivered: 6


Total Steps Trained:  10%|█         | 102518/1000000 [30:23<22:06:52, 11.27step/s]


--- Rollout Summary (Steps 102001 to 102500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 344.0718
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  10%|█         | 102993/1000000 [30:27<2:02:33, 121.99step/s]


Episode 206 finished at step 500 (103000 total). Env Reward: -9.46, Steps: 500, Delivered: 5


Total Steps Trained:  10%|█         | 103017/1000000 [30:32<21:20:32, 11.67step/s]


--- Rollout Summary (Steps 102501 to 103000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 398.5038
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  10%|█         | 103488/1000000 [30:36<2:04:10, 120.33step/s]


Episode 207 finished at step 500 (103500 total). Env Reward: -1.31, Steps: 500, Delivered: 5


Total Steps Trained:  10%|█         | 103511/1000000 [30:40<21:54:26, 11.37step/s]


--- Rollout Summary (Steps 103001 to 103500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 695.5611
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  10%|█         | 103992/1000000 [30:45<2:14:11, 111.29step/s]


Episode 208 finished at step 500 (104000 total). Env Reward: -9.61, Steps: 500, Delivered: 5


Total Steps Trained:  10%|█         | 104015/1000000 [30:49<22:27:33, 11.08step/s]


--- Rollout Summary (Steps 103501 to 104000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 620.1848
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  10%|█         | 104489/1000000 [30:53<2:09:53, 114.91step/s]


Episode 209 finished at step 500 (104500 total). Env Reward: -7.02, Steps: 500, Delivered: 8


Total Steps Trained:  10%|█         | 104512/1000000 [30:58<23:01:36, 10.80step/s]


--- Rollout Summary (Steps 104001 to 104500) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 606.7185
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  10%|█         | 104999/1000000 [31:02<2:10:11, 114.57step/s]


Episode 210 finished at step 500 (105000 total). Env Reward: -10.05, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000210_map1.pth


Total Steps Trained:  11%|█         | 105011/1000000 [31:07<29:40:22,  8.38step/s]


--- Rollout Summary (Steps 104501 to 105000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 521.5455
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  11%|█         | 105494/1000000 [31:11<2:14:31, 110.82step/s]


Episode 211 finished at step 500 (105500 total). Env Reward: -10.56, Steps: 500, Delivered: 4


Total Steps Trained:  11%|█         | 105517/1000000 [31:16<21:55:27, 11.33step/s]


--- Rollout Summary (Steps 105001 to 105500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 753.9672
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  11%|█         | 105992/1000000 [31:20<2:14:02, 111.17step/s]


Episode 212 finished at step 500 (106000 total). Env Reward: -9.38, Steps: 500, Delivered: 5


Total Steps Trained:  11%|█         | 106016/1000000 [31:25<21:38:52, 11.47step/s]


--- Rollout Summary (Steps 105501 to 106000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 706.5995
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  11%|█         | 106494/1000000 [31:29<2:15:38, 109.79step/s]


Episode 213 finished at step 500 (106500 total). Env Reward: -0.55, Steps: 500, Delivered: 6


Total Steps Trained:  11%|█         | 106517/1000000 [31:34<22:23:17, 11.09step/s]


--- Rollout Summary (Steps 106001 to 106500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 676.2702
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  11%|█         | 106999/1000000 [31:38<2:16:53, 108.72step/s]


Episode 214 finished at step 500 (107000 total). Env Reward: -9.70, Steps: 500, Delivered: 5


Total Steps Trained:  11%|█         | 107010/1000000 [31:43<31:11:19,  7.95step/s]


--- Rollout Summary (Steps 106501 to 107000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 528.5517
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  11%|█         | 107494/1000000 [31:47<2:06:10, 117.89step/s]


Episode 215 finished at step 500 (107500 total). Env Reward: -4.54, Steps: 500, Delivered: 10


Total Steps Trained:  11%|█         | 107517/1000000 [31:52<22:16:32, 11.13step/s]


--- Rollout Summary (Steps 107001 to 107500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 481.7168
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  11%|█         | 107991/1000000 [31:56<2:12:22, 112.31step/s]


Episode 216 finished at step 500 (108000 total). Env Reward: -9.11, Steps: 500, Delivered: 5


Total Steps Trained:  11%|█         | 108013/1000000 [32:01<23:12:20, 10.68step/s]


--- Rollout Summary (Steps 107501 to 108000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 387.9597
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  11%|█         | 108494/1000000 [32:05<2:10:43, 113.67step/s]


Episode 217 finished at step 500 (108500 total). Env Reward: -5.28, Steps: 500, Delivered: 9


Total Steps Trained:  11%|█         | 108518/1000000 [32:09<21:40:30, 11.42step/s]


--- Rollout Summary (Steps 108001 to 108500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0101
Avg Critic Loss (per minibatch): 687.3998
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  11%|█         | 108998/1000000 [32:14<2:05:39, 118.18step/s]


Episode 218 finished at step 500 (109000 total). Env Reward: -7.78, Steps: 500, Delivered: 6


Total Steps Trained:  11%|█         | 109022/1000000 [32:18<21:19:22, 11.61step/s]


--- Rollout Summary (Steps 108501 to 109000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 563.3122
Avg Entropy (per minibatch): 2.5615
------------------------------


Total Steps Trained:  11%|█         | 109490/1000000 [32:22<2:01:16, 122.38step/s]


Episode 219 finished at step 500 (109500 total). Env Reward: -9.02, Steps: 500, Delivered: 5


Total Steps Trained:  11%|█         | 109515/1000000 [32:27<20:32:57, 12.04step/s]


--- Rollout Summary (Steps 109001 to 109500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 474.9103
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  11%|█         | 109999/1000000 [32:31<2:08:04, 115.82step/s]


Episode 220 finished at step 500 (110000 total). Env Reward: 1.76, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000220_map1.pth


Total Steps Trained:  11%|█         | 110011/1000000 [32:36<31:06:18,  7.95step/s]


--- Rollout Summary (Steps 109501 to 110000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 747.8032
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  11%|█         | 110496/1000000 [32:40<2:11:45, 112.52step/s]


Episode 221 finished at step 500 (110500 total). Env Reward: 0.61, Steps: 500, Delivered: 6


Total Steps Trained:  11%|█         | 110520/1000000 [32:45<21:34:12, 11.45step/s]


--- Rollout Summary (Steps 110001 to 110500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 715.4520
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  11%|█         | 110989/1000000 [32:49<2:05:20, 118.22step/s]


Episode 222 finished at step 500 (111000 total). Env Reward: -8.61, Steps: 500, Delivered: 6


Total Steps Trained:  11%|█         | 111012/1000000 [32:54<22:22:52, 11.03step/s]


--- Rollout Summary (Steps 110501 to 111000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 640.2134
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  11%|█         | 111499/1000000 [32:58<2:06:49, 116.76step/s]


Episode 223 finished at step 500 (111500 total). Env Reward: -11.02, Steps: 500, Delivered: 4


Total Steps Trained:  11%|█         | 111511/1000000 [33:02<29:41:28,  8.31step/s]


--- Rollout Summary (Steps 111001 to 111500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 431.6908
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  11%|█         | 111999/1000000 [33:07<2:09:43, 114.08step/s]


Episode 224 finished at step 500 (112000 total). Env Reward: -8.93, Steps: 500, Delivered: 6


Total Steps Trained:  11%|█         | 112011/1000000 [33:11<30:03:11,  8.21step/s]


--- Rollout Summary (Steps 111501 to 112000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0009
Avg Critic Loss (per minibatch): 547.1395
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  11%|█         | 112490/1000000 [33:15<2:04:35, 118.72step/s]


Episode 225 finished at step 500 (112500 total). Env Reward: -7.27, Steps: 500, Delivered: 7


Total Steps Trained:  11%|█▏        | 112513/1000000 [33:20<21:41:09, 11.37step/s]


--- Rollout Summary (Steps 112001 to 112500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 534.6931
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  11%|█▏        | 112999/1000000 [33:24<2:03:10, 120.02step/s]


Episode 226 finished at step 500 (113000 total). Env Reward: 1.47, Steps: 500, Delivered: 7


Total Steps Trained:  11%|█▏        | 113012/1000000 [33:29<27:59:44,  8.80step/s]


--- Rollout Summary (Steps 112501 to 113000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 798.9571
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  11%|█▏        | 113492/1000000 [33:33<2:01:28, 121.63step/s]


Episode 227 finished at step 500 (113500 total). Env Reward: -2.26, Steps: 500, Delivered: 3


Total Steps Trained:  11%|█▏        | 113515/1000000 [33:38<22:16:10, 11.06step/s]


--- Rollout Summary (Steps 113001 to 113500) ---
Update Duration: 4.69s
Avg Episode Reward (last 100): -7.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): 0.0005
Avg Critic Loss (per minibatch): 795.2576
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  11%|█▏        | 113993/1000000 [33:42<2:08:10, 115.21step/s]


Episode 228 finished at step 500 (114000 total). Env Reward: -6.52, Steps: 500, Delivered: 8


Total Steps Trained:  11%|█▏        | 114016/1000000 [33:47<22:08:27, 11.12step/s]


--- Rollout Summary (Steps 113501 to 114000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.17
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 569.7838
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  11%|█▏        | 114499/1000000 [33:51<2:04:24, 118.63step/s]


Episode 229 finished at step 500 (114500 total). Env Reward: 2.77, Steps: 500, Delivered: 9


Total Steps Trained:  11%|█▏        | 114511/1000000 [33:56<29:10:01,  8.43step/s]


--- Rollout Summary (Steps 114001 to 114500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 1457.4665
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  11%|█▏        | 114994/1000000 [34:00<2:08:59, 114.35step/s]


Episode 230 finished at step 500 (115000 total). Env Reward: -10.71, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000230_map1.pth


Total Steps Trained:  12%|█▏        | 115017/1000000 [34:05<22:11:09, 11.08step/s]


--- Rollout Summary (Steps 114501 to 115000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 633.5746
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  12%|█▏        | 115500/1000000 [34:09<2:05:40, 117.30step/s]


Episode 231 finished at step 500 (115500 total). Env Reward: -8.43, Steps: 500, Delivered: 6


Total Steps Trained:  12%|█▏        | 115512/1000000 [34:13<29:11:28,  8.42step/s]


--- Rollout Summary (Steps 115001 to 115500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 571.8672
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  12%|█▏        | 115998/1000000 [34:18<2:07:27, 115.59step/s]


Episode 232 finished at step 500 (116000 total). Env Reward: -9.04, Steps: 500, Delivered: 6


Total Steps Trained:  12%|█▏        | 116010/1000000 [34:22<29:16:26,  8.39step/s]


--- Rollout Summary (Steps 115501 to 116000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 458.0271
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  12%|█▏        | 116492/1000000 [34:26<2:05:33, 117.28step/s]


Episode 233 finished at step 500 (116500 total). Env Reward: -11.39, Steps: 500, Delivered: 3


Total Steps Trained:  12%|█▏        | 116515/1000000 [34:31<21:54:24, 11.20step/s]


--- Rollout Summary (Steps 116001 to 116500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 602.8229
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  12%|█▏        | 117000/1000000 [34:35<2:02:28, 120.15step/s]


Episode 234 finished at step 500 (117000 total). Env Reward: -11.41, Steps: 500, Delivered: 3


Total Steps Trained:  12%|█▏        | 117013/1000000 [34:40<28:41:47,  8.55step/s]


--- Rollout Summary (Steps 116501 to 117000) ---
Update Duration: 4.69s
Avg Episode Reward (last 100): -7.10
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 485.3224
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  12%|█▏        | 117491/1000000 [34:44<2:06:07, 116.62step/s]


Episode 235 finished at step 500 (117500 total). Env Reward: -0.93, Steps: 500, Delivered: 5


Total Steps Trained:  12%|█▏        | 117515/1000000 [34:49<21:23:26, 11.46step/s]


--- Rollout Summary (Steps 117001 to 117500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.00
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 465.1973
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  12%|█▏        | 117990/1000000 [34:53<2:05:53, 116.77step/s]


Episode 236 finished at step 500 (118000 total). Env Reward: -7.29, Steps: 500, Delivered: 7


Total Steps Trained:  12%|█▏        | 118012/1000000 [34:58<21:49:46, 11.22step/s]


--- Rollout Summary (Steps 117501 to 118000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 447.8574
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  12%|█▏        | 118491/1000000 [35:02<2:06:00, 116.59step/s]


Episode 237 finished at step 500 (118500 total). Env Reward: -10.32, Steps: 500, Delivered: 4


Total Steps Trained:  12%|█▏        | 118514/1000000 [35:06<21:23:11, 11.45step/s]


--- Rollout Summary (Steps 118001 to 118500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 599.8001
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  12%|█▏        | 118993/1000000 [35:11<2:30:32, 97.54step/s] 


Episode 238 finished at step 500 (119000 total). Env Reward: -7.73, Steps: 500, Delivered: 7


Total Steps Trained:  12%|█▏        | 119015/1000000 [35:15<22:55:44, 10.67step/s]


--- Rollout Summary (Steps 118501 to 119000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 688.0563
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  12%|█▏        | 119491/1000000 [35:20<1:59:44, 122.56step/s]


Episode 239 finished at step 500 (119500 total). Env Reward: -8.30, Steps: 500, Delivered: 6


Total Steps Trained:  12%|█▏        | 119515/1000000 [35:24<20:46:25, 11.77step/s]


--- Rollout Summary (Steps 119001 to 119500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 525.8586
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  12%|█▏        | 119995/1000000 [35:28<2:04:31, 117.78step/s]


Episode 240 finished at step 500 (120000 total). Env Reward: -7.10, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000240_map1.pth


Total Steps Trained:  12%|█▏        | 120019/1000000 [35:33<21:04:50, 11.60step/s]


--- Rollout Summary (Steps 119501 to 120000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 667.4145
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  12%|█▏        | 120498/1000000 [35:37<2:13:23, 109.88step/s]


Episode 241 finished at step 500 (120500 total). Env Reward: -10.94, Steps: 500, Delivered: 4


Total Steps Trained:  12%|█▏        | 120518/1000000 [35:42<24:05:31, 10.14step/s]


--- Rollout Summary (Steps 120001 to 120500) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 469.4574
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  12%|█▏        | 120989/1000000 [35:46<2:05:45, 116.50step/s]


Episode 242 finished at step 500 (121000 total). Env Reward: -9.93, Steps: 500, Delivered: 4


Total Steps Trained:  12%|█▏        | 121011/1000000 [35:51<22:32:28, 10.83step/s]


--- Rollout Summary (Steps 120501 to 121000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 498.1329
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  12%|█▏        | 121493/1000000 [35:55<2:05:26, 116.73step/s]


Episode 243 finished at step 500 (121500 total). Env Reward: -8.73, Steps: 500, Delivered: 6


Total Steps Trained:  12%|█▏        | 121516/1000000 [36:00<21:41:42, 11.25step/s]


--- Rollout Summary (Steps 121001 to 121500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): 0.0004
Avg Critic Loss (per minibatch): 605.4102
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  12%|█▏        | 121988/1000000 [36:04<1:58:05, 123.92step/s]


Episode 244 finished at step 500 (122000 total). Env Reward: -12.69, Steps: 500, Delivered: 2


Total Steps Trained:  12%|█▏        | 122011/1000000 [36:09<21:19:27, 11.44step/s]


--- Rollout Summary (Steps 121501 to 122000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 535.3590
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  12%|█▏        | 122497/1000000 [36:13<2:25:11, 100.73step/s]


Episode 245 finished at step 500 (122500 total). Env Reward: -8.16, Steps: 500, Delivered: 7


Total Steps Trained:  12%|█▏        | 122516/1000000 [36:18<24:53:25,  9.79step/s]


--- Rollout Summary (Steps 122001 to 122500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 494.5895
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  12%|█▏        | 122997/1000000 [36:22<2:21:31, 103.28step/s]


Episode 246 finished at step 500 (123000 total). Env Reward: -8.76, Steps: 500, Delivered: 6


Total Steps Trained:  12%|█▏        | 123019/1000000 [36:27<23:11:25, 10.50step/s]


--- Rollout Summary (Steps 122501 to 123000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 558.7967
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  12%|█▏        | 123498/1000000 [36:31<1:59:56, 121.80step/s]


Episode 247 finished at step 500 (123500 total). Env Reward: -8.00, Steps: 500, Delivered: 7


Total Steps Trained:  12%|█▏        | 123511/1000000 [36:36<27:30:29,  8.85step/s]


--- Rollout Summary (Steps 123001 to 123500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 641.4628
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  12%|█▏        | 123992/1000000 [36:40<2:03:08, 118.56step/s]


Episode 248 finished at step 500 (124000 total). Env Reward: -9.59, Steps: 500, Delivered: 5


Total Steps Trained:  12%|█▏        | 124016/1000000 [36:45<20:41:51, 11.76step/s]


--- Rollout Summary (Steps 123501 to 124000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 544.7607
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  12%|█▏        | 124497/1000000 [36:49<2:02:04, 119.53step/s]


Episode 249 finished at step 500 (124500 total). Env Reward: -9.81, Steps: 500, Delivered: 5


Total Steps Trained:  12%|█▏        | 124521/1000000 [36:53<20:46:34, 11.71step/s]


--- Rollout Summary (Steps 124001 to 124500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 406.5367
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  12%|█▏        | 124992/1000000 [36:58<2:02:21, 119.19step/s]


Episode 250 finished at step 500 (125000 total). Env Reward: -8.95, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000250_map1.pth


Total Steps Trained:  13%|█▎        | 125015/1000000 [37:02<21:41:01, 11.21step/s]


--- Rollout Summary (Steps 124501 to 125000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 495.2990
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  13%|█▎        | 125500/1000000 [37:07<2:04:16, 117.27step/s]


Episode 251 finished at step 500 (125500 total). Env Reward: -8.08, Steps: 500, Delivered: 6


Total Steps Trained:  13%|█▎        | 125512/1000000 [37:11<29:19:55,  8.28step/s]


--- Rollout Summary (Steps 125001 to 125500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 850.9929
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  13%|█▎        | 125995/1000000 [37:15<2:10:29, 111.64step/s]


Episode 252 finished at step 500 (126000 total). Env Reward: -9.26, Steps: 500, Delivered: 5


Total Steps Trained:  13%|█▎        | 126018/1000000 [37:20<22:39:37, 10.71step/s]


--- Rollout Summary (Steps 125501 to 126000) ---
Update Duration: 4.68s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 464.9591
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  13%|█▎        | 126494/1000000 [37:24<2:01:18, 120.02step/s]


Episode 253 finished at step 500 (126500 total). Env Reward: -9.60, Steps: 500, Delivered: 5


Total Steps Trained:  13%|█▎        | 126517/1000000 [37:29<21:25:01, 11.33step/s]


--- Rollout Summary (Steps 126001 to 126500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 431.5643
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  13%|█▎        | 126998/1000000 [37:34<2:08:28, 113.25step/s]


Episode 254 finished at step 500 (127000 total). Env Reward: 0.93, Steps: 500, Delivered: 6


Total Steps Trained:  13%|█▎        | 127021/1000000 [37:38<21:56:48, 11.05step/s]


--- Rollout Summary (Steps 126501 to 127000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): 0.0009
Avg Critic Loss (per minibatch): 733.0486
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  13%|█▎        | 127498/1000000 [37:42<2:00:24, 120.77step/s]


Episode 255 finished at step 500 (127500 total). Env Reward: -9.08, Steps: 500, Delivered: 6


Total Steps Trained:  13%|█▎        | 127511/1000000 [37:47<27:14:20,  8.90step/s]


--- Rollout Summary (Steps 127001 to 127500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 672.9648
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  13%|█▎        | 127997/1000000 [37:51<2:08:33, 113.04step/s]


Episode 256 finished at step 500 (128000 total). Env Reward: -10.24, Steps: 500, Delivered: 5


Total Steps Trained:  13%|█▎        | 128020/1000000 [37:56<21:50:15, 11.09step/s]


--- Rollout Summary (Steps 127501 to 128000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 701.1890
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  13%|█▎        | 128489/1000000 [38:00<2:03:26, 117.67step/s]


Episode 257 finished at step 500 (128500 total). Env Reward: -8.71, Steps: 500, Delivered: 6


Total Steps Trained:  13%|█▎        | 128511/1000000 [38:05<21:56:32, 11.03step/s]


--- Rollout Summary (Steps 128001 to 128500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 637.5610
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  13%|█▎        | 129000/1000000 [38:09<2:03:30, 117.54step/s]


Episode 258 finished at step 500 (129000 total). Env Reward: -10.19, Steps: 500, Delivered: 4


Total Steps Trained:  13%|█▎        | 129012/1000000 [38:14<29:01:45,  8.33step/s]


--- Rollout Summary (Steps 128501 to 129000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 517.1641
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  13%|█▎        | 129493/1000000 [38:18<2:15:19, 107.21step/s]


Episode 259 finished at step 500 (129500 total). Env Reward: -10.16, Steps: 500, Delivered: 5


Total Steps Trained:  13%|█▎        | 129515/1000000 [38:23<22:14:22, 10.87step/s]


--- Rollout Summary (Steps 129001 to 129500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 465.5050
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  13%|█▎        | 129997/1000000 [38:27<2:01:55, 118.93step/s]


Episode 260 finished at step 500 (130000 total). Env Reward: 1.23, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000260_map1.pth


Total Steps Trained:  13%|█▎        | 130020/1000000 [38:31<21:18:20, 11.34step/s]


--- Rollout Summary (Steps 129501 to 130000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 754.3753
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  13%|█▎        | 130494/1000000 [38:36<2:03:10, 117.65step/s]


Episode 261 finished at step 500 (130500 total). Env Reward: -9.82, Steps: 500, Delivered: 5


Total Steps Trained:  13%|█▎        | 130516/1000000 [38:40<22:08:15, 10.91step/s]


--- Rollout Summary (Steps 130001 to 130500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 495.3711
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  13%|█▎        | 130992/1000000 [38:45<2:04:02, 116.77step/s]


Episode 262 finished at step 500 (131000 total). Env Reward: -8.51, Steps: 500, Delivered: 6


Total Steps Trained:  13%|█▎        | 131015/1000000 [38:49<21:47:33, 11.08step/s]


--- Rollout Summary (Steps 130501 to 131000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 741.4073
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  13%|█▎        | 131496/1000000 [38:54<2:01:54, 118.74step/s]


Episode 263 finished at step 500 (131500 total). Env Reward: -10.28, Steps: 500, Delivered: 5


Total Steps Trained:  13%|█▎        | 131518/1000000 [38:58<21:46:14, 11.08step/s]


--- Rollout Summary (Steps 131001 to 131500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 417.4771
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  13%|█▎        | 131993/1000000 [39:02<2:00:18, 120.24step/s]


Episode 264 finished at step 500 (132000 total). Env Reward: -12.38, Steps: 500, Delivered: 2


Total Steps Trained:  13%|█▎        | 132016/1000000 [39:07<21:13:10, 11.36step/s]


--- Rollout Summary (Steps 131501 to 132000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 565.8295
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  13%|█▎        | 132498/1000000 [39:11<2:00:05, 120.39step/s]


Episode 265 finished at step 500 (132500 total). Env Reward: -11.97, Steps: 500, Delivered: 3


Total Steps Trained:  13%|█▎        | 132511/1000000 [39:16<27:05:51,  8.89step/s]


--- Rollout Summary (Steps 132001 to 132500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 663.8494
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  13%|█▎        | 132992/1000000 [39:20<1:59:50, 120.57step/s]


Episode 266 finished at step 500 (133000 total). Env Reward: -9.35, Steps: 500, Delivered: 5


Total Steps Trained:  13%|█▎        | 133017/1000000 [39:25<20:33:05, 11.72step/s]


--- Rollout Summary (Steps 132501 to 133000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 487.1160
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  13%|█▎        | 133495/1000000 [39:29<2:01:10, 119.18step/s]


Episode 267 finished at step 500 (133500 total). Env Reward: -10.76, Steps: 500, Delivered: 4


Total Steps Trained:  13%|█▎        | 133517/1000000 [39:34<21:35:18, 11.15step/s]


--- Rollout Summary (Steps 133001 to 133500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 487.6345
Avg Entropy (per minibatch): 2.5582
------------------------------


Total Steps Trained:  13%|█▎        | 134000/1000000 [39:38<2:14:25, 107.37step/s]


Episode 268 finished at step 500 (134000 total). Env Reward: -9.74, Steps: 500, Delivered: 5


Total Steps Trained:  13%|█▎        | 134011/1000000 [39:42<28:52:24,  8.33step/s]


--- Rollout Summary (Steps 133501 to 134000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 521.5119
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  13%|█▎        | 134500/1000000 [39:47<2:04:53, 115.49step/s]


Episode 269 finished at step 500 (134500 total). Env Reward: -8.82, Steps: 500, Delivered: 5


Total Steps Trained:  13%|█▎        | 134512/1000000 [39:51<29:05:55,  8.26step/s]


--- Rollout Summary (Steps 134001 to 134500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 711.8134
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  13%|█▎        | 134997/1000000 [39:55<2:01:17, 118.86step/s]


Episode 270 finished at step 500 (135000 total). Env Reward: -6.40, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000270_map1.pth


Total Steps Trained:  14%|█▎        | 135020/1000000 [40:00<21:28:09, 11.19step/s]


--- Rollout Summary (Steps 134501 to 135000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 564.9450
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  14%|█▎        | 135498/1000000 [40:04<2:02:02, 118.07step/s]


Episode 271 finished at step 500 (135500 total). Env Reward: -9.83, Steps: 500, Delivered: 5


Total Steps Trained:  14%|█▎        | 135522/1000000 [40:09<20:51:24, 11.51step/s]


--- Rollout Summary (Steps 135001 to 135500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 453.8228
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  14%|█▎        | 135998/1000000 [40:13<2:03:27, 116.64step/s]


Episode 272 finished at step 500 (136000 total). Env Reward: -7.98, Steps: 500, Delivered: 7


Total Steps Trained:  14%|█▎        | 136010/1000000 [40:18<28:57:15,  8.29step/s]


--- Rollout Summary (Steps 135501 to 136000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 603.9836
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  14%|█▎        | 136492/1000000 [40:22<2:00:33, 119.38step/s]


Episode 273 finished at step 500 (136500 total). Env Reward: -9.86, Steps: 500, Delivered: 4


Total Steps Trained:  14%|█▎        | 136514/1000000 [40:27<21:44:40, 11.03step/s]


--- Rollout Summary (Steps 136001 to 136500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 509.1094
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  14%|█▎        | 136993/1000000 [40:31<2:03:20, 116.61step/s]


Episode 274 finished at step 500 (137000 total). Env Reward: -9.73, Steps: 500, Delivered: 5


Total Steps Trained:  14%|█▎        | 137016/1000000 [40:36<21:27:16, 11.17step/s]


--- Rollout Summary (Steps 136501 to 137000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 470.7258
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  14%|█▎        | 137496/1000000 [40:40<2:12:04, 108.83step/s]


Episode 275 finished at step 500 (137500 total). Env Reward: -7.63, Steps: 500, Delivered: 7


Total Steps Trained:  14%|█▍        | 137519/1000000 [40:45<21:52:25, 10.95step/s]


--- Rollout Summary (Steps 137001 to 137500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 769.8228
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  14%|█▍        | 137997/1000000 [40:49<2:00:37, 119.10step/s]


Episode 276 finished at step 500 (138000 total). Env Reward: -9.09, Steps: 500, Delivered: 6


Total Steps Trained:  14%|█▍        | 138020/1000000 [40:54<20:52:22, 11.47step/s]


--- Rollout Summary (Steps 137501 to 138000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 471.9106
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  14%|█▍        | 138499/1000000 [40:58<2:14:47, 106.53step/s]


Episode 277 finished at step 500 (138500 total). Env Reward: -7.90, Steps: 500, Delivered: 7


Total Steps Trained:  14%|█▍        | 138522/1000000 [41:03<21:18:13, 11.23step/s]


--- Rollout Summary (Steps 138001 to 138500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 521.7206
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  14%|█▍        | 138999/1000000 [41:07<1:57:19, 122.31step/s]


Episode 278 finished at step 500 (139000 total). Env Reward: -10.87, Steps: 500, Delivered: 4


Total Steps Trained:  14%|█▍        | 139012/1000000 [41:11<26:53:28,  8.89step/s]


--- Rollout Summary (Steps 138501 to 139000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 405.9416
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  14%|█▍        | 139495/1000000 [41:15<2:08:02, 112.01step/s]


Episode 279 finished at step 500 (139500 total). Env Reward: -9.37, Steps: 500, Delivered: 5


Total Steps Trained:  14%|█▍        | 139519/1000000 [41:20<21:05:54, 11.33step/s]


--- Rollout Summary (Steps 139001 to 139500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 480.9161
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  14%|█▍        | 139996/1000000 [41:24<1:59:56, 119.51step/s]


Episode 280 finished at step 500 (140000 total). Env Reward: -8.96, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000280_map1.pth


Total Steps Trained:  14%|█▍        | 140008/1000000 [41:29<28:32:43,  8.37step/s]


--- Rollout Summary (Steps 139501 to 140000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0105
Avg Critic Loss (per minibatch): 592.3762
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  14%|█▍        | 140489/1000000 [41:33<2:01:16, 118.12step/s]


Episode 281 finished at step 500 (140500 total). Env Reward: 9.71, Steps: 500, Delivered: 6


Total Steps Trained:  14%|█▍        | 140511/1000000 [41:38<21:46:54, 10.96step/s]


--- Rollout Summary (Steps 140001 to 140500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 1424.0665
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  14%|█▍        | 140991/1000000 [41:42<2:10:42, 109.53step/s]


Episode 282 finished at step 500 (141000 total). Env Reward: -4.79, Steps: 500, Delivered: 10


Total Steps Trained:  14%|█▍        | 141013/1000000 [41:47<22:47:20, 10.47step/s]


--- Rollout Summary (Steps 140501 to 141000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 656.3329
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  14%|█▍        | 141496/1000000 [41:51<2:12:33, 107.93step/s]


Episode 283 finished at step 500 (141500 total). Env Reward: -6.08, Steps: 500, Delivered: 8


Total Steps Trained:  14%|█▍        | 141517/1000000 [41:56<22:45:17, 10.48step/s]


--- Rollout Summary (Steps 141001 to 141500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 678.6947
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  14%|█▍        | 141992/1000000 [42:00<1:59:29, 119.68step/s]


Episode 284 finished at step 500 (142000 total). Env Reward: -10.14, Steps: 500, Delivered: 4


Total Steps Trained:  14%|█▍        | 142015/1000000 [42:05<21:27:06, 11.11step/s]


--- Rollout Summary (Steps 141501 to 142000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 546.6440
Avg Entropy (per minibatch): 2.5589
------------------------------


Total Steps Trained:  14%|█▍        | 142497/1000000 [42:09<2:03:18, 115.91step/s]


Episode 285 finished at step 500 (142500 total). Env Reward: -8.31, Steps: 500, Delivered: 6


Total Steps Trained:  14%|█▍        | 142520/1000000 [42:14<21:05:51, 11.29step/s]


--- Rollout Summary (Steps 142001 to 142500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 434.2424
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  14%|█▍        | 142991/1000000 [42:18<2:04:45, 114.49step/s]


Episode 286 finished at step 500 (143000 total). Env Reward: -11.24, Steps: 500, Delivered: 4


Total Steps Trained:  14%|█▍        | 143014/1000000 [42:22<20:58:40, 11.35step/s]


--- Rollout Summary (Steps 142501 to 143000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0003
Avg Critic Loss (per minibatch): 472.4902
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  14%|█▍        | 143491/1000000 [42:27<1:58:46, 120.19step/s]


Episode 287 finished at step 500 (143500 total). Env Reward: -10.30, Steps: 500, Delivered: 4


Total Steps Trained:  14%|█▍        | 143516/1000000 [42:31<20:12:25, 11.77step/s]


--- Rollout Summary (Steps 143001 to 143500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 454.0748
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  14%|█▍        | 143993/1000000 [42:36<2:03:20, 115.66step/s]


Episode 288 finished at step 500 (144000 total). Env Reward: -8.25, Steps: 500, Delivered: 6


Total Steps Trained:  14%|█▍        | 144017/1000000 [42:40<20:39:04, 11.51step/s]


--- Rollout Summary (Steps 143501 to 144000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 539.4498
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  14%|█▍        | 144490/1000000 [42:44<2:00:20, 118.49step/s]


Episode 289 finished at step 500 (144500 total). Env Reward: 2.33, Steps: 500, Delivered: 8


Total Steps Trained:  14%|█▍        | 144513/1000000 [42:49<20:53:58, 11.37step/s]


--- Rollout Summary (Steps 144001 to 144500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 891.7981
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  14%|█▍        | 144994/1000000 [42:53<2:05:29, 113.55step/s]


Episode 290 finished at step 500 (145000 total). Env Reward: -9.80, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000290_map1.pth


Total Steps Trained:  15%|█▍        | 145017/1000000 [42:58<21:24:05, 11.10step/s]


--- Rollout Summary (Steps 144501 to 145000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 430.4965
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  15%|█▍        | 145497/1000000 [43:02<2:10:25, 109.19step/s]


Episode 291 finished at step 500 (145500 total). Env Reward: -8.27, Steps: 500, Delivered: 6


Total Steps Trained:  15%|█▍        | 145518/1000000 [43:07<22:49:33, 10.40step/s]


--- Rollout Summary (Steps 145001 to 145500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 596.9806
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  15%|█▍        | 145989/1000000 [43:11<2:04:58, 113.88step/s]


Episode 292 finished at step 500 (146000 total). Env Reward: -7.47, Steps: 500, Delivered: 7


Total Steps Trained:  15%|█▍        | 146012/1000000 [43:16<21:15:33, 11.16step/s]


--- Rollout Summary (Steps 145501 to 146000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0080
Avg Critic Loss (per minibatch): 439.7521
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  15%|█▍        | 146496/1000000 [43:20<1:59:31, 119.01step/s]


Episode 293 finished at step 500 (146500 total). Env Reward: -11.95, Steps: 500, Delivered: 3


Total Steps Trained:  15%|█▍        | 146519/1000000 [43:25<20:24:22, 11.62step/s]


--- Rollout Summary (Steps 146001 to 146500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0007
Avg Critic Loss (per minibatch): 501.0890
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  15%|█▍        | 146995/1000000 [43:29<1:58:26, 120.03step/s]


Episode 294 finished at step 500 (147000 total). Env Reward: -7.63, Steps: 500, Delivered: 7


Total Steps Trained:  15%|█▍        | 147019/1000000 [43:33<20:04:05, 11.81step/s]


--- Rollout Summary (Steps 146501 to 147000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0089
Avg Critic Loss (per minibatch): 475.9140
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  15%|█▍        | 147489/1000000 [43:37<2:04:30, 114.11step/s]


Episode 295 finished at step 500 (147500 total). Env Reward: -2.11, Steps: 500, Delivered: 3


Total Steps Trained:  15%|█▍        | 147511/1000000 [43:42<21:39:18, 10.94step/s]


--- Rollout Summary (Steps 147001 to 147500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 701.2576
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  15%|█▍        | 147992/1000000 [43:46<1:54:54, 123.57step/s]


Episode 296 finished at step 500 (148000 total). Env Reward: -10.86, Steps: 500, Delivered: 4


Total Steps Trained:  15%|█▍        | 148015/1000000 [43:51<20:31:42, 11.53step/s]


--- Rollout Summary (Steps 147501 to 148000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 457.5050
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  15%|█▍        | 148492/1000000 [43:55<1:56:17, 122.03step/s]


Episode 297 finished at step 500 (148500 total). Env Reward: -10.93, Steps: 500, Delivered: 4


Total Steps Trained:  15%|█▍        | 148517/1000000 [44:00<19:48:35, 11.94step/s]


--- Rollout Summary (Steps 148001 to 148500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 615.5680
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  15%|█▍        | 148996/1000000 [44:04<2:11:40, 107.72step/s]


Episode 298 finished at step 500 (149000 total). Env Reward: 1.18, Steps: 500, Delivered: 7


Total Steps Trained:  15%|█▍        | 149019/1000000 [44:09<22:25:01, 10.54step/s]


--- Rollout Summary (Steps 148501 to 149000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 781.1558
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  15%|█▍        | 149492/1000000 [44:13<1:57:59, 120.13step/s]


Episode 299 finished at step 500 (149500 total). Env Reward: -10.02, Steps: 500, Delivered: 4


Total Steps Trained:  15%|█▍        | 149514/1000000 [44:18<21:16:02, 11.11step/s]


--- Rollout Summary (Steps 149001 to 149500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 505.0965
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  15%|█▍        | 149992/1000000 [44:22<1:59:35, 118.46step/s]


Episode 300 finished at step 500 (150000 total). Env Reward: -4.37, Steps: 500, Delivered: 10
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000300_map1.pth


Total Steps Trained:  15%|█▌        | 150014/1000000 [44:27<21:34:40, 10.94step/s]


--- Rollout Summary (Steps 149501 to 150000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 514.9672
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  15%|█▌        | 150496/1000000 [44:31<2:01:03, 116.95step/s]


Episode 301 finished at step 500 (150500 total). Env Reward: -9.48, Steps: 500, Delivered: 5


Total Steps Trained:  15%|█▌        | 150520/1000000 [44:36<20:19:23, 11.61step/s]


--- Rollout Summary (Steps 150001 to 150500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 571.3298
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  15%|█▌        | 150998/1000000 [44:40<1:59:22, 118.53step/s]


Episode 302 finished at step 500 (151000 total). Env Reward: -9.48, Steps: 500, Delivered: 5


Total Steps Trained:  15%|█▌        | 151010/1000000 [44:45<28:39:09,  8.23step/s]


--- Rollout Summary (Steps 150501 to 151000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 637.5594
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  15%|█▌        | 151491/1000000 [44:49<2:05:47, 112.42step/s]


Episode 303 finished at step 500 (151500 total). Env Reward: -6.42, Steps: 500, Delivered: 8


Total Steps Trained:  15%|█▌        | 151514/1000000 [44:53<21:07:50, 11.15step/s]


--- Rollout Summary (Steps 151001 to 151500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 447.6530
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  15%|█▌        | 151997/1000000 [44:58<1:59:16, 118.49step/s]


Episode 304 finished at step 500 (152000 total). Env Reward: -10.58, Steps: 500, Delivered: 4


Total Steps Trained:  15%|█▌        | 152020/1000000 [45:02<20:57:44, 11.24step/s]


--- Rollout Summary (Steps 151501 to 152000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 436.6559
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  15%|█▌        | 152497/1000000 [45:06<1:59:21, 118.35step/s]


Episode 305 finished at step 500 (152500 total). Env Reward: -0.87, Steps: 500, Delivered: 5


Total Steps Trained:  15%|█▌        | 152520/1000000 [45:11<21:31:19, 10.94step/s]


--- Rollout Summary (Steps 152001 to 152500) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 675.0011
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  15%|█▌        | 153000/1000000 [45:15<1:56:33, 121.11step/s]


Episode 306 finished at step 500 (153000 total). Env Reward: -11.49, Steps: 500, Delivered: 3


Total Steps Trained:  15%|█▌        | 153013/1000000 [45:20<26:37:28,  8.84step/s]


--- Rollout Summary (Steps 152501 to 153000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 594.2641
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  15%|█▌        | 153500/1000000 [45:24<2:00:45, 116.84step/s]


Episode 307 finished at step 500 (153500 total). Env Reward: 0.58, Steps: 500, Delivered: 6


Total Steps Trained:  15%|█▌        | 153512/1000000 [45:29<27:52:57,  8.43step/s]


--- Rollout Summary (Steps 153001 to 153500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 642.0040
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  15%|█▌        | 153997/1000000 [45:33<2:00:46, 116.75step/s]


Episode 308 finished at step 500 (154000 total). Env Reward: 10.78, Steps: 500, Delivered: 7


Total Steps Trained:  15%|█▌        | 154009/1000000 [45:38<28:07:04,  8.36step/s]


--- Rollout Summary (Steps 153501 to 154000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 1686.4322
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  15%|█▌        | 154491/1000000 [45:42<1:58:58, 118.44step/s]


Episode 309 finished at step 500 (154500 total). Env Reward: -12.66, Steps: 500, Delivered: 2


Total Steps Trained:  15%|█▌        | 154514/1000000 [45:47<20:39:29, 11.37step/s]


--- Rollout Summary (Steps 154001 to 154500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 509.3158
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  15%|█▌        | 154997/1000000 [45:51<1:55:24, 122.04step/s]


Episode 310 finished at step 500 (155000 total). Env Reward: -6.81, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000310_map1.pth


Total Steps Trained:  16%|█▌        | 155021/1000000 [45:56<19:52:00, 11.81step/s]


--- Rollout Summary (Steps 154501 to 155000) ---
Update Duration: 4.43s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 837.4920
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  16%|█▌        | 155498/1000000 [46:00<2:01:53, 115.48step/s]


Episode 311 finished at step 500 (155500 total). Env Reward: -10.46, Steps: 500, Delivered: 4


Total Steps Trained:  16%|█▌        | 155510/1000000 [46:04<27:59:28,  8.38step/s]


--- Rollout Summary (Steps 155001 to 155500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 479.1095
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  16%|█▌        | 155996/1000000 [46:08<1:59:05, 118.12step/s]


Episode 312 finished at step 500 (156000 total). Env Reward: -4.67, Steps: 500, Delivered: 10


Total Steps Trained:  16%|█▌        | 156020/1000000 [46:13<20:12:46, 11.60step/s]


--- Rollout Summary (Steps 155501 to 156000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.35
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0083
Avg Critic Loss (per minibatch): 410.6462
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  16%|█▌        | 156500/1000000 [46:17<1:59:39, 117.49step/s]


Episode 313 finished at step 500 (156500 total). Env Reward: -10.39, Steps: 500, Delivered: 4


Total Steps Trained:  16%|█▌        | 156512/1000000 [46:22<27:38:51,  8.47step/s]


--- Rollout Summary (Steps 156001 to 156500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 526.1321
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  16%|█▌        | 156993/1000000 [46:26<2:06:29, 111.08step/s]


Episode 314 finished at step 500 (157000 total). Env Reward: 3.51, Steps: 500, Delivered: 9


Total Steps Trained:  16%|█▌        | 157016/1000000 [46:31<21:04:19, 11.11step/s]


--- Rollout Summary (Steps 156501 to 157000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 857.5155
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  16%|█▌        | 157496/1000000 [46:35<2:02:48, 114.35step/s]


Episode 315 finished at step 500 (157500 total). Env Reward: -9.54, Steps: 500, Delivered: 5


Total Steps Trained:  16%|█▌        | 157520/1000000 [46:40<20:36:43, 11.35step/s]


--- Rollout Summary (Steps 157001 to 157500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 498.8457
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  16%|█▌        | 157994/1000000 [46:44<2:01:37, 115.39step/s]


Episode 316 finished at step 500 (158000 total). Env Reward: -8.41, Steps: 500, Delivered: 6


Total Steps Trained:  16%|█▌        | 158017/1000000 [46:48<21:17:06, 10.99step/s]


--- Rollout Summary (Steps 157501 to 158000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.36
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 377.2316
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  16%|█▌        | 158497/1000000 [46:52<1:58:59, 117.86step/s]


Episode 317 finished at step 500 (158500 total). Env Reward: -7.50, Steps: 500, Delivered: 7


Total Steps Trained:  16%|█▌        | 158520/1000000 [46:57<20:51:23, 11.21step/s]


--- Rollout Summary (Steps 158001 to 158500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 558.7410
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  16%|█▌        | 158991/1000000 [47:01<2:02:19, 114.59step/s]


Episode 318 finished at step 500 (159000 total). Env Reward: -7.81, Steps: 500, Delivered: 7


Total Steps Trained:  16%|█▌        | 159015/1000000 [47:06<20:21:55, 11.47step/s]


--- Rollout Summary (Steps 158501 to 159000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 618.2787
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  16%|█▌        | 159499/1000000 [47:10<1:59:24, 117.32step/s]


Episode 319 finished at step 500 (159500 total). Env Reward: 1.30, Steps: 500, Delivered: 7


Total Steps Trained:  16%|█▌        | 159511/1000000 [47:15<28:00:52,  8.33step/s]


--- Rollout Summary (Steps 159001 to 159500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 836.4244
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  16%|█▌        | 159990/1000000 [47:19<2:08:56, 108.57step/s]


Episode 320 finished at step 500 (160000 total). Env Reward: -9.67, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000320_map1.pth


Total Steps Trained:  16%|█▌        | 160012/1000000 [47:24<21:21:24, 10.93step/s]


--- Rollout Summary (Steps 159501 to 160000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 503.0006
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  16%|█▌        | 160494/1000000 [47:28<2:13:31, 104.79step/s]


Episode 321 finished at step 500 (160500 total). Env Reward: -11.21, Steps: 500, Delivered: 3


Total Steps Trained:  16%|█▌        | 160515/1000000 [47:32<21:40:54, 10.76step/s]


--- Rollout Summary (Steps 160001 to 160500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 469.2133
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  16%|█▌        | 160996/1000000 [47:37<1:56:57, 119.55step/s]


Episode 322 finished at step 500 (161000 total). Env Reward: -13.09, Steps: 500, Delivered: 2


Total Steps Trained:  16%|█▌        | 161018/1000000 [47:41<21:25:49, 10.87step/s]


--- Rollout Summary (Steps 160501 to 161000) ---
Update Duration: 4.68s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 569.1605
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  16%|█▌        | 161492/1000000 [47:46<1:54:59, 121.53step/s]


Episode 323 finished at step 500 (161500 total). Env Reward: -10.35, Steps: 500, Delivered: 4


Total Steps Trained:  16%|█▌        | 161516/1000000 [47:50<20:40:43, 11.26step/s]


--- Rollout Summary (Steps 161001 to 161500) ---
Update Duration: 4.69s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 483.3739
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  16%|█▌        | 161998/1000000 [47:55<1:57:19, 119.05step/s]


Episode 324 finished at step 500 (162000 total). Env Reward: -8.37, Steps: 500, Delivered: 6


Total Steps Trained:  16%|█▌        | 162010/1000000 [47:59<27:59:38,  8.32step/s]


--- Rollout Summary (Steps 161501 to 162000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 615.1298
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  16%|█▌        | 162497/1000000 [48:03<2:01:30, 114.88step/s]


Episode 325 finished at step 500 (162500 total). Env Reward: -9.24, Steps: 500, Delivered: 6


Total Steps Trained:  16%|█▋        | 162520/1000000 [48:08<20:51:36, 11.15step/s]


--- Rollout Summary (Steps 162001 to 162500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0085
Avg Critic Loss (per minibatch): 557.6818
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  16%|█▋        | 163000/1000000 [48:12<1:57:58, 118.25step/s]


Episode 326 finished at step 500 (163000 total). Env Reward: 1.15, Steps: 500, Delivered: 6


Total Steps Trained:  16%|█▋        | 163012/1000000 [48:17<26:37:51,  8.73step/s]


--- Rollout Summary (Steps 162501 to 163000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0005
Avg Critic Loss (per minibatch): 410.0361
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  16%|█▋        | 163496/1000000 [48:21<1:55:02, 121.18step/s]


Episode 327 finished at step 500 (163500 total). Env Reward: -10.06, Steps: 500, Delivered: 5


Total Steps Trained:  16%|█▋        | 163520/1000000 [48:26<19:54:55, 11.67step/s]


--- Rollout Summary (Steps 163001 to 163500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 466.9224
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  16%|█▋        | 163988/1000000 [48:30<1:54:35, 121.60step/s]


Episode 328 finished at step 500 (164000 total). Env Reward: -9.30, Steps: 500, Delivered: 5


Total Steps Trained:  16%|█▋        | 164012/1000000 [48:34<19:48:45, 11.72step/s]


--- Rollout Summary (Steps 163501 to 164000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 699.0162
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  16%|█▋        | 164491/1000000 [48:38<1:57:09, 118.85step/s]


Episode 329 finished at step 500 (164500 total). Env Reward: 12.71, Steps: 500, Delivered: 9


Total Steps Trained:  16%|█▋        | 164515/1000000 [48:43<20:17:24, 11.44step/s]


--- Rollout Summary (Steps 164001 to 164500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 1525.4837
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  16%|█▋        | 164999/1000000 [48:47<1:54:29, 121.54step/s]


Episode 330 finished at step 500 (165000 total). Env Reward: -10.49, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000330_map1.pth


Total Steps Trained:  17%|█▋        | 165012/1000000 [48:52<26:54:28,  8.62step/s]


--- Rollout Summary (Steps 164501 to 165000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0079
Avg Critic Loss (per minibatch): 503.0010
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  17%|█▋        | 165490/1000000 [48:56<2:08:11, 108.50step/s]


Episode 331 finished at step 500 (165500 total). Env Reward: -8.91, Steps: 500, Delivered: 6


Total Steps Trained:  17%|█▋        | 165511/1000000 [49:01<22:24:32, 10.34step/s]


--- Rollout Summary (Steps 165001 to 165500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0086
Avg Critic Loss (per minibatch): 493.7286
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  17%|█▋        | 165992/1000000 [49:05<1:56:08, 119.69step/s]


Episode 332 finished at step 500 (166000 total). Env Reward: -0.53, Steps: 500, Delivered: 5


Total Steps Trained:  17%|█▋        | 166014/1000000 [49:10<20:50:21, 11.12step/s]


--- Rollout Summary (Steps 165501 to 166000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 402.6496
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  17%|█▋        | 166493/1000000 [49:14<1:57:54, 117.81step/s]


Episode 333 finished at step 500 (166500 total). Env Reward: -9.65, Steps: 500, Delivered: 4


Total Steps Trained:  17%|█▋        | 166516/1000000 [49:19<20:35:06, 11.25step/s]


--- Rollout Summary (Steps 166001 to 166500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 597.0437
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  17%|█▋        | 167000/1000000 [49:23<1:57:59, 117.67step/s]


Episode 334 finished at step 500 (167000 total). Env Reward: -6.49, Steps: 500, Delivered: 8


Total Steps Trained:  17%|█▋        | 167012/1000000 [49:27<27:49:16,  8.32step/s]


--- Rollout Summary (Steps 166501 to 167000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 544.2879
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  17%|█▋        | 167500/1000000 [49:32<1:56:52, 118.72step/s]


Episode 335 finished at step 500 (167500 total). Env Reward: 3.12, Steps: 500, Delivered: 9


Total Steps Trained:  17%|█▋        | 167512/1000000 [49:36<27:21:25,  8.45step/s]


--- Rollout Summary (Steps 167001 to 167500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 809.8274
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  17%|█▋        | 167990/1000000 [49:40<2:13:40, 103.73step/s]


Episode 336 finished at step 500 (168000 total). Env Reward: -0.87, Steps: 500, Delivered: 5


Total Steps Trained:  17%|█▋        | 168013/1000000 [49:45<20:53:50, 11.06step/s]


--- Rollout Summary (Steps 167501 to 168000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 747.6476
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  17%|█▋        | 168488/1000000 [49:49<1:54:52, 120.64step/s]


Episode 337 finished at step 500 (168500 total). Env Reward: -7.59, Steps: 500, Delivered: 7


Total Steps Trained:  17%|█▋        | 168511/1000000 [49:54<20:03:53, 11.51step/s]


--- Rollout Summary (Steps 168001 to 168500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 390.8065
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  17%|█▋        | 168996/1000000 [49:58<1:59:12, 116.18step/s]


Episode 338 finished at step 500 (169000 total). Env Reward: -9.78, Steps: 500, Delivered: 5


Total Steps Trained:  17%|█▋        | 169019/1000000 [50:03<20:17:08, 11.38step/s]


--- Rollout Summary (Steps 168501 to 169000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 609.3703
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  17%|█▋        | 169497/1000000 [50:07<1:57:59, 117.32step/s]


Episode 339 finished at step 500 (169500 total). Env Reward: -7.50, Steps: 500, Delivered: 7


Total Steps Trained:  17%|█▋        | 169521/1000000 [50:12<20:22:28, 11.32step/s]


--- Rollout Summary (Steps 169001 to 169500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0005
Avg Critic Loss (per minibatch): 565.8811
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  17%|█▋        | 169989/1000000 [50:16<1:57:20, 117.90step/s]


Episode 340 finished at step 500 (170000 total). Env Reward: -6.55, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000340_map1.pth


Total Steps Trained:  17%|█▋        | 170012/1000000 [50:20<20:20:04, 11.34step/s]


--- Rollout Summary (Steps 169501 to 170000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 618.1639
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  17%|█▋        | 170492/1000000 [50:24<2:03:49, 111.65step/s]


Episode 341 finished at step 500 (170500 total). Env Reward: -8.34, Steps: 500, Delivered: 6


Total Steps Trained:  17%|█▋        | 170515/1000000 [50:29<21:17:41, 10.82step/s]


--- Rollout Summary (Steps 170001 to 170500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 597.9316
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  17%|█▋        | 170994/1000000 [50:33<1:54:21, 120.82step/s]


Episode 342 finished at step 500 (171000 total). Env Reward: -12.40, Steps: 500, Delivered: 3


Total Steps Trained:  17%|█▋        | 171018/1000000 [50:38<20:18:53, 11.34step/s]


--- Rollout Summary (Steps 170501 to 171000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 476.7564
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  17%|█▋        | 171492/1000000 [50:42<1:53:17, 121.88step/s]


Episode 343 finished at step 500 (171500 total). Env Reward: -8.79, Steps: 500, Delivered: 6


Total Steps Trained:  17%|█▋        | 171516/1000000 [50:47<19:49:34, 11.61step/s]


--- Rollout Summary (Steps 171001 to 171500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 369.1995
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  17%|█▋        | 171992/1000000 [50:51<1:54:15, 120.77step/s]


Episode 344 finished at step 500 (172000 total). Env Reward: -8.51, Steps: 500, Delivered: 6


Total Steps Trained:  17%|█▋        | 172016/1000000 [50:56<19:38:57, 11.70step/s]


--- Rollout Summary (Steps 171501 to 172000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 411.6584
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  17%|█▋        | 172496/1000000 [51:00<1:52:32, 122.55step/s]


Episode 345 finished at step 500 (172500 total). Env Reward: -12.05, Steps: 500, Delivered: 2


Total Steps Trained:  17%|█▋        | 172520/1000000 [51:05<19:31:24, 11.77step/s]


--- Rollout Summary (Steps 172001 to 172500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 500.4404
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  17%|█▋        | 172993/1000000 [51:09<1:54:52, 119.98step/s]


Episode 346 finished at step 500 (173000 total). Env Reward: 9.42, Steps: 500, Delivered: 6


Total Steps Trained:  17%|█▋        | 173017/1000000 [51:14<19:36:25, 11.72step/s]


--- Rollout Summary (Steps 172501 to 173000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 943.2791
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  17%|█▋        | 173494/1000000 [51:18<1:56:09, 118.58step/s]


Episode 347 finished at step 500 (173500 total). Env Reward: 0.15, Steps: 500, Delivered: 6


Total Steps Trained:  17%|█▋        | 173516/1000000 [51:22<20:57:14, 10.96step/s]


--- Rollout Summary (Steps 173001 to 173500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 1115.0993
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  17%|█▋        | 173994/1000000 [51:26<1:51:50, 123.09step/s]


Episode 348 finished at step 500 (174000 total). Env Reward: -11.92, Steps: 500, Delivered: 2


Total Steps Trained:  17%|█▋        | 174017/1000000 [51:31<20:24:37, 11.24step/s]


--- Rollout Summary (Steps 173501 to 174000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 478.4769
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  17%|█▋        | 174494/1000000 [51:36<1:59:36, 115.03step/s]


Episode 349 finished at step 500 (174500 total). Env Reward: -8.47, Steps: 500, Delivered: 6


Total Steps Trained:  17%|█▋        | 174517/1000000 [51:40<20:43:34, 11.06step/s]


--- Rollout Summary (Steps 174001 to 174500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 508.9397
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  17%|█▋        | 174994/1000000 [51:44<2:01:47, 112.90step/s]


Episode 350 finished at step 500 (175000 total). Env Reward: -7.76, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000350_map1.pth


Total Steps Trained:  18%|█▊        | 175017/1000000 [51:49<20:33:52, 11.14step/s]


--- Rollout Summary (Steps 174501 to 175000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 399.7968
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  18%|█▊        | 175496/1000000 [51:53<1:56:11, 118.26step/s]


Episode 351 finished at step 500 (175500 total). Env Reward: -9.05, Steps: 500, Delivered: 6


Total Steps Trained:  18%|█▊        | 175517/1000000 [51:58<21:08:11, 10.84step/s]


--- Rollout Summary (Steps 175001 to 175500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 579.4625
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  18%|█▊        | 175995/1000000 [52:02<1:58:34, 115.83step/s]


Episode 352 finished at step 500 (176000 total). Env Reward: -7.37, Steps: 500, Delivered: 7


Total Steps Trained:  18%|█▊        | 176019/1000000 [52:07<19:45:06, 11.59step/s]


--- Rollout Summary (Steps 175501 to 176000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 488.1028
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  18%|█▊        | 176496/1000000 [52:11<1:57:01, 117.28step/s]


Episode 353 finished at step 500 (176500 total). Env Reward: -8.78, Steps: 500, Delivered: 6


Total Steps Trained:  18%|█▊        | 176519/1000000 [52:16<20:01:34, 11.42step/s]


--- Rollout Summary (Steps 176001 to 176500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 468.3876
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  18%|█▊        | 176993/1000000 [52:20<1:53:04, 121.31step/s]


Episode 354 finished at step 500 (177000 total). Env Reward: -9.88, Steps: 500, Delivered: 5


Total Steps Trained:  18%|█▊        | 177017/1000000 [52:24<19:27:00, 11.75step/s]


--- Rollout Summary (Steps 176501 to 177000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 430.3383
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  18%|█▊        | 177496/1000000 [52:28<1:56:02, 118.13step/s]


Episode 355 finished at step 500 (177500 total). Env Reward: -7.68, Steps: 500, Delivered: 7


Total Steps Trained:  18%|█▊        | 177519/1000000 [52:33<20:09:16, 11.34step/s]


--- Rollout Summary (Steps 177001 to 177500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 410.3677
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  18%|█▊        | 177998/1000000 [52:37<1:55:38, 118.47step/s]


Episode 356 finished at step 500 (178000 total). Env Reward: -7.80, Steps: 500, Delivered: 7


Total Steps Trained:  18%|█▊        | 178021/1000000 [52:42<19:50:02, 11.51step/s]


--- Rollout Summary (Steps 177501 to 178000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 636.5250
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  18%|█▊        | 178499/1000000 [52:46<1:51:04, 123.26step/s]


Episode 357 finished at step 500 (178500 total). Env Reward: -12.28, Steps: 500, Delivered: 2


Total Steps Trained:  18%|█▊        | 178512/1000000 [52:51<25:36:21,  8.91step/s]


--- Rollout Summary (Steps 178001 to 178500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 568.5322
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  18%|█▊        | 178991/1000000 [52:55<1:55:29, 118.47step/s]


Episode 358 finished at step 500 (179000 total). Env Reward: -9.77, Steps: 500, Delivered: 5


Total Steps Trained:  18%|█▊        | 179013/1000000 [52:59<20:46:31, 10.98step/s]


--- Rollout Summary (Steps 178501 to 179000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 391.6965
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  18%|█▊        | 179495/1000000 [53:04<1:53:03, 120.96step/s]


Episode 359 finished at step 500 (179500 total). Env Reward: -7.81, Steps: 500, Delivered: 7


Total Steps Trained:  18%|█▊        | 179519/1000000 [53:08<19:41:10, 11.58step/s]


--- Rollout Summary (Steps 179001 to 179500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 696.9935
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  18%|█▊        | 179993/1000000 [53:13<1:58:15, 115.57step/s]


Episode 360 finished at step 500 (180000 total). Env Reward: -8.94, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000360_map1.pth


Total Steps Trained:  18%|█▊        | 180016/1000000 [53:17<20:19:26, 11.21step/s]


--- Rollout Summary (Steps 179501 to 180000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.20
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0083
Avg Critic Loss (per minibatch): 461.7433
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  18%|█▊        | 180496/1000000 [53:22<1:52:26, 121.47step/s]


Episode 361 finished at step 500 (180500 total). Env Reward: -3.36, Steps: 500, Delivered: 3


Total Steps Trained:  18%|█▊        | 180520/1000000 [53:26<19:19:48, 11.78step/s]


--- Rollout Summary (Steps 180001 to 180500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0009
Avg Critic Loss (per minibatch): 791.1565
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  18%|█▊        | 180995/1000000 [53:30<1:53:14, 120.55step/s]


Episode 362 finished at step 500 (181000 total). Env Reward: -10.96, Steps: 500, Delivered: 4


Total Steps Trained:  18%|█▊        | 181020/1000000 [53:35<19:20:46, 11.76step/s]


--- Rollout Summary (Steps 180501 to 181000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.16
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 492.3197
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  18%|█▊        | 181499/1000000 [53:40<1:54:01, 119.64step/s]


Episode 363 finished at step 500 (181500 total). Env Reward: 2.61, Steps: 500, Delivered: 8


Total Steps Trained:  18%|█▊        | 181511/1000000 [53:44<26:26:36,  8.60step/s]


--- Rollout Summary (Steps 181001 to 181500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 1223.5814
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  18%|█▊        | 181989/1000000 [53:48<1:58:50, 114.71step/s]


Episode 364 finished at step 500 (182000 total). Env Reward: -8.78, Steps: 500, Delivered: 6


Total Steps Trained:  18%|█▊        | 182012/1000000 [53:53<20:27:55, 11.10step/s]


--- Rollout Summary (Steps 181501 to 182000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 513.1204
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  18%|█▊        | 182498/1000000 [53:57<1:51:50, 121.82step/s]


Episode 365 finished at step 500 (182500 total). Env Reward: -10.85, Steps: 500, Delivered: 4


Total Steps Trained:  18%|█▊        | 182511/1000000 [54:02<25:57:10,  8.75step/s]


--- Rollout Summary (Steps 182001 to 182500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 610.6067
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  18%|█▊        | 183000/1000000 [54:06<1:58:29, 114.91step/s]


Episode 366 finished at step 500 (183000 total). Env Reward: 2.47, Steps: 500, Delivered: 8


Total Steps Trained:  18%|█▊        | 183012/1000000 [54:11<28:26:16,  7.98step/s]


--- Rollout Summary (Steps 182501 to 183000) ---
Update Duration: 4.72s
Avg Episode Reward (last 100): -6.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 798.7935
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  18%|█▊        | 183495/1000000 [54:15<1:54:13, 119.14step/s]


Episode 367 finished at step 500 (183500 total). Env Reward: -9.32, Steps: 500, Delivered: 5


Total Steps Trained:  18%|█▊        | 183518/1000000 [54:20<20:12:01, 11.23step/s]


--- Rollout Summary (Steps 183001 to 183500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 731.7802
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  18%|█▊        | 184000/1000000 [54:24<1:56:14, 116.99step/s]


Episode 368 finished at step 500 (184000 total). Env Reward: -7.87, Steps: 500, Delivered: 7


Total Steps Trained:  18%|█▊        | 184012/1000000 [54:29<27:24:14,  8.27step/s]


--- Rollout Summary (Steps 183501 to 184000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 531.4614
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  18%|█▊        | 184494/1000000 [54:33<1:53:01, 120.26step/s]


Episode 369 finished at step 500 (184500 total). Env Reward: -9.65, Steps: 500, Delivered: 5


Total Steps Trained:  18%|█▊        | 184518/1000000 [54:37<19:23:05, 11.69step/s]


--- Rollout Summary (Steps 184001 to 184500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 468.7882
Avg Entropy (per minibatch): 2.5585
------------------------------


Total Steps Trained:  18%|█▊        | 184989/1000000 [54:41<1:51:44, 121.57step/s]


Episode 370 finished at step 500 (185000 total). Env Reward: -6.56, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000370_map1.pth


Total Steps Trained:  19%|█▊        | 185013/1000000 [54:46<19:13:39, 11.77step/s]


--- Rollout Summary (Steps 184501 to 185000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 495.1795
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  19%|█▊        | 185494/1000000 [54:50<2:00:54, 112.28step/s]


Episode 371 finished at step 500 (185500 total). Env Reward: -7.65, Steps: 500, Delivered: 7


Total Steps Trained:  19%|█▊        | 185518/1000000 [54:55<20:18:11, 11.14step/s]


--- Rollout Summary (Steps 185001 to 185500) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -6.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.71
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 432.3733
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  19%|█▊        | 185996/1000000 [54:59<2:00:00, 113.05step/s]


Episode 372 finished at step 500 (186000 total). Env Reward: -8.32, Steps: 500, Delivered: 6


Total Steps Trained:  19%|█▊        | 186018/1000000 [55:04<20:52:45, 10.83step/s]


--- Rollout Summary (Steps 185501 to 186000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.70
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 493.4904
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  19%|█▊        | 186495/1000000 [55:08<1:55:23, 117.49step/s]


Episode 373 finished at step 500 (186500 total). Env Reward: -9.51, Steps: 500, Delivered: 5


Total Steps Trained:  19%|█▊        | 186518/1000000 [55:13<20:09:04, 11.21step/s]


--- Rollout Summary (Steps 186001 to 186500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -6.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.71
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 442.9178
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  19%|█▊        | 186993/1000000 [55:17<1:51:20, 121.71step/s]


Episode 374 finished at step 500 (187000 total). Env Reward: -8.45, Steps: 500, Delivered: 6


Total Steps Trained:  19%|█▊        | 187017/1000000 [55:21<19:19:52, 11.68step/s]


--- Rollout Summary (Steps 186501 to 187000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.72
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 530.8458
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  19%|█▊        | 187496/1000000 [55:25<1:50:13, 122.86step/s]


Episode 375 finished at step 500 (187500 total). Env Reward: -11.27, Steps: 500, Delivered: 3


Total Steps Trained:  19%|█▉        | 187521/1000000 [55:30<18:44:14, 12.04step/s]


--- Rollout Summary (Steps 187001 to 187500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 463.5549
Avg Entropy (per minibatch): 2.5617
------------------------------


Total Steps Trained:  19%|█▉        | 187999/1000000 [55:34<1:59:06, 113.62step/s]


Episode 376 finished at step 500 (188000 total). Env Reward: -7.56, Steps: 500, Delivered: 7


Total Steps Trained:  19%|█▉        | 188011/1000000 [55:39<27:47:21,  8.12step/s]


--- Rollout Summary (Steps 187501 to 188000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 520.3760
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  19%|█▉        | 188497/1000000 [55:43<1:57:26, 115.17step/s]


Episode 377 finished at step 500 (188500 total). Env Reward: -8.92, Steps: 500, Delivered: 6


Total Steps Trained:  19%|█▉        | 188519/1000000 [55:48<20:47:06, 10.84step/s]


--- Rollout Summary (Steps 188001 to 188500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 463.0298
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  19%|█▉        | 189000/1000000 [55:52<1:54:36, 117.94step/s]


Episode 378 finished at step 500 (189000 total). Env Reward: -12.02, Steps: 500, Delivered: 3


Total Steps Trained:  19%|█▉        | 189012/1000000 [55:57<26:20:04,  8.55step/s]


--- Rollout Summary (Steps 188501 to 189000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 482.5129
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  19%|█▉        | 189494/1000000 [56:01<1:57:31, 114.95step/s]


Episode 379 finished at step 500 (189500 total). Env Reward: -10.51, Steps: 500, Delivered: 4


Total Steps Trained:  19%|█▉        | 189516/1000000 [56:05<20:44:25, 10.85step/s]


--- Rollout Summary (Steps 189001 to 189500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 553.5497
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  19%|█▉        | 189999/1000000 [56:09<1:53:06, 119.36step/s]


Episode 380 finished at step 500 (190000 total). Env Reward: -8.04, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000380_map1.pth


Total Steps Trained:  19%|█▉        | 190011/1000000 [56:14<26:34:44,  8.47step/s]


--- Rollout Summary (Steps 189501 to 190000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 492.5401
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  19%|█▉        | 190500/1000000 [56:18<1:55:38, 116.66step/s]


Episode 381 finished at step 500 (190500 total). Env Reward: -10.02, Steps: 500, Delivered: 4


Total Steps Trained:  19%|█▉        | 190512/1000000 [56:23<26:26:13,  8.51step/s]


--- Rollout Summary (Steps 190001 to 190500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0001
Avg Critic Loss (per minibatch): 453.3732
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  19%|█▉        | 191000/1000000 [56:27<1:58:49, 113.48step/s]


Episode 382 finished at step 500 (191000 total). Env Reward: -9.07, Steps: 500, Delivered: 6


Total Steps Trained:  19%|█▉        | 191012/1000000 [56:32<28:00:26,  8.02step/s]


--- Rollout Summary (Steps 190501 to 191000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 611.3698
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  19%|█▉        | 191499/1000000 [56:36<1:54:20, 117.85step/s]


Episode 383 finished at step 500 (191500 total). Env Reward: -10.09, Steps: 500, Delivered: 5


Total Steps Trained:  19%|█▉        | 191511/1000000 [56:41<26:46:33,  8.39step/s]


--- Rollout Summary (Steps 191001 to 191500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 614.9638
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  19%|█▉        | 191998/1000000 [56:45<1:57:18, 114.80step/s]


Episode 384 finished at step 500 (192000 total). Env Reward: -5.62, Steps: 500, Delivered: 9


Total Steps Trained:  19%|█▉        | 192010/1000000 [56:49<27:36:30,  8.13step/s]


--- Rollout Summary (Steps 191501 to 192000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 501.4025
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  19%|█▉        | 192491/1000000 [56:54<1:57:41, 114.35step/s]


Episode 385 finished at step 500 (192500 total). Env Reward: 2.72, Steps: 500, Delivered: 8


Total Steps Trained:  19%|█▉        | 192514/1000000 [56:58<20:09:11, 11.13step/s]


--- Rollout Summary (Steps 192001 to 192500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0078
Avg Critic Loss (per minibatch): 1102.8525
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  19%|█▉        | 192996/1000000 [57:03<1:57:17, 114.68step/s]


Episode 386 finished at step 500 (193000 total). Env Reward: -9.48, Steps: 500, Delivered: 5


Total Steps Trained:  19%|█▉        | 193018/1000000 [57:07<20:32:18, 10.91step/s]


--- Rollout Summary (Steps 192501 to 193000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 593.6586
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  19%|█▉        | 193498/1000000 [57:12<1:58:03, 113.85step/s]


Episode 387 finished at step 500 (193500 total). Env Reward: -7.46, Steps: 500, Delivered: 7


Total Steps Trained:  19%|█▉        | 193510/1000000 [57:16<27:21:55,  8.19step/s]


--- Rollout Summary (Steps 193001 to 193500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 361.9443
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  19%|█▉        | 193999/1000000 [57:21<1:50:33, 121.50step/s]


Episode 388 finished at step 500 (194000 total). Env Reward: -8.42, Steps: 500, Delivered: 6


Total Steps Trained:  19%|█▉        | 194012/1000000 [57:25<25:09:37,  8.90step/s]


--- Rollout Summary (Steps 193501 to 194000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 424.2455
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  19%|█▉        | 194498/1000000 [57:30<1:51:10, 120.76step/s]


Episode 389 finished at step 500 (194500 total). Env Reward: -1.71, Steps: 500, Delivered: 4


Total Steps Trained:  19%|█▉        | 194511/1000000 [57:34<25:19:27,  8.84step/s]


--- Rollout Summary (Steps 194001 to 194500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 735.4511
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  19%|█▉        | 194990/1000000 [57:38<1:49:57, 122.02step/s]


Episode 390 finished at step 500 (195000 total). Env Reward: -7.32, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000390_map1.pth


Total Steps Trained:  20%|█▉        | 195014/1000000 [57:43<18:58:48, 11.78step/s]


--- Rollout Summary (Steps 194501 to 195000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -6.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 650.6084
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  20%|█▉        | 195492/1000000 [57:47<1:49:09, 122.84step/s]


Episode 391 finished at step 500 (195500 total). Env Reward: -11.63, Steps: 500, Delivered: 3


Total Steps Trained:  20%|█▉        | 195515/1000000 [57:52<20:18:14, 11.01step/s]


--- Rollout Summary (Steps 195001 to 195500) ---
Update Duration: 4.72s
Avg Episode Reward (last 100): -6.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): 0.0025
Avg Critic Loss (per minibatch): 513.7740
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  20%|█▉        | 195996/1000000 [57:56<1:52:50, 118.75step/s]


Episode 392 finished at step 500 (196000 total). Env Reward: -8.98, Steps: 500, Delivered: 6


Total Steps Trained:  20%|█▉        | 196019/1000000 [58:01<19:42:45, 11.33step/s]


--- Rollout Summary (Steps 195501 to 196000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0078
Avg Critic Loss (per minibatch): 473.8615
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  20%|█▉        | 196492/1000000 [58:05<1:54:03, 117.42step/s]


Episode 393 finished at step 500 (196500 total). Env Reward: -9.51, Steps: 500, Delivered: 5


Total Steps Trained:  20%|█▉        | 196515/1000000 [58:10<19:42:11, 11.33step/s]


--- Rollout Summary (Steps 196001 to 196500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 448.5415
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  20%|█▉        | 196999/1000000 [58:14<1:51:36, 119.90step/s]


Episode 394 finished at step 500 (197000 total). Env Reward: -8.88, Steps: 500, Delivered: 6


Total Steps Trained:  20%|█▉        | 197011/1000000 [58:19<26:15:52,  8.49step/s]


--- Rollout Summary (Steps 196501 to 197000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0006
Avg Critic Loss (per minibatch): 619.2610
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  20%|█▉        | 197490/1000000 [58:23<1:52:10, 119.24step/s]


Episode 395 finished at step 500 (197500 total). Env Reward: -13.41, Steps: 500, Delivered: 1


Total Steps Trained:  20%|█▉        | 197512/1000000 [58:27<19:55:27, 11.19step/s]


--- Rollout Summary (Steps 197001 to 197500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 557.9269
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  20%|█▉        | 197995/1000000 [58:32<2:06:15, 105.86step/s]


Episode 396 finished at step 500 (198000 total). Env Reward: -8.40, Steps: 500, Delivered: 6


Total Steps Trained:  20%|█▉        | 198017/1000000 [58:37<20:56:54, 10.63step/s]


--- Rollout Summary (Steps 197501 to 198000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 591.6668
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  20%|█▉        | 198491/1000000 [58:41<1:49:45, 121.71step/s]


Episode 397 finished at step 500 (198500 total). Env Reward: -5.46, Steps: 500, Delivered: 9


Total Steps Trained:  20%|█▉        | 198515/1000000 [58:45<18:59:16, 11.73step/s]


--- Rollout Summary (Steps 198001 to 198500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 749.2075
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  20%|█▉        | 198991/1000000 [58:49<1:52:11, 118.99step/s]


Episode 398 finished at step 500 (199000 total). Env Reward: -8.51, Steps: 500, Delivered: 6


Total Steps Trained:  20%|█▉        | 199015/1000000 [58:54<19:23:44, 11.47step/s]


--- Rollout Summary (Steps 198501 to 199000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 348.7270
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  20%|█▉        | 199492/1000000 [58:58<2:01:30, 109.80step/s]


Episode 399 finished at step 500 (199500 total). Env Reward: 9.66, Steps: 500, Delivered: 6


Total Steps Trained:  20%|█▉        | 199514/1000000 [59:03<20:19:58, 10.94step/s]


--- Rollout Summary (Steps 199001 to 199500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 2763.5385
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  20%|█▉        | 199991/1000000 [59:07<1:50:18, 120.88step/s]


Episode 400 finished at step 500 (200000 total). Env Reward: -11.74, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000400_map1.pth


Total Steps Trained:  20%|██        | 200014/1000000 [59:12<19:23:49, 11.46step/s]


--- Rollout Summary (Steps 199501 to 200000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0074
Avg Critic Loss (per minibatch): 443.3017
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  20%|██        | 200496/1000000 [59:16<1:57:18, 113.60step/s]


Episode 401 finished at step 500 (200500 total). Env Reward: -10.02, Steps: 500, Delivered: 5


Total Steps Trained:  20%|██        | 200520/1000000 [59:21<19:38:13, 11.31step/s]


--- Rollout Summary (Steps 200001 to 200500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 773.0316
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  20%|██        | 200994/1000000 [59:25<1:53:12, 117.64step/s]


Episode 402 finished at step 500 (201000 total). Env Reward: -10.57, Steps: 500, Delivered: 4


Total Steps Trained:  20%|██        | 201016/1000000 [59:30<20:54:07, 10.62step/s]


--- Rollout Summary (Steps 200501 to 201000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.01
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 452.4146
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  20%|██        | 201499/1000000 [59:34<1:50:35, 120.34step/s]


Episode 403 finished at step 500 (201500 total). Env Reward: -8.63, Steps: 500, Delivered: 6


Total Steps Trained:  20%|██        | 201512/1000000 [59:39<25:30:22,  8.70step/s]


--- Rollout Summary (Steps 201001 to 201500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0084
Avg Critic Loss (per minibatch): 532.8964
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  20%|██        | 201998/1000000 [59:43<1:48:17, 122.82step/s]


Episode 404 finished at step 500 (202000 total). Env Reward: -12.61, Steps: 500, Delivered: 2


Total Steps Trained:  20%|██        | 202011/1000000 [59:47<24:38:12,  9.00step/s]


--- Rollout Summary (Steps 201501 to 202000) ---
Update Duration: 4.43s
Avg Episode Reward (last 100): -7.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 623.2038
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  20%|██        | 202498/1000000 [59:52<1:51:04, 119.67step/s]


Episode 405 finished at step 500 (202500 total). Env Reward: -9.52, Steps: 500, Delivered: 5


Total Steps Trained:  20%|██        | 202510/1000000 [59:56<25:42:44,  8.62step/s]


--- Rollout Summary (Steps 202001 to 202500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 568.1664
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  20%|██        | 202998/1000000 [1:00:00<1:55:15, 115.25step/s]


Episode 406 finished at step 500 (203000 total). Env Reward: -4.41, Steps: 500, Delivered: 10


Total Steps Trained:  20%|██        | 203010/1000000 [1:00:05<26:59:53,  8.20step/s]


--- Rollout Summary (Steps 202501 to 203000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 568.6350
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  20%|██        | 203491/1000000 [1:00:09<1:53:42, 116.74step/s]


Episode 407 finished at step 500 (203500 total). Env Reward: -9.95, Steps: 500, Delivered: 5


Total Steps Trained:  20%|██        | 203513/1000000 [1:00:14<20:09:28, 10.98step/s]


--- Rollout Summary (Steps 203001 to 203500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.17
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0086
Avg Critic Loss (per minibatch): 482.1869
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  20%|██        | 204000/1000000 [1:00:18<1:57:36, 112.80step/s]


Episode 408 finished at step 500 (204000 total). Env Reward: -8.32, Steps: 500, Delivered: 6


Total Steps Trained:  20%|██        | 204012/1000000 [1:00:23<26:30:31,  8.34step/s]


--- Rollout Summary (Steps 203501 to 204000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.36
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 519.3229
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  20%|██        | 204495/1000000 [1:00:27<1:56:59, 113.32step/s]


Episode 409 finished at step 500 (204500 total). Env Reward: -0.87, Steps: 500, Delivered: 5


Total Steps Trained:  20%|██        | 204517/1000000 [1:00:32<20:25:18, 10.82step/s]


--- Rollout Summary (Steps 204001 to 204500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.24
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 753.5419
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  20%|██        | 204996/1000000 [1:00:36<1:56:58, 113.27step/s]


Episode 410 finished at step 500 (205000 total). Env Reward: -7.22, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000410_map1.pth


Total Steps Trained:  21%|██        | 205019/1000000 [1:00:40<20:00:32, 11.04step/s]


--- Rollout Summary (Steps 204501 to 205000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 480.6237
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  21%|██        | 205494/1000000 [1:00:44<1:50:35, 119.74step/s]


Episode 411 finished at step 500 (205500 total). Env Reward: -12.85, Steps: 500, Delivered: 2


Total Steps Trained:  21%|██        | 205517/1000000 [1:00:49<19:09:54, 11.52step/s]


--- Rollout Summary (Steps 205001 to 205500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0076
Avg Critic Loss (per minibatch): 440.9111
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  21%|██        | 205996/1000000 [1:00:53<2:02:11, 108.30step/s]


Episode 412 finished at step 500 (206000 total). Env Reward: -8.66, Steps: 500, Delivered: 6


Total Steps Trained:  21%|██        | 206018/1000000 [1:00:58<21:01:34, 10.49step/s]


--- Rollout Summary (Steps 205501 to 206000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 497.3036
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  21%|██        | 206499/1000000 [1:01:02<1:48:33, 121.82step/s]


Episode 413 finished at step 500 (206500 total). Env Reward: 3.18, Steps: 500, Delivered: 9


Total Steps Trained:  21%|██        | 206512/1000000 [1:01:07<24:43:21,  8.92step/s]


--- Rollout Summary (Steps 206001 to 206500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.17
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0080
Avg Critic Loss (per minibatch): 982.8011
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  21%|██        | 206997/1000000 [1:01:11<1:49:31, 120.68step/s]


Episode 414 finished at step 500 (207000 total). Env Reward: -8.72, Steps: 500, Delivered: 5


Total Steps Trained:  21%|██        | 207021/1000000 [1:01:16<18:46:27, 11.73step/s]


--- Rollout Summary (Steps 206501 to 207000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0009
Avg Critic Loss (per minibatch): 444.3381
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  21%|██        | 207500/1000000 [1:01:20<1:55:38, 114.22step/s]


Episode 415 finished at step 500 (207500 total). Env Reward: 14.40, Steps: 500, Delivered: 11
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000415_map1.pth


Total Steps Trained:  21%|██        | 207512/1000000 [1:01:25<26:54:45,  8.18step/s]


--- Rollout Summary (Steps 207001 to 207500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 2327.5454
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  21%|██        | 207990/1000000 [1:01:29<2:01:25, 108.71step/s]


Episode 416 finished at step 500 (208000 total). Env Reward: -0.66, Steps: 500, Delivered: 5


Total Steps Trained:  21%|██        | 208010/1000000 [1:01:33<22:03:45,  9.97step/s]


--- Rollout Summary (Steps 207501 to 208000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -6.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0090
Avg Critic Loss (per minibatch): 804.1146
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  21%|██        | 208499/1000000 [1:01:38<2:04:26, 106.01step/s]


Episode 417 finished at step 500 (208500 total). Env Reward: -1.61, Steps: 500, Delivered: 4


Total Steps Trained:  21%|██        | 208510/1000000 [1:01:42<27:51:02,  7.89step/s]


--- Rollout Summary (Steps 208001 to 208500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 1027.9349
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  21%|██        | 208995/1000000 [1:01:46<1:52:23, 117.29step/s]


Episode 418 finished at step 500 (209000 total). Env Reward: -8.75, Steps: 500, Delivered: 6


Total Steps Trained:  21%|██        | 209019/1000000 [1:01:51<19:16:15, 11.40step/s]


--- Rollout Summary (Steps 208501 to 209000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 610.8004
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  21%|██        | 209496/1000000 [1:01:55<1:47:01, 123.10step/s]


Episode 419 finished at step 500 (209500 total). Env Reward: -10.35, Steps: 500, Delivered: 5


Total Steps Trained:  21%|██        | 209520/1000000 [1:02:00<18:47:11, 11.69step/s]


--- Rollout Summary (Steps 209001 to 209500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 480.5479
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  21%|██        | 209989/1000000 [1:02:04<1:56:25, 113.10step/s]


Episode 420 finished at step 500 (210000 total). Env Reward: 5.23, Steps: 500, Delivered: 11
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000420_map1.pth


Total Steps Trained:  21%|██        | 210012/1000000 [1:02:09<19:49:02, 11.07step/s]


--- Rollout Summary (Steps 209501 to 210000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 1145.7409
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  21%|██        | 210498/1000000 [1:02:13<1:51:25, 118.08step/s]


Episode 421 finished at step 500 (210500 total). Env Reward: -6.86, Steps: 500, Delivered: 8


Total Steps Trained:  21%|██        | 210521/1000000 [1:02:18<19:21:21, 11.33step/s]


--- Rollout Summary (Steps 210001 to 210500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 380.1166
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  21%|██        | 210989/1000000 [1:02:22<1:47:31, 122.29step/s]


Episode 422 finished at step 500 (211000 total). Env Reward: -10.49, Steps: 500, Delivered: 4


Total Steps Trained:  21%|██        | 211013/1000000 [1:02:26<19:00:32, 11.53step/s]


--- Rollout Summary (Steps 210501 to 211000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -6.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.71
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 471.9717
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  21%|██        | 211493/1000000 [1:02:30<1:48:00, 121.67step/s]


Episode 423 finished at step 500 (211500 total). Env Reward: -9.29, Steps: 500, Delivered: 5


Total Steps Trained:  21%|██        | 211516/1000000 [1:02:35<19:33:00, 11.20step/s]


--- Rollout Summary (Steps 211001 to 211500) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -6.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.72
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 587.1690
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  21%|██        | 211990/1000000 [1:02:39<1:51:13, 118.09step/s]


Episode 424 finished at step 500 (212000 total). Env Reward: -8.04, Steps: 500, Delivered: 7


Total Steps Trained:  21%|██        | 212012/1000000 [1:02:44<19:52:30, 11.01step/s]


--- Rollout Summary (Steps 211501 to 212000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.73
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 454.1656
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  21%|██        | 212497/1000000 [1:02:48<1:52:37, 116.55step/s]


Episode 425 finished at step 500 (212500 total). Env Reward: -8.10, Steps: 500, Delivered: 7


Total Steps Trained:  21%|██▏       | 212520/1000000 [1:02:53<19:18:00, 11.33step/s]


--- Rollout Summary (Steps 212001 to 212500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.74
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 364.4531
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  21%|██▏       | 212989/1000000 [1:02:57<1:56:43, 112.38step/s]


Episode 426 finished at step 500 (213000 total). Env Reward: -7.86, Steps: 500, Delivered: 7


Total Steps Trained:  21%|██▏       | 213012/1000000 [1:03:02<19:38:57, 11.13step/s]


--- Rollout Summary (Steps 212501 to 213000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.75
Avg Actor Loss (per minibatch): 0.0000
Avg Critic Loss (per minibatch): 430.4417
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  21%|██▏       | 213496/1000000 [1:03:06<1:52:25, 116.59step/s]


Episode 427 finished at step 500 (213500 total). Env Reward: 0.71, Steps: 500, Delivered: 6


Total Steps Trained:  21%|██▏       | 213519/1000000 [1:03:11<19:42:25, 11.09step/s]


--- Rollout Summary (Steps 213001 to 213500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -6.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.76
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 1505.9119
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  21%|██▏       | 213999/1000000 [1:03:15<1:49:40, 119.45step/s]


Episode 428 finished at step 500 (214000 total). Env Reward: -1.70, Steps: 500, Delivered: 4


Total Steps Trained:  21%|██▏       | 214011/1000000 [1:03:20<25:56:53,  8.41step/s]


--- Rollout Summary (Steps 213501 to 214000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.75
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 979.5654
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  21%|██▏       | 214497/1000000 [1:03:24<1:53:54, 114.93step/s]


Episode 429 finished at step 500 (214500 total). Env Reward: -5.28, Steps: 500, Delivered: 9


Total Steps Trained:  21%|██▏       | 214519/1000000 [1:03:28<20:03:58, 10.87step/s]


--- Rollout Summary (Steps 214001 to 214500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.75
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 414.8537
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  21%|██▏       | 214998/1000000 [1:03:33<1:52:54, 115.88step/s]


Episode 430 finished at step 500 (215000 total). Env Reward: -9.87, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000430_map1.pth


Total Steps Trained:  22%|██▏       | 215021/1000000 [1:03:38<20:22:58, 10.70step/s]


--- Rollout Summary (Steps 214501 to 215000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.76
Avg Actor Loss (per minibatch): -0.0098
Avg Critic Loss (per minibatch): 389.1895
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  22%|██▏       | 215498/1000000 [1:03:42<1:52:48, 115.90step/s]


Episode 431 finished at step 500 (215500 total). Env Reward: -0.60, Steps: 500, Delivered: 5


Total Steps Trained:  22%|██▏       | 215510/1000000 [1:03:47<26:18:50,  8.28step/s]


--- Rollout Summary (Steps 215001 to 215500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.75
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 903.3657
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  22%|██▏       | 215992/1000000 [1:03:51<1:46:47, 122.36step/s]


Episode 432 finished at step 500 (216000 total). Env Reward: -9.02, Steps: 500, Delivered: 6


Total Steps Trained:  22%|██▏       | 216015/1000000 [1:03:55<19:10:10, 11.36step/s]


--- Rollout Summary (Steps 215501 to 216000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.76
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 536.7777
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  22%|██▏       | 216496/1000000 [1:04:00<1:47:46, 121.17step/s]


Episode 433 finished at step 500 (216500 total). Env Reward: -11.71, Steps: 500, Delivered: 3


Total Steps Trained:  22%|██▏       | 216521/1000000 [1:04:04<18:04:27, 12.04step/s]


--- Rollout Summary (Steps 216001 to 216500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.75
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 586.4251
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  22%|██▏       | 216999/1000000 [1:04:08<1:53:52, 114.60step/s]


Episode 434 finished at step 500 (217000 total). Env Reward: -9.97, Steps: 500, Delivered: 5


Total Steps Trained:  22%|██▏       | 217011/1000000 [1:04:13<26:55:52,  8.08step/s]


--- Rollout Summary (Steps 216501 to 217000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -6.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.72
Avg Actor Loss (per minibatch): -0.0088
Avg Critic Loss (per minibatch): 540.6878
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  22%|██▏       | 217498/1000000 [1:04:17<1:55:53, 112.54step/s]


Episode 435 finished at step 500 (217500 total). Env Reward: -10.71, Steps: 500, Delivered: 4


Total Steps Trained:  22%|██▏       | 217521/1000000 [1:04:22<19:49:28, 10.96step/s]


--- Rollout Summary (Steps 217001 to 217500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 423.9951
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  22%|██▏       | 217995/1000000 [1:04:26<1:50:27, 117.99step/s]


Episode 436 finished at step 500 (218000 total). Env Reward: 0.85, Steps: 500, Delivered: 6


Total Steps Trained:  22%|██▏       | 218018/1000000 [1:04:31<19:46:12, 10.99step/s]


--- Rollout Summary (Steps 217501 to 218000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 722.3187
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  22%|██▏       | 218493/1000000 [1:04:35<1:49:32, 118.90step/s]


Episode 437 finished at step 500 (218500 total). Env Reward: -9.53, Steps: 500, Delivered: 5


Total Steps Trained:  22%|██▏       | 218514/1000000 [1:04:40<20:50:01, 10.42step/s]


--- Rollout Summary (Steps 218001 to 218500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): 0.0009
Avg Critic Loss (per minibatch): 663.2405
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  22%|██▏       | 218997/1000000 [1:04:44<1:55:31, 112.68step/s]


Episode 438 finished at step 500 (219000 total). Env Reward: -7.73, Steps: 500, Delivered: 7


Total Steps Trained:  22%|██▏       | 219019/1000000 [1:04:49<20:12:40, 10.73step/s]


--- Rollout Summary (Steps 218501 to 219000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 490.9826
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  22%|██▏       | 219491/1000000 [1:04:53<1:51:57, 116.18step/s]


Episode 439 finished at step 500 (219500 total). Env Reward: -4.36, Steps: 500, Delivered: 10


Total Steps Trained:  22%|██▏       | 219514/1000000 [1:04:58<19:24:05, 11.17step/s]


--- Rollout Summary (Steps 219001 to 219500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.71
Avg Actor Loss (per minibatch): -0.0086
Avg Critic Loss (per minibatch): 521.4850
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  22%|██▏       | 219990/1000000 [1:05:02<1:47:37, 120.80step/s]


Episode 440 finished at step 500 (220000 total). Env Reward: -10.97, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000440_map1.pth


Total Steps Trained:  22%|██▏       | 220014/1000000 [1:05:07<18:24:24, 11.77step/s]


--- Rollout Summary (Steps 219501 to 220000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 520.7941
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  22%|██▏       | 220499/1000000 [1:05:11<2:21:26, 91.85step/s]


Episode 441 finished at step 500 (220500 total). Env Reward: -8.53, Steps: 500, Delivered: 6


Total Steps Trained:  22%|██▏       | 220520/1000000 [1:05:16<22:04:43,  9.81step/s]


--- Rollout Summary (Steps 220001 to 220500) ---
Update Duration: 4.74s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 663.1821
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  22%|██▏       | 220993/1000000 [1:05:21<1:50:40, 117.32step/s]


Episode 442 finished at step 500 (221000 total). Env Reward: -0.22, Steps: 500, Delivered: 5


Total Steps Trained:  22%|██▏       | 221016/1000000 [1:05:25<19:38:19, 11.02step/s]


--- Rollout Summary (Steps 220501 to 221000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -6.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 1044.4104
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  22%|██▏       | 221492/1000000 [1:05:30<1:57:14, 110.67step/s]


Episode 443 finished at step 500 (221500 total). Env Reward: -0.57, Steps: 500, Delivered: 5


Total Steps Trained:  22%|██▏       | 221515/1000000 [1:05:34<19:33:42, 11.05step/s]


--- Rollout Summary (Steps 221001 to 221500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 778.0619
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  22%|██▏       | 221993/1000000 [1:05:39<1:48:09, 119.88step/s]


Episode 444 finished at step 500 (222000 total). Env Reward: -10.88, Steps: 500, Delivered: 4


Total Steps Trained:  22%|██▏       | 222005/1000000 [1:05:43<26:01:50,  8.30step/s]


--- Rollout Summary (Steps 221501 to 222000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -6.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): -0.0074
Avg Critic Loss (per minibatch): 543.4015
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  22%|██▏       | 222496/1000000 [1:05:48<1:59:50, 108.13step/s]


Episode 445 finished at step 500 (222500 total). Env Reward: -8.52, Steps: 500, Delivered: 6


Total Steps Trained:  22%|██▏       | 222519/1000000 [1:05:53<19:17:45, 11.19step/s]


--- Rollout Summary (Steps 222001 to 222500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.70
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 629.0470
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  22%|██▏       | 222994/1000000 [1:05:57<1:53:31, 114.07step/s]


Episode 446 finished at step 500 (223000 total). Env Reward: 3.18, Steps: 500, Delivered: 9


Total Steps Trained:  22%|██▏       | 223017/1000000 [1:06:02<19:29:50, 11.07step/s]


--- Rollout Summary (Steps 222501 to 223000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.73
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 1246.3102
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  22%|██▏       | 223492/1000000 [1:06:06<1:46:15, 121.80step/s]


Episode 447 finished at step 500 (223500 total). Env Reward: -1.04, Steps: 500, Delivered: 4


Total Steps Trained:  22%|██▏       | 223516/1000000 [1:06:10<18:36:24, 11.59step/s]


--- Rollout Summary (Steps 223001 to 223500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.71
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 864.8409
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  22%|██▏       | 223996/1000000 [1:06:14<2:02:08, 105.89step/s]


Episode 448 finished at step 500 (224000 total). Env Reward: -11.94, Steps: 500, Delivered: 3


Total Steps Trained:  22%|██▏       | 224018/1000000 [1:06:19<20:43:54, 10.40step/s]


--- Rollout Summary (Steps 223501 to 224000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.72
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 485.5457
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  22%|██▏       | 224492/1000000 [1:06:23<1:47:30, 120.22step/s]


Episode 449 finished at step 500 (224500 total). Env Reward: -11.54, Steps: 500, Delivered: 3


Total Steps Trained:  22%|██▏       | 224517/1000000 [1:06:28<17:51:41, 12.06step/s]


--- Rollout Summary (Steps 224001 to 224500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 577.1513
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  22%|██▏       | 224993/1000000 [1:06:32<1:50:00, 117.41step/s]


Episode 450 finished at step 500 (225000 total). Env Reward: -10.97, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000450_map1.pth


Total Steps Trained:  23%|██▎       | 225016/1000000 [1:06:37<19:05:29, 11.28step/s]


--- Rollout Summary (Steps 224501 to 225000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.00
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 592.4906
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  23%|██▎       | 225489/1000000 [1:06:41<1:47:26, 120.14step/s]


Episode 451 finished at step 500 (225500 total). Env Reward: -10.63, Steps: 500, Delivered: 4


Total Steps Trained:  23%|██▎       | 225512/1000000 [1:06:46<18:56:45, 11.36step/s]


--- Rollout Summary (Steps 225001 to 225500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.01
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 490.0149
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  23%|██▎       | 225997/1000000 [1:06:50<1:49:51, 117.43step/s]


Episode 452 finished at step 500 (226000 total). Env Reward: -10.69, Steps: 500, Delivered: 4


Total Steps Trained:  23%|██▎       | 226021/1000000 [1:06:55<18:29:05, 11.63step/s]


--- Rollout Summary (Steps 225501 to 226000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 554.0911
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  23%|██▎       | 226495/1000000 [1:06:59<1:44:47, 123.02step/s]


Episode 453 finished at step 500 (226500 total). Env Reward: 0.17, Steps: 500, Delivered: 6


Total Steps Trained:  23%|██▎       | 226519/1000000 [1:07:04<18:11:00, 11.82step/s]


--- Rollout Summary (Steps 226001 to 226500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 1338.2536
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  23%|██▎       | 227000/1000000 [1:07:08<1:58:57, 108.30step/s]


Episode 454 finished at step 500 (227000 total). Env Reward: -2.39, Steps: 500, Delivered: 4


Total Steps Trained:  23%|██▎       | 227011/1000000 [1:07:12<26:30:50,  8.10step/s]


--- Rollout Summary (Steps 226501 to 227000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 870.4261
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  23%|██▎       | 227490/1000000 [1:07:17<1:50:48, 116.19step/s]


Episode 455 finished at step 500 (227500 total). Env Reward: -9.02, Steps: 500, Delivered: 6


Total Steps Trained:  23%|██▎       | 227512/1000000 [1:07:22<20:19:30, 10.56step/s]


--- Rollout Summary (Steps 227001 to 227500) ---
Update Duration: 4.69s
Avg Episode Reward (last 100): -6.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 631.5292
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  23%|██▎       | 227994/1000000 [1:07:26<1:45:34, 121.87step/s]


Episode 456 finished at step 500 (228000 total). Env Reward: -11.77, Steps: 500, Delivered: 3


Total Steps Trained:  23%|██▎       | 228019/1000000 [1:07:30<17:57:28, 11.94step/s]


--- Rollout Summary (Steps 227501 to 228000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 563.7019
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  23%|██▎       | 228495/1000000 [1:07:34<1:47:34, 119.53step/s]


Episode 457 finished at step 500 (228500 total). Env Reward: -12.78, Steps: 500, Delivered: 2


Total Steps Trained:  23%|██▎       | 228519/1000000 [1:07:39<18:21:06, 11.68step/s]


--- Rollout Summary (Steps 228001 to 228500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 601.4753
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  23%|██▎       | 229000/1000000 [1:07:43<1:54:40, 112.06step/s]


Episode 458 finished at step 500 (229000 total). Env Reward: -13.03, Steps: 500, Delivered: 2


Total Steps Trained:  23%|██▎       | 229012/1000000 [1:07:48<26:08:38,  8.19step/s]


--- Rollout Summary (Steps 228501 to 229000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 530.3285
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  23%|██▎       | 229499/1000000 [1:07:52<1:50:11, 116.55step/s]


Episode 459 finished at step 500 (229500 total). Env Reward: -7.54, Steps: 500, Delivered: 7


Total Steps Trained:  23%|██▎       | 229511/1000000 [1:07:57<25:26:56,  8.41step/s]


--- Rollout Summary (Steps 229001 to 229500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 423.1743
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  23%|██▎       | 229994/1000000 [1:08:01<1:51:35, 115.00step/s]


Episode 460 finished at step 500 (230000 total). Env Reward: -8.14, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000460_map1.pth


Total Steps Trained:  23%|██▎       | 230017/1000000 [1:08:06<19:04:04, 11.22step/s]


--- Rollout Summary (Steps 229501 to 230000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -6.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 481.6672
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  23%|██▎       | 230495/1000000 [1:08:10<1:48:13, 118.51step/s]


Episode 461 finished at step 500 (230500 total). Env Reward: -9.40, Steps: 500, Delivered: 5


Total Steps Trained:  23%|██▎       | 230518/1000000 [1:08:14<18:33:19, 11.52step/s]


--- Rollout Summary (Steps 230001 to 230500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 600.0236
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  23%|██▎       | 230990/1000000 [1:08:19<1:46:31, 120.32step/s]


Episode 462 finished at step 500 (231000 total). Env Reward: -8.44, Steps: 500, Delivered: 7


Total Steps Trained:  23%|██▎       | 231014/1000000 [1:08:24<19:24:01, 11.01step/s]


--- Rollout Summary (Steps 230501 to 231000) ---
Update Duration: 4.74s
Avg Episode Reward (last 100): -7.00
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0085
Avg Critic Loss (per minibatch): 595.3253
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  23%|██▎       | 231493/1000000 [1:08:28<2:02:24, 104.64step/s]


Episode 463 finished at step 500 (231500 total). Env Reward: 1.07, Steps: 500, Delivered: 6


Total Steps Trained:  23%|██▎       | 231516/1000000 [1:08:32<19:38:55, 10.86step/s]


--- Rollout Summary (Steps 231001 to 231500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.01
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 826.2591
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  23%|██▎       | 231997/1000000 [1:08:36<1:42:25, 124.98step/s]


Episode 464 finished at step 500 (232000 total). Env Reward: -9.93, Steps: 500, Delivered: 5


Total Steps Trained:  23%|██▎       | 232022/1000000 [1:08:41<17:45:33, 12.01step/s]


--- Rollout Summary (Steps 231501 to 232000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 668.1900
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  23%|██▎       | 232494/1000000 [1:08:45<1:49:49, 116.48step/s]


Episode 465 finished at step 500 (232500 total). Env Reward: -2.82, Steps: 500, Delivered: 12


Total Steps Trained:  23%|██▎       | 232517/1000000 [1:08:50<19:21:54, 11.01step/s]


--- Rollout Summary (Steps 232001 to 232500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -6.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0073
Avg Critic Loss (per minibatch): 753.0438
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  23%|██▎       | 232999/1000000 [1:08:54<1:47:58, 118.40step/s]


Episode 466 finished at step 500 (233000 total). Env Reward: -11.60, Steps: 500, Delivered: 3


Total Steps Trained:  23%|██▎       | 233011/1000000 [1:08:59<25:47:49,  8.26step/s]


--- Rollout Summary (Steps 232501 to 233000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 482.5665
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  23%|██▎       | 233490/1000000 [1:09:03<1:46:44, 119.69step/s]


Episode 467 finished at step 500 (233500 total). Env Reward: -11.23, Steps: 500, Delivered: 3


Total Steps Trained:  23%|██▎       | 233512/1000000 [1:09:08<18:41:03, 11.40step/s]


--- Rollout Summary (Steps 233001 to 233500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.10
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0079
Avg Critic Loss (per minibatch): 527.6745
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  23%|██▎       | 233998/1000000 [1:09:12<1:47:03, 119.25step/s]


Episode 468 finished at step 500 (234000 total). Env Reward: -9.97, Steps: 500, Delivered: 5


Total Steps Trained:  23%|██▎       | 234010/1000000 [1:09:16<25:24:02,  8.38step/s]


--- Rollout Summary (Steps 233501 to 234000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 521.8711
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  23%|██▎       | 234493/1000000 [1:09:21<1:47:53, 118.25step/s]


Episode 469 finished at step 500 (234500 total). Env Reward: -11.20, Steps: 500, Delivered: 3


Total Steps Trained:  23%|██▎       | 234515/1000000 [1:09:26<19:57:50, 10.65step/s]


--- Rollout Summary (Steps 234001 to 234500) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.14
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 466.4799
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  23%|██▎       | 234993/1000000 [1:09:30<1:47:57, 118.10step/s]


Episode 470 finished at step 500 (235000 total). Env Reward: -1.74, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000470_map1.pth


Total Steps Trained:  24%|██▎       | 235018/1000000 [1:09:34<17:53:00, 11.88step/s]


--- Rollout Summary (Steps 234501 to 235000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 915.4143
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  24%|██▎       | 235490/1000000 [1:09:38<1:53:55, 111.85step/s]


Episode 471 finished at step 500 (235500 total). Env Reward: -3.25, Steps: 500, Delivered: 3


Total Steps Trained:  24%|██▎       | 235513/1000000 [1:09:43<18:53:52, 11.24step/s]


--- Rollout Summary (Steps 235001 to 235500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 728.4633
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  24%|██▎       | 235989/1000000 [1:09:47<1:44:37, 121.71step/s]


Episode 472 finished at step 500 (236000 total). Env Reward: -10.39, Steps: 500, Delivered: 4


Total Steps Trained:  24%|██▎       | 236013/1000000 [1:09:52<18:12:10, 11.66step/s]


--- Rollout Summary (Steps 235501 to 236000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 510.7756
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  24%|██▎       | 236495/1000000 [1:09:56<1:58:11, 107.67step/s]


Episode 473 finished at step 500 (236500 total). Env Reward: -5.71, Steps: 500, Delivered: 9


Total Steps Trained:  24%|██▎       | 236517/1000000 [1:10:01<19:46:45, 10.72step/s]


--- Rollout Summary (Steps 236001 to 236500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0073
Avg Critic Loss (per minibatch): 796.2963
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  24%|██▎       | 236990/1000000 [1:10:05<1:41:37, 125.14step/s]


Episode 474 finished at step 500 (237000 total). Env Reward: -9.54, Steps: 500, Delivered: 5


Total Steps Trained:  24%|██▎       | 237013/1000000 [1:10:09<18:31:15, 11.44step/s]


--- Rollout Summary (Steps 236501 to 237000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 394.8270
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  24%|██▎       | 237489/1000000 [1:10:13<1:45:51, 120.05step/s]


Episode 475 finished at step 500 (237500 total). Env Reward: -10.07, Steps: 500, Delivered: 5


Total Steps Trained:  24%|██▍       | 237513/1000000 [1:10:18<18:08:26, 11.68step/s]


--- Rollout Summary (Steps 237001 to 237500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 554.0819
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  24%|██▍       | 237997/1000000 [1:10:23<1:54:28, 110.94step/s]


Episode 476 finished at step 500 (238000 total). Env Reward: -4.07, Steps: 500, Delivered: 11


Total Steps Trained:  24%|██▍       | 238018/1000000 [1:10:27<20:09:08, 10.50step/s]


--- Rollout Summary (Steps 237501 to 238000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 895.3615
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  24%|██▍       | 238494/1000000 [1:10:31<1:45:41, 120.09step/s]


Episode 477 finished at step 500 (238500 total). Env Reward: -10.72, Steps: 500, Delivered: 4


Total Steps Trained:  24%|██▍       | 238517/1000000 [1:10:36<18:39:38, 11.34step/s]


--- Rollout Summary (Steps 238001 to 238500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.01
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 717.7139
Avg Entropy (per minibatch): 2.5589
------------------------------


Total Steps Trained:  24%|██▍       | 238999/1000000 [1:10:40<1:45:05, 120.69step/s]


Episode 478 finished at step 500 (239000 total). Env Reward: 1.79, Steps: 500, Delivered: 8


Total Steps Trained:  24%|██▍       | 239012/1000000 [1:10:45<23:33:29,  8.97step/s]


--- Rollout Summary (Steps 238501 to 239000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -6.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 771.0924
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  24%|██▍       | 239493/1000000 [1:10:49<1:46:51, 118.61step/s]


Episode 479 finished at step 500 (239500 total). Env Reward: -11.43, Steps: 500, Delivered: 3


Total Steps Trained:  24%|██▍       | 239517/1000000 [1:10:54<17:59:55, 11.74step/s]


--- Rollout Summary (Steps 239001 to 239500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 419.3830
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  24%|██▍       | 239998/1000000 [1:10:58<1:53:07, 111.96step/s]


Episode 480 finished at step 500 (240000 total). Env Reward: -9.75, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000480_map1.pth


Total Steps Trained:  24%|██▍       | 240022/1000000 [1:11:03<18:42:04, 11.29step/s]


--- Rollout Summary (Steps 239501 to 240000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 454.1611
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  24%|██▍       | 240500/1000000 [1:11:07<1:50:52, 114.17step/s]


Episode 481 finished at step 500 (240500 total). Env Reward: -6.90, Steps: 500, Delivered: 8


Total Steps Trained:  24%|██▍       | 240512/1000000 [1:11:11<25:06:20,  8.40step/s]


--- Rollout Summary (Steps 240001 to 240500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 396.0608
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  24%|██▍       | 240998/1000000 [1:11:16<1:51:02, 113.92step/s]


Episode 482 finished at step 500 (241000 total). Env Reward: -8.66, Steps: 500, Delivered: 6


Total Steps Trained:  24%|██▍       | 241021/1000000 [1:11:20<19:16:17, 10.94step/s]


--- Rollout Summary (Steps 240501 to 241000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -6.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0073
Avg Critic Loss (per minibatch): 518.3495
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  24%|██▍       | 241498/1000000 [1:11:25<1:50:21, 114.55step/s]


Episode 483 finished at step 500 (241500 total). Env Reward: -7.89, Steps: 500, Delivered: 7


Total Steps Trained:  24%|██▍       | 241510/1000000 [1:11:29<25:23:57,  8.30step/s]


--- Rollout Summary (Steps 241001 to 241500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 514.3439
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  24%|██▍       | 241993/1000000 [1:11:34<1:47:14, 117.79step/s]


Episode 484 finished at step 500 (242000 total). Env Reward: -9.39, Steps: 500, Delivered: 5


Total Steps Trained:  24%|██▍       | 242016/1000000 [1:11:38<18:59:14, 11.09step/s]


--- Rollout Summary (Steps 241501 to 242000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 714.1537
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  24%|██▍       | 242492/1000000 [1:11:42<1:44:10, 121.19step/s]


Episode 485 finished at step 500 (242500 total). Env Reward: 1.11, Steps: 500, Delivered: 7


Total Steps Trained:  24%|██▍       | 242517/1000000 [1:11:47<17:26:57, 12.06step/s]


--- Rollout Summary (Steps 242001 to 242500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0005
Avg Critic Loss (per minibatch): 1176.5106
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  24%|██▍       | 243000/1000000 [1:11:51<1:45:29, 119.61step/s]


Episode 486 finished at step 500 (243000 total). Env Reward: -2.70, Steps: 500, Delivered: 3


Total Steps Trained:  24%|██▍       | 243012/1000000 [1:11:56<24:10:24,  8.70step/s]


--- Rollout Summary (Steps 242501 to 243000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 808.6486
Avg Entropy (per minibatch): 2.5617
------------------------------


Total Steps Trained:  24%|██▍       | 243496/1000000 [1:12:00<1:48:44, 115.94step/s]


Episode 487 finished at step 500 (243500 total). Env Reward: -8.54, Steps: 500, Delivered: 6


Total Steps Trained:  24%|██▍       | 243520/1000000 [1:12:05<18:29:59, 11.36step/s]


--- Rollout Summary (Steps 243001 to 243500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -6.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 473.1835
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  24%|██▍       | 243989/1000000 [1:12:09<1:50:35, 113.94step/s]


Episode 488 finished at step 500 (244000 total). Env Reward: -8.83, Steps: 500, Delivered: 6


Total Steps Trained:  24%|██▍       | 244012/1000000 [1:12:14<18:55:37, 11.10step/s]


--- Rollout Summary (Steps 243501 to 244000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 568.2534
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  24%|██▍       | 244489/1000000 [1:12:18<1:46:09, 118.61step/s]


Episode 489 finished at step 500 (244500 total). Env Reward: -11.41, Steps: 500, Delivered: 3


Total Steps Trained:  24%|██▍       | 244513/1000000 [1:12:22<18:09:16, 11.56step/s]


--- Rollout Summary (Steps 244001 to 244500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 487.5848
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  24%|██▍       | 244996/1000000 [1:12:27<1:53:56, 110.44step/s]


Episode 490 finished at step 500 (245000 total). Env Reward: -7.76, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000490_map1.pth


Total Steps Trained:  25%|██▍       | 245018/1000000 [1:12:31<19:31:30, 10.74step/s]


--- Rollout Summary (Steps 244501 to 245000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 459.2706
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  25%|██▍       | 245494/1000000 [1:12:36<1:45:49, 118.82step/s]


Episode 491 finished at step 500 (245500 total). Env Reward: -8.73, Steps: 500, Delivered: 6


Total Steps Trained:  25%|██▍       | 245517/1000000 [1:12:41<18:35:24, 11.27step/s]


--- Rollout Summary (Steps 245001 to 245500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 648.6646
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  25%|██▍       | 245998/1000000 [1:12:45<1:59:01, 105.58step/s]


Episode 492 finished at step 500 (246000 total). Env Reward: 3.50, Steps: 500, Delivered: 9


Total Steps Trained:  25%|██▍       | 246020/1000000 [1:12:50<19:56:38, 10.50step/s]


--- Rollout Summary (Steps 245501 to 246000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -6.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 1290.2066
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  25%|██▍       | 246492/1000000 [1:12:54<1:49:47, 114.39step/s]


Episode 493 finished at step 500 (246500 total). Env Reward: -12.51, Steps: 500, Delivered: 2


Total Steps Trained:  25%|██▍       | 246514/1000000 [1:12:58<19:08:03, 10.94step/s]


--- Rollout Summary (Steps 246001 to 246500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 497.6582
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  25%|██▍       | 246996/1000000 [1:13:02<1:42:06, 122.91step/s]


Episode 494 finished at step 500 (247000 total). Env Reward: -12.78, Steps: 500, Delivered: 2


Total Steps Trained:  25%|██▍       | 247020/1000000 [1:13:07<18:04:40, 11.57step/s]


--- Rollout Summary (Steps 246501 to 247000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -6.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 423.1716
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  25%|██▍       | 247496/1000000 [1:13:11<1:41:26, 123.64step/s]


Episode 495 finished at step 500 (247500 total). Env Reward: -9.14, Steps: 500, Delivered: 5


Total Steps Trained:  25%|██▍       | 247520/1000000 [1:13:16<17:42:41, 11.80step/s]


--- Rollout Summary (Steps 247001 to 247500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 463.5233
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  25%|██▍       | 247997/1000000 [1:13:20<1:49:54, 114.04step/s]


Episode 496 finished at step 500 (248000 total). Env Reward: -8.13, Steps: 500, Delivered: 6


Total Steps Trained:  25%|██▍       | 248019/1000000 [1:13:25<18:58:50, 11.01step/s]


--- Rollout Summary (Steps 247501 to 248000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -6.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 512.5158
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  25%|██▍       | 248499/1000000 [1:13:29<1:42:51, 121.76step/s]


Episode 497 finished at step 500 (248500 total). Env Reward: -8.97, Steps: 500, Delivered: 5


Total Steps Trained:  25%|██▍       | 248512/1000000 [1:13:34<23:35:33,  8.85step/s]


--- Rollout Summary (Steps 248001 to 248500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 421.7176
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  25%|██▍       | 248991/1000000 [1:13:38<2:01:31, 103.00step/s]


Episode 498 finished at step 500 (249000 total). Env Reward: 2.43, Steps: 500, Delivered: 8


Total Steps Trained:  25%|██▍       | 249013/1000000 [1:13:43<19:46:40, 10.55step/s]


--- Rollout Summary (Steps 248501 to 249000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 946.2016
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  25%|██▍       | 249499/1000000 [1:13:47<1:42:03, 122.57step/s]


Episode 499 finished at step 500 (249500 total). Env Reward: -10.84, Steps: 500, Delivered: 4


Total Steps Trained:  25%|██▍       | 249512/1000000 [1:13:52<23:41:18,  8.80step/s]


--- Rollout Summary (Steps 249001 to 249500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 631.6535
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  25%|██▍       | 249999/1000000 [1:13:56<1:41:40, 122.94step/s]


Episode 500 finished at step 500 (250000 total). Env Reward: -10.53, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000500_map1.pth


Total Steps Trained:  25%|██▌       | 250012/1000000 [1:14:00<23:39:53,  8.80step/s]


--- Rollout Summary (Steps 249501 to 250000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 531.5608
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  25%|██▌       | 250498/1000000 [1:14:05<1:40:59, 123.69step/s]


Episode 501 finished at step 500 (250500 total). Env Reward: 0.63, Steps: 500, Delivered: 6


Total Steps Trained:  25%|██▌       | 250511/1000000 [1:14:09<23:52:24,  8.72step/s]


--- Rollout Summary (Steps 250001 to 250500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -6.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 798.8620
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  25%|██▌       | 250992/1000000 [1:14:14<1:44:16, 119.72step/s]


Episode 502 finished at step 500 (251000 total). Env Reward: -10.50, Steps: 500, Delivered: 4


Total Steps Trained:  25%|██▌       | 251016/1000000 [1:14:19<18:07:15, 11.48step/s]


--- Rollout Summary (Steps 250501 to 251000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -6.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 546.4657
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  25%|██▌       | 251491/1000000 [1:14:23<1:51:48, 111.58step/s]


Episode 503 finished at step 500 (251500 total). Env Reward: -8.82, Steps: 500, Delivered: 6


Total Steps Trained:  25%|██▌       | 251513/1000000 [1:14:27<19:03:51, 10.91step/s]


--- Rollout Summary (Steps 251001 to 251500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 475.7141
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  25%|██▌       | 251996/1000000 [1:14:32<1:50:24, 112.91step/s]


Episode 504 finished at step 500 (252000 total). Env Reward: -11.72, Steps: 500, Delivered: 3


Total Steps Trained:  25%|██▌       | 252019/1000000 [1:14:36<18:39:46, 11.13step/s]


--- Rollout Summary (Steps 251501 to 252000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 457.0260
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  25%|██▌       | 252495/1000000 [1:14:40<1:38:06, 126.98step/s]


Episode 505 finished at step 500 (252500 total). Env Reward: -13.65, Steps: 500, Delivered: 1


Total Steps Trained:  25%|██▌       | 252520/1000000 [1:14:45<17:33:21, 11.83step/s]


--- Rollout Summary (Steps 252001 to 252500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -6.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 461.1443
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  25%|██▌       | 252994/1000000 [1:14:49<1:51:21, 111.81step/s]


Episode 506 finished at step 500 (253000 total). Env Reward: -8.34, Steps: 500, Delivered: 6


Total Steps Trained:  25%|██▌       | 253018/1000000 [1:14:54<18:03:48, 11.49step/s]


--- Rollout Summary (Steps 252501 to 253000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 481.9082
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  25%|██▌       | 253492/1000000 [1:14:58<1:42:29, 121.40step/s]


Episode 507 finished at step 500 (253500 total). Env Reward: -10.39, Steps: 500, Delivered: 4


Total Steps Trained:  25%|██▌       | 253517/1000000 [1:15:02<17:13:49, 12.03step/s]


--- Rollout Summary (Steps 253001 to 253500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 401.0947
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  25%|██▌       | 254000/1000000 [1:15:06<1:45:35, 117.76step/s]


Episode 508 finished at step 500 (254000 total). Env Reward: -8.04, Steps: 500, Delivered: 7


Total Steps Trained:  25%|██▌       | 254012/1000000 [1:15:11<24:31:02,  8.45step/s]


--- Rollout Summary (Steps 253501 to 254000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 400.6336
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  25%|██▌       | 254493/1000000 [1:15:15<1:42:39, 121.03step/s]


Episode 509 finished at step 500 (254500 total). Env Reward: 8.90, Steps: 500, Delivered: 5


Total Steps Trained:  25%|██▌       | 254517/1000000 [1:15:20<17:45:06, 11.67step/s]


--- Rollout Summary (Steps 254001 to 254500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 1416.0664
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  25%|██▌       | 254993/1000000 [1:15:24<1:42:10, 121.52step/s]


Episode 510 finished at step 500 (255000 total). Env Reward: -6.11, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000510_map1.pth


Total Steps Trained:  26%|██▌       | 255017/1000000 [1:15:29<17:41:05, 11.70step/s]


--- Rollout Summary (Steps 254501 to 255000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 466.1193
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  26%|██▌       | 255492/1000000 [1:15:33<1:51:10, 111.61step/s]


Episode 511 finished at step 500 (255500 total). Env Reward: -6.16, Steps: 500, Delivered: 8


Total Steps Trained:  26%|██▌       | 255514/1000000 [1:15:37<18:56:49, 10.91step/s]


--- Rollout Summary (Steps 255001 to 255500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 518.3875
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  26%|██▌       | 255992/1000000 [1:15:42<1:44:42, 118.43step/s]


Episode 512 finished at step 500 (256000 total). Env Reward: -11.65, Steps: 500, Delivered: 3


Total Steps Trained:  26%|██▌       | 256014/1000000 [1:15:47<19:12:38, 10.76step/s]


--- Rollout Summary (Steps 255501 to 256000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -6.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 735.9814
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  26%|██▌       | 256490/1000000 [1:15:51<1:48:31, 114.19step/s]


Episode 513 finished at step 500 (256500 total). Env Reward: -9.30, Steps: 500, Delivered: 5


Total Steps Trained:  26%|██▌       | 256514/1000000 [1:15:55<18:06:11, 11.41step/s]


--- Rollout Summary (Steps 256001 to 256500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 714.1311
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  26%|██▌       | 256992/1000000 [1:15:59<1:39:33, 124.38step/s]


Episode 514 finished at step 500 (257000 total). Env Reward: 0.53, Steps: 500, Delivered: 6


Total Steps Trained:  26%|██▌       | 257016/1000000 [1:16:04<17:33:31, 11.75step/s]


--- Rollout Summary (Steps 256501 to 257000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 1289.9765
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  26%|██▌       | 257496/1000000 [1:16:08<1:47:44, 114.87step/s]


Episode 515 finished at step 500 (257500 total). Env Reward: -11.91, Steps: 500, Delivered: 2


Total Steps Trained:  26%|██▌       | 257520/1000000 [1:16:13<17:48:44, 11.58step/s]


--- Rollout Summary (Steps 257001 to 257500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0094
Avg Critic Loss (per minibatch): 544.2906
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  26%|██▌       | 257995/1000000 [1:16:17<1:49:10, 113.27step/s]


Episode 516 finished at step 500 (258000 total). Env Reward: -8.55, Steps: 500, Delivered: 6


Total Steps Trained:  26%|██▌       | 258018/1000000 [1:16:22<18:41:17, 11.03step/s]


--- Rollout Summary (Steps 257501 to 258000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 585.8467
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  26%|██▌       | 258500/1000000 [1:16:26<1:50:27, 111.89step/s]


Episode 517 finished at step 500 (258500 total). Env Reward: -9.08, Steps: 500, Delivered: 6


Total Steps Trained:  26%|██▌       | 258512/1000000 [1:16:31<25:09:27,  8.19step/s]


--- Rollout Summary (Steps 258001 to 258500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.20
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 456.2015
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  26%|██▌       | 258989/1000000 [1:16:35<1:43:49, 118.96step/s]


Episode 518 finished at step 500 (259000 total). Env Reward: -0.39, Steps: 500, Delivered: 6


Total Steps Trained:  26%|██▌       | 259012/1000000 [1:16:39<18:00:07, 11.43step/s]


--- Rollout Summary (Steps 258501 to 259000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 683.5797
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  26%|██▌       | 259496/1000000 [1:16:44<1:47:03, 115.28step/s]


Episode 519 finished at step 500 (259500 total). Env Reward: -6.73, Steps: 500, Delivered: 8


Total Steps Trained:  26%|██▌       | 259519/1000000 [1:16:48<18:41:57, 11.00step/s]


--- Rollout Summary (Steps 259001 to 259500) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 475.3515
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  26%|██▌       | 259996/1000000 [1:16:52<1:49:24, 112.73step/s]


Episode 520 finished at step 500 (260000 total). Env Reward: -7.77, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000520_map1.pth


Total Steps Trained:  26%|██▌       | 260019/1000000 [1:16:57<18:22:48, 11.18step/s]


--- Rollout Summary (Steps 259501 to 260000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 546.3490
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  26%|██▌       | 260499/1000000 [1:17:01<1:41:30, 121.42step/s]


Episode 521 finished at step 500 (260500 total). Env Reward: 1.08, Steps: 500, Delivered: 7


Total Steps Trained:  26%|██▌       | 260512/1000000 [1:17:06<23:05:16,  8.90step/s]


--- Rollout Summary (Steps 260001 to 260500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.14
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 1126.6146
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  26%|██▌       | 260998/1000000 [1:17:10<1:43:14, 119.30step/s]


Episode 522 finished at step 500 (261000 total). Env Reward: -13.61, Steps: 500, Delivered: 1


Total Steps Trained:  26%|██▌       | 261021/1000000 [1:17:15<18:30:17, 11.09step/s]


--- Rollout Summary (Steps 260501 to 261000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.17
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 534.5412
Avg Entropy (per minibatch): 2.5588
------------------------------


Total Steps Trained:  26%|██▌       | 261490/1000000 [1:17:19<1:54:57, 107.07step/s]


Episode 523 finished at step 500 (261500 total). Env Reward: -12.62, Steps: 500, Delivered: 2


Total Steps Trained:  26%|██▌       | 261511/1000000 [1:17:24<19:44:39, 10.39step/s]


--- Rollout Summary (Steps 261001 to 261500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.20
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 547.1323
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  26%|██▌       | 261990/1000000 [1:17:28<1:45:29, 116.59step/s]


Episode 524 finished at step 500 (262000 total). Env Reward: -10.30, Steps: 500, Delivered: 4


Total Steps Trained:  26%|██▌       | 262012/1000000 [1:17:32<18:25:40, 11.12step/s]


--- Rollout Summary (Steps 261501 to 262000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 466.1592
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  26%|██▌       | 262489/1000000 [1:17:37<1:45:15, 116.78step/s]


Episode 525 finished at step 500 (262500 total). Env Reward: -11.03, Steps: 500, Delivered: 3


Total Steps Trained:  26%|██▋       | 262511/1000000 [1:17:41<18:30:56, 11.06step/s]


--- Rollout Summary (Steps 262001 to 262500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 476.8158
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  26%|██▋       | 262990/1000000 [1:17:45<1:46:09, 115.70step/s]


Episode 526 finished at step 500 (263000 total). Env Reward: -10.69, Steps: 500, Delivered: 4


Total Steps Trained:  26%|██▋       | 263012/1000000 [1:17:50<19:04:37, 10.73step/s]


--- Rollout Summary (Steps 262501 to 263000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 696.0471
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  26%|██▋       | 263491/1000000 [1:17:54<1:43:31, 118.58step/s]


Episode 527 finished at step 500 (263500 total). Env Reward: 1.21, Steps: 500, Delivered: 7


Total Steps Trained:  26%|██▋       | 263514/1000000 [1:17:59<17:53:25, 11.44step/s]


--- Rollout Summary (Steps 263001 to 263500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 925.3283
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  26%|██▋       | 263994/1000000 [1:18:03<1:44:44, 117.12step/s]


Episode 528 finished at step 500 (264000 total). Env Reward: -5.87, Steps: 500, Delivered: 9


Total Steps Trained:  26%|██▋       | 264017/1000000 [1:18:08<18:08:06, 11.27step/s]


--- Rollout Summary (Steps 263501 to 264000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0084
Avg Critic Loss (per minibatch): 606.1773
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  26%|██▋       | 264498/1000000 [1:18:12<1:41:47, 120.44step/s]


Episode 529 finished at step 500 (264500 total). Env Reward: -9.55, Steps: 500, Delivered: 5


Total Steps Trained:  26%|██▋       | 264511/1000000 [1:18:17<23:21:09,  8.75step/s]


--- Rollout Summary (Steps 264001 to 264500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.36
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 558.4939
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  26%|██▋       | 264991/1000000 [1:18:21<1:42:24, 119.63step/s]


Episode 530 finished at step 500 (265000 total). Env Reward: -2.15, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000530_map1.pth


Total Steps Trained:  27%|██▋       | 265014/1000000 [1:18:26<18:18:07, 11.16step/s]


--- Rollout Summary (Steps 264501 to 265000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 448.6852
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  27%|██▋       | 265491/1000000 [1:18:30<1:42:06, 119.90step/s]


Episode 531 finished at step 500 (265500 total). Env Reward: -10.75, Steps: 500, Delivered: 4


Total Steps Trained:  27%|██▋       | 265513/1000000 [1:18:34<18:04:30, 11.29step/s]


--- Rollout Summary (Steps 265001 to 265500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 552.5748
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  27%|██▋       | 265990/1000000 [1:18:39<1:41:48, 120.16step/s]


Episode 532 finished at step 500 (266000 total). Env Reward: -9.29, Steps: 500, Delivered: 5


Total Steps Trained:  27%|██▋       | 266014/1000000 [1:18:43<17:17:24, 11.79step/s]


--- Rollout Summary (Steps 265501 to 266000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 507.6691
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  27%|██▋       | 266495/1000000 [1:18:48<1:45:47, 115.57step/s]


Episode 533 finished at step 500 (266500 total). Env Reward: 10.65, Steps: 500, Delivered: 7


Total Steps Trained:  27%|██▋       | 266517/1000000 [1:18:52<18:39:13, 10.92step/s]


--- Rollout Summary (Steps 266001 to 266500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.16
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 1473.3993
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  27%|██▋       | 266992/1000000 [1:18:57<1:43:17, 118.28step/s]


Episode 534 finished at step 500 (267000 total). Env Reward: -12.65, Steps: 500, Delivered: 2


Total Steps Trained:  27%|██▋       | 267015/1000000 [1:19:01<18:15:37, 11.15step/s]


--- Rollout Summary (Steps 266501 to 267000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.19
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0078
Avg Critic Loss (per minibatch): 726.0282
Avg Entropy (per minibatch): 2.5584
------------------------------


Total Steps Trained:  27%|██▋       | 267494/1000000 [1:19:05<1:40:56, 120.95step/s]


Episode 535 finished at step 500 (267500 total). Env Reward: -9.62, Steps: 500, Delivered: 5


Total Steps Trained:  27%|██▋       | 267517/1000000 [1:19:10<18:22:45, 11.07step/s]


--- Rollout Summary (Steps 267001 to 267500) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 451.1306
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  27%|██▋       | 267990/1000000 [1:19:15<1:42:08, 119.45step/s]


Episode 536 finished at step 500 (268000 total). Env Reward: 0.64, Steps: 500, Delivered: 7


Total Steps Trained:  27%|██▋       | 268013/1000000 [1:19:19<18:06:06, 11.23step/s]


--- Rollout Summary (Steps 267501 to 268000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 870.9984
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  27%|██▋       | 268492/1000000 [1:19:23<1:43:11, 118.15step/s]


Episode 537 finished at step 500 (268500 total). Env Reward: -12.19, Steps: 500, Delivered: 3


Total Steps Trained:  27%|██▋       | 268515/1000000 [1:19:28<18:24:28, 11.04step/s]


--- Rollout Summary (Steps 268001 to 268500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0000
Avg Critic Loss (per minibatch): 528.7852
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  27%|██▋       | 269000/1000000 [1:19:32<1:44:04, 117.07step/s]


Episode 538 finished at step 500 (269000 total). Env Reward: 1.05, Steps: 500, Delivered: 7


Total Steps Trained:  27%|██▋       | 269012/1000000 [1:19:37<23:43:26,  8.56step/s]


--- Rollout Summary (Steps 268501 to 269000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 1221.3889
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  27%|██▋       | 269498/1000000 [1:19:41<1:45:45, 115.12step/s]


Episode 539 finished at step 500 (269500 total). Env Reward: -8.67, Steps: 500, Delivered: 6


Total Steps Trained:  27%|██▋       | 269521/1000000 [1:19:46<18:17:47, 11.09step/s]


--- Rollout Summary (Steps 269001 to 269500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.16
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 527.6676
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  27%|██▋       | 269992/1000000 [1:19:50<1:43:14, 117.86step/s]


Episode 540 finished at step 500 (270000 total). Env Reward: -6.10, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000540_map1.pth


Total Steps Trained:  27%|██▋       | 270015/1000000 [1:19:55<17:40:35, 11.47step/s]


--- Rollout Summary (Steps 269501 to 270000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 463.8353
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  27%|██▋       | 270490/1000000 [1:19:59<1:42:52, 118.18step/s]


Episode 541 finished at step 500 (270500 total). Env Reward: -10.22, Steps: 500, Delivered: 4


Total Steps Trained:  27%|██▋       | 270512/1000000 [1:20:04<18:24:52, 11.00step/s]


--- Rollout Summary (Steps 270001 to 270500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 460.2307
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  27%|██▋       | 271000/1000000 [1:20:08<1:56:24, 104.37step/s]


Episode 542 finished at step 500 (271000 total). Env Reward: -9.75, Steps: 500, Delivered: 5


Total Steps Trained:  27%|██▋       | 271011/1000000 [1:20:13<26:19:13,  7.69step/s]


--- Rollout Summary (Steps 270501 to 271000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 481.9240
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  27%|██▋       | 271494/1000000 [1:20:17<1:40:51, 120.38step/s]


Episode 543 finished at step 500 (271500 total). Env Reward: 9.52, Steps: 500, Delivered: 6


Total Steps Trained:  27%|██▋       | 271519/1000000 [1:20:21<16:57:22, 11.93step/s]


--- Rollout Summary (Steps 271001 to 271500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 1308.9286
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  27%|██▋       | 271996/1000000 [1:20:26<1:46:05, 114.36step/s]


Episode 544 finished at step 500 (272000 total). Env Reward: -8.07, Steps: 500, Delivered: 7


Total Steps Trained:  27%|██▋       | 272019/1000000 [1:20:30<18:46:06, 10.77step/s]


--- Rollout Summary (Steps 271501 to 272000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.10
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0078
Avg Critic Loss (per minibatch): 984.7658
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  27%|██▋       | 272496/1000000 [1:20:34<1:43:57, 116.63step/s]


Episode 545 finished at step 500 (272500 total). Env Reward: 1.16, Steps: 500, Delivered: 7


Total Steps Trained:  27%|██▋       | 272520/1000000 [1:20:39<17:47:36, 11.36step/s]


--- Rollout Summary (Steps 272001 to 272500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.00
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 1013.9189
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  27%|██▋       | 272990/1000000 [1:20:43<1:43:23, 117.19step/s]


Episode 546 finished at step 500 (273000 total). Env Reward: -8.75, Steps: 500, Delivered: 6


Total Steps Trained:  27%|██▋       | 273012/1000000 [1:20:48<18:03:33, 11.18step/s]


--- Rollout Summary (Steps 272501 to 273000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 462.1322
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  27%|██▋       | 273496/1000000 [1:20:52<1:41:07, 119.73step/s]


Episode 547 finished at step 500 (273500 total). Env Reward: -11.69, Steps: 500, Delivered: 3


Total Steps Trained:  27%|██▋       | 273519/1000000 [1:20:57<17:40:05, 11.42step/s]


--- Rollout Summary (Steps 273001 to 273500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 526.0902
Avg Entropy (per minibatch): 2.5580
------------------------------


Total Steps Trained:  27%|██▋       | 273991/1000000 [1:21:01<1:43:56, 116.42step/s]


Episode 548 finished at step 500 (274000 total). Env Reward: -11.38, Steps: 500, Delivered: 3


Total Steps Trained:  27%|██▋       | 274013/1000000 [1:21:05<18:22:40, 10.97step/s]


--- Rollout Summary (Steps 273501 to 274000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 404.9107
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  27%|██▋       | 274491/1000000 [1:21:10<1:41:44, 118.84step/s]


Episode 549 finished at step 500 (274500 total). Env Reward: -10.65, Steps: 500, Delivered: 4


Total Steps Trained:  27%|██▋       | 274513/1000000 [1:21:15<18:19:06, 11.00step/s]


--- Rollout Summary (Steps 274001 to 274500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 634.2457
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  27%|██▋       | 274990/1000000 [1:21:19<1:41:16, 119.31step/s]


Episode 550 finished at step 500 (275000 total). Env Reward: -9.77, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000550_map1.pth


Total Steps Trained:  28%|██▊       | 275013/1000000 [1:21:23<17:45:22, 11.34step/s]


--- Rollout Summary (Steps 274501 to 275000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.20
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 461.4365
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  28%|██▊       | 275496/1000000 [1:21:28<1:42:38, 117.65step/s]


Episode 551 finished at step 500 (275500 total). Env Reward: -7.54, Steps: 500, Delivered: 7


Total Steps Trained:  28%|██▊       | 275518/1000000 [1:21:32<18:40:44, 10.77step/s]


--- Rollout Summary (Steps 275001 to 275500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.17
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 505.2846
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  28%|██▊       | 275989/1000000 [1:21:36<1:48:03, 111.67step/s]


Episode 552 finished at step 500 (276000 total). Env Reward: -11.73, Steps: 500, Delivered: 3


Total Steps Trained:  28%|██▊       | 276012/1000000 [1:21:41<17:46:38, 11.31step/s]


--- Rollout Summary (Steps 275501 to 276000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0007
Avg Critic Loss (per minibatch): 574.5678
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  28%|██▊       | 276499/1000000 [1:21:45<1:39:34, 121.10step/s]


Episode 553 finished at step 500 (276500 total). Env Reward: -5.97, Steps: 500, Delivered: 8


Total Steps Trained:  28%|██▊       | 276512/1000000 [1:21:50<22:55:58,  8.76step/s]


--- Rollout Summary (Steps 276001 to 276500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.24
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 648.4006
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  28%|██▊       | 276994/1000000 [1:21:54<1:44:14, 115.59step/s]


Episode 554 finished at step 500 (277000 total). Env Reward: -3.32, Steps: 500, Delivered: 11


Total Steps Trained:  28%|██▊       | 277016/1000000 [1:21:59<18:30:07, 10.85step/s]


--- Rollout Summary (Steps 276501 to 277000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 774.9831
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  28%|██▊       | 277491/1000000 [1:22:03<1:49:19, 110.15step/s]


Episode 555 finished at step 500 (277500 total). Env Reward: -12.19, Steps: 500, Delivered: 3


Total Steps Trained:  28%|██▊       | 277511/1000000 [1:22:08<19:48:28, 10.13step/s]


--- Rollout Summary (Steps 277001 to 277500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 522.0649
Avg Entropy (per minibatch): 2.5588
------------------------------


Total Steps Trained:  28%|██▊       | 277992/1000000 [1:22:12<1:42:48, 117.05step/s]


Episode 556 finished at step 500 (278000 total). Env Reward: 0.18, Steps: 500, Delivered: 6


Total Steps Trained:  28%|██▊       | 278016/1000000 [1:22:17<17:10:08, 11.68step/s]


--- Rollout Summary (Steps 277501 to 278000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.16
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): 0.0005
Avg Critic Loss (per minibatch): 876.2148
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  28%|██▊       | 278493/1000000 [1:22:21<1:41:06, 118.94step/s]


Episode 557 finished at step 500 (278500 total). Env Reward: -8.12, Steps: 500, Delivered: 6


Total Steps Trained:  28%|██▊       | 278516/1000000 [1:22:26<18:00:25, 11.13step/s]


--- Rollout Summary (Steps 278001 to 278500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 556.2785
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  28%|██▊       | 278998/1000000 [1:22:30<1:39:54, 120.28step/s]


Episode 558 finished at step 500 (279000 total). Env Reward: -8.55, Steps: 500, Delivered: 6


Total Steps Trained:  28%|██▊       | 279011/1000000 [1:22:35<23:19:47,  8.58step/s]


--- Rollout Summary (Steps 278501 to 279000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 447.0696
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  28%|██▊       | 279492/1000000 [1:22:39<1:39:16, 120.95step/s]


Episode 559 finished at step 500 (279500 total). Env Reward: -11.11, Steps: 500, Delivered: 4


Total Steps Trained:  28%|██▊       | 279516/1000000 [1:22:43<17:02:12, 11.75step/s]


--- Rollout Summary (Steps 279001 to 279500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0007
Avg Critic Loss (per minibatch): 482.5994
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  28%|██▊       | 279990/1000000 [1:22:48<1:42:08, 117.48step/s]


Episode 560 finished at step 500 (280000 total). Env Reward: -8.69, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000560_map1.pth


Total Steps Trained:  28%|██▊       | 280013/1000000 [1:22:52<17:57:29, 11.14step/s]


--- Rollout Summary (Steps 279501 to 280000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 508.4177
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  28%|██▊       | 280493/1000000 [1:22:56<1:45:39, 113.49step/s]


Episode 561 finished at step 500 (280500 total). Env Reward: -10.67, Steps: 500, Delivered: 4


Total Steps Trained:  28%|██▊       | 280515/1000000 [1:23:01<18:03:05, 11.07step/s]


--- Rollout Summary (Steps 280001 to 280500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0007
Avg Critic Loss (per minibatch): 651.7398
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  28%|██▊       | 280999/1000000 [1:23:05<1:42:43, 116.66step/s]


Episode 562 finished at step 500 (281000 total). Env Reward: -6.78, Steps: 500, Delivered: 8


Total Steps Trained:  28%|██▊       | 281011/1000000 [1:23:10<25:38:27,  7.79step/s]


--- Rollout Summary (Steps 280501 to 281000) ---
Update Duration: 4.79s
Avg Episode Reward (last 100): -7.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 595.0322
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  28%|██▊       | 281494/1000000 [1:23:14<1:39:34, 120.25step/s]


Episode 563 finished at step 500 (281500 total). Env Reward: -8.74, Steps: 500, Delivered: 6


Total Steps Trained:  28%|██▊       | 281518/1000000 [1:23:19<17:00:17, 11.74step/s]


--- Rollout Summary (Steps 281001 to 281500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0003
Avg Critic Loss (per minibatch): 457.3385
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  28%|██▊       | 281991/1000000 [1:23:23<1:38:52, 121.02step/s]


Episode 564 finished at step 500 (282000 total). Env Reward: -10.28, Steps: 500, Delivered: 4


Total Steps Trained:  28%|██▊       | 282016/1000000 [1:23:28<17:06:55, 11.65step/s]


--- Rollout Summary (Steps 281501 to 282000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0084
Avg Critic Loss (per minibatch): 568.0714
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  28%|██▊       | 282498/1000000 [1:23:32<1:38:30, 121.39step/s]


Episode 565 finished at step 500 (282500 total). Env Reward: -10.90, Steps: 500, Delivered: 4


Total Steps Trained:  28%|██▊       | 282511/1000000 [1:23:37<22:22:03,  8.91step/s]


--- Rollout Summary (Steps 282001 to 282500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 489.2513
Avg Entropy (per minibatch): 2.5588
------------------------------


Total Steps Trained:  28%|██▊       | 282998/1000000 [1:23:41<1:39:05, 120.59step/s]


Episode 566 finished at step 500 (283000 total). Env Reward: -9.07, Steps: 500, Delivered: 6


Total Steps Trained:  28%|██▊       | 283011/1000000 [1:23:45<22:20:47,  8.91step/s]


--- Rollout Summary (Steps 282501 to 283000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 415.6907
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  28%|██▊       | 283489/1000000 [1:23:50<1:48:00, 110.57step/s]


Episode 567 finished at step 500 (283500 total). Env Reward: -12.99, Steps: 500, Delivered: 2


Total Steps Trained:  28%|██▊       | 283512/1000000 [1:23:54<17:54:48, 11.11step/s]


--- Rollout Summary (Steps 283001 to 283500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 619.9983
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  28%|██▊       | 283991/1000000 [1:23:59<1:40:05, 119.22step/s]


Episode 568 finished at step 500 (284000 total). Env Reward: -13.71, Steps: 500, Delivered: 1


Total Steps Trained:  28%|██▊       | 284014/1000000 [1:24:03<17:22:49, 11.44step/s]


--- Rollout Summary (Steps 283501 to 284000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 471.9690
Avg Entropy (per minibatch): 2.5585
------------------------------


Total Steps Trained:  28%|██▊       | 284490/1000000 [1:24:07<1:42:05, 116.80step/s]


Episode 569 finished at step 500 (284500 total). Env Reward: -5.73, Steps: 500, Delivered: 9


Total Steps Trained:  28%|██▊       | 284513/1000000 [1:24:12<18:05:11, 10.99step/s]


--- Rollout Summary (Steps 284001 to 284500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 549.2786
Avg Entropy (per minibatch): 2.5618
------------------------------


Total Steps Trained:  28%|██▊       | 284993/1000000 [1:24:16<1:43:21, 115.29step/s]


Episode 570 finished at step 500 (285000 total). Env Reward: -11.93, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000570_map1.pth


Total Steps Trained:  29%|██▊       | 285016/1000000 [1:24:21<17:42:40, 11.21step/s]


--- Rollout Summary (Steps 284501 to 285000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 536.6614
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  29%|██▊       | 285499/1000000 [1:24:25<1:39:47, 119.32step/s]


Episode 571 finished at step 500 (285500 total). Env Reward: -8.38, Steps: 500, Delivered: 6


Total Steps Trained:  29%|██▊       | 285511/1000000 [1:24:30<23:25:17,  8.47step/s]


--- Rollout Summary (Steps 285001 to 285500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 492.5335
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  29%|██▊       | 285990/1000000 [1:24:34<1:43:03, 115.46step/s]


Episode 572 finished at step 500 (286000 total). Env Reward: -10.43, Steps: 500, Delivered: 4


Total Steps Trained:  29%|██▊       | 286014/1000000 [1:24:39<17:18:41, 11.46step/s]


--- Rollout Summary (Steps 285501 to 286000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 500.8057
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  29%|██▊       | 286491/1000000 [1:24:43<1:39:13, 119.84step/s]


Episode 573 finished at step 500 (286500 total). Env Reward: -11.57, Steps: 500, Delivered: 3


Total Steps Trained:  29%|██▊       | 286513/1000000 [1:24:47<17:33:18, 11.29step/s]


--- Rollout Summary (Steps 286001 to 286500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 547.7856
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  29%|██▊       | 286991/1000000 [1:24:52<1:43:42, 114.58step/s]


Episode 574 finished at step 500 (287000 total). Env Reward: -7.50, Steps: 500, Delivered: 7


Total Steps Trained:  29%|██▊       | 287014/1000000 [1:24:56<17:40:10, 11.21step/s]


--- Rollout Summary (Steps 286501 to 287000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 565.9211
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  29%|██▊       | 287499/1000000 [1:25:01<1:44:42, 113.41step/s]


Episode 575 finished at step 500 (287500 total). Env Reward: -8.15, Steps: 500, Delivered: 6


Total Steps Trained:  29%|██▉       | 287511/1000000 [1:25:05<23:45:24,  8.33step/s]


--- Rollout Summary (Steps 287001 to 287500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 726.6408
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  29%|██▉       | 287999/1000000 [1:25:09<1:39:21, 119.43step/s]


Episode 576 finished at step 500 (288000 total). Env Reward: -10.52, Steps: 500, Delivered: 4


Total Steps Trained:  29%|██▉       | 288011/1000000 [1:25:14<23:48:52,  8.30step/s]


--- Rollout Summary (Steps 287501 to 288000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 454.3620
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  29%|██▉       | 288494/1000000 [1:25:18<1:41:03, 117.35step/s]


Episode 577 finished at step 500 (288500 total). Env Reward: -8.98, Steps: 500, Delivered: 5


Total Steps Trained:  29%|██▉       | 288517/1000000 [1:25:23<17:08:33, 11.53step/s]


--- Rollout Summary (Steps 288001 to 288500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 585.8529
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  29%|██▉       | 288990/1000000 [1:25:27<1:45:45, 112.04step/s]


Episode 578 finished at step 500 (289000 total). Env Reward: -6.98, Steps: 500, Delivered: 7


Total Steps Trained:  29%|██▉       | 289013/1000000 [1:25:32<17:50:34, 11.07step/s]


--- Rollout Summary (Steps 288501 to 289000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 951.0313
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  29%|██▉       | 289495/1000000 [1:25:36<1:37:08, 121.91step/s]


Episode 579 finished at step 500 (289500 total). Env Reward: -8.72, Steps: 500, Delivered: 6


Total Steps Trained:  29%|██▉       | 289519/1000000 [1:25:41<17:03:02, 11.57step/s]


--- Rollout Summary (Steps 289001 to 289500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 524.5432
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  29%|██▉       | 289998/1000000 [1:25:45<1:40:39, 117.56step/s]


Episode 580 finished at step 500 (290000 total). Env Reward: -12.45, Steps: 500, Delivered: 2
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000580_map1.pth


Total Steps Trained:  29%|██▉       | 290022/1000000 [1:25:50<17:16:33, 11.42step/s]


--- Rollout Summary (Steps 289501 to 290000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 573.8459
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  29%|██▉       | 290492/1000000 [1:25:54<1:45:45, 111.82step/s]


Episode 581 finished at step 500 (290500 total). Env Reward: -6.53, Steps: 500, Delivered: 8


Total Steps Trained:  29%|██▉       | 290515/1000000 [1:25:58<17:39:09, 11.16step/s]


--- Rollout Summary (Steps 290001 to 290500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 620.6770
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  29%|██▉       | 290998/1000000 [1:26:03<1:37:49, 120.79step/s]


Episode 582 finished at step 500 (291000 total). Env Reward: 2.80, Steps: 500, Delivered: 8


Total Steps Trained:  29%|██▉       | 291011/1000000 [1:26:07<22:36:47,  8.71step/s]


--- Rollout Summary (Steps 290501 to 291000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 931.8968
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  29%|██▉       | 291499/1000000 [1:26:12<1:40:12, 117.83step/s]


Episode 583 finished at step 500 (291500 total). Env Reward: -10.20, Steps: 500, Delivered: 4


Total Steps Trained:  29%|██▉       | 291511/1000000 [1:26:16<23:49:33,  8.26step/s]


--- Rollout Summary (Steps 291001 to 291500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 536.9506
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  29%|██▉       | 291992/1000000 [1:26:20<1:43:15, 114.28step/s]


Episode 584 finished at step 500 (292000 total). Env Reward: -10.48, Steps: 500, Delivered: 4


Total Steps Trained:  29%|██▉       | 292014/1000000 [1:26:25<17:52:07, 11.01step/s]


--- Rollout Summary (Steps 291501 to 292000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 555.4692
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  29%|██▉       | 292499/1000000 [1:26:29<1:38:44, 119.43step/s]


Episode 585 finished at step 500 (292500 total). Env Reward: -10.14, Steps: 500, Delivered: 5


Total Steps Trained:  29%|██▉       | 292511/1000000 [1:26:34<22:58:07,  8.56step/s]


--- Rollout Summary (Steps 292001 to 292500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 499.7213
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  29%|██▉       | 292991/1000000 [1:26:38<1:40:05, 117.74step/s]


Episode 586 finished at step 500 (293000 total). Env Reward: -10.93, Steps: 500, Delivered: 4


Total Steps Trained:  29%|██▉       | 293013/1000000 [1:26:43<17:31:16, 11.21step/s]


--- Rollout Summary (Steps 292501 to 293000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 427.4991
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  29%|██▉       | 293489/1000000 [1:26:47<1:38:13, 119.87step/s]


Episode 587 finished at step 500 (293500 total). Env Reward: -0.83, Steps: 500, Delivered: 5


Total Steps Trained:  29%|██▉       | 293512/1000000 [1:26:52<17:54:32, 10.96step/s]


--- Rollout Summary (Steps 293001 to 293500) ---
Update Duration: 4.71s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): 0.0008
Avg Critic Loss (per minibatch): 807.5962
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  29%|██▉       | 293997/1000000 [1:26:56<1:36:14, 122.27step/s]


Episode 588 finished at step 500 (294000 total). Env Reward: -9.80, Steps: 500, Delivered: 5


Total Steps Trained:  29%|██▉       | 294021/1000000 [1:27:01<16:44:25, 11.71step/s]


--- Rollout Summary (Steps 293501 to 294000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 505.5339
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  29%|██▉       | 294495/1000000 [1:27:05<1:49:03, 107.82step/s]


Episode 589 finished at step 500 (294500 total). Env Reward: -5.78, Steps: 500, Delivered: 9


Total Steps Trained:  29%|██▉       | 294516/1000000 [1:27:09<18:47:03, 10.43step/s]


--- Rollout Summary (Steps 294001 to 294500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 690.0303
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  29%|██▉       | 294988/1000000 [1:27:13<1:36:51, 121.31step/s]


Episode 590 finished at step 500 (295000 total). Env Reward: -4.30, Steps: 500, Delivered: 10
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000590_map1.pth


Total Steps Trained:  30%|██▉       | 295011/1000000 [1:27:18<17:32:26, 11.16step/s]


--- Rollout Summary (Steps 294501 to 295000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 985.2359
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  30%|██▉       | 295491/1000000 [1:27:22<1:40:45, 116.53step/s]


Episode 591 finished at step 500 (295500 total). Env Reward: -10.46, Steps: 500, Delivered: 4


Total Steps Trained:  30%|██▉       | 295514/1000000 [1:27:27<17:21:50, 11.27step/s]


--- Rollout Summary (Steps 295001 to 295500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 368.0104
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  30%|██▉       | 295996/1000000 [1:27:31<1:41:33, 115.54step/s]


Episode 592 finished at step 500 (296000 total). Env Reward: 1.12, Steps: 500, Delivered: 7


Total Steps Trained:  30%|██▉       | 296019/1000000 [1:27:36<17:38:37, 11.08step/s]


--- Rollout Summary (Steps 295501 to 296000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0077
Avg Critic Loss (per minibatch): 446.0946
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  30%|██▉       | 296495/1000000 [1:27:40<1:35:59, 122.15step/s]


Episode 593 finished at step 500 (296500 total). Env Reward: 2.72, Steps: 500, Delivered: 8


Total Steps Trained:  30%|██▉       | 296519/1000000 [1:27:45<16:30:26, 11.84step/s]


--- Rollout Summary (Steps 296001 to 296500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 1110.0138
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  30%|██▉       | 296990/1000000 [1:27:49<1:43:03, 113.69step/s]


Episode 594 finished at step 500 (297000 total). Env Reward: -12.18, Steps: 500, Delivered: 2


Total Steps Trained:  30%|██▉       | 297014/1000000 [1:27:54<17:22:06, 11.24step/s]


--- Rollout Summary (Steps 296501 to 297000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 577.5993
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  30%|██▉       | 297499/1000000 [1:27:58<1:41:35, 115.24step/s]


Episode 595 finished at step 500 (297500 total). Env Reward: -0.70, Steps: 500, Delivered: 5


Total Steps Trained:  30%|██▉       | 297511/1000000 [1:28:02<22:37:10,  8.63step/s]


--- Rollout Summary (Steps 297001 to 297500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 910.4806
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  30%|██▉       | 297994/1000000 [1:28:07<1:40:10, 116.79step/s]


Episode 596 finished at step 500 (298000 total). Env Reward: -6.43, Steps: 500, Delivered: 8


Total Steps Trained:  30%|██▉       | 298017/1000000 [1:28:11<17:25:40, 11.19step/s]


--- Rollout Summary (Steps 297501 to 298000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 596.8379
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  30%|██▉       | 298489/1000000 [1:28:15<1:35:56, 121.86step/s]


Episode 597 finished at step 500 (298500 total). Env Reward: -8.34, Steps: 500, Delivered: 6


Total Steps Trained:  30%|██▉       | 298512/1000000 [1:28:20<17:06:56, 11.38step/s]


--- Rollout Summary (Steps 298001 to 298500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0077
Avg Critic Loss (per minibatch): 694.6184
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  30%|██▉       | 298989/1000000 [1:28:24<1:36:28, 121.10step/s]


Episode 598 finished at step 500 (299000 total). Env Reward: -11.98, Steps: 500, Delivered: 3


Total Steps Trained:  30%|██▉       | 299013/1000000 [1:28:29<16:40:12, 11.68step/s]


--- Rollout Summary (Steps 298501 to 299000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 514.3585
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  30%|██▉       | 299498/1000000 [1:28:33<1:38:04, 119.04step/s]


Episode 599 finished at step 500 (299500 total). Env Reward: -8.73, Steps: 500, Delivered: 6


Total Steps Trained:  30%|██▉       | 299510/1000000 [1:28:38<23:07:50,  8.41step/s]


--- Rollout Summary (Steps 299001 to 299500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 433.8021
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  30%|██▉       | 299990/1000000 [1:28:42<1:36:14, 121.23step/s]


Episode 600 finished at step 500 (300000 total). Env Reward: -8.50, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000600_map1.pth


Total Steps Trained:  30%|███       | 300014/1000000 [1:28:47<16:37:34, 11.69step/s]


--- Rollout Summary (Steps 299501 to 300000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 474.4008
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  30%|███       | 300489/1000000 [1:28:51<1:42:51, 113.34step/s]


Episode 601 finished at step 500 (300500 total). Env Reward: -0.75, Steps: 500, Delivered: 5


Total Steps Trained:  30%|███       | 300511/1000000 [1:28:55<18:01:28, 10.78step/s]


--- Rollout Summary (Steps 300001 to 300500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 648.0879
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  30%|███       | 300999/1000000 [1:29:00<1:43:25, 112.64step/s]


Episode 602 finished at step 500 (301000 total). Env Reward: -1.92, Steps: 500, Delivered: 4


Total Steps Trained:  30%|███       | 301011/1000000 [1:29:05<24:03:16,  8.07step/s]


--- Rollout Summary (Steps 300501 to 301000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): 0.0003
Avg Critic Loss (per minibatch): 654.0686
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  30%|███       | 301493/1000000 [1:29:09<1:35:12, 122.28step/s]


Episode 603 finished at step 500 (301500 total). Env Reward: -9.75, Steps: 500, Delivered: 5


Total Steps Trained:  30%|███       | 301518/1000000 [1:29:13<16:12:54, 11.97step/s]


--- Rollout Summary (Steps 301001 to 301500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0084
Avg Critic Loss (per minibatch): 637.3663
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  30%|███       | 301999/1000000 [1:29:18<1:38:15, 118.40step/s]


Episode 604 finished at step 500 (302000 total). Env Reward: -7.21, Steps: 500, Delivered: 8


Total Steps Trained:  30%|███       | 302023/1000000 [1:29:22<16:42:35, 11.60step/s]


--- Rollout Summary (Steps 301501 to 302000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0085
Avg Critic Loss (per minibatch): 461.7029
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  30%|███       | 302490/1000000 [1:29:26<1:38:15, 118.31step/s]


Episode 605 finished at step 500 (302500 total). Env Reward: -13.17, Steps: 500, Delivered: 1


Total Steps Trained:  30%|███       | 302512/1000000 [1:29:31<18:22:53, 10.54step/s]


--- Rollout Summary (Steps 302001 to 302500) ---
Update Duration: 4.77s
Avg Episode Reward (last 100): -7.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 461.6130
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  30%|███       | 302999/1000000 [1:29:35<1:42:26, 113.40step/s]


Episode 606 finished at step 500 (303000 total). Env Reward: -9.20, Steps: 500, Delivered: 5


Total Steps Trained:  30%|███       | 303011/1000000 [1:29:40<23:43:14,  8.16step/s]


--- Rollout Summary (Steps 302501 to 303000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 533.0017
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  30%|███       | 303494/1000000 [1:29:44<1:35:32, 121.50step/s]


Episode 607 finished at step 500 (303500 total). Env Reward: -9.81, Steps: 500, Delivered: 5


Total Steps Trained:  30%|███       | 303517/1000000 [1:29:49<16:52:49, 11.46step/s]


--- Rollout Summary (Steps 303001 to 303500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 512.7771
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  30%|███       | 303994/1000000 [1:29:53<1:35:53, 120.96step/s]


Episode 608 finished at step 500 (304000 total). Env Reward: 3.09, Steps: 500, Delivered: 9


Total Steps Trained:  30%|███       | 304016/1000000 [1:29:58<17:35:55, 10.99step/s]


--- Rollout Summary (Steps 303501 to 304000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 1200.7208
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  30%|███       | 304500/1000000 [1:30:02<1:37:29, 118.91step/s]


Episode 609 finished at step 500 (304500 total). Env Reward: -10.45, Steps: 500, Delivered: 4


Total Steps Trained:  30%|███       | 304512/1000000 [1:30:07<22:18:28,  8.66step/s]


--- Rollout Summary (Steps 304001 to 304500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0014
Avg Critic Loss (per minibatch): 449.7509
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  30%|███       | 304999/1000000 [1:30:11<1:36:17, 120.29step/s]


Episode 610 finished at step 500 (305000 total). Env Reward: -13.69, Steps: 500, Delivered: 1
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000610_map1.pth


Total Steps Trained:  31%|███       | 305012/1000000 [1:30:15<21:54:13,  8.81step/s]


--- Rollout Summary (Steps 304501 to 305000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 507.8136
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  31%|███       | 305499/1000000 [1:30:19<1:39:16, 116.60step/s]


Episode 611 finished at step 500 (305500 total). Env Reward: -12.77, Steps: 500, Delivered: 2


Total Steps Trained:  31%|███       | 305511/1000000 [1:30:24<22:42:10,  8.50step/s]


--- Rollout Summary (Steps 305001 to 305500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 713.0459
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  31%|███       | 305998/1000000 [1:30:28<1:36:52, 119.41step/s]


Episode 612 finished at step 500 (306000 total). Env Reward: -12.79, Steps: 500, Delivered: 2


Total Steps Trained:  31%|███       | 306019/1000000 [1:30:33<17:47:21, 10.84step/s]


--- Rollout Summary (Steps 305501 to 306000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 549.3302
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  31%|███       | 306500/1000000 [1:30:37<1:35:26, 121.11step/s]


Episode 613 finished at step 500 (306500 total). Env Reward: -11.52, Steps: 500, Delivered: 3


Total Steps Trained:  31%|███       | 306513/1000000 [1:30:42<21:45:10,  8.86step/s]


--- Rollout Summary (Steps 306001 to 306500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0071
Avg Critic Loss (per minibatch): 417.5804
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  31%|███       | 306989/1000000 [1:30:46<1:34:24, 122.35step/s]


Episode 614 finished at step 500 (307000 total). Env Reward: -12.87, Steps: 500, Delivered: 2


Total Steps Trained:  31%|███       | 307013/1000000 [1:30:50<16:29:28, 11.67step/s]


--- Rollout Summary (Steps 306501 to 307000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 414.9298
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  31%|███       | 307500/1000000 [1:30:55<1:37:24, 118.49step/s]


Episode 615 finished at step 500 (307500 total). Env Reward: -9.74, Steps: 500, Delivered: 5


Total Steps Trained:  31%|███       | 307512/1000000 [1:30:59<22:47:03,  8.44step/s]


--- Rollout Summary (Steps 307001 to 307500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 407.8296
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  31%|███       | 307994/1000000 [1:31:03<1:36:42, 119.27step/s]


Episode 616 finished at step 500 (308000 total). Env Reward: -10.06, Steps: 500, Delivered: 4


Total Steps Trained:  31%|███       | 308017/1000000 [1:31:08<16:54:02, 11.37step/s]


--- Rollout Summary (Steps 307501 to 308000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 462.0021
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  31%|███       | 308498/1000000 [1:31:12<1:34:17, 122.22step/s]


Episode 617 finished at step 500 (308500 total). Env Reward: 1.27, Steps: 500, Delivered: 7


Total Steps Trained:  31%|███       | 308511/1000000 [1:31:17<21:39:53,  8.87step/s]


--- Rollout Summary (Steps 308001 to 308500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 1086.0830
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  31%|███       | 308996/1000000 [1:31:21<1:49:12, 105.46step/s]


Episode 618 finished at step 500 (309000 total). Env Reward: -9.44, Steps: 500, Delivered: 5


Total Steps Trained:  31%|███       | 309018/1000000 [1:31:26<18:16:56, 10.50step/s]


--- Rollout Summary (Steps 308501 to 309000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 538.2905
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  31%|███       | 309498/1000000 [1:31:30<1:35:40, 120.29step/s]


Episode 619 finished at step 500 (309500 total). Env Reward: -10.58, Steps: 500, Delivered: 4


Total Steps Trained:  31%|███       | 309511/1000000 [1:31:35<22:35:35,  8.49step/s]


--- Rollout Summary (Steps 309001 to 309500) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 588.1799
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  31%|███       | 309991/1000000 [1:31:39<1:48:01, 106.46step/s]


Episode 620 finished at step 500 (310000 total). Env Reward: 3.12, Steps: 500, Delivered: 9
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000620_map1.pth


Total Steps Trained:  31%|███       | 310013/1000000 [1:31:44<18:34:33, 10.32step/s]


--- Rollout Summary (Steps 309501 to 310000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 1258.4048
Avg Entropy (per minibatch): 2.5615
------------------------------


Total Steps Trained:  31%|███       | 310498/1000000 [1:31:48<1:41:39, 113.03step/s]


Episode 621 finished at step 500 (310500 total). Env Reward: -11.41, Steps: 500, Delivered: 3


Total Steps Trained:  31%|███       | 310521/1000000 [1:31:52<16:41:35, 11.47step/s]


--- Rollout Summary (Steps 310001 to 310500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.09
Avg Actor Loss (per minibatch): -0.0074
Avg Critic Loss (per minibatch): 549.2400
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  31%|███       | 311000/1000000 [1:31:57<1:37:30, 117.77step/s]


Episode 622 finished at step 500 (311000 total). Env Reward: -8.75, Steps: 500, Delivered: 6


Total Steps Trained:  31%|███       | 311012/1000000 [1:32:01<23:07:20,  8.28step/s]


--- Rollout Summary (Steps 310501 to 311000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 423.9860
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  31%|███       | 311497/1000000 [1:32:06<1:36:28, 118.94step/s]


Episode 623 finished at step 500 (311500 total). Env Reward: -11.24, Steps: 500, Delivered: 3


Total Steps Trained:  31%|███       | 311521/1000000 [1:32:10<16:34:05, 11.54step/s]


--- Rollout Summary (Steps 311001 to 311500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0014
Avg Critic Loss (per minibatch): 583.1930
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  31%|███       | 311993/1000000 [1:32:14<1:36:33, 118.75step/s]


Episode 624 finished at step 500 (312000 total). Env Reward: -9.68, Steps: 500, Delivered: 5


Total Steps Trained:  31%|███       | 312016/1000000 [1:32:19<16:47:45, 11.38step/s]


--- Rollout Summary (Steps 311501 to 312000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0116
Avg Critic Loss (per minibatch): 498.2375
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  31%|███       | 312493/1000000 [1:32:23<1:35:31, 119.96step/s]


Episode 625 finished at step 500 (312500 total). Env Reward: -12.83, Steps: 500, Delivered: 2


Total Steps Trained:  31%|███▏      | 312514/1000000 [1:32:28<17:28:26, 10.93step/s]


--- Rollout Summary (Steps 312001 to 312500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 535.8995
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  31%|███▏      | 312991/1000000 [1:32:32<1:34:19, 121.39step/s]


Episode 626 finished at step 500 (313000 total). Env Reward: -11.81, Steps: 500, Delivered: 3


Total Steps Trained:  31%|███▏      | 313015/1000000 [1:32:37<16:45:58, 11.38step/s]


--- Rollout Summary (Steps 312501 to 313000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 498.2062
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  31%|███▏      | 313499/1000000 [1:32:41<1:36:05, 119.07step/s]


Episode 627 finished at step 500 (313500 total). Env Reward: -9.71, Steps: 500, Delivered: 4


Total Steps Trained:  31%|███▏      | 313511/1000000 [1:32:46<22:08:11,  8.61step/s]


--- Rollout Summary (Steps 313001 to 313500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 715.3153
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  31%|███▏      | 313997/1000000 [1:32:50<1:34:56, 120.43step/s]


Episode 628 finished at step 500 (314000 total). Env Reward: -8.52, Steps: 500, Delivered: 6


Total Steps Trained:  31%|███▏      | 314010/1000000 [1:32:55<21:32:32,  8.85step/s]


--- Rollout Summary (Steps 313501 to 314000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0071
Avg Critic Loss (per minibatch): 644.5477
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  31%|███▏      | 314489/1000000 [1:32:59<1:34:52, 120.41step/s]


Episode 629 finished at step 500 (314500 total). Env Reward: -8.53, Steps: 500, Delivered: 6


Total Steps Trained:  31%|███▏      | 314513/1000000 [1:33:04<16:15:52, 11.71step/s]


--- Rollout Summary (Steps 314001 to 314500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.09
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 521.5895
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  31%|███▏      | 314990/1000000 [1:33:08<1:36:48, 117.93step/s]


Episode 630 finished at step 500 (315000 total). Env Reward: -8.88, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000630_map1.pth


Total Steps Trained:  32%|███▏      | 315014/1000000 [1:33:12<16:10:29, 11.76step/s]


--- Rollout Summary (Steps 314501 to 315000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 378.9535
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  32%|███▏      | 315494/1000000 [1:33:16<1:44:51, 108.79step/s]


Episode 631 finished at step 500 (315500 total). Env Reward: 2.82, Steps: 500, Delivered: 8


Total Steps Trained:  32%|███▏      | 315516/1000000 [1:33:21<17:35:52, 10.80step/s]


--- Rollout Summary (Steps 315001 to 315500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0077
Avg Critic Loss (per minibatch): 991.0407
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  32%|███▏      | 316000/1000000 [1:33:25<1:39:24, 114.69step/s]


Episode 632 finished at step 500 (316000 total). Env Reward: -2.41, Steps: 500, Delivered: 3


Total Steps Trained:  32%|███▏      | 316012/1000000 [1:33:30<22:36:27,  8.40step/s]


--- Rollout Summary (Steps 315501 to 316000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0092
Avg Critic Loss (per minibatch): 774.3211
Avg Entropy (per minibatch): 2.5587
------------------------------


Total Steps Trained:  32%|███▏      | 316500/1000000 [1:33:34<1:34:58, 119.94step/s]


Episode 633 finished at step 500 (316500 total). Env Reward: -10.11, Steps: 500, Delivered: 4


Total Steps Trained:  32%|███▏      | 316513/1000000 [1:33:39<22:02:40,  8.61step/s]


--- Rollout Summary (Steps 316001 to 316500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 653.0393
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  32%|███▏      | 316992/1000000 [1:33:43<1:36:30, 117.96step/s]


Episode 634 finished at step 500 (317000 total). Env Reward: -9.18, Steps: 500, Delivered: 6


Total Steps Trained:  32%|███▏      | 317016/1000000 [1:33:48<16:17:50, 11.64step/s]


--- Rollout Summary (Steps 316501 to 317000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 529.7761
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  32%|███▏      | 317498/1000000 [1:33:52<1:40:53, 112.75step/s]


Episode 635 finished at step 500 (317500 total). Env Reward: 2.91, Steps: 500, Delivered: 8


Total Steps Trained:  32%|███▏      | 317510/1000000 [1:33:56<23:01:55,  8.23step/s]


--- Rollout Summary (Steps 317001 to 317500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 1164.1921
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  32%|███▏      | 317998/1000000 [1:34:01<1:37:32, 116.53step/s]


Episode 636 finished at step 500 (318000 total). Env Reward: -5.22, Steps: 500, Delivered: 9


Total Steps Trained:  32%|███▏      | 318021/1000000 [1:34:05<16:54:50, 11.20step/s]


--- Rollout Summary (Steps 317501 to 318000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 619.3800
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  32%|███▏      | 318492/1000000 [1:34:09<1:37:46, 116.17step/s]


Episode 637 finished at step 500 (318500 total). Env Reward: -12.05, Steps: 500, Delivered: 3


Total Steps Trained:  32%|███▏      | 318515/1000000 [1:34:14<16:54:37, 11.19step/s]


--- Rollout Summary (Steps 318001 to 318500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 516.7720
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  32%|███▏      | 318998/1000000 [1:34:18<1:39:09, 114.46step/s]


Episode 638 finished at step 500 (319000 total). Env Reward: -11.04, Steps: 500, Delivered: 4


Total Steps Trained:  32%|███▏      | 319010/1000000 [1:34:23<22:36:31,  8.37step/s]


--- Rollout Summary (Steps 318501 to 319000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 609.8945
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  32%|███▏      | 319490/1000000 [1:34:27<1:31:33, 123.87step/s]


Episode 639 finished at step 500 (319500 total). Env Reward: -11.50, Steps: 500, Delivered: 3


Total Steps Trained:  32%|███▏      | 319514/1000000 [1:34:32<15:59:05, 11.83step/s]


--- Rollout Summary (Steps 319001 to 319500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 602.7808
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  32%|███▏      | 319991/1000000 [1:34:36<1:37:10, 116.63step/s]


Episode 640 finished at step 500 (320000 total). Env Reward: -7.36, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000640_map1.pth


Total Steps Trained:  32%|███▏      | 320014/1000000 [1:34:40<17:06:40, 11.04step/s]


--- Rollout Summary (Steps 319501 to 320000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 460.5296
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  32%|███▏      | 320497/1000000 [1:34:45<1:32:13, 122.80step/s]


Episode 641 finished at step 500 (320500 total). Env Reward: -11.46, Steps: 500, Delivered: 3


Total Steps Trained:  32%|███▏      | 320522/1000000 [1:34:50<15:49:18, 11.93step/s]


--- Rollout Summary (Steps 320001 to 320500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 649.4592
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  32%|███▏      | 320992/1000000 [1:34:54<1:38:23, 115.01step/s]


Episode 642 finished at step 500 (321000 total). Env Reward: -8.59, Steps: 500, Delivered: 6


Total Steps Trained:  32%|███▏      | 321016/1000000 [1:34:58<16:55:26, 11.14step/s]


--- Rollout Summary (Steps 320501 to 321000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 654.0342
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  32%|███▏      | 321490/1000000 [1:35:02<1:34:16, 119.96step/s]


Episode 643 finished at step 500 (321500 total). Env Reward: -11.71, Steps: 500, Delivered: 3


Total Steps Trained:  32%|███▏      | 321513/1000000 [1:35:07<16:44:49, 11.25step/s]


--- Rollout Summary (Steps 321001 to 321500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -8.15
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 445.6066
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  32%|███▏      | 321994/1000000 [1:35:12<1:35:21, 118.51step/s]


Episode 644 finished at step 500 (322000 total). Env Reward: -6.97, Steps: 500, Delivered: 7


Total Steps Trained:  32%|███▏      | 322017/1000000 [1:35:16<16:39:06, 11.31step/s]


--- Rollout Summary (Steps 321501 to 322000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -8.14
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 550.4210
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  32%|███▏      | 322492/1000000 [1:35:20<1:37:36, 115.68step/s]


Episode 645 finished at step 500 (322500 total). Env Reward: -8.08, Steps: 500, Delivered: 6


Total Steps Trained:  32%|███▏      | 322515/1000000 [1:35:25<16:39:23, 11.30step/s]


--- Rollout Summary (Steps 322001 to 322500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.09
Avg Actor Loss (per minibatch): -0.0079
Avg Critic Loss (per minibatch): 634.8029
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  32%|███▏      | 322988/1000000 [1:35:29<1:32:54, 121.45step/s]


Episode 646 finished at step 500 (323000 total). Env Reward: -11.50, Steps: 500, Delivered: 3


Total Steps Trained:  32%|███▏      | 323012/1000000 [1:35:34<16:01:30, 11.73step/s]


--- Rollout Summary (Steps 322501 to 323000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.06
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 406.4078
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  32%|███▏      | 323498/1000000 [1:35:38<1:39:01, 113.86step/s]


Episode 647 finished at step 500 (323500 total). Env Reward: -8.49, Steps: 500, Delivered: 6


Total Steps Trained:  32%|███▏      | 323522/1000000 [1:35:43<16:05:22, 11.68step/s]


--- Rollout Summary (Steps 323001 to 323500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.09
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 495.9519
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  32%|███▏      | 323996/1000000 [1:35:47<1:31:23, 123.27step/s]


Episode 648 finished at step 500 (324000 total). Env Reward: -9.56, Steps: 500, Delivered: 5


Total Steps Trained:  32%|███▏      | 324020/1000000 [1:35:51<16:01:34, 11.72step/s]


--- Rollout Summary (Steps 323501 to 324000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 316.9616
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  32%|███▏      | 324488/1000000 [1:35:56<1:32:09, 122.17step/s]


Episode 649 finished at step 500 (324500 total). Env Reward: -11.46, Steps: 500, Delivered: 3


Total Steps Trained:  32%|███▏      | 324512/1000000 [1:36:00<15:57:27, 11.76step/s]


--- Rollout Summary (Steps 324001 to 324500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -8.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0006
Avg Critic Loss (per minibatch): 603.0026
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  32%|███▏      | 324992/1000000 [1:36:04<1:36:09, 117.00step/s]


Episode 650 finished at step 500 (325000 total). Env Reward: -7.56, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000650_map1.pth


Total Steps Trained:  33%|███▎      | 325015/1000000 [1:36:09<16:33:04, 11.33step/s]


--- Rollout Summary (Steps 324501 to 325000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -8.19
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 600.3960
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  33%|███▎      | 325495/1000000 [1:36:13<1:33:44, 119.92step/s]


Episode 651 finished at step 500 (325500 total). Env Reward: -7.99, Steps: 500, Delivered: 6


Total Steps Trained:  33%|███▎      | 325517/1000000 [1:36:18<16:44:27, 11.19step/s]


--- Rollout Summary (Steps 325001 to 325500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.20
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0113
Avg Critic Loss (per minibatch): 761.3248
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  33%|███▎      | 325990/1000000 [1:36:22<1:30:53, 123.60step/s]


Episode 652 finished at step 500 (326000 total). Env Reward: -0.36, Steps: 500, Delivered: 5


Total Steps Trained:  33%|███▎      | 326013/1000000 [1:36:27<16:09:43, 11.58step/s]


--- Rollout Summary (Steps 325501 to 326000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -8.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 862.0577
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  33%|███▎      | 326500/1000000 [1:36:31<1:43:35, 108.35step/s]


Episode 653 finished at step 500 (326500 total). Env Reward: 0.37, Steps: 500, Delivered: 6


Total Steps Trained:  33%|███▎      | 326511/1000000 [1:36:35<23:08:01,  8.09step/s]


--- Rollout Summary (Steps 326001 to 326500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -8.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 1124.3697
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  33%|███▎      | 326999/1000000 [1:36:40<1:33:07, 120.46step/s]


Episode 654 finished at step 500 (327000 total). Env Reward: -10.08, Steps: 500, Delivered: 4


Total Steps Trained:  33%|███▎      | 327012/1000000 [1:36:44<21:03:19,  8.88step/s]


--- Rollout Summary (Steps 326501 to 327000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.04
Avg Actor Loss (per minibatch): -0.0082
Avg Critic Loss (per minibatch): 436.5243
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  33%|███▎      | 327489/1000000 [1:36:48<1:37:09, 115.35step/s]


Episode 655 finished at step 500 (327500 total). Env Reward: -10.79, Steps: 500, Delivered: 4


Total Steps Trained:  33%|███▎      | 327512/1000000 [1:36:53<16:17:30, 11.47step/s]


--- Rollout Summary (Steps 327001 to 327500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -8.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): 0.0010
Avg Critic Loss (per minibatch): 565.4459
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  33%|███▎      | 327995/1000000 [1:36:57<1:37:54, 114.39step/s]


Episode 656 finished at step 500 (328000 total). Env Reward: -7.45, Steps: 500, Delivered: 7


Total Steps Trained:  33%|███▎      | 328018/1000000 [1:37:02<16:46:57, 11.12step/s]


--- Rollout Summary (Steps 327501 to 328000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -8.15
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.06
Avg Actor Loss (per minibatch): -0.0098
Avg Critic Loss (per minibatch): 481.1116
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  33%|███▎      | 328495/1000000 [1:37:06<1:30:32, 123.61step/s]


Episode 657 finished at step 500 (328500 total). Env Reward: 0.08, Steps: 500, Delivered: 6


Total Steps Trained:  33%|███▎      | 328519/1000000 [1:37:11<15:52:28, 11.75step/s]


--- Rollout Summary (Steps 328001 to 328500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.06
Avg Actor Loss (per minibatch): 0.0003
Avg Critic Loss (per minibatch): 535.1199
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  33%|███▎      | 328999/1000000 [1:37:15<1:34:30, 118.32step/s]


Episode 658 finished at step 500 (329000 total). Env Reward: -9.83, Steps: 500, Delivered: 5


Total Steps Trained:  33%|███▎      | 329011/1000000 [1:37:20<22:52:32,  8.15step/s]


--- Rollout Summary (Steps 328501 to 329000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -8.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 491.7697
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  33%|███▎      | 329499/1000000 [1:37:24<1:36:30, 115.80step/s]


Episode 659 finished at step 500 (329500 total). Env Reward: 13.11, Steps: 500, Delivered: 10


Total Steps Trained:  33%|███▎      | 329511/1000000 [1:37:29<22:42:48,  8.20step/s]


--- Rollout Summary (Steps 329001 to 329500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 556.5929
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  33%|███▎      | 329990/1000000 [1:37:33<1:32:50, 120.27step/s]


Episode 660 finished at step 500 (330000 total). Env Reward: -2.22, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000660_map1.pth


Total Steps Trained:  33%|███▎      | 330015/1000000 [1:37:38<15:46:04, 11.80step/s]


--- Rollout Summary (Steps 329501 to 330000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 723.3112
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  33%|███▎      | 330493/1000000 [1:37:42<1:32:54, 120.11step/s]


Episode 661 finished at step 500 (330500 total). Env Reward: 0.20, Steps: 500, Delivered: 6


Total Steps Trained:  33%|███▎      | 330517/1000000 [1:37:46<15:46:17, 11.79step/s]


--- Rollout Summary (Steps 330001 to 330500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 1300.7721
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  33%|███▎      | 330992/1000000 [1:37:50<1:35:47, 116.40step/s]


Episode 662 finished at step 500 (331000 total). Env Reward: -7.60, Steps: 500, Delivered: 7


Total Steps Trained:  33%|███▎      | 331015/1000000 [1:37:55<16:58:53, 10.94step/s]


--- Rollout Summary (Steps 330501 to 331000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.09
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 543.8747
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  33%|███▎      | 331489/1000000 [1:37:59<1:32:00, 121.10step/s]


Episode 663 finished at step 500 (331500 total). Env Reward: -9.20, Steps: 500, Delivered: 5


Total Steps Trained:  33%|███▎      | 331513/1000000 [1:38:04<15:48:31, 11.75step/s]


--- Rollout Summary (Steps 331001 to 331500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 426.3487
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  33%|███▎      | 331999/1000000 [1:38:08<1:37:09, 114.59step/s]


Episode 664 finished at step 500 (332000 total). Env Reward: -10.23, Steps: 500, Delivered: 4


Total Steps Trained:  33%|███▎      | 332011/1000000 [1:38:13<22:27:48,  8.26step/s]


--- Rollout Summary (Steps 331501 to 332000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0009
Avg Critic Loss (per minibatch): 483.4190
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  33%|███▎      | 332493/1000000 [1:38:17<1:34:18, 117.97step/s]


Episode 665 finished at step 500 (332500 total). Env Reward: -9.20, Steps: 500, Delivered: 5


Total Steps Trained:  33%|███▎      | 332516/1000000 [1:38:22<16:17:10, 11.38step/s]


--- Rollout Summary (Steps 332001 to 332500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.09
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 482.8987
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  33%|███▎      | 332992/1000000 [1:38:26<1:35:57, 115.85step/s]


Episode 666 finished at step 500 (333000 total). Env Reward: -6.68, Steps: 500, Delivered: 8


Total Steps Trained:  33%|███▎      | 333016/1000000 [1:38:31<16:06:11, 11.51step/s]


--- Rollout Summary (Steps 332501 to 333000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 463.9514
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  33%|███▎      | 333499/1000000 [1:38:35<1:38:19, 112.98step/s]


Episode 667 finished at step 500 (333500 total). Env Reward: -9.81, Steps: 500, Delivered: 5


Total Steps Trained:  33%|███▎      | 333511/1000000 [1:38:39<22:08:01,  8.36step/s]


--- Rollout Summary (Steps 333001 to 333500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): 0.0005
Avg Critic Loss (per minibatch): 454.0721
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  33%|███▎      | 333991/1000000 [1:38:43<1:38:12, 113.02step/s]


Episode 668 finished at step 500 (334000 total). Env Reward: -9.91, Steps: 500, Delivered: 5


Total Steps Trained:  33%|███▎      | 334014/1000000 [1:38:48<16:24:07, 11.28step/s]


--- Rollout Summary (Steps 333501 to 334000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 411.6427
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  33%|███▎      | 334498/1000000 [1:38:52<1:31:16, 121.51step/s]


Episode 669 finished at step 500 (334500 total). Env Reward: -10.79, Steps: 500, Delivered: 4


Total Steps Trained:  33%|███▎      | 334511/1000000 [1:38:57<20:53:02,  8.85step/s]


--- Rollout Summary (Steps 334001 to 334500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 490.1358
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  33%|███▎      | 334999/1000000 [1:39:01<1:29:45, 123.47step/s]


Episode 670 finished at step 500 (335000 total). Env Reward: -10.29, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000670_map1.pth


Total Steps Trained:  34%|███▎      | 335012/1000000 [1:39:06<20:50:21,  8.86step/s]


--- Rollout Summary (Steps 334501 to 335000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 506.6730
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  34%|███▎      | 335491/1000000 [1:39:10<1:30:36, 122.24step/s]


Episode 671 finished at step 500 (335500 total). Env Reward: 10.57, Steps: 500, Delivered: 7


Total Steps Trained:  34%|███▎      | 335515/1000000 [1:39:14<15:38:33, 11.80step/s]


--- Rollout Summary (Steps 335001 to 335500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 1153.5787
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  34%|███▎      | 335990/1000000 [1:39:19<1:31:42, 120.68step/s]


Episode 672 finished at step 500 (336000 total). Env Reward: -7.61, Steps: 500, Delivered: 7


Total Steps Trained:  34%|███▎      | 336013/1000000 [1:39:23<16:04:30, 11.47step/s]


--- Rollout Summary (Steps 335501 to 336000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0101
Avg Critic Loss (per minibatch): 738.0354
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  34%|███▎      | 336489/1000000 [1:39:27<1:31:58, 120.24step/s]


Episode 673 finished at step 500 (336500 total). Env Reward: -11.79, Steps: 500, Delivered: 3


Total Steps Trained:  34%|███▎      | 336513/1000000 [1:39:32<15:40:22, 11.76step/s]


--- Rollout Summary (Steps 336001 to 336500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 407.8784
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  34%|███▎      | 336994/1000000 [1:39:36<1:32:47, 119.09step/s]


Episode 674 finished at step 500 (337000 total). Env Reward: -11.65, Steps: 500, Delivered: 3


Total Steps Trained:  34%|███▎      | 337018/1000000 [1:39:41<15:36:15, 11.80step/s]


--- Rollout Summary (Steps 336501 to 337000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 418.2366
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  34%|███▎      | 337491/1000000 [1:39:45<1:30:31, 121.97step/s]


Episode 675 finished at step 500 (337500 total). Env Reward: -11.65, Steps: 500, Delivered: 3


Total Steps Trained:  34%|███▍      | 337516/1000000 [1:39:50<15:22:23, 11.97step/s]


--- Rollout Summary (Steps 337001 to 337500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 582.6111
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  34%|███▍      | 337997/1000000 [1:39:54<1:31:23, 120.73step/s]


Episode 676 finished at step 500 (338000 total). Env Reward: -11.96, Steps: 500, Delivered: 3


Total Steps Trained:  34%|███▍      | 338021/1000000 [1:39:58<16:05:33, 11.43step/s]


--- Rollout Summary (Steps 337501 to 338000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 513.0858
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  34%|███▍      | 338493/1000000 [1:40:03<1:34:48, 116.28step/s]


Episode 677 finished at step 500 (338500 total). Env Reward: 4.03, Steps: 500, Delivered: 10


Total Steps Trained:  34%|███▍      | 338518/1000000 [1:40:07<15:32:46, 11.82step/s]


--- Rollout Summary (Steps 338001 to 338500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.35
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 983.2761
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  34%|███▍      | 338989/1000000 [1:40:11<1:31:22, 120.57step/s]


Episode 678 finished at step 500 (339000 total). Env Reward: -11.19, Steps: 500, Delivered: 3


Total Steps Trained:  34%|███▍      | 339012/1000000 [1:40:16<15:51:33, 11.58step/s]


--- Rollout Summary (Steps 338501 to 339000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 555.8201
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  34%|███▍      | 339489/1000000 [1:40:20<1:31:05, 120.85step/s]


Episode 679 finished at step 500 (339500 total). Env Reward: -9.96, Steps: 500, Delivered: 4


Total Steps Trained:  34%|███▍      | 339513/1000000 [1:40:25<15:35:52, 11.76step/s]


--- Rollout Summary (Steps 339001 to 339500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.09
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 547.4031
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  34%|███▍      | 339997/1000000 [1:40:29<1:34:07, 116.87step/s]


Episode 680 finished at step 500 (340000 total). Env Reward: -6.11, Steps: 500, Delivered: 9
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000680_map1.pth


Total Steps Trained:  34%|███▍      | 340021/1000000 [1:40:34<15:50:52, 11.57step/s]


--- Rollout Summary (Steps 339501 to 340000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 462.1318
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  34%|███▍      | 340490/1000000 [1:40:38<1:37:50, 112.35step/s]


Episode 681 finished at step 500 (340500 total). Env Reward: -5.57, Steps: 500, Delivered: 9


Total Steps Trained:  34%|███▍      | 340514/1000000 [1:40:43<16:01:39, 11.43step/s]


--- Rollout Summary (Steps 340001 to 340500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 750.8241
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  34%|███▍      | 341000/1000000 [1:40:47<1:41:25, 108.29step/s]


Episode 682 finished at step 500 (341000 total). Env Reward: -11.33, Steps: 500, Delivered: 3


Total Steps Trained:  34%|███▍      | 341011/1000000 [1:40:52<24:01:48,  7.62step/s]


--- Rollout Summary (Steps 340501 to 341000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 589.9944
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  34%|███▍      | 341489/1000000 [1:40:56<1:30:56, 120.67step/s]


Episode 683 finished at step 500 (341500 total). Env Reward: -9.07, Steps: 500, Delivered: 6


Total Steps Trained:  34%|███▍      | 341511/1000000 [1:41:00<16:24:33, 11.15step/s]


--- Rollout Summary (Steps 341001 to 341500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0093
Avg Critic Loss (per minibatch): 535.7582
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  34%|███▍      | 341994/1000000 [1:41:05<1:29:00, 123.20step/s]


Episode 684 finished at step 500 (342000 total). Env Reward: -11.76, Steps: 500, Delivered: 3


Total Steps Trained:  34%|███▍      | 342018/1000000 [1:41:09<15:30:12, 11.79step/s]


--- Rollout Summary (Steps 341501 to 342000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 578.0350
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  34%|███▍      | 342499/1000000 [1:41:14<1:29:52, 121.92step/s]


Episode 685 finished at step 500 (342500 total). Env Reward: -10.86, Steps: 500, Delivered: 4


Total Steps Trained:  34%|███▍      | 342512/1000000 [1:41:18<20:32:48,  8.89step/s]


--- Rollout Summary (Steps 342001 to 342500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 500.0859
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  34%|███▍      | 342998/1000000 [1:41:22<1:30:50, 120.54step/s]


Episode 686 finished at step 500 (343000 total). Env Reward: -10.48, Steps: 500, Delivered: 4


Total Steps Trained:  34%|███▍      | 343011/1000000 [1:41:27<20:21:49,  8.96step/s]


--- Rollout Summary (Steps 342501 to 343000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 539.0825
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  34%|███▍      | 343489/1000000 [1:41:31<1:29:51, 121.76step/s]


Episode 687 finished at step 500 (343500 total). Env Reward: 0.59, Steps: 500, Delivered: 6


Total Steps Trained:  34%|███▍      | 343513/1000000 [1:41:36<15:58:59, 11.41step/s]


--- Rollout Summary (Steps 343001 to 343500) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 659.9341
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  34%|███▍      | 343996/1000000 [1:41:40<1:31:30, 119.49step/s]


Episode 688 finished at step 500 (344000 total). Env Reward: -8.78, Steps: 500, Delivered: 6


Total Steps Trained:  34%|███▍      | 344021/1000000 [1:41:45<14:55:59, 12.20step/s]


--- Rollout Summary (Steps 343501 to 344000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 441.3687
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  34%|███▍      | 344499/1000000 [1:41:49<1:31:39, 119.18step/s]


Episode 689 finished at step 500 (344500 total). Env Reward: -8.26, Steps: 500, Delivered: 6


Total Steps Trained:  34%|███▍      | 344511/1000000 [1:41:53<21:17:53,  8.55step/s]


--- Rollout Summary (Steps 344001 to 344500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 492.2840
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  34%|███▍      | 344992/1000000 [1:41:57<1:29:11, 122.40step/s]


Episode 690 finished at step 500 (345000 total). Env Reward: -11.57, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000690_map1.pth


Total Steps Trained:  35%|███▍      | 345016/1000000 [1:42:02<15:36:07, 11.66step/s]


--- Rollout Summary (Steps 344501 to 345000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.04
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 464.6196
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  35%|███▍      | 345492/1000000 [1:42:06<1:34:18, 115.67step/s]


Episode 691 finished at step 500 (345500 total). Env Reward: -4.56, Steps: 500, Delivered: 10


Total Steps Trained:  35%|███▍      | 345515/1000000 [1:42:11<16:23:15, 11.09step/s]


--- Rollout Summary (Steps 345001 to 345500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 574.4156
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  35%|███▍      | 345993/1000000 [1:42:15<1:33:15, 116.87step/s]


Episode 692 finished at step 500 (346000 total). Env Reward: -6.93, Steps: 500, Delivered: 8


Total Steps Trained:  35%|███▍      | 346016/1000000 [1:42:20<16:13:22, 11.20step/s]


--- Rollout Summary (Steps 345501 to 346000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 449.7168
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  35%|███▍      | 346491/1000000 [1:42:24<1:29:04, 122.29step/s]


Episode 693 finished at step 500 (346500 total). Env Reward: -7.78, Steps: 500, Delivered: 7


Total Steps Trained:  35%|███▍      | 346516/1000000 [1:42:29<14:59:42, 12.11step/s]


--- Rollout Summary (Steps 346001 to 346500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 409.6770
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  35%|███▍      | 346996/1000000 [1:42:33<1:30:02, 120.87step/s]


Episode 694 finished at step 500 (347000 total). Env Reward: -1.70, Steps: 500, Delivered: 4


Total Steps Trained:  35%|███▍      | 347021/1000000 [1:42:37<15:12:26, 11.93step/s]


--- Rollout Summary (Steps 346501 to 347000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 704.7491
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  35%|███▍      | 347490/1000000 [1:42:41<1:31:08, 119.31step/s]


Episode 695 finished at step 500 (347500 total). Env Reward: -11.18, Steps: 500, Delivered: 4


Total Steps Trained:  35%|███▍      | 347514/1000000 [1:42:46<15:31:37, 11.67step/s]


--- Rollout Summary (Steps 347001 to 347500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 532.8445
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  35%|███▍      | 347993/1000000 [1:42:50<1:35:09, 114.19step/s]


Episode 696 finished at step 500 (348000 total). Env Reward: -9.40, Steps: 500, Delivered: 5


Total Steps Trained:  35%|███▍      | 348016/1000000 [1:42:55<16:14:02, 11.16step/s]


--- Rollout Summary (Steps 347501 to 348000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 514.6811
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  35%|███▍      | 348495/1000000 [1:42:59<1:31:32, 118.62step/s]


Episode 697 finished at step 500 (348500 total). Env Reward: -8.45, Steps: 500, Delivered: 6


Total Steps Trained:  35%|███▍      | 348519/1000000 [1:43:04<15:22:53, 11.77step/s]


--- Rollout Summary (Steps 348001 to 348500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 559.9999
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  35%|███▍      | 348997/1000000 [1:43:08<1:45:56, 102.41step/s]


Episode 698 finished at step 500 (349000 total). Env Reward: -10.95, Steps: 500, Delivered: 3


Total Steps Trained:  35%|███▍      | 349019/1000000 [1:43:13<16:49:24, 10.75step/s]


--- Rollout Summary (Steps 348501 to 349000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 555.7033
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  35%|███▍      | 349492/1000000 [1:43:17<1:32:57, 116.63step/s]


Episode 699 finished at step 500 (349500 total). Env Reward: -11.01, Steps: 500, Delivered: 4


Total Steps Trained:  35%|███▍      | 349516/1000000 [1:43:21<15:45:02, 11.47step/s]


--- Rollout Summary (Steps 349001 to 349500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.06
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 492.4751
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  35%|███▍      | 349990/1000000 [1:43:25<1:28:48, 122.00step/s]


Episode 700 finished at step 500 (350000 total). Env Reward: 0.44, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000700_map1.pth


Total Steps Trained:  35%|███▌      | 350014/1000000 [1:43:30<15:26:35, 11.69step/s]


--- Rollout Summary (Steps 349501 to 350000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.06
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 521.3887
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  35%|███▌      | 350494/1000000 [1:43:34<1:32:13, 117.38step/s]


Episode 701 finished at step 500 (350500 total). Env Reward: -6.63, Steps: 500, Delivered: 8


Total Steps Trained:  35%|███▌      | 350517/1000000 [1:43:39<16:27:20, 10.96step/s]


--- Rollout Summary (Steps 350001 to 350500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.09
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 450.3811
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  35%|███▌      | 350992/1000000 [1:43:43<1:28:42, 121.94step/s]


Episode 702 finished at step 500 (351000 total). Env Reward: -8.82, Steps: 500, Delivered: 6


Total Steps Trained:  35%|███▌      | 351016/1000000 [1:43:48<15:41:48, 11.48step/s]


--- Rollout Summary (Steps 350501 to 351000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 460.1973
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  35%|███▌      | 351500/1000000 [1:43:52<1:37:27, 110.90step/s]


Episode 703 finished at step 500 (351500 total). Env Reward: -8.72, Steps: 500, Delivered: 6


Total Steps Trained:  35%|███▌      | 351512/1000000 [1:43:57<21:49:55,  8.25step/s]


--- Rollout Summary (Steps 351001 to 351500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 547.8034
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  35%|███▌      | 352000/1000000 [1:44:01<1:38:13, 109.95step/s]


Episode 704 finished at step 500 (352000 total). Env Reward: -6.03, Steps: 500, Delivered: 8


Total Steps Trained:  35%|███▌      | 352012/1000000 [1:44:06<21:34:29,  8.34step/s]


--- Rollout Summary (Steps 351501 to 352000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 458.2770
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  35%|███▌      | 352493/1000000 [1:44:10<1:43:29, 104.28step/s]


Episode 705 finished at step 500 (352500 total). Env Reward: -9.14, Steps: 500, Delivered: 5


Total Steps Trained:  35%|███▌      | 352514/1000000 [1:44:15<17:36:02, 10.22step/s]


--- Rollout Summary (Steps 352001 to 352500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 486.0614
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  35%|███▌      | 352997/1000000 [1:44:19<1:31:14, 118.18step/s]


Episode 706 finished at step 500 (353000 total). Env Reward: -8.60, Steps: 500, Delivered: 6


Total Steps Trained:  35%|███▌      | 353020/1000000 [1:44:24<15:45:44, 11.40step/s]


--- Rollout Summary (Steps 352501 to 353000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 431.8530
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  35%|███▌      | 353498/1000000 [1:44:28<1:27:53, 122.60step/s]


Episode 707 finished at step 500 (353500 total). Env Reward: -10.73, Steps: 500, Delivered: 3


Total Steps Trained:  35%|███▌      | 353511/1000000 [1:44:32<20:09:29,  8.91step/s]


--- Rollout Summary (Steps 353001 to 353500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 659.6842
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  35%|███▌      | 353996/1000000 [1:44:36<1:32:59, 115.78step/s]


Episode 708 finished at step 500 (354000 total). Env Reward: -8.98, Steps: 500, Delivered: 6


Total Steps Trained:  35%|███▌      | 354019/1000000 [1:44:41<16:10:38, 11.09step/s]


--- Rollout Summary (Steps 353501 to 354000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 702.6366
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  35%|███▌      | 354499/1000000 [1:44:45<1:28:19, 121.80step/s]


Episode 709 finished at step 500 (354500 total). Env Reward: -0.63, Steps: 500, Delivered: 5


Total Steps Trained:  35%|███▌      | 354512/1000000 [1:44:50<20:14:06,  8.86step/s]


--- Rollout Summary (Steps 354001 to 354500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 654.6251
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  35%|███▌      | 354999/1000000 [1:44:54<1:30:12, 119.17step/s]


Episode 710 finished at step 500 (355000 total). Env Reward: -9.05, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000710_map1.pth


Total Steps Trained:  36%|███▌      | 355011/1000000 [1:44:59<21:20:32,  8.39step/s]


--- Rollout Summary (Steps 354501 to 355000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 365.0752
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  36%|███▌      | 355496/1000000 [1:45:03<1:29:45, 119.67step/s]


Episode 711 finished at step 500 (355500 total). Env Reward: 1.65, Steps: 500, Delivered: 7


Total Steps Trained:  36%|███▌      | 355519/1000000 [1:45:08<15:38:11, 11.45step/s]


--- Rollout Summary (Steps 355001 to 355500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 838.6275
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  36%|███▌      | 355992/1000000 [1:45:11<1:28:20, 121.49step/s]


Episode 712 finished at step 500 (356000 total). Env Reward: -11.34, Steps: 500, Delivered: 3


Total Steps Trained:  36%|███▌      | 356016/1000000 [1:45:16<15:17:35, 11.70step/s]


--- Rollout Summary (Steps 355501 to 356000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 453.4319
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  36%|███▌      | 356494/1000000 [1:45:20<1:28:50, 120.72step/s]


Episode 713 finished at step 500 (356500 total). Env Reward: 2.43, Steps: 500, Delivered: 8


Total Steps Trained:  36%|███▌      | 356517/1000000 [1:45:25<15:34:07, 11.48step/s]


--- Rollout Summary (Steps 356001 to 356500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 786.6605
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  36%|███▌      | 357000/1000000 [1:45:29<1:28:56, 120.48step/s]


Episode 714 finished at step 500 (357000 total). Env Reward: -10.75, Steps: 500, Delivered: 4


Total Steps Trained:  36%|███▌      | 357013/1000000 [1:45:34<20:03:18,  8.91step/s]


--- Rollout Summary (Steps 356501 to 357000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.35
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 529.0908
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  36%|███▌      | 357495/1000000 [1:45:38<1:41:34, 105.42step/s]


Episode 715 finished at step 500 (357500 total). Env Reward: 1.28, Steps: 500, Delivered: 7


Total Steps Trained:  36%|███▌      | 357517/1000000 [1:45:43<16:17:38, 10.95step/s]


--- Rollout Summary (Steps 357001 to 357500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.24
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 642.9765
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  36%|███▌      | 357989/1000000 [1:45:47<1:29:02, 120.18step/s]


Episode 716 finished at step 500 (358000 total). Env Reward: -8.81, Steps: 500, Delivered: 6


Total Steps Trained:  36%|███▌      | 358014/1000000 [1:45:51<14:56:29, 11.94step/s]


--- Rollout Summary (Steps 357501 to 358000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 480.5925
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  36%|███▌      | 358494/1000000 [1:45:55<1:27:22, 122.36step/s]


Episode 717 finished at step 500 (358500 total). Env Reward: -2.76, Steps: 500, Delivered: 3


Total Steps Trained:  36%|███▌      | 358518/1000000 [1:46:00<15:18:30, 11.64step/s]


--- Rollout Summary (Steps 358001 to 358500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 695.2104
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  36%|███▌      | 359000/1000000 [1:46:04<1:33:15, 114.57step/s]


Episode 718 finished at step 500 (359000 total). Env Reward: -9.22, Steps: 500, Delivered: 5


Total Steps Trained:  36%|███▌      | 359012/1000000 [1:46:09<21:25:45,  8.31step/s]


--- Rollout Summary (Steps 358501 to 359000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 495.0003
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  36%|███▌      | 359497/1000000 [1:46:13<1:31:03, 117.23step/s]


Episode 719 finished at step 500 (359500 total). Env Reward: -10.33, Steps: 500, Delivered: 4


Total Steps Trained:  36%|███▌      | 359509/1000000 [1:46:18<21:50:02,  8.15step/s]


--- Rollout Summary (Steps 359001 to 359500) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 631.4717
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  36%|███▌      | 359999/1000000 [1:46:22<1:32:55, 114.80step/s]


Episode 720 finished at step 500 (360000 total). Env Reward: 2.68, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000720_map1.pth


Total Steps Trained:  36%|███▌      | 360011/1000000 [1:46:27<21:08:56,  8.41step/s]


--- Rollout Summary (Steps 359501 to 360000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 1115.1105
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  36%|███▌      | 360496/1000000 [1:46:31<1:27:41, 121.55step/s]


Episode 721 finished at step 500 (360500 total). Env Reward: -6.76, Steps: 500, Delivered: 8


Total Steps Trained:  36%|███▌      | 360520/1000000 [1:46:36<15:05:58, 11.76step/s]


--- Rollout Summary (Steps 360001 to 360500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 702.8510
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  36%|███▌      | 361000/1000000 [1:46:40<1:35:27, 111.56step/s]


Episode 722 finished at step 500 (361000 total). Env Reward: -7.33, Steps: 500, Delivered: 7


Total Steps Trained:  36%|███▌      | 361012/1000000 [1:46:44<22:02:15,  8.05step/s]


--- Rollout Summary (Steps 360501 to 361000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 349.7020
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  36%|███▌      | 361493/1000000 [1:46:49<1:37:10, 109.50step/s]


Episode 723 finished at step 500 (361500 total). Env Reward: -7.90, Steps: 500, Delivered: 7


Total Steps Trained:  36%|███▌      | 361515/1000000 [1:46:54<16:11:12, 10.96step/s]


--- Rollout Summary (Steps 361001 to 361500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 518.1996
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  36%|███▌      | 361990/1000000 [1:46:58<1:31:51, 115.76step/s]


Episode 724 finished at step 500 (362000 total). Env Reward: 4.91, Steps: 500, Delivered: 11


Total Steps Trained:  36%|███▌      | 362014/1000000 [1:47:02<15:31:25, 11.42step/s]


--- Rollout Summary (Steps 361501 to 362000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 1240.5310
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  36%|███▌      | 362499/1000000 [1:47:07<1:29:04, 119.29step/s]


Episode 725 finished at step 500 (362500 total). Env Reward: 2.92, Steps: 500, Delivered: 9


Total Steps Trained:  36%|███▋      | 362511/1000000 [1:47:11<20:52:26,  8.48step/s]


--- Rollout Summary (Steps 362001 to 362500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 878.0111
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  36%|███▋      | 362989/1000000 [1:47:15<1:27:12, 121.74step/s]


Episode 726 finished at step 500 (363000 total). Env Reward: -11.64, Steps: 500, Delivered: 3


Total Steps Trained:  36%|███▋      | 363012/1000000 [1:47:20<15:49:34, 11.18step/s]


--- Rollout Summary (Steps 362501 to 363000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -6.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 501.2665
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  36%|███▋      | 363497/1000000 [1:47:24<1:30:52, 116.73step/s]


Episode 727 finished at step 500 (363500 total). Env Reward: -9.59, Steps: 500, Delivered: 5


Total Steps Trained:  36%|███▋      | 363520/1000000 [1:47:29<15:44:19, 11.23step/s]


--- Rollout Summary (Steps 363001 to 363500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 505.3892
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  36%|███▋      | 364000/1000000 [1:47:33<1:30:35, 117.00step/s]


Episode 728 finished at step 500 (364000 total). Env Reward: -9.57, Steps: 500, Delivered: 5


Total Steps Trained:  36%|███▋      | 364012/1000000 [1:47:38<21:30:38,  8.21step/s]


--- Rollout Summary (Steps 363501 to 364000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 448.8553
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  36%|███▋      | 364488/1000000 [1:47:42<1:26:38, 122.24step/s]


Episode 729 finished at step 500 (364500 total). Env Reward: -10.95, Steps: 500, Delivered: 4


Total Steps Trained:  36%|███▋      | 364511/1000000 [1:47:47<15:28:43, 11.40step/s]


--- Rollout Summary (Steps 364001 to 364500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 628.3086
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  36%|███▋      | 364994/1000000 [1:47:51<1:35:09, 111.21step/s]


Episode 730 finished at step 500 (365000 total). Env Reward: -8.58, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000730_map1.pth


Total Steps Trained:  37%|███▋      | 365017/1000000 [1:47:56<15:55:58, 11.07step/s]


--- Rollout Summary (Steps 364501 to 365000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 385.2940
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  37%|███▋      | 365494/1000000 [1:48:00<1:28:03, 120.09step/s]


Episode 731 finished at step 500 (365500 total). Env Reward: -9.19, Steps: 500, Delivered: 5


Total Steps Trained:  37%|███▋      | 365518/1000000 [1:48:05<15:05:02, 11.68step/s]


--- Rollout Summary (Steps 365001 to 365500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 479.1034
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  37%|███▋      | 365991/1000000 [1:48:09<1:31:09, 115.92step/s]


Episode 732 finished at step 500 (366000 total). Env Reward: -11.68, Steps: 500, Delivered: 3


Total Steps Trained:  37%|███▋      | 366013/1000000 [1:48:13<15:56:48, 11.04step/s]


--- Rollout Summary (Steps 365501 to 366000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 396.8950
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  37%|███▋      | 366500/1000000 [1:48:17<1:33:10, 113.32step/s]


Episode 733 finished at step 500 (366500 total). Env Reward: -7.04, Steps: 500, Delivered: 8


Total Steps Trained:  37%|███▋      | 366512/1000000 [1:48:22<21:05:29,  8.34step/s]


--- Rollout Summary (Steps 366001 to 366500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 631.3405
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  37%|███▋      | 366990/1000000 [1:48:26<1:29:29, 117.89step/s]


Episode 734 finished at step 500 (367000 total). Env Reward: -10.78, Steps: 500, Delivered: 4


Total Steps Trained:  37%|███▋      | 367013/1000000 [1:48:31<15:33:01, 11.31step/s]


--- Rollout Summary (Steps 366501 to 367000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.10
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 515.4809
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  37%|███▋      | 367493/1000000 [1:48:35<1:29:33, 117.70step/s]


Episode 735 finished at step 500 (367500 total). Env Reward: -10.54, Steps: 500, Delivered: 4


Total Steps Trained:  37%|███▋      | 367516/1000000 [1:48:40<15:36:17, 11.26step/s]


--- Rollout Summary (Steps 367001 to 367500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 510.3559
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  37%|███▋      | 367991/1000000 [1:48:44<1:35:50, 109.90step/s]


Episode 736 finished at step 500 (368000 total). Env Reward: -10.24, Steps: 500, Delivered: 4


Total Steps Trained:  37%|███▋      | 368013/1000000 [1:48:49<16:09:21, 10.87step/s]


--- Rollout Summary (Steps 367501 to 368000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 691.8944
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  37%|███▋      | 368494/1000000 [1:48:53<1:28:18, 119.18step/s]


Episode 737 finished at step 500 (368500 total). Env Reward: -9.29, Steps: 500, Delivered: 5


Total Steps Trained:  37%|███▋      | 368518/1000000 [1:48:58<15:27:35, 11.35step/s]


--- Rollout Summary (Steps 368001 to 368500) ---
Update Duration: 4.71s
Avg Episode Reward (last 100): -7.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 618.9140
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  37%|███▋      | 368992/1000000 [1:49:02<1:30:27, 116.25step/s]


Episode 738 finished at step 500 (369000 total). Env Reward: 0.80, Steps: 500, Delivered: 6


Total Steps Trained:  37%|███▋      | 369014/1000000 [1:49:07<16:02:33, 10.93step/s]


--- Rollout Summary (Steps 368501 to 369000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.14
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 844.5875
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  37%|███▋      | 369490/1000000 [1:49:11<1:27:36, 119.95step/s]


Episode 739 finished at step 500 (369500 total). Env Reward: -6.24, Steps: 500, Delivered: 9


Total Steps Trained:  37%|███▋      | 369513/1000000 [1:49:16<15:26:45, 11.34step/s]


--- Rollout Summary (Steps 369001 to 369500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 766.5455
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  37%|███▋      | 369994/1000000 [1:49:20<1:27:40, 119.77step/s]


Episode 740 finished at step 500 (370000 total). Env Reward: 1.09, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000740_map1.pth


Total Steps Trained:  37%|███▋      | 370017/1000000 [1:49:24<15:16:03, 11.46step/s]


--- Rollout Summary (Steps 369501 to 370000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.00
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 1021.3239
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  37%|███▋      | 370497/1000000 [1:49:28<1:27:07, 120.42step/s]


Episode 741 finished at step 500 (370500 total). Env Reward: -7.59, Steps: 500, Delivered: 7


Total Steps Trained:  37%|███▋      | 370521/1000000 [1:49:33<14:52:25, 11.76step/s]


--- Rollout Summary (Steps 370001 to 370500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 478.5978
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  37%|███▋      | 370998/1000000 [1:49:37<1:27:14, 120.17step/s]


Episode 742 finished at step 500 (371000 total). Env Reward: 1.69, Steps: 500, Delivered: 7


Total Steps Trained:  37%|███▋      | 371011/1000000 [1:49:42<20:17:35,  8.61step/s]


--- Rollout Summary (Steps 370501 to 371000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -6.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 893.0946
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  37%|███▋      | 371493/1000000 [1:49:46<1:28:31, 118.34step/s]


Episode 743 finished at step 500 (371500 total). Env Reward: -10.72, Steps: 500, Delivered: 4


Total Steps Trained:  37%|███▋      | 371517/1000000 [1:49:51<15:04:45, 11.58step/s]


--- Rollout Summary (Steps 371001 to 371500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 571.3316
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  37%|███▋      | 371997/1000000 [1:49:55<1:26:22, 121.18step/s]


Episode 744 finished at step 500 (372000 total). Env Reward: -0.67, Steps: 500, Delivered: 5


Total Steps Trained:  37%|███▋      | 372021/1000000 [1:50:00<15:19:09, 11.39step/s]


--- Rollout Summary (Steps 371501 to 372000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -6.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 819.8966
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  37%|███▋      | 372490/1000000 [1:50:04<1:28:01, 118.81step/s]


Episode 745 finished at step 500 (372500 total). Env Reward: -4.44, Steps: 500, Delivered: 10


Total Steps Trained:  37%|███▋      | 372511/1000000 [1:50:09<16:05:24, 10.83step/s]


--- Rollout Summary (Steps 372001 to 372500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 459.9298
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  37%|███▋      | 372998/1000000 [1:50:13<1:29:22, 116.93step/s]


Episode 746 finished at step 500 (373000 total). Env Reward: -11.30, Steps: 500, Delivered: 3


Total Steps Trained:  37%|███▋      | 373010/1000000 [1:50:17<20:38:17,  8.44step/s]


--- Rollout Summary (Steps 372501 to 373000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -6.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 550.9941
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  37%|███▋      | 373490/1000000 [1:50:21<1:26:45, 120.36step/s]


Episode 747 finished at step 500 (373500 total). Env Reward: -10.67, Steps: 500, Delivered: 4


Total Steps Trained:  37%|███▋      | 373515/1000000 [1:50:26<14:19:44, 12.14step/s]


--- Rollout Summary (Steps 373001 to 373500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 508.4279
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  37%|███▋      | 373992/1000000 [1:50:30<1:30:10, 115.71step/s]


Episode 748 finished at step 500 (374000 total). Env Reward: -7.26, Steps: 500, Delivered: 7


Total Steps Trained:  37%|███▋      | 374014/1000000 [1:50:35<15:48:07, 11.00step/s]


--- Rollout Summary (Steps 373501 to 374000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 570.1092
Avg Entropy (per minibatch): 2.5584
------------------------------


Total Steps Trained:  37%|███▋      | 374491/1000000 [1:50:39<1:31:29, 113.95step/s]


Episode 749 finished at step 500 (374500 total). Env Reward: -5.72, Steps: 500, Delivered: 9


Total Steps Trained:  37%|███▋      | 374513/1000000 [1:50:44<15:49:24, 10.98step/s]


--- Rollout Summary (Steps 374001 to 374500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -6.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 518.0581
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  37%|███▋      | 374995/1000000 [1:50:48<1:34:08, 110.65step/s]


Episode 750 finished at step 500 (375000 total). Env Reward: -6.78, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000750_map1.pth


Total Steps Trained:  38%|███▊      | 375018/1000000 [1:50:53<15:29:36, 11.21step/s]


--- Rollout Summary (Steps 374501 to 375000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 705.6211
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  38%|███▊      | 375495/1000000 [1:50:57<1:25:00, 122.43step/s]


Episode 751 finished at step 500 (375500 total). Env Reward: -9.83, Steps: 500, Delivered: 5


Total Steps Trained:  38%|███▊      | 375519/1000000 [1:51:01<14:53:16, 11.65step/s]


--- Rollout Summary (Steps 375001 to 375500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 465.8873
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  38%|███▊      | 375991/1000000 [1:51:06<1:28:00, 118.18step/s]


Episode 752 finished at step 500 (376000 total). Env Reward: -11.79, Steps: 500, Delivered: 3


Total Steps Trained:  38%|███▊      | 376014/1000000 [1:51:10<15:23:22, 11.26step/s]


--- Rollout Summary (Steps 375501 to 376000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0083
Avg Critic Loss (per minibatch): 621.3652
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  38%|███▊      | 376493/1000000 [1:51:14<1:27:12, 119.16step/s]


Episode 753 finished at step 500 (376500 total). Env Reward: -4.56, Steps: 500, Delivered: 10


Total Steps Trained:  38%|███▊      | 376515/1000000 [1:51:19<15:27:41, 11.20step/s]


--- Rollout Summary (Steps 376001 to 376500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 557.9565
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  38%|███▊      | 376992/1000000 [1:51:23<1:26:07, 120.57step/s]


Episode 754 finished at step 500 (377000 total). Env Reward: -11.78, Steps: 500, Delivered: 3


Total Steps Trained:  38%|███▊      | 377014/1000000 [1:51:28<15:22:26, 11.26step/s]


--- Rollout Summary (Steps 376501 to 377000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 543.4148
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  38%|███▊      | 377495/1000000 [1:51:32<1:24:54, 122.19step/s]


Episode 755 finished at step 500 (377500 total). Env Reward: -10.05, Steps: 500, Delivered: 5


Total Steps Trained:  38%|███▊      | 377521/1000000 [1:51:37<14:08:52, 12.22step/s]


--- Rollout Summary (Steps 377001 to 377500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -6.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 546.9691
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  38%|███▊      | 377989/1000000 [1:51:41<1:29:20, 116.03step/s]


Episode 756 finished at step 500 (378000 total). Env Reward: -9.30, Steps: 500, Delivered: 5


Total Steps Trained:  38%|███▊      | 378012/1000000 [1:51:45<15:07:40, 11.42step/s]


--- Rollout Summary (Steps 377501 to 378000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -6.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): 0.0030
Avg Critic Loss (per minibatch): 431.8712
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  38%|███▊      | 378496/1000000 [1:51:49<1:25:45, 120.78step/s]


Episode 757 finished at step 500 (378500 total). Env Reward: -9.92, Steps: 500, Delivered: 5


Total Steps Trained:  38%|███▊      | 378521/1000000 [1:51:54<14:14:45, 12.12step/s]


--- Rollout Summary (Steps 378001 to 378500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 546.9616
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  38%|███▊      | 379000/1000000 [1:51:58<1:29:20, 115.85step/s]


Episode 758 finished at step 500 (379000 total). Env Reward: -9.82, Steps: 500, Delivered: 5


Total Steps Trained:  38%|███▊      | 379012/1000000 [1:52:03<20:31:46,  8.40step/s]


--- Rollout Summary (Steps 378501 to 379000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 571.3360
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  38%|███▊      | 379488/1000000 [1:52:07<1:25:52, 120.43step/s]


Episode 759 finished at step 500 (379500 total). Env Reward: -10.64, Steps: 500, Delivered: 4


Total Steps Trained:  38%|███▊      | 379512/1000000 [1:52:12<14:38:32, 11.77step/s]


--- Rollout Summary (Steps 379001 to 379500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 398.7490
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  38%|███▊      | 379997/1000000 [1:52:16<1:26:18, 119.72step/s]


Episode 760 finished at step 500 (380000 total). Env Reward: -11.70, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000760_map1.pth


Total Steps Trained:  38%|███▊      | 380021/1000000 [1:52:21<14:31:40, 11.85step/s]


--- Rollout Summary (Steps 379501 to 380000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 519.6484
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  38%|███▊      | 380491/1000000 [1:52:25<1:23:37, 123.47step/s]


Episode 761 finished at step 500 (380500 total). Env Reward: -12.87, Steps: 500, Delivered: 2


Total Steps Trained:  38%|███▊      | 380515/1000000 [1:52:29<14:41:22, 11.71step/s]


--- Rollout Summary (Steps 380001 to 380500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 666.4194
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  38%|███▊      | 380999/1000000 [1:52:34<1:24:34, 121.98step/s]


Episode 762 finished at step 500 (381000 total). Env Reward: -6.39, Steps: 500, Delivered: 8


Total Steps Trained:  38%|███▊      | 381012/1000000 [1:52:38<20:32:43,  8.37step/s]


--- Rollout Summary (Steps 380501 to 381000) ---
Update Duration: 4.79s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 435.0199
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  38%|███▊      | 381491/1000000 [1:52:43<1:29:11, 115.57step/s]


Episode 763 finished at step 500 (381500 total). Env Reward: -10.63, Steps: 500, Delivered: 4


Total Steps Trained:  38%|███▊      | 381514/1000000 [1:52:47<15:14:02, 11.28step/s]


--- Rollout Summary (Steps 381001 to 381500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 452.3044
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  38%|███▊      | 381991/1000000 [1:52:51<1:26:07, 119.60step/s]


Episode 764 finished at step 500 (382000 total). Env Reward: -9.71, Steps: 500, Delivered: 5


Total Steps Trained:  38%|███▊      | 382014/1000000 [1:52:56<15:02:05, 11.42step/s]


--- Rollout Summary (Steps 381501 to 382000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 448.1711
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  38%|███▊      | 382497/1000000 [1:53:00<1:27:43, 117.32step/s]


Episode 765 finished at step 500 (382500 total). Env Reward: -12.05, Steps: 500, Delivered: 2


Total Steps Trained:  38%|███▊      | 382519/1000000 [1:53:05<15:28:37, 11.08step/s]


--- Rollout Summary (Steps 382001 to 382500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): 0.0014
Avg Critic Loss (per minibatch): 665.5480
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  38%|███▊      | 383000/1000000 [1:53:10<1:25:53, 119.73step/s]


Episode 766 finished at step 500 (383000 total). Env Reward: -9.52, Steps: 500, Delivered: 5


Total Steps Trained:  38%|███▊      | 383012/1000000 [1:53:14<19:41:25,  8.70step/s]


--- Rollout Summary (Steps 382501 to 383000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 497.9620
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  38%|███▊      | 383490/1000000 [1:53:18<1:27:37, 117.26step/s]


Episode 767 finished at step 500 (383500 total). Env Reward: -8.84, Steps: 500, Delivered: 5


Total Steps Trained:  38%|███▊      | 383513/1000000 [1:53:23<15:17:31, 11.20step/s]


--- Rollout Summary (Steps 383001 to 383500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 588.3344
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  38%|███▊      | 383995/1000000 [1:53:27<1:26:13, 119.06step/s]


Episode 768 finished at step 500 (384000 total). Env Reward: -7.88, Steps: 500, Delivered: 7


Total Steps Trained:  38%|███▊      | 384018/1000000 [1:53:32<14:46:23, 11.58step/s]


--- Rollout Summary (Steps 383501 to 384000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 459.5765
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  38%|███▊      | 384493/1000000 [1:53:36<1:28:59, 115.26step/s]


Episode 769 finished at step 500 (384500 total). Env Reward: -8.62, Steps: 500, Delivered: 6


Total Steps Trained:  38%|███▊      | 384515/1000000 [1:53:41<16:01:46, 10.67step/s]


--- Rollout Summary (Steps 384001 to 384500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 416.7868
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  38%|███▊      | 384997/1000000 [1:53:45<1:30:47, 112.90step/s]


Episode 770 finished at step 500 (385000 total). Env Reward: -9.60, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000770_map1.pth


Total Steps Trained:  39%|███▊      | 385020/1000000 [1:53:50<15:13:54, 11.22step/s]


--- Rollout Summary (Steps 384501 to 385000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 452.0438
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  39%|███▊      | 385492/1000000 [1:53:54<1:24:29, 121.21step/s]


Episode 771 finished at step 500 (385500 total). Env Reward: -11.41, Steps: 500, Delivered: 3


Total Steps Trained:  39%|███▊      | 385517/1000000 [1:53:58<14:10:25, 12.04step/s]


--- Rollout Summary (Steps 385001 to 385500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 472.2537
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  39%|███▊      | 385995/1000000 [1:54:02<1:28:33, 115.56step/s]


Episode 772 finished at step 500 (386000 total). Env Reward: -8.75, Steps: 500, Delivered: 6


Total Steps Trained:  39%|███▊      | 386019/1000000 [1:54:07<14:49:51, 11.50step/s]


--- Rollout Summary (Steps 385501 to 386000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 480.1404
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  39%|███▊      | 386496/1000000 [1:54:11<1:32:58, 109.98step/s]


Episode 773 finished at step 500 (386500 total). Env Reward: -2.76, Steps: 500, Delivered: 3


Total Steps Trained:  39%|███▊      | 386519/1000000 [1:54:16<15:30:17, 10.99step/s]


--- Rollout Summary (Steps 386001 to 386500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 739.5329
Avg Entropy (per minibatch): 2.5617
------------------------------


Total Steps Trained:  39%|███▊      | 386999/1000000 [1:54:20<1:23:55, 121.73step/s]


Episode 774 finished at step 500 (387000 total). Env Reward: -7.57, Steps: 500, Delivered: 7


Total Steps Trained:  39%|███▊      | 387012/1000000 [1:54:25<19:09:45,  8.89step/s]


--- Rollout Summary (Steps 386501 to 387000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 604.2527
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  39%|███▊      | 387497/1000000 [1:54:29<1:26:49, 117.58step/s]


Episode 775 finished at step 500 (387500 total). Env Reward: -11.07, Steps: 500, Delivered: 3


Total Steps Trained:  39%|███▉      | 387520/1000000 [1:54:33<14:56:10, 11.39step/s]


--- Rollout Summary (Steps 387001 to 387500) ---
Update Duration: 4.43s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0003
Avg Critic Loss (per minibatch): 567.4791
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  39%|███▉      | 387990/1000000 [1:54:38<1:26:16, 118.23step/s]


Episode 776 finished at step 500 (388000 total). Env Reward: -12.75, Steps: 500, Delivered: 2


Total Steps Trained:  39%|███▉      | 388011/1000000 [1:54:42<15:50:01, 10.74step/s]


--- Rollout Summary (Steps 387501 to 388000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0014
Avg Critic Loss (per minibatch): 457.4465
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  39%|███▉      | 388495/1000000 [1:54:46<1:28:12, 115.54step/s]


Episode 777 finished at step 500 (388500 total). Env Reward: -10.53, Steps: 500, Delivered: 4


Total Steps Trained:  39%|███▉      | 388518/1000000 [1:54:51<15:15:16, 11.13step/s]


--- Rollout Summary (Steps 388001 to 388500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 422.8114
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  39%|███▉      | 388999/1000000 [1:54:55<1:23:17, 122.25step/s]


Episode 778 finished at step 500 (389000 total). Env Reward: -7.70, Steps: 500, Delivered: 7


Total Steps Trained:  39%|███▉      | 389012/1000000 [1:55:00<19:06:28,  8.88step/s]


--- Rollout Summary (Steps 388501 to 389000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 439.8855
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  39%|███▉      | 389496/1000000 [1:55:04<1:31:10, 111.60step/s]


Episode 779 finished at step 500 (389500 total). Env Reward: -5.80, Steps: 500, Delivered: 9


Total Steps Trained:  39%|███▉      | 389519/1000000 [1:55:09<15:06:55, 11.22step/s]


--- Rollout Summary (Steps 389001 to 389500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 681.8407
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  39%|███▉      | 389994/1000000 [1:55:13<1:28:46, 114.53step/s]


Episode 780 finished at step 500 (390000 total). Env Reward: -8.74, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000780_map1.pth


Total Steps Trained:  39%|███▉      | 390017/1000000 [1:55:18<15:27:22, 10.96step/s]


--- Rollout Summary (Steps 389501 to 390000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0085
Avg Critic Loss (per minibatch): 658.4262
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  39%|███▉      | 390494/1000000 [1:55:22<1:27:15, 116.43step/s]


Episode 781 finished at step 500 (390500 total). Env Reward: -12.92, Steps: 500, Delivered: 2


Total Steps Trained:  39%|███▉      | 390517/1000000 [1:55:26<15:04:53, 11.23step/s]


--- Rollout Summary (Steps 390001 to 390500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 456.9228
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  39%|███▉      | 390989/1000000 [1:55:31<1:24:52, 119.59step/s]


Episode 782 finished at step 500 (391000 total). Env Reward: 3.08, Steps: 500, Delivered: 9


Total Steps Trained:  39%|███▉      | 391011/1000000 [1:55:35<15:46:53, 10.72step/s]


--- Rollout Summary (Steps 390501 to 391000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 1181.3488
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  39%|███▉      | 391496/1000000 [1:55:40<1:30:23, 112.19step/s]


Episode 783 finished at step 500 (391500 total). Env Reward: 0.33, Steps: 500, Delivered: 6


Total Steps Trained:  39%|███▉      | 391519/1000000 [1:55:44<15:14:48, 11.09step/s]


--- Rollout Summary (Steps 391001 to 391500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 691.8464
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  39%|███▉      | 392000/1000000 [1:55:49<1:25:30, 118.51step/s]


Episode 784 finished at step 500 (392000 total). Env Reward: -11.99, Steps: 500, Delivered: 2


Total Steps Trained:  39%|███▉      | 392012/1000000 [1:55:53<19:43:08,  8.56step/s]


--- Rollout Summary (Steps 391501 to 392000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 681.6456
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  39%|███▉      | 392495/1000000 [1:55:57<1:23:26, 121.34step/s]


Episode 785 finished at step 500 (392500 total). Env Reward: -5.47, Steps: 500, Delivered: 9


Total Steps Trained:  39%|███▉      | 392518/1000000 [1:56:02<14:36:23, 11.55step/s]


--- Rollout Summary (Steps 392001 to 392500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 624.4279
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  39%|███▉      | 392996/1000000 [1:56:06<1:25:02, 118.96step/s]


Episode 786 finished at step 500 (393000 total). Env Reward: -0.60, Steps: 500, Delivered: 5


Total Steps Trained:  39%|███▉      | 393019/1000000 [1:56:11<14:51:59, 11.34step/s]


--- Rollout Summary (Steps 392501 to 393000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 694.8372
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  39%|███▉      | 393493/1000000 [1:56:15<1:27:30, 115.51step/s]


Episode 787 finished at step 500 (393500 total). Env Reward: 2.09, Steps: 500, Delivered: 8


Total Steps Trained:  39%|███▉      | 393516/1000000 [1:56:20<15:29:07, 10.88step/s]


--- Rollout Summary (Steps 393001 to 393500) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 808.7674
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  39%|███▉      | 393988/1000000 [1:56:24<1:23:50, 120.46step/s]


Episode 788 finished at step 500 (394000 total). Env Reward: -8.70, Steps: 500, Delivered: 6


Total Steps Trained:  39%|███▉      | 394012/1000000 [1:56:28<14:26:04, 11.66step/s]


--- Rollout Summary (Steps 393501 to 394000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 424.2856
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  39%|███▉      | 394490/1000000 [1:56:33<1:23:04, 121.47step/s]


Episode 789 finished at step 500 (394500 total). Env Reward: 0.95, Steps: 500, Delivered: 7


Total Steps Trained:  39%|███▉      | 394514/1000000 [1:56:37<14:26:50, 11.64step/s]


--- Rollout Summary (Steps 394001 to 394500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 799.4547
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  39%|███▉      | 394995/1000000 [1:56:42<1:34:37, 106.56step/s]


Episode 790 finished at step 500 (395000 total). Env Reward: 3.49, Steps: 500, Delivered: 9
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000790_map1.pth


Total Steps Trained:  40%|███▉      | 395018/1000000 [1:56:46<15:34:01, 10.80step/s]


--- Rollout Summary (Steps 394501 to 395000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 854.7967
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  40%|███▉      | 395496/1000000 [1:56:50<1:27:06, 115.66step/s]


Episode 791 finished at step 500 (395500 total). Env Reward: -9.20, Steps: 500, Delivered: 6


Total Steps Trained:  40%|███▉      | 395520/1000000 [1:56:55<14:36:55, 11.49step/s]


--- Rollout Summary (Steps 395001 to 395500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 405.7559
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  40%|███▉      | 395991/1000000 [1:56:59<1:23:02, 121.22step/s]


Episode 792 finished at step 500 (396000 total). Env Reward: -10.99, Steps: 500, Delivered: 4


Total Steps Trained:  40%|███▉      | 396015/1000000 [1:57:04<14:14:06, 11.79step/s]


--- Rollout Summary (Steps 395501 to 396000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.16
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 368.6072
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  40%|███▉      | 396500/1000000 [1:57:08<1:27:22, 115.12step/s]


Episode 793 finished at step 500 (396500 total). Env Reward: -1.68, Steps: 500, Delivered: 4


Total Steps Trained:  40%|███▉      | 396512/1000000 [1:57:13<19:36:33,  8.55step/s]


--- Rollout Summary (Steps 396001 to 396500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.10
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 794.5022
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  40%|███▉      | 396992/1000000 [1:57:17<1:24:49, 118.47step/s]


Episode 794 finished at step 500 (397000 total). Env Reward: 1.82, Steps: 500, Delivered: 7


Total Steps Trained:  40%|███▉      | 397016/1000000 [1:57:22<14:41:23, 11.40step/s]


--- Rollout Summary (Steps 396501 to 397000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 1103.5711
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  40%|███▉      | 397491/1000000 [1:57:26<1:27:22, 114.93step/s]


Episode 795 finished at step 500 (397500 total). Env Reward: 0.63, Steps: 500, Delivered: 6


Total Steps Trained:  40%|███▉      | 397515/1000000 [1:57:30<14:22:20, 11.64step/s]


--- Rollout Summary (Steps 397001 to 397500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 867.8223
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  40%|███▉      | 398000/1000000 [1:57:35<1:29:01, 112.69step/s]


Episode 796 finished at step 500 (398000 total). Env Reward: -11.04, Steps: 500, Delivered: 3


Total Steps Trained:  40%|███▉      | 398012/1000000 [1:57:39<20:42:48,  8.07step/s]


--- Rollout Summary (Steps 397501 to 398000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 527.8077
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  40%|███▉      | 398500/1000000 [1:57:44<1:33:27, 107.26step/s]


Episode 797 finished at step 500 (398500 total). Env Reward: 2.44, Steps: 500, Delivered: 8


Total Steps Trained:  40%|███▉      | 398511/1000000 [1:57:48<21:25:39,  7.80step/s]


--- Rollout Summary (Steps 398001 to 398500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 702.6454
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  40%|███▉      | 398995/1000000 [1:57:52<1:30:02, 111.25step/s]


Episode 798 finished at step 500 (399000 total). Env Reward: 2.43, Steps: 500, Delivered: 8


Total Steps Trained:  40%|███▉      | 399018/1000000 [1:57:57<14:54:13, 11.20step/s]


--- Rollout Summary (Steps 398501 to 399000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 1042.8972
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  40%|███▉      | 399493/1000000 [1:58:01<1:30:07, 111.05step/s]


Episode 799 finished at step 500 (399500 total). Env Reward: -6.65, Steps: 500, Delivered: 8


Total Steps Trained:  40%|███▉      | 399516/1000000 [1:58:06<14:55:29, 11.18step/s]


--- Rollout Summary (Steps 399001 to 399500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 495.5793
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  40%|███▉      | 399994/1000000 [1:58:10<1:25:52, 116.45step/s]


Episode 800 finished at step 500 (400000 total). Env Reward: 0.03, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000800_map1.pth


Total Steps Trained:  40%|████      | 400017/1000000 [1:58:15<14:53:01, 11.20step/s]


--- Rollout Summary (Steps 399501 to 400000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 437.6066
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  40%|████      | 400495/1000000 [1:58:19<1:24:25, 118.36step/s]


Episode 801 finished at step 500 (400500 total). Env Reward: 0.56, Steps: 500, Delivered: 6


Total Steps Trained:  40%|████      | 400507/1000000 [1:58:24<19:47:22,  8.41step/s]


--- Rollout Summary (Steps 400001 to 400500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -6.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 968.1633
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  40%|████      | 400989/1000000 [1:58:28<1:24:26, 118.24step/s]


Episode 802 finished at step 500 (401000 total). Env Reward: -9.79, Steps: 500, Delivered: 5


Total Steps Trained:  40%|████      | 401012/1000000 [1:58:33<14:47:29, 11.25step/s]


--- Rollout Summary (Steps 400501 to 401000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -6.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 485.0502
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  40%|████      | 401499/1000000 [1:58:37<1:24:40, 117.81step/s]


Episode 803 finished at step 500 (401500 total). Env Reward: 3.30, Steps: 500, Delivered: 9


Total Steps Trained:  40%|████      | 401511/1000000 [1:58:42<19:55:33,  8.34step/s]


--- Rollout Summary (Steps 401001 to 401500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 420.7548
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  40%|████      | 401994/1000000 [1:58:46<1:22:01, 121.50step/s]


Episode 804 finished at step 500 (402000 total). Env Reward: -13.24, Steps: 500, Delivered: 1


Total Steps Trained:  40%|████      | 402018/1000000 [1:58:50<14:10:39, 11.72step/s]


--- Rollout Summary (Steps 401501 to 402000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 474.4161
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  40%|████      | 402498/1000000 [1:58:55<1:22:50, 120.20step/s]


Episode 805 finished at step 500 (402500 total). Env Reward: -12.10, Steps: 500, Delivered: 3


Total Steps Trained:  40%|████      | 402511/1000000 [1:58:59<19:12:50,  8.64step/s]


--- Rollout Summary (Steps 402001 to 402500) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -6.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0007
Avg Critic Loss (per minibatch): 501.1988
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  40%|████      | 402995/1000000 [1:59:04<1:23:46, 118.78step/s]


Episode 806 finished at step 500 (403000 total). Env Reward: -9.36, Steps: 500, Delivered: 5


Total Steps Trained:  40%|████      | 403019/1000000 [1:59:08<14:07:07, 11.75step/s]


--- Rollout Summary (Steps 402501 to 403000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 655.6839
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  40%|████      | 403491/1000000 [1:59:12<1:23:31, 119.04step/s]


Episode 807 finished at step 500 (403500 total). Env Reward: -9.86, Steps: 500, Delivered: 5


Total Steps Trained:  40%|████      | 403514/1000000 [1:59:17<14:33:44, 11.38step/s]


--- Rollout Summary (Steps 403001 to 403500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 496.5902
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  40%|████      | 403997/1000000 [1:59:21<1:21:25, 122.00step/s]


Episode 808 finished at step 500 (404000 total). Env Reward: -8.31, Steps: 500, Delivered: 6


Total Steps Trained:  40%|████      | 404010/1000000 [1:59:26<18:32:15,  8.93step/s]


--- Rollout Summary (Steps 403501 to 404000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0074
Avg Critic Loss (per minibatch): 494.5769
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  40%|████      | 404500/1000000 [1:59:30<1:31:05, 108.96step/s]


Episode 809 finished at step 500 (404500 total). Env Reward: -8.92, Steps: 500, Delivered: 5


Total Steps Trained:  40%|████      | 404511/1000000 [1:59:35<20:19:17,  8.14step/s]


--- Rollout Summary (Steps 404001 to 404500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 530.5459
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  40%|████      | 404995/1000000 [1:59:39<1:22:44, 119.85step/s]


Episode 810 finished at step 500 (405000 total). Env Reward: -4.01, Steps: 500, Delivered: 10
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000810_map1.pth


Total Steps Trained:  41%|████      | 405019/1000000 [1:59:44<14:03:55, 11.75step/s]


--- Rollout Summary (Steps 404501 to 405000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 506.8765
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  41%|████      | 405498/1000000 [1:59:48<1:33:21, 106.14step/s]


Episode 811 finished at step 500 (405500 total). Env Reward: -8.57, Steps: 500, Delivered: 6


Total Steps Trained:  41%|████      | 405522/1000000 [1:59:53<14:42:26, 11.23step/s]


--- Rollout Summary (Steps 405001 to 405500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0077
Avg Critic Loss (per minibatch): 501.9260
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  41%|████      | 405990/1000000 [1:59:56<1:20:22, 123.18step/s]


Episode 812 finished at step 500 (406000 total). Env Reward: -12.55, Steps: 500, Delivered: 2


Total Steps Trained:  41%|████      | 406014/1000000 [2:00:01<14:11:08, 11.63step/s]


--- Rollout Summary (Steps 405501 to 406000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -6.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 513.6554
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  41%|████      | 406498/1000000 [2:00:05<1:22:02, 120.57step/s]


Episode 813 finished at step 500 (406500 total). Env Reward: -0.98, Steps: 500, Delivered: 5


Total Steps Trained:  41%|████      | 406511/1000000 [2:00:10<18:41:41,  8.82step/s]


--- Rollout Summary (Steps 406001 to 406500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 808.4671
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  41%|████      | 406989/1000000 [2:00:14<1:20:01, 123.50step/s]


Episode 814 finished at step 500 (407000 total). Env Reward: -10.66, Steps: 500, Delivered: 4


Total Steps Trained:  41%|████      | 407014/1000000 [2:00:19<13:41:32, 12.03step/s]


--- Rollout Summary (Steps 406501 to 407000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 382.4880
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  41%|████      | 407497/1000000 [2:00:23<1:23:38, 118.07step/s]


Episode 815 finished at step 500 (407500 total). Env Reward: -9.33, Steps: 500, Delivered: 5


Total Steps Trained:  41%|████      | 407521/1000000 [2:00:27<14:11:20, 11.60step/s]


--- Rollout Summary (Steps 407001 to 407500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 577.5454
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  41%|████      | 407994/1000000 [2:00:32<1:26:14, 114.41step/s]


Episode 816 finished at step 500 (408000 total). Env Reward: -8.61, Steps: 500, Delivered: 6


Total Steps Trained:  41%|████      | 408017/1000000 [2:00:36<14:42:19, 11.18step/s]


--- Rollout Summary (Steps 407501 to 408000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 536.3231
Avg Entropy (per minibatch): 2.5621
------------------------------


Total Steps Trained:  41%|████      | 408499/1000000 [2:00:40<1:24:50, 116.19step/s]


Episode 817 finished at step 500 (408500 total). Env Reward: -1.30, Steps: 500, Delivered: 5


Total Steps Trained:  41%|████      | 408511/1000000 [2:00:45<19:25:24,  8.46step/s]


--- Rollout Summary (Steps 408001 to 408500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 682.3784
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  41%|████      | 408994/1000000 [2:00:49<1:21:06, 121.46step/s]


Episode 818 finished at step 500 (409000 total). Env Reward: -11.83, Steps: 500, Delivered: 3


Total Steps Trained:  41%|████      | 409017/1000000 [2:00:54<14:18:54, 11.47step/s]


--- Rollout Summary (Steps 408501 to 409000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 601.2629
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  41%|████      | 409495/1000000 [2:00:58<1:22:08, 119.82step/s]


Episode 819 finished at step 500 (409500 total). Env Reward: -0.18, Steps: 500, Delivered: 5


Total Steps Trained:  41%|████      | 409519/1000000 [2:01:03<14:14:52, 11.51step/s]


--- Rollout Summary (Steps 409001 to 409500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -6.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0007
Avg Critic Loss (per minibatch): 848.2527
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  41%|████      | 409991/1000000 [2:01:07<1:27:51, 111.93step/s]


Episode 820 finished at step 500 (410000 total). Env Reward: -9.66, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000820_map1.pth


Total Steps Trained:  41%|████      | 410014/1000000 [2:01:12<14:45:56, 11.10step/s]


--- Rollout Summary (Steps 409501 to 410000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 589.8561
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  41%|████      | 410492/1000000 [2:01:16<1:24:42, 115.99step/s]


Episode 821 finished at step 500 (410500 total). Env Reward: -6.73, Steps: 500, Delivered: 8


Total Steps Trained:  41%|████      | 410516/1000000 [2:01:21<14:19:55, 11.43step/s]


--- Rollout Summary (Steps 410001 to 410500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 626.2308
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  41%|████      | 410989/1000000 [2:01:25<1:21:15, 120.80step/s]


Episode 822 finished at step 500 (411000 total). Env Reward: -10.00, Steps: 500, Delivered: 5


Total Steps Trained:  41%|████      | 411013/1000000 [2:01:30<14:15:03, 11.48step/s]


--- Rollout Summary (Steps 410501 to 411000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -6.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 491.1094
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  41%|████      | 411498/1000000 [2:01:34<1:23:58, 116.81step/s]


Episode 823 finished at step 500 (411500 total). Env Reward: -11.29, Steps: 500, Delivered: 3


Total Steps Trained:  41%|████      | 411510/1000000 [2:01:38<19:48:50,  8.25step/s]


--- Rollout Summary (Steps 411001 to 411500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 526.4920
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  41%|████      | 411993/1000000 [2:01:42<1:21:10, 120.73step/s]


Episode 824 finished at step 500 (412000 total). Env Reward: -9.02, Steps: 500, Delivered: 5


Total Steps Trained:  41%|████      | 412018/1000000 [2:01:47<13:24:32, 12.18step/s]


--- Rollout Summary (Steps 411501 to 412000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 695.8103
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  41%|████      | 412491/1000000 [2:01:51<1:23:01, 117.95step/s]


Episode 825 finished at step 500 (412500 total). Env Reward: 2.68, Steps: 500, Delivered: 8


Total Steps Trained:  41%|████▏     | 412515/1000000 [2:01:56<14:15:33, 11.44step/s]


--- Rollout Summary (Steps 412001 to 412500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 596.1060
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  41%|████▏     | 412995/1000000 [2:02:00<1:19:57, 122.36step/s]


Episode 826 finished at step 500 (413000 total). Env Reward: -9.34, Steps: 500, Delivered: 5


Total Steps Trained:  41%|████▏     | 413018/1000000 [2:02:05<14:20:04, 11.37step/s]


--- Rollout Summary (Steps 412501 to 413000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 428.0772
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  41%|████▏     | 413496/1000000 [2:02:09<1:28:05, 110.97step/s]


Episode 827 finished at step 500 (413500 total). Env Reward: -8.74, Steps: 500, Delivered: 6


Total Steps Trained:  41%|████▏     | 413519/1000000 [2:02:14<14:37:08, 11.14step/s]


--- Rollout Summary (Steps 413001 to 413500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 544.3171
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  41%|████▏     | 413997/1000000 [2:02:18<1:24:27, 115.64step/s]


Episode 828 finished at step 500 (414000 total). Env Reward: -8.18, Steps: 500, Delivered: 7


Total Steps Trained:  41%|████▏     | 414020/1000000 [2:02:22<14:25:21, 11.29step/s]


--- Rollout Summary (Steps 413501 to 414000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 610.7630
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  41%|████▏     | 414497/1000000 [2:02:27<1:23:03, 117.50step/s]


Episode 829 finished at step 500 (414500 total). Env Reward: -7.24, Steps: 500, Delivered: 7


Total Steps Trained:  41%|████▏     | 414520/1000000 [2:02:31<14:24:17, 11.29step/s]


--- Rollout Summary (Steps 414001 to 414500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 415.8289
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  42%|████▏     | 415000/1000000 [2:02:35<1:23:40, 116.52step/s]


Episode 830 finished at step 500 (415000 total). Env Reward: -8.88, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000830_map1.pth


Total Steps Trained:  42%|████▏     | 415012/1000000 [2:02:40<19:34:30,  8.30step/s]


--- Rollout Summary (Steps 414501 to 415000) ---
Update Duration: 4.71s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 418.1152
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  42%|████▏     | 415500/1000000 [2:02:44<1:27:39, 111.13step/s]


Episode 831 finished at step 500 (415500 total). Env Reward: -4.69, Steps: 500, Delivered: 10


Total Steps Trained:  42%|████▏     | 415512/1000000 [2:02:49<19:48:26,  8.20step/s]


--- Rollout Summary (Steps 415001 to 415500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 414.1276
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  42%|████▏     | 415990/1000000 [2:02:53<1:20:55, 120.29step/s]


Episode 832 finished at step 500 (416000 total). Env Reward: -10.51, Steps: 500, Delivered: 4


Total Steps Trained:  42%|████▏     | 416013/1000000 [2:02:58<14:14:37, 11.39step/s]


--- Rollout Summary (Steps 415501 to 416000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 484.9176
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  42%|████▏     | 416500/1000000 [2:03:02<1:25:29, 113.75step/s]


Episode 833 finished at step 500 (416500 total). Env Reward: -9.22, Steps: 500, Delivered: 6


Total Steps Trained:  42%|████▏     | 416512/1000000 [2:03:06<19:32:22,  8.30step/s]


--- Rollout Summary (Steps 416001 to 416500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.00
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 422.7535
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  42%|████▏     | 416998/1000000 [2:03:11<1:20:23, 120.88step/s]


Episode 834 finished at step 500 (417000 total). Env Reward: -11.77, Steps: 500, Delivered: 3


Total Steps Trained:  42%|████▏     | 417011/1000000 [2:03:15<18:10:48,  8.91step/s]


--- Rollout Summary (Steps 416501 to 417000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.01
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 465.8801
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  42%|████▏     | 417488/1000000 [2:03:19<1:19:32, 122.05step/s]


Episode 835 finished at step 500 (417500 total). Env Reward: -8.90, Steps: 500, Delivered: 6


Total Steps Trained:  42%|████▏     | 417511/1000000 [2:03:24<13:54:32, 11.63step/s]


--- Rollout Summary (Steps 417001 to 417500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 514.9006
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  42%|████▏     | 417998/1000000 [2:03:28<1:24:38, 114.60step/s]


Episode 836 finished at step 500 (418000 total). Env Reward: -8.56, Steps: 500, Delivered: 6


Total Steps Trained:  42%|████▏     | 418021/1000000 [2:03:33<14:11:21, 11.39step/s]


--- Rollout Summary (Steps 417501 to 418000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 503.3180
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  42%|████▏     | 418500/1000000 [2:03:37<1:23:46, 115.68step/s]


Episode 837 finished at step 500 (418500 total). Env Reward: -8.63, Steps: 500, Delivered: 6


Total Steps Trained:  42%|████▏     | 418512/1000000 [2:03:42<20:02:06,  8.06step/s]


--- Rollout Summary (Steps 418001 to 418500) ---
Update Duration: 4.69s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 455.7151
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  42%|████▏     | 418993/1000000 [2:03:46<1:24:29, 114.60step/s]


Episode 838 finished at step 500 (419000 total). Env Reward: -3.28, Steps: 500, Delivered: 2


Total Steps Trained:  42%|████▏     | 419016/1000000 [2:03:51<14:19:56, 11.26step/s]


--- Rollout Summary (Steps 418501 to 419000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.01
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 733.2195
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  42%|████▏     | 419491/1000000 [2:03:55<1:24:35, 114.37step/s]


Episode 839 finished at step 500 (419500 total). Env Reward: -8.60, Steps: 500, Delivered: 6


Total Steps Trained:  42%|████▏     | 419514/1000000 [2:04:00<14:24:29, 11.19step/s]


--- Rollout Summary (Steps 419001 to 419500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 592.5537
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  42%|████▏     | 420000/1000000 [2:04:04<1:21:18, 118.88step/s]


Episode 840 finished at step 500 (420000 total). Env Reward: -2.23, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000840_map1.pth


Total Steps Trained:  42%|████▏     | 420012/1000000 [2:04:08<18:44:04,  8.60step/s]


--- Rollout Summary (Steps 419501 to 420000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 777.9417
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  42%|████▏     | 420490/1000000 [2:04:13<1:27:36, 110.25step/s]


Episode 841 finished at step 500 (420500 total). Env Reward: -7.97, Steps: 500, Delivered: 7


Total Steps Trained:  42%|████▏     | 420513/1000000 [2:04:17<14:25:07, 11.16step/s]


--- Rollout Summary (Steps 420001 to 420500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 402.3833
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  42%|████▏     | 420997/1000000 [2:04:21<1:21:20, 118.63step/s]


Episode 842 finished at step 500 (421000 total). Env Reward: -0.54, Steps: 500, Delivered: 5


Total Steps Trained:  42%|████▏     | 421020/1000000 [2:04:26<14:43:46, 10.92step/s]


--- Rollout Summary (Steps 420501 to 421000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 720.5097
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  42%|████▏     | 421497/1000000 [2:04:30<1:20:41, 119.50step/s]


Episode 843 finished at step 500 (421500 total). Env Reward: -11.93, Steps: 500, Delivered: 3


Total Steps Trained:  42%|████▏     | 421521/1000000 [2:04:35<13:34:32, 11.84step/s]


--- Rollout Summary (Steps 421001 to 421500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.10
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 540.0300
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  42%|████▏     | 421997/1000000 [2:04:39<1:20:43, 119.33step/s]


Episode 844 finished at step 500 (422000 total). Env Reward: -10.96, Steps: 500, Delivered: 4


Total Steps Trained:  42%|████▏     | 422020/1000000 [2:04:44<14:37:49, 10.97step/s]


--- Rollout Summary (Steps 421501 to 422000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 550.8191
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  42%|████▏     | 422498/1000000 [2:04:48<1:30:24, 106.46step/s]


Episode 845 finished at step 500 (422500 total). Env Reward: -7.18, Steps: 500, Delivered: 7


Total Steps Trained:  42%|████▏     | 422520/1000000 [2:04:53<14:52:41, 10.78step/s]


--- Rollout Summary (Steps 422001 to 422500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 595.7373
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  42%|████▏     | 422989/1000000 [2:04:57<1:19:43, 120.63step/s]


Episode 846 finished at step 500 (423000 total). Env Reward: -10.41, Steps: 500, Delivered: 4


Total Steps Trained:  42%|████▏     | 423013/1000000 [2:05:01<13:43:18, 11.68step/s]


--- Rollout Summary (Steps 422501 to 423000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 452.8024
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  42%|████▏     | 423496/1000000 [2:05:05<1:18:53, 121.79step/s]


Episode 847 finished at step 500 (423500 total). Env Reward: -10.13, Steps: 500, Delivered: 5


Total Steps Trained:  42%|████▏     | 423520/1000000 [2:05:10<13:41:19, 11.70step/s]


--- Rollout Summary (Steps 423001 to 423500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 479.2104
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  42%|████▏     | 423994/1000000 [2:05:14<1:25:58, 111.67step/s]


Episode 848 finished at step 500 (424000 total). Env Reward: -9.57, Steps: 500, Delivered: 5


Total Steps Trained:  42%|████▏     | 424018/1000000 [2:05:19<14:22:29, 11.13step/s]


--- Rollout Summary (Steps 423501 to 424000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.24
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 571.1515
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  42%|████▏     | 424491/1000000 [2:05:23<1:19:02, 121.36step/s]


Episode 849 finished at step 500 (424500 total). Env Reward: -11.04, Steps: 500, Delivered: 4


Total Steps Trained:  42%|████▏     | 424514/1000000 [2:05:28<14:01:19, 11.40step/s]


--- Rollout Summary (Steps 424001 to 424500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 563.0264
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  42%|████▎     | 425000/1000000 [2:05:32<1:21:53, 117.03step/s]


Episode 850 finished at step 500 (425000 total). Env Reward: -10.80, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000850_map1.pth


Total Steps Trained:  43%|████▎     | 425012/1000000 [2:05:37<19:13:27,  8.31step/s]


--- Rollout Summary (Steps 424501 to 425000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 547.5387
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  43%|████▎     | 425500/1000000 [2:05:41<1:23:23, 114.81step/s]


Episode 851 finished at step 500 (425500 total). Env Reward: -0.78, Steps: 500, Delivered: 5


Total Steps Trained:  43%|████▎     | 425512/1000000 [2:05:46<19:12:01,  8.31step/s]


--- Rollout Summary (Steps 425001 to 425500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 715.0126
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  43%|████▎     | 425993/1000000 [2:05:50<1:20:52, 118.29step/s]


Episode 852 finished at step 500 (426000 total). Env Reward: -12.07, Steps: 500, Delivered: 2


Total Steps Trained:  43%|████▎     | 426014/1000000 [2:05:54<14:38:29, 10.89step/s]


--- Rollout Summary (Steps 425501 to 426000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 473.3683
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  43%|████▎     | 426500/1000000 [2:05:59<1:20:54, 118.15step/s]


Episode 853 finished at step 500 (426500 total). Env Reward: -10.58, Steps: 500, Delivered: 4


Total Steps Trained:  43%|████▎     | 426512/1000000 [2:06:03<18:31:48,  8.60step/s]


--- Rollout Summary (Steps 426001 to 426500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 593.2086
Avg Entropy (per minibatch): 2.5587
------------------------------


Total Steps Trained:  43%|████▎     | 426997/1000000 [2:06:07<1:17:58, 122.48step/s]


Episode 854 finished at step 500 (427000 total). Env Reward: -11.58, Steps: 500, Delivered: 3


Total Steps Trained:  43%|████▎     | 427021/1000000 [2:06:12<13:37:54, 11.68step/s]


--- Rollout Summary (Steps 426501 to 427000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 465.8446
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  43%|████▎     | 427499/1000000 [2:06:16<1:20:31, 118.50step/s]


Episode 855 finished at step 500 (427500 total). Env Reward: -0.93, Steps: 500, Delivered: 5


Total Steps Trained:  43%|████▎     | 427511/1000000 [2:06:21<19:04:42,  8.34step/s]


--- Rollout Summary (Steps 427001 to 427500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 694.6761
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  43%|████▎     | 427988/1000000 [2:06:25<1:17:30, 123.00step/s]


Episode 856 finished at step 500 (428000 total). Env Reward: -12.48, Steps: 500, Delivered: 2


Total Steps Trained:  43%|████▎     | 428012/1000000 [2:06:30<13:31:14, 11.75step/s]


--- Rollout Summary (Steps 427501 to 428000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 527.5969
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  43%|████▎     | 428495/1000000 [2:06:34<1:20:37, 118.15step/s]


Episode 857 finished at step 500 (428500 total). Env Reward: 9.52, Steps: 500, Delivered: 6


Total Steps Trained:  43%|████▎     | 428517/1000000 [2:06:39<14:32:02, 10.92step/s]


--- Rollout Summary (Steps 428001 to 428500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0005
Avg Critic Loss (per minibatch): 1300.7843
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  43%|████▎     | 428991/1000000 [2:06:43<1:18:59, 120.49step/s]


Episode 858 finished at step 500 (429000 total). Env Reward: -12.74, Steps: 500, Delivered: 2


Total Steps Trained:  43%|████▎     | 429015/1000000 [2:06:47<13:34:35, 11.68step/s]


--- Rollout Summary (Steps 428501 to 429000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): 0.0021
Avg Critic Loss (per minibatch): 466.5562
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  43%|████▎     | 429493/1000000 [2:06:52<1:19:26, 119.68step/s]


Episode 859 finished at step 500 (429500 total). Env Reward: -10.76, Steps: 500, Delivered: 4


Total Steps Trained:  43%|████▎     | 429516/1000000 [2:06:56<14:04:07, 11.26step/s]


--- Rollout Summary (Steps 429001 to 429500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 609.6592
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  43%|████▎     | 430000/1000000 [2:07:01<1:19:11, 119.97step/s]


Episode 860 finished at step 500 (430000 total). Env Reward: -11.87, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000860_map1.pth


Total Steps Trained:  43%|████▎     | 430013/1000000 [2:07:05<17:55:00,  8.84step/s]


--- Rollout Summary (Steps 429501 to 430000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 453.3991
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  43%|████▎     | 430499/1000000 [2:07:09<1:21:17, 116.77step/s]


Episode 861 finished at step 500 (430500 total). Env Reward: -7.00, Steps: 500, Delivered: 8


Total Steps Trained:  43%|████▎     | 430511/1000000 [2:07:14<18:57:59,  8.34step/s]


--- Rollout Summary (Steps 430001 to 430500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 541.9657
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  43%|████▎     | 430989/1000000 [2:07:18<1:24:44, 111.91step/s]


Episode 862 finished at step 500 (431000 total). Env Reward: -6.34, Steps: 500, Delivered: 8


Total Steps Trained:  43%|████▎     | 431011/1000000 [2:07:23<15:21:21, 10.29step/s]


--- Rollout Summary (Steps 430501 to 431000) ---
Update Duration: 4.77s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 663.4071
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  43%|████▎     | 431491/1000000 [2:07:28<1:20:30, 117.69step/s]


Episode 863 finished at step 500 (431500 total). Env Reward: -9.63, Steps: 500, Delivered: 5


Total Steps Trained:  43%|████▎     | 431514/1000000 [2:07:32<13:56:30, 11.33step/s]


--- Rollout Summary (Steps 431001 to 431500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 678.5060
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  43%|████▎     | 431996/1000000 [2:07:36<1:19:43, 118.75step/s]


Episode 864 finished at step 500 (432000 total). Env Reward: -8.96, Steps: 500, Delivered: 6


Total Steps Trained:  43%|████▎     | 432018/1000000 [2:07:41<14:24:38, 10.95step/s]


--- Rollout Summary (Steps 431501 to 432000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.01
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 665.0529
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  43%|████▎     | 432490/1000000 [2:07:45<1:19:32, 118.90step/s]


Episode 865 finished at step 500 (432500 total). Env Reward: -0.52, Steps: 500, Delivered: 5


Total Steps Trained:  43%|████▎     | 432513/1000000 [2:07:50<14:17:54, 11.02step/s]


--- Rollout Summary (Steps 432001 to 432500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -6.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 819.3872
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  43%|████▎     | 432996/1000000 [2:07:54<1:18:03, 121.06step/s]


Episode 866 finished at step 500 (433000 total). Env Reward: -11.35, Steps: 500, Delivered: 3


Total Steps Trained:  43%|████▎     | 433020/1000000 [2:07:59<13:36:00, 11.58step/s]


--- Rollout Summary (Steps 432501 to 433000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 464.7651
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  43%|████▎     | 433497/1000000 [2:08:03<1:18:15, 120.65step/s]


Episode 867 finished at step 500 (433500 total). Env Reward: -9.61, Steps: 500, Delivered: 5


Total Steps Trained:  43%|████▎     | 433510/1000000 [2:08:08<17:57:10,  8.77step/s]


--- Rollout Summary (Steps 433001 to 433500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 641.3266
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  43%|████▎     | 433989/1000000 [2:08:12<1:19:43, 118.33step/s]


Episode 868 finished at step 500 (434000 total). Env Reward: -0.63, Steps: 500, Delivered: 5


Total Steps Trained:  43%|████▎     | 434011/1000000 [2:08:17<14:23:03, 10.93step/s]


--- Rollout Summary (Steps 433501 to 434000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 993.6159
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  43%|████▎     | 434494/1000000 [2:08:21<1:19:00, 119.29step/s]


Episode 869 finished at step 500 (434500 total). Env Reward: -8.53, Steps: 500, Delivered: 6


Total Steps Trained:  43%|████▎     | 434517/1000000 [2:08:26<14:11:52, 11.06step/s]


--- Rollout Summary (Steps 434001 to 434500) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 643.7140
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  43%|████▎     | 434993/1000000 [2:08:30<1:19:20, 118.68step/s]


Episode 870 finished at step 500 (435000 total). Env Reward: -8.73, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000870_map1.pth


Total Steps Trained:  44%|████▎     | 435016/1000000 [2:08:35<13:49:26, 11.35step/s]


--- Rollout Summary (Steps 434501 to 435000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 508.9657
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  44%|████▎     | 435493/1000000 [2:08:39<1:20:53, 116.31step/s]


Episode 871 finished at step 500 (435500 total). Env Reward: -8.75, Steps: 500, Delivered: 6


Total Steps Trained:  44%|████▎     | 435516/1000000 [2:08:43<13:59:55, 11.20step/s]


--- Rollout Summary (Steps 435001 to 435500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0091
Avg Critic Loss (per minibatch): 374.7948
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  44%|████▎     | 435999/1000000 [2:08:48<1:20:17, 117.08step/s]


Episode 872 finished at step 500 (436000 total). Env Reward: -11.97, Steps: 500, Delivered: 3


Total Steps Trained:  44%|████▎     | 436011/1000000 [2:08:52<18:26:07,  8.50step/s]


--- Rollout Summary (Steps 435501 to 436000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 518.2078
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  44%|████▎     | 436489/1000000 [2:08:56<1:20:22, 116.84step/s]


Episode 873 finished at step 500 (436500 total). Env Reward: -10.75, Steps: 500, Delivered: 4


Total Steps Trained:  44%|████▎     | 436512/1000000 [2:09:01<14:00:56, 11.17step/s]


--- Rollout Summary (Steps 436001 to 436500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 484.6427
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  44%|████▎     | 436989/1000000 [2:09:05<1:17:24, 121.22step/s]


Episode 874 finished at step 500 (437000 total). Env Reward: -7.43, Steps: 500, Delivered: 7


Total Steps Trained:  44%|████▎     | 437013/1000000 [2:09:10<13:19:03, 11.74step/s]


--- Rollout Summary (Steps 436501 to 437000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 484.5573
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  44%|████▎     | 437493/1000000 [2:09:14<1:20:16, 116.78step/s]


Episode 875 finished at step 500 (437500 total). Env Reward: -9.36, Steps: 500, Delivered: 5


Total Steps Trained:  44%|████▍     | 437515/1000000 [2:09:19<14:18:41, 10.92step/s]


--- Rollout Summary (Steps 437001 to 437500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 683.6272
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  44%|████▍     | 438000/1000000 [2:09:23<1:28:01, 106.40step/s]


Episode 876 finished at step 500 (438000 total). Env Reward: -9.03, Steps: 500, Delivered: 6


Total Steps Trained:  44%|████▍     | 438011/1000000 [2:09:28<19:55:52,  7.83step/s]


--- Rollout Summary (Steps 437501 to 438000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -6.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 560.3701
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  44%|████▍     | 438490/1000000 [2:09:32<1:20:40, 116.00step/s]


Episode 877 finished at step 500 (438500 total). Env Reward: -6.28, Steps: 500, Delivered: 8


Total Steps Trained:  44%|████▍     | 438513/1000000 [2:09:37<13:54:51, 11.21step/s]


--- Rollout Summary (Steps 438001 to 438500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 515.7671
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  44%|████▍     | 438991/1000000 [2:09:41<1:18:02, 119.81step/s]


Episode 878 finished at step 500 (439000 total). Env Reward: -6.22, Steps: 500, Delivered: 8


Total Steps Trained:  44%|████▍     | 439014/1000000 [2:09:45<13:33:39, 11.49step/s]


--- Rollout Summary (Steps 438501 to 439000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 549.0460
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  44%|████▍     | 439498/1000000 [2:09:50<1:22:45, 112.88step/s]


Episode 879 finished at step 500 (439500 total). Env Reward: -6.26, Steps: 500, Delivered: 8


Total Steps Trained:  44%|████▍     | 439510/1000000 [2:09:54<18:45:23,  8.30step/s]


--- Rollout Summary (Steps 439001 to 439500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 507.9103
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  44%|████▍     | 439998/1000000 [2:09:59<1:32:07, 101.31step/s]


Episode 880 finished at step 500 (440000 total). Env Reward: -7.24, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000880_map1.pth


Total Steps Trained:  44%|████▍     | 440020/1000000 [2:10:03<15:02:59, 10.34step/s]


--- Rollout Summary (Steps 439501 to 440000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0079
Avg Critic Loss (per minibatch): 783.8249
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  44%|████▍     | 440500/1000000 [2:10:08<1:18:29, 118.79step/s]


Episode 881 finished at step 500 (440500 total). Env Reward: -13.40, Steps: 500, Delivered: 1


Total Steps Trained:  44%|████▍     | 440512/1000000 [2:10:12<18:13:25,  8.53step/s]


--- Rollout Summary (Steps 440001 to 440500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): 0.0011
Avg Critic Loss (per minibatch): 593.6254
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  44%|████▍     | 441000/1000000 [2:10:16<1:19:04, 117.83step/s]


Episode 882 finished at step 500 (441000 total). Env Reward: -10.82, Steps: 500, Delivered: 4


Total Steps Trained:  44%|████▍     | 441012/1000000 [2:10:21<18:27:58,  8.41step/s]


--- Rollout Summary (Steps 440501 to 441000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -6.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0081
Avg Critic Loss (per minibatch): 452.3217
Avg Entropy (per minibatch): 2.5589
------------------------------


Total Steps Trained:  44%|████▍     | 441496/1000000 [2:10:25<1:20:12, 116.06step/s]


Episode 883 finished at step 500 (441500 total). Env Reward: -7.64, Steps: 500, Delivered: 7


Total Steps Trained:  44%|████▍     | 441520/1000000 [2:10:30<13:36:15, 11.40step/s]


--- Rollout Summary (Steps 441001 to 441500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 561.6315
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  44%|████▍     | 441992/1000000 [2:10:34<1:19:44, 116.62step/s]


Episode 884 finished at step 500 (442000 total). Env Reward: 2.84, Steps: 500, Delivered: 9


Total Steps Trained:  44%|████▍     | 442015/1000000 [2:10:39<13:56:33, 11.12step/s]


--- Rollout Summary (Steps 441501 to 442000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 589.7883
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  44%|████▍     | 442498/1000000 [2:10:43<1:16:12, 121.92step/s]


Episode 885 finished at step 500 (442500 total). Env Reward: -11.18, Steps: 500, Delivered: 4


Total Steps Trained:  44%|████▍     | 442511/1000000 [2:10:47<17:25:50,  8.88step/s]


--- Rollout Summary (Steps 442001 to 442500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 395.8173
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  44%|████▍     | 442991/1000000 [2:10:52<1:16:56, 120.65step/s]


Episode 886 finished at step 500 (443000 total). Env Reward: -9.15, Steps: 500, Delivered: 5


Total Steps Trained:  44%|████▍     | 443015/1000000 [2:10:56<13:08:31, 11.77step/s]


--- Rollout Summary (Steps 442501 to 443000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 399.6501
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  44%|████▍     | 443500/1000000 [2:11:00<1:17:41, 119.37step/s]


Episode 887 finished at step 500 (443500 total). Env Reward: -10.21, Steps: 500, Delivered: 4


Total Steps Trained:  44%|████▍     | 443512/1000000 [2:11:05<18:19:27,  8.44step/s]


--- Rollout Summary (Steps 443001 to 443500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.14
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 418.4976
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  44%|████▍     | 443989/1000000 [2:11:09<1:23:07, 111.47step/s]


Episode 888 finished at step 500 (444000 total). Env Reward: -8.27, Steps: 500, Delivered: 6


Total Steps Trained:  44%|████▍     | 444013/1000000 [2:11:14<13:26:42, 11.49step/s]


--- Rollout Summary (Steps 443501 to 444000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.14
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): 0.0004
Avg Critic Loss (per minibatch): 426.5863
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  44%|████▍     | 444491/1000000 [2:11:18<1:25:56, 107.72step/s]


Episode 889 finished at step 500 (444500 total). Env Reward: -10.99, Steps: 500, Delivered: 4


Total Steps Trained:  44%|████▍     | 444513/1000000 [2:11:23<14:18:14, 10.79step/s]


--- Rollout Summary (Steps 444001 to 444500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 534.2496
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  44%|████▍     | 444989/1000000 [2:11:27<1:16:47, 120.47step/s]


Episode 890 finished at step 500 (445000 total). Env Reward: -10.86, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000890_map1.pth


Total Steps Trained:  45%|████▍     | 445013/1000000 [2:11:32<13:11:22, 11.69step/s]


--- Rollout Summary (Steps 444501 to 445000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0003
Avg Critic Loss (per minibatch): 559.4514
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  45%|████▍     | 445499/1000000 [2:11:36<1:28:18, 104.66step/s]


Episode 891 finished at step 500 (445500 total). Env Reward: 1.57, Steps: 500, Delivered: 7


Total Steps Trained:  45%|████▍     | 445522/1000000 [2:11:41<14:13:31, 10.83step/s]


--- Rollout Summary (Steps 445001 to 445500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 788.8034
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  45%|████▍     | 445990/1000000 [2:11:45<1:18:54, 117.02step/s]


Episode 892 finished at step 500 (446000 total). Env Reward: -10.50, Steps: 500, Delivered: 4


Total Steps Trained:  45%|████▍     | 446012/1000000 [2:11:50<14:07:10, 10.90step/s]


--- Rollout Summary (Steps 445501 to 446000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 636.9176
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  45%|████▍     | 446496/1000000 [2:11:54<1:19:52, 115.50step/s]


Episode 893 finished at step 500 (446500 total). Env Reward: -1.55, Steps: 500, Delivered: 4


Total Steps Trained:  45%|████▍     | 446519/1000000 [2:11:58<13:45:33, 11.17step/s]


--- Rollout Summary (Steps 446001 to 446500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 883.2372
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  45%|████▍     | 446997/1000000 [2:12:03<1:16:38, 120.27step/s]


Episode 894 finished at step 500 (447000 total). Env Reward: -11.92, Steps: 500, Delivered: 3


Total Steps Trained:  45%|████▍     | 447020/1000000 [2:12:07<13:34:55, 11.31step/s]


--- Rollout Summary (Steps 446501 to 447000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 531.9489
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  45%|████▍     | 447497/1000000 [2:12:11<1:19:45, 115.46step/s]


Episode 895 finished at step 500 (447500 total). Env Reward: -9.18, Steps: 500, Delivered: 6


Total Steps Trained:  45%|████▍     | 447520/1000000 [2:12:16<13:41:27, 11.21step/s]


--- Rollout Summary (Steps 447001 to 447500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 573.9674
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  45%|████▍     | 447992/1000000 [2:12:20<1:18:15, 117.57step/s]


Episode 896 finished at step 500 (448000 total). Env Reward: -5.20, Steps: 500, Delivered: 9


Total Steps Trained:  45%|████▍     | 448016/1000000 [2:12:25<13:09:52, 11.65step/s]


--- Rollout Summary (Steps 447501 to 448000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 356.4851
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  45%|████▍     | 448495/1000000 [2:12:29<1:16:12, 120.61step/s]


Episode 897 finished at step 500 (448500 total). Env Reward: -10.12, Steps: 500, Delivered: 5


Total Steps Trained:  45%|████▍     | 448519/1000000 [2:12:34<13:01:49, 11.76step/s]


--- Rollout Summary (Steps 448001 to 448500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 585.8193
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  45%|████▍     | 448999/1000000 [2:12:38<1:28:15, 104.06step/s]


Episode 898 finished at step 500 (449000 total). Env Reward: 0.57, Steps: 500, Delivered: 6


Total Steps Trained:  45%|████▍     | 449021/1000000 [2:12:43<14:12:13, 10.78step/s]


--- Rollout Summary (Steps 448501 to 449000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 574.1746
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  45%|████▍     | 449498/1000000 [2:12:47<1:20:21, 114.19step/s]


Episode 899 finished at step 500 (449500 total). Env Reward: -8.87, Steps: 500, Delivered: 6


Total Steps Trained:  45%|████▍     | 449521/1000000 [2:12:51<13:42:03, 11.16step/s]


--- Rollout Summary (Steps 449001 to 449500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 594.8122
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  45%|████▍     | 449993/1000000 [2:12:55<1:21:46, 112.10step/s]


Episode 900 finished at step 500 (450000 total). Env Reward: -9.84, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000900_map1.pth


Total Steps Trained:  45%|████▌     | 450016/1000000 [2:13:00<13:39:07, 11.19step/s]


--- Rollout Summary (Steps 449501 to 450000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 486.7279
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  45%|████▌     | 450497/1000000 [2:13:04<1:18:44, 116.32step/s]


Episode 901 finished at step 500 (450500 total). Env Reward: -6.36, Steps: 500, Delivered: 8


Total Steps Trained:  45%|████▌     | 450520/1000000 [2:13:09<13:51:33, 11.01step/s]


--- Rollout Summary (Steps 450001 to 450500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 412.4456
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  45%|████▌     | 450995/1000000 [2:13:13<1:16:52, 119.04step/s]


Episode 902 finished at step 500 (451000 total). Env Reward: -12.53, Steps: 500, Delivered: 2


Total Steps Trained:  45%|████▌     | 451018/1000000 [2:13:18<13:48:10, 11.05step/s]


--- Rollout Summary (Steps 450501 to 451000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 765.5755
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  45%|████▌     | 451493/1000000 [2:13:22<1:24:39, 107.99step/s]


Episode 903 finished at step 500 (451500 total). Env Reward: -9.57, Steps: 500, Delivered: 5


Total Steps Trained:  45%|████▌     | 451515/1000000 [2:13:27<13:52:04, 10.99step/s]


--- Rollout Summary (Steps 451001 to 451500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0006
Avg Critic Loss (per minibatch): 443.5120
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  45%|████▌     | 451999/1000000 [2:13:31<1:19:48, 114.43step/s]


Episode 904 finished at step 500 (452000 total). Env Reward: -6.87, Steps: 500, Delivered: 8


Total Steps Trained:  45%|████▌     | 452011/1000000 [2:13:36<18:27:11,  8.25step/s]


--- Rollout Summary (Steps 451501 to 452000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 564.0168
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  45%|████▌     | 452495/1000000 [2:13:40<1:14:47, 122.00step/s]


Episode 905 finished at step 500 (452500 total). Env Reward: -11.78, Steps: 500, Delivered: 3


Total Steps Trained:  45%|████▌     | 452520/1000000 [2:13:45<12:59:11, 11.71step/s]


--- Rollout Summary (Steps 452001 to 452500) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 551.5388
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  45%|████▌     | 452991/1000000 [2:13:49<1:16:23, 119.35step/s]


Episode 906 finished at step 500 (453000 total). Env Reward: -8.82, Steps: 500, Delivered: 6


Total Steps Trained:  45%|████▌     | 453014/1000000 [2:13:53<13:07:25, 11.58step/s]


--- Rollout Summary (Steps 452501 to 453000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 445.2506
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  45%|████▌     | 453497/1000000 [2:13:57<1:15:59, 119.85step/s]


Episode 907 finished at step 500 (453500 total). Env Reward: -9.50, Steps: 500, Delivered: 5


Total Steps Trained:  45%|████▌     | 453520/1000000 [2:14:02<13:05:37, 11.59step/s]


--- Rollout Summary (Steps 453001 to 453500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 418.6281
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  45%|████▌     | 453993/1000000 [2:14:06<1:16:23, 119.14step/s]


Episode 908 finished at step 500 (454000 total). Env Reward: -10.39, Steps: 500, Delivered: 5


Total Steps Trained:  45%|████▌     | 454016/1000000 [2:14:11<13:03:15, 11.62step/s]


--- Rollout Summary (Steps 453501 to 454000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 500.7204
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  45%|████▌     | 454498/1000000 [2:14:15<1:19:23, 114.51step/s]


Episode 909 finished at step 500 (454500 total). Env Reward: -5.66, Steps: 500, Delivered: 9


Total Steps Trained:  45%|████▌     | 454510/1000000 [2:14:20<18:31:36,  8.18step/s]


--- Rollout Summary (Steps 454001 to 454500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 695.1032
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  45%|████▌     | 454994/1000000 [2:14:24<1:13:11, 124.10step/s]


Episode 910 finished at step 500 (455000 total). Env Reward: -10.62, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000910_map1.pth


Total Steps Trained:  46%|████▌     | 455018/1000000 [2:14:29<12:53:54, 11.74step/s]


--- Rollout Summary (Steps 454501 to 455000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 423.8182
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  46%|████▌     | 455500/1000000 [2:14:33<1:16:49, 118.12step/s]


Episode 911 finished at step 500 (455500 total). Env Reward: -10.43, Steps: 500, Delivered: 4


Total Steps Trained:  46%|████▌     | 455512/1000000 [2:14:37<17:52:17,  8.46step/s]


--- Rollout Summary (Steps 455001 to 455500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 489.4436
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  46%|████▌     | 455990/1000000 [2:14:41<1:15:01, 120.86step/s]


Episode 912 finished at step 500 (456000 total). Env Reward: -8.70, Steps: 500, Delivered: 6


Total Steps Trained:  46%|████▌     | 456014/1000000 [2:14:46<13:11:35, 11.45step/s]


--- Rollout Summary (Steps 455501 to 456000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 608.3639
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  46%|████▌     | 456498/1000000 [2:14:51<1:20:09, 113.02step/s]


Episode 913 finished at step 500 (456500 total). Env Reward: -9.30, Steps: 500, Delivered: 5


Total Steps Trained:  46%|████▌     | 456510/1000000 [2:14:55<18:05:14,  8.35step/s]


--- Rollout Summary (Steps 456001 to 456500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 782.9183
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  46%|████▌     | 456998/1000000 [2:15:00<1:16:31, 118.25step/s]


Episode 914 finished at step 500 (457000 total). Env Reward: -4.42, Steps: 500, Delivered: 10


Total Steps Trained:  46%|████▌     | 457010/1000000 [2:15:04<17:51:01,  8.45step/s]


--- Rollout Summary (Steps 456501 to 457000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0075
Avg Critic Loss (per minibatch): 710.6060
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  46%|████▌     | 457492/1000000 [2:15:08<1:17:51, 116.14step/s]


Episode 915 finished at step 500 (457500 total). Env Reward: -8.07, Steps: 500, Delivered: 6


Total Steps Trained:  46%|████▌     | 457515/1000000 [2:15:13<13:20:30, 11.29step/s]


--- Rollout Summary (Steps 457001 to 457500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 617.1311
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  46%|████▌     | 457997/1000000 [2:15:17<1:17:13, 116.97step/s]


Episode 916 finished at step 500 (458000 total). Env Reward: -8.88, Steps: 500, Delivered: 5


Total Steps Trained:  46%|████▌     | 458020/1000000 [2:15:22<13:26:43, 11.20step/s]


--- Rollout Summary (Steps 457501 to 458000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 677.3175
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  46%|████▌     | 458494/1000000 [2:15:26<1:13:06, 123.44step/s]


Episode 917 finished at step 500 (458500 total). Env Reward: -8.47, Steps: 500, Delivered: 6


Total Steps Trained:  46%|████▌     | 458518/1000000 [2:15:31<12:50:36, 11.71step/s]


--- Rollout Summary (Steps 458001 to 458500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 479.9166
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  46%|████▌     | 458990/1000000 [2:15:35<1:18:13, 115.27step/s]


Episode 918 finished at step 500 (459000 total). Env Reward: -11.72, Steps: 500, Delivered: 3


Total Steps Trained:  46%|████▌     | 459013/1000000 [2:15:39<13:32:26, 11.10step/s]


--- Rollout Summary (Steps 458501 to 459000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 533.0914
Avg Entropy (per minibatch): 2.5585
------------------------------


Total Steps Trained:  46%|████▌     | 459494/1000000 [2:15:44<1:18:22, 114.94step/s]


Episode 919 finished at step 500 (459500 total). Env Reward: -7.87, Steps: 500, Delivered: 7


Total Steps Trained:  46%|████▌     | 459517/1000000 [2:15:48<13:41:21, 10.97step/s]


--- Rollout Summary (Steps 459001 to 459500) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -8.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 613.5166
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  46%|████▌     | 459996/1000000 [2:15:53<1:18:48, 114.21step/s]


Episode 920 finished at step 500 (460000 total). Env Reward: -5.26, Steps: 500, Delivered: 9
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000920_map1.pth


Total Steps Trained:  46%|████▌     | 460020/1000000 [2:15:57<13:07:10, 11.43step/s]


--- Rollout Summary (Steps 459501 to 460000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -8.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 555.8702
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  46%|████▌     | 460498/1000000 [2:16:01<1:13:52, 121.73step/s]


Episode 921 finished at step 500 (460500 total). Env Reward: -5.32, Steps: 500, Delivered: 9


Total Steps Trained:  46%|████▌     | 460511/1000000 [2:16:06<16:47:59,  8.92step/s]


--- Rollout Summary (Steps 460001 to 460500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -8.01
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0080
Avg Critic Loss (per minibatch): 911.5362
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  46%|████▌     | 460995/1000000 [2:16:10<1:21:06, 110.77step/s]


Episode 922 finished at step 500 (461000 total). Env Reward: -8.26, Steps: 500, Delivered: 6


Total Steps Trained:  46%|████▌     | 461019/1000000 [2:16:15<13:30:32, 11.08step/s]


--- Rollout Summary (Steps 460501 to 461000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 539.5643
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  46%|████▌     | 461500/1000000 [2:16:19<1:19:43, 112.57step/s]


Episode 923 finished at step 500 (461500 total). Env Reward: -8.49, Steps: 500, Delivered: 6


Total Steps Trained:  46%|████▌     | 461512/1000000 [2:16:24<17:47:43,  8.41step/s]


--- Rollout Summary (Steps 461001 to 461500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 578.2194
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  46%|████▌     | 461992/1000000 [2:16:28<1:21:34, 109.93step/s]


Episode 924 finished at step 500 (462000 total). Env Reward: -6.71, Steps: 500, Delivered: 8


Total Steps Trained:  46%|████▌     | 462014/1000000 [2:16:33<13:48:53, 10.82step/s]


--- Rollout Summary (Steps 461501 to 462000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 551.7804
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  46%|████▌     | 462497/1000000 [2:16:37<1:13:27, 121.94step/s]


Episode 925 finished at step 500 (462500 total). Env Reward: -11.82, Steps: 500, Delivered: 3


Total Steps Trained:  46%|████▋     | 462510/1000000 [2:16:41<16:52:30,  8.85step/s]


--- Rollout Summary (Steps 462001 to 462500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): 0.0011
Avg Critic Loss (per minibatch): 382.9722
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  46%|████▋     | 462999/1000000 [2:16:46<1:13:53, 121.11step/s]


Episode 926 finished at step 500 (463000 total). Env Reward: -9.84, Steps: 500, Delivered: 5


Total Steps Trained:  46%|████▋     | 463012/1000000 [2:16:50<17:10:06,  8.69step/s]


--- Rollout Summary (Steps 462501 to 463000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -8.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 499.8098
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  46%|████▋     | 463489/1000000 [2:16:55<1:15:29, 118.46step/s]


Episode 927 finished at step 500 (463500 total). Env Reward: -9.38, Steps: 500, Delivered: 6


Total Steps Trained:  46%|████▋     | 463511/1000000 [2:16:59<13:27:24, 11.07step/s]


--- Rollout Summary (Steps 463001 to 463500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -8.10
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 798.0016
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  46%|████▋     | 464000/1000000 [2:17:03<1:16:55, 116.13step/s]


Episode 928 finished at step 500 (464000 total). Env Reward: -5.18, Steps: 500, Delivered: 9


Total Steps Trained:  46%|████▋     | 464012/1000000 [2:17:08<18:05:53,  8.23step/s]


--- Rollout Summary (Steps 463501 to 464000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -8.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 464.3669
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  46%|████▋     | 464500/1000000 [2:17:12<1:13:12, 121.92step/s]


Episode 929 finished at step 500 (464500 total). Env Reward: -9.41, Steps: 500, Delivered: 5


Total Steps Trained:  46%|████▋     | 464513/1000000 [2:17:17<16:40:06,  8.92step/s]


--- Rollout Summary (Steps 464001 to 464500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 691.2735
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  46%|████▋     | 464990/1000000 [2:17:21<1:14:11, 120.20step/s]


Episode 930 finished at step 500 (465000 total). Env Reward: -9.03, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000930_map1.pth


Total Steps Trained:  47%|████▋     | 465014/1000000 [2:17:26<13:05:45, 11.35step/s]


--- Rollout Summary (Steps 464501 to 465000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -8.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 415.6903
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  47%|████▋     | 465493/1000000 [2:17:30<1:14:02, 120.31step/s]


Episode 931 finished at step 500 (465500 total). Env Reward: -10.20, Steps: 500, Delivered: 4


Total Steps Trained:  47%|████▋     | 465516/1000000 [2:17:34<13:03:07, 11.37step/s]


--- Rollout Summary (Steps 465001 to 465500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.15
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 552.9847
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  47%|████▋     | 465988/1000000 [2:17:39<1:13:27, 121.15step/s]


Episode 932 finished at step 500 (466000 total). Env Reward: -9.65, Steps: 500, Delivered: 5


Total Steps Trained:  47%|████▋     | 466012/1000000 [2:17:43<12:38:05, 11.74step/s]


--- Rollout Summary (Steps 465501 to 466000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -8.14
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 930.5243
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  47%|████▋     | 466497/1000000 [2:17:48<1:19:18, 112.12step/s]


Episode 933 finished at step 500 (466500 total). Env Reward: -8.60, Steps: 500, Delivered: 6


Total Steps Trained:  47%|████▋     | 466520/1000000 [2:17:52<13:23:02, 11.07step/s]


--- Rollout Summary (Steps 466001 to 466500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 502.0829
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  47%|████▋     | 466993/1000000 [2:17:56<1:14:48, 118.76step/s]


Episode 934 finished at step 500 (467000 total). Env Reward: -12.12, Steps: 500, Delivered: 2


Total Steps Trained:  47%|████▋     | 467016/1000000 [2:18:01<12:58:28, 11.41step/s]


--- Rollout Summary (Steps 466501 to 467000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -8.14
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 603.6510
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  47%|████▋     | 467492/1000000 [2:18:05<1:15:54, 116.92step/s]


Episode 935 finished at step 500 (467500 total). Env Reward: -7.60, Steps: 500, Delivered: 7


Total Steps Trained:  47%|████▋     | 467515/1000000 [2:18:10<13:09:16, 11.24step/s]


--- Rollout Summary (Steps 467001 to 467500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -8.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 691.1375
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  47%|████▋     | 467996/1000000 [2:18:14<1:17:14, 114.78step/s]


Episode 936 finished at step 500 (468000 total). Env Reward: -9.48, Steps: 500, Delivered: 5


Total Steps Trained:  47%|████▋     | 468020/1000000 [2:18:19<12:59:00, 11.38step/s]


--- Rollout Summary (Steps 467501 to 468000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -8.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 448.7487
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  47%|████▋     | 468498/1000000 [2:18:23<1:14:08, 119.47step/s]


Episode 937 finished at step 500 (468500 total). Env Reward: -9.08, Steps: 500, Delivered: 5


Total Steps Trained:  47%|████▋     | 468510/1000000 [2:18:28<17:23:40,  8.49step/s]


--- Rollout Summary (Steps 468001 to 468500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -8.14
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 459.3124
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  47%|████▋     | 468996/1000000 [2:18:32<1:23:14, 106.32step/s]


Episode 938 finished at step 500 (469000 total). Env Reward: -9.55, Steps: 500, Delivered: 5


Total Steps Trained:  47%|████▋     | 469019/1000000 [2:18:36<13:37:09, 10.83step/s]


--- Rollout Summary (Steps 468501 to 469000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.20
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0081
Avg Critic Loss (per minibatch): 573.9152
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  47%|████▋     | 469489/1000000 [2:18:41<1:13:53, 119.66step/s]


Episode 939 finished at step 500 (469500 total). Env Reward: -12.54, Steps: 500, Delivered: 2


Total Steps Trained:  47%|████▋     | 469512/1000000 [2:18:45<12:47:35, 11.52step/s]


--- Rollout Summary (Steps 469001 to 469500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.24
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 689.5355
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  47%|████▋     | 469990/1000000 [2:18:49<1:11:48, 123.02step/s]


Episode 940 finished at step 500 (470000 total). Env Reward: -13.62, Steps: 500, Delivered: 1
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000940_map1.pth


Total Steps Trained:  47%|████▋     | 470013/1000000 [2:18:54<12:43:44, 11.57step/s]


--- Rollout Summary (Steps 469501 to 470000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -8.35
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 591.0458
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  47%|████▋     | 470498/1000000 [2:18:58<1:17:26, 113.97step/s]


Episode 941 finished at step 500 (470500 total). Env Reward: -5.78, Steps: 500, Delivered: 9


Total Steps Trained:  47%|████▋     | 470510/1000000 [2:19:03<17:36:29,  8.35step/s]


--- Rollout Summary (Steps 470001 to 470500) ---
Update Duration: 4.43s
Avg Episode Reward (last 100): -8.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 504.4241
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  47%|████▋     | 470993/1000000 [2:19:07<1:13:43, 119.59step/s]


Episode 942 finished at step 500 (471000 total). Env Reward: -5.54, Steps: 500, Delivered: 9


Total Steps Trained:  47%|████▋     | 471017/1000000 [2:19:12<12:54:22, 11.39step/s]


--- Rollout Summary (Steps 470501 to 471000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -8.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 487.3660
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  47%|████▋     | 471497/1000000 [2:19:16<1:17:07, 114.21step/s]


Episode 943 finished at step 500 (471500 total). Env Reward: -6.73, Steps: 500, Delivered: 8


Total Steps Trained:  47%|████▋     | 471521/1000000 [2:19:21<12:52:44, 11.40step/s]


--- Rollout Summary (Steps 471001 to 471500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -8.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 516.8733
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  47%|████▋     | 471989/1000000 [2:19:25<1:18:44, 111.75step/s]


Episode 944 finished at step 500 (472000 total). Env Reward: -8.45, Steps: 500, Delivered: 6


Total Steps Trained:  47%|████▋     | 472011/1000000 [2:19:29<13:52:17, 10.57step/s]


--- Rollout Summary (Steps 471501 to 472000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -8.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 331.1723
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  47%|████▋     | 472491/1000000 [2:19:34<1:16:15, 115.28step/s]


Episode 945 finished at step 500 (472500 total). Env Reward: -9.83, Steps: 500, Delivered: 5


Total Steps Trained:  47%|████▋     | 472513/1000000 [2:19:38<13:33:15, 10.81step/s]


--- Rollout Summary (Steps 472001 to 472500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -8.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 576.9684
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  47%|████▋     | 473000/1000000 [2:19:42<1:20:23, 109.25step/s]


Episode 946 finished at step 500 (473000 total). Env Reward: -7.28, Steps: 500, Delivered: 7


Total Steps Trained:  47%|████▋     | 473011/1000000 [2:19:47<17:56:47,  8.16step/s]


--- Rollout Summary (Steps 472501 to 473000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -8.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 351.2622
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  47%|████▋     | 473493/1000000 [2:19:51<1:19:59, 109.71step/s]


Episode 947 finished at step 500 (473500 total). Env Reward: -11.02, Steps: 500, Delivered: 4


Total Steps Trained:  47%|████▋     | 473516/1000000 [2:19:56<13:09:51, 11.11step/s]


--- Rollout Summary (Steps 473001 to 473500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -8.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 623.0743
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  47%|████▋     | 473993/1000000 [2:20:00<1:19:28, 110.31step/s]


Episode 948 finished at step 500 (474000 total). Env Reward: -9.36, Steps: 500, Delivered: 5


Total Steps Trained:  47%|████▋     | 474015/1000000 [2:20:05<13:27:55, 10.85step/s]


--- Rollout Summary (Steps 473501 to 474000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 477.0597
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  47%|████▋     | 474492/1000000 [2:20:09<1:20:02, 109.41step/s]


Episode 949 finished at step 500 (474500 total). Env Reward: -9.09, Steps: 500, Delivered: 5


Total Steps Trained:  47%|████▋     | 474514/1000000 [2:20:14<13:14:39, 11.02step/s]


--- Rollout Summary (Steps 474001 to 474500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 714.3835
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  47%|████▋     | 474996/1000000 [2:20:18<1:15:45, 115.51step/s]


Episode 950 finished at step 500 (475000 total). Env Reward: -9.31, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000950_map1.pth


Total Steps Trained:  48%|████▊     | 475019/1000000 [2:20:23<12:59:10, 11.23step/s]


--- Rollout Summary (Steps 474501 to 475000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 497.0821
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  48%|████▊     | 475498/1000000 [2:20:27<1:11:34, 122.13step/s]


Episode 951 finished at step 500 (475500 total). Env Reward: -2.55, Steps: 500, Delivered: 3


Total Steps Trained:  48%|████▊     | 475511/1000000 [2:20:31<16:32:50,  8.80step/s]


--- Rollout Summary (Steps 475001 to 475500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): 0.0003
Avg Critic Loss (per minibatch): 806.0248
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  48%|████▊     | 475994/1000000 [2:20:35<1:12:50, 119.88step/s]


Episode 952 finished at step 500 (476000 total). Env Reward: -9.90, Steps: 500, Delivered: 5


Total Steps Trained:  48%|████▊     | 476017/1000000 [2:20:40<12:43:53, 11.43step/s]


--- Rollout Summary (Steps 475501 to 476000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 753.4406
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  48%|████▊     | 476496/1000000 [2:20:44<1:11:21, 122.29step/s]


Episode 953 finished at step 500 (476500 total). Env Reward: -12.42, Steps: 500, Delivered: 2


Total Steps Trained:  48%|████▊     | 476521/1000000 [2:20:49<12:03:40, 12.06step/s]


--- Rollout Summary (Steps 476001 to 476500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -8.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 480.1869
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  48%|████▊     | 476994/1000000 [2:20:53<1:13:27, 118.66step/s]


Episode 954 finished at step 500 (477000 total). Env Reward: -6.49, Steps: 500, Delivered: 8


Total Steps Trained:  48%|████▊     | 477017/1000000 [2:20:57<12:33:46, 11.56step/s]


--- Rollout Summary (Steps 476501 to 477000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 611.5556
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  48%|████▊     | 477492/1000000 [2:21:02<1:14:35, 116.74step/s]


Episode 955 finished at step 500 (477500 total). Env Reward: -9.85, Steps: 500, Delivered: 5


Total Steps Trained:  48%|████▊     | 477514/1000000 [2:21:07<13:33:35, 10.70step/s]


--- Rollout Summary (Steps 477001 to 477500) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -8.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 518.2044
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  48%|████▊     | 477990/1000000 [2:21:11<1:17:02, 112.94step/s]


Episode 956 finished at step 500 (478000 total). Env Reward: -9.43, Steps: 500, Delivered: 5


Total Steps Trained:  48%|████▊     | 478013/1000000 [2:21:15<13:05:47, 11.07step/s]


--- Rollout Summary (Steps 477501 to 478000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 497.9595
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  48%|████▊     | 478494/1000000 [2:21:20<1:12:23, 120.07step/s]


Episode 957 finished at step 500 (478500 total). Env Reward: -7.87, Steps: 500, Delivered: 7


Total Steps Trained:  48%|████▊     | 478518/1000000 [2:21:24<12:33:56, 11.53step/s]


--- Rollout Summary (Steps 478001 to 478500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -8.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 644.3753
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  48%|████▊     | 478993/1000000 [2:21:29<1:17:03, 112.68step/s]


Episode 958 finished at step 500 (479000 total). Env Reward: -7.36, Steps: 500, Delivered: 7


Total Steps Trained:  48%|████▊     | 479017/1000000 [2:21:33<12:38:05, 11.45step/s]


--- Rollout Summary (Steps 478501 to 479000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -8.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 462.3062
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  48%|████▊     | 479489/1000000 [2:21:37<1:14:44, 116.08step/s]


Episode 959 finished at step 500 (479500 total). Env Reward: -8.68, Steps: 500, Delivered: 6


Total Steps Trained:  48%|████▊     | 479512/1000000 [2:21:42<12:40:28, 11.41step/s]


--- Rollout Summary (Steps 479001 to 479500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -8.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 412.2462
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  48%|████▊     | 480000/1000000 [2:21:46<1:14:15, 116.72step/s]


Episode 960 finished at step 500 (480000 total). Env Reward: -4.61, Steps: 500, Delivered: 10
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000960_map1.pth


Total Steps Trained:  48%|████▊     | 480012/1000000 [2:21:51<17:25:17,  8.29step/s]


--- Rollout Summary (Steps 479501 to 480000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -8.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 374.7559
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  48%|████▊     | 480493/1000000 [2:21:55<1:11:05, 121.79step/s]


Episode 961 finished at step 500 (480500 total). Env Reward: -6.22, Steps: 500, Delivered: 8


Total Steps Trained:  48%|████▊     | 480516/1000000 [2:22:00<12:36:49, 11.44step/s]


--- Rollout Summary (Steps 480001 to 480500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 528.5965
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  48%|████▊     | 480998/1000000 [2:22:04<1:11:39, 120.71step/s]


Episode 962 finished at step 500 (481000 total). Env Reward: -12.42, Steps: 500, Delivered: 2


Total Steps Trained:  48%|████▊     | 481011/1000000 [2:22:08<17:05:06,  8.44step/s]


--- Rollout Summary (Steps 480501 to 481000) ---
Update Duration: 4.72s
Avg Episode Reward (last 100): -8.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 693.4997
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  48%|████▊     | 481490/1000000 [2:22:12<1:10:38, 122.32step/s]


Episode 963 finished at step 500 (481500 total). Env Reward: 1.12, Steps: 500, Delivered: 7


Total Steps Trained:  48%|████▊     | 481514/1000000 [2:22:17<12:09:26, 11.85step/s]


--- Rollout Summary (Steps 481001 to 481500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -8.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 947.3588
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  48%|████▊     | 481999/1000000 [2:22:21<1:11:04, 121.46step/s]


Episode 964 finished at step 500 (482000 total). Env Reward: -8.26, Steps: 500, Delivered: 6


Total Steps Trained:  48%|████▊     | 482012/1000000 [2:22:26<16:06:45,  8.93step/s]


--- Rollout Summary (Steps 481501 to 482000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -8.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 366.7480
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  48%|████▊     | 482496/1000000 [2:22:30<1:13:52, 116.74step/s]


Episode 965 finished at step 500 (482500 total). Env Reward: -7.23, Steps: 500, Delivered: 7


Total Steps Trained:  48%|████▊     | 482519/1000000 [2:22:35<12:47:42, 11.23step/s]


--- Rollout Summary (Steps 482001 to 482500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 526.2782
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  48%|████▊     | 482996/1000000 [2:22:39<1:15:27, 114.18step/s]


Episode 966 finished at step 500 (483000 total). Env Reward: -12.68, Steps: 500, Delivered: 2


Total Steps Trained:  48%|████▊     | 483018/1000000 [2:22:44<13:06:13, 10.96step/s]


--- Rollout Summary (Steps 482501 to 483000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -8.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 494.7188
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  48%|████▊     | 483500/1000000 [2:22:48<1:21:02, 106.22step/s]


Episode 967 finished at step 500 (483500 total). Env Reward: -5.39, Steps: 500, Delivered: 9


Total Steps Trained:  48%|████▊     | 483511/1000000 [2:22:53<18:40:07,  7.68step/s]


--- Rollout Summary (Steps 483001 to 483500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -8.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0080
Avg Critic Loss (per minibatch): 579.5077
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  48%|████▊     | 483998/1000000 [2:22:57<1:13:18, 117.32step/s]


Episode 968 finished at step 500 (484000 total). Env Reward: -6.73, Steps: 500, Delivered: 8


Total Steps Trained:  48%|████▊     | 484010/1000000 [2:23:01<16:26:37,  8.72step/s]


--- Rollout Summary (Steps 483501 to 484000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.36
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 492.9282
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  48%|████▊     | 484490/1000000 [2:23:06<1:11:24, 120.31step/s]


Episode 969 finished at step 500 (484500 total). Env Reward: -10.59, Steps: 500, Delivered: 4


Total Steps Trained:  48%|████▊     | 484514/1000000 [2:23:10<12:49:10, 11.17step/s]


--- Rollout Summary (Steps 484001 to 484500) ---
Update Duration: 4.71s
Avg Episode Reward (last 100): -8.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 435.6232
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  48%|████▊     | 484990/1000000 [2:23:15<1:12:24, 118.54step/s]


Episode 970 finished at step 500 (485000 total). Env Reward: -9.81, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000970_map1.pth


Total Steps Trained:  49%|████▊     | 485014/1000000 [2:23:19<12:31:11, 11.43step/s]


--- Rollout Summary (Steps 484501 to 485000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -8.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0071
Avg Critic Loss (per minibatch): 465.5037
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  49%|████▊     | 485489/1000000 [2:23:23<1:11:32, 119.86step/s]


Episode 971 finished at step 500 (485500 total). Env Reward: -11.17, Steps: 500, Delivered: 3


Total Steps Trained:  49%|████▊     | 485512/1000000 [2:23:28<12:27:06, 11.48step/s]


--- Rollout Summary (Steps 485001 to 485500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 588.6371
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  49%|████▊     | 485992/1000000 [2:23:32<1:10:00, 122.38step/s]


Episode 972 finished at step 500 (486000 total). Env Reward: -9.37, Steps: 500, Delivered: 5


Total Steps Trained:  49%|████▊     | 486016/1000000 [2:23:37<12:09:26, 11.74step/s]


--- Rollout Summary (Steps 485501 to 486000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 473.4433
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  49%|████▊     | 486493/1000000 [2:23:41<1:15:37, 113.16step/s]


Episode 973 finished at step 500 (486500 total). Env Reward: 6.53, Steps: 500, Delivered: 3


Total Steps Trained:  49%|████▊     | 486516/1000000 [2:23:46<12:45:04, 11.19step/s]


--- Rollout Summary (Steps 486001 to 486500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 772.6863
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  49%|████▊     | 486997/1000000 [2:23:50<1:11:54, 118.91step/s]


Episode 974 finished at step 500 (487000 total). Env Reward: -11.86, Steps: 500, Delivered: 3


Total Steps Trained:  49%|████▊     | 487020/1000000 [2:23:54<12:36:00, 11.31step/s]


--- Rollout Summary (Steps 486501 to 487000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -8.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 609.7685
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  49%|████▉     | 487500/1000000 [2:23:59<1:12:19, 118.11step/s]


Episode 975 finished at step 500 (487500 total). Env Reward: -10.55, Steps: 500, Delivered: 4


Total Steps Trained:  49%|████▉     | 487512/1000000 [2:24:03<16:37:28,  8.56step/s]


--- Rollout Summary (Steps 487001 to 487500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -8.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 553.5531
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  49%|████▉     | 487997/1000000 [2:24:07<1:07:54, 125.65step/s]


Episode 976 finished at step 500 (488000 total). Env Reward: 0.18, Steps: 500, Delivered: 6


Total Steps Trained:  49%|████▉     | 488022/1000000 [2:24:12<11:50:23, 12.01step/s]


--- Rollout Summary (Steps 487501 to 488000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -8.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 1183.1139
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  49%|████▉     | 488494/1000000 [2:24:16<1:17:02, 110.67step/s]


Episode 977 finished at step 500 (488500 total). Env Reward: -7.51, Steps: 500, Delivered: 7


Total Steps Trained:  49%|████▉     | 488517/1000000 [2:24:21<12:45:43, 11.13step/s]


--- Rollout Summary (Steps 488001 to 488500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.19
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 534.9932
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  49%|████▉     | 488993/1000000 [2:24:25<1:08:29, 124.35step/s]


Episode 978 finished at step 500 (489000 total). Env Reward: -12.34, Steps: 500, Delivered: 2


Total Steps Trained:  49%|████▉     | 489017/1000000 [2:24:30<12:04:25, 11.76step/s]


--- Rollout Summary (Steps 488501 to 489000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 743.8839
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  49%|████▉     | 489500/1000000 [2:24:34<1:11:58, 118.21step/s]


Episode 979 finished at step 500 (489500 total). Env Reward: -12.52, Steps: 500, Delivered: 2


Total Steps Trained:  49%|████▉     | 489512/1000000 [2:24:38<16:55:32,  8.38step/s]


--- Rollout Summary (Steps 489001 to 489500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0076
Avg Critic Loss (per minibatch): 505.5520
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  49%|████▉     | 489999/1000000 [2:24:43<1:14:26, 114.17step/s]


Episode 980 finished at step 500 (490000 total). Env Reward: -6.62, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000980_map1.pth


Total Steps Trained:  49%|████▉     | 490011/1000000 [2:24:47<17:30:28,  8.09step/s]


--- Rollout Summary (Steps 489501 to 490000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -8.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 392.5609
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  49%|████▉     | 490494/1000000 [2:24:52<1:11:18, 119.08step/s]


Episode 981 finished at step 500 (490500 total). Env Reward: -1.15, Steps: 500, Delivered: 5


Total Steps Trained:  49%|████▉     | 490518/1000000 [2:24:56<12:12:22, 11.59step/s]


--- Rollout Summary (Steps 490001 to 490500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.19
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 1089.0654
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  49%|████▉     | 490992/1000000 [2:25:00<1:11:23, 118.83step/s]


Episode 982 finished at step 500 (491000 total). Env Reward: -11.59, Steps: 500, Delivered: 3


Total Steps Trained:  49%|████▉     | 491014/1000000 [2:25:05<12:47:08, 11.06step/s]


--- Rollout Summary (Steps 490501 to 491000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -8.19
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 535.6626
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  49%|████▉     | 491491/1000000 [2:25:09<1:10:04, 120.94step/s]


Episode 983 finished at step 500 (491500 total). Env Reward: -11.33, Steps: 500, Delivered: 3


Total Steps Trained:  49%|████▉     | 491515/1000000 [2:25:14<11:56:41, 11.82step/s]


--- Rollout Summary (Steps 491001 to 491500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 461.3292
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  49%|████▉     | 492000/1000000 [2:25:18<1:13:11, 115.67step/s]


Episode 984 finished at step 500 (492000 total). Env Reward: -13.28, Steps: 500, Delivered: 2


Total Steps Trained:  49%|████▉     | 492012/1000000 [2:25:23<16:59:44,  8.30step/s]


--- Rollout Summary (Steps 491501 to 492000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -8.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 545.4779
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  49%|████▉     | 492491/1000000 [2:25:27<1:11:26, 118.39step/s]


Episode 985 finished at step 500 (492500 total). Env Reward: -9.68, Steps: 500, Delivered: 5


Total Steps Trained:  49%|████▉     | 492513/1000000 [2:25:32<12:51:17, 10.97step/s]


--- Rollout Summary (Steps 492001 to 492500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -8.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 588.8255
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  49%|████▉     | 492988/1000000 [2:25:36<1:09:43, 121.19step/s]


Episode 986 finished at step 500 (493000 total). Env Reward: -10.56, Steps: 500, Delivered: 4


Total Steps Trained:  49%|████▉     | 493012/1000000 [2:25:41<12:01:01, 11.72step/s]


--- Rollout Summary (Steps 492501 to 493000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 469.5883
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  49%|████▉     | 493499/1000000 [2:25:45<1:12:24, 116.59step/s]


Episode 987 finished at step 500 (493500 total). Env Reward: 2.12, Steps: 500, Delivered: 8


Total Steps Trained:  49%|████▉     | 493511/1000000 [2:25:49<17:34:20,  8.01step/s]


--- Rollout Summary (Steps 493001 to 493500) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -8.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 766.6536
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  49%|████▉     | 493995/1000000 [2:25:54<1:13:36, 114.57step/s]


Episode 988 finished at step 500 (494000 total). Env Reward: -9.94, Steps: 500, Delivered: 5


Total Steps Trained:  49%|████▉     | 494018/1000000 [2:25:58<12:33:44, 11.19step/s]


--- Rollout Summary (Steps 493501 to 494000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -8.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 459.5681
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  49%|████▉     | 494500/1000000 [2:26:03<1:14:19, 113.36step/s]


Episode 989 finished at step 500 (494500 total). Env Reward: -11.77, Steps: 500, Delivered: 3


Total Steps Trained:  49%|████▉     | 494512/1000000 [2:26:07<17:00:28,  8.26step/s]


--- Rollout Summary (Steps 494001 to 494500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 442.5493
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  49%|████▉     | 494997/1000000 [2:26:11<1:16:54, 109.44step/s]


Episode 990 finished at step 500 (495000 total). Env Reward: -7.46, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_000990_map1.pth


Total Steps Trained:  50%|████▉     | 495019/1000000 [2:26:16<12:48:42, 10.95step/s]


--- Rollout Summary (Steps 494501 to 495000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 451.8070
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  50%|████▉     | 495498/1000000 [2:26:20<1:14:27, 112.93step/s]


Episode 991 finished at step 500 (495500 total). Env Reward: -9.42, Steps: 500, Delivered: 5


Total Steps Trained:  50%|████▉     | 495510/1000000 [2:26:25<17:06:33,  8.19step/s]


--- Rollout Summary (Steps 495001 to 495500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -8.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 590.0301
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  50%|████▉     | 495990/1000000 [2:26:29<1:10:46, 118.70step/s]


Episode 992 finished at step 500 (496000 total). Env Reward: 3.77, Steps: 500, Delivered: 10


Total Steps Trained:  50%|████▉     | 496013/1000000 [2:26:34<12:25:11, 11.27step/s]


--- Rollout Summary (Steps 495501 to 496000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -8.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 854.8638
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  50%|████▉     | 496497/1000000 [2:26:38<1:13:23, 114.35step/s]


Episode 993 finished at step 500 (496500 total). Env Reward: -13.00, Steps: 500, Delivered: 2


Total Steps Trained:  50%|████▉     | 496521/1000000 [2:26:43<12:08:28, 11.52step/s]


--- Rollout Summary (Steps 496001 to 496500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -8.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 420.0789
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  50%|████▉     | 496994/1000000 [2:26:47<1:11:04, 117.96step/s]


Episode 994 finished at step 500 (497000 total). Env Reward: -5.36, Steps: 500, Delivered: 9


Total Steps Trained:  50%|████▉     | 497017/1000000 [2:26:52<12:35:37, 11.09step/s]


--- Rollout Summary (Steps 496501 to 497000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -8.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 758.1228
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  50%|████▉     | 497497/1000000 [2:26:56<1:16:40, 109.24step/s]


Episode 995 finished at step 500 (497500 total). Env Reward: -6.54, Steps: 500, Delivered: 8


Total Steps Trained:  50%|████▉     | 497519/1000000 [2:27:00<12:58:35, 10.76step/s]


--- Rollout Summary (Steps 497001 to 497500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 580.8672
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  50%|████▉     | 497997/1000000 [2:27:05<1:09:10, 120.96step/s]


Episode 996 finished at step 500 (498000 total). Env Reward: -7.41, Steps: 500, Delivered: 7


Total Steps Trained:  50%|████▉     | 498010/1000000 [2:27:09<15:47:37,  8.83step/s]


--- Rollout Summary (Steps 497501 to 498000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0080
Avg Critic Loss (per minibatch): 533.1357
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  50%|████▉     | 498500/1000000 [2:27:13<1:10:57, 117.80step/s]


Episode 997 finished at step 500 (498500 total). Env Reward: -7.34, Steps: 500, Delivered: 7


Total Steps Trained:  50%|████▉     | 498512/1000000 [2:27:18<16:41:17,  8.35step/s]


--- Rollout Summary (Steps 498001 to 498500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -8.24
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 459.9075
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  50%|████▉     | 498990/1000000 [2:27:22<1:08:24, 122.05step/s]


Episode 998 finished at step 500 (499000 total). Env Reward: -6.59, Steps: 500, Delivered: 8


Total Steps Trained:  50%|████▉     | 499015/1000000 [2:27:27<11:51:27, 11.74step/s]


--- Rollout Summary (Steps 498501 to 499000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -8.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0117
Avg Critic Loss (per minibatch): 459.1575
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  50%|████▉     | 499488/1000000 [2:27:31<1:08:37, 121.56step/s]


Episode 999 finished at step 500 (499500 total). Env Reward: -8.29, Steps: 500, Delivered: 7


Total Steps Trained:  50%|████▉     | 499512/1000000 [2:27:36<11:52:59, 11.70step/s]


--- Rollout Summary (Steps 499001 to 499500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -8.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0071
Avg Critic Loss (per minibatch): 492.9419
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  50%|████▉     | 499997/1000000 [2:27:40<1:17:17, 107.81step/s]


Episode 1000 finished at step 500 (500000 total). Env Reward: 0.54, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001000_map1.pth


Total Steps Trained:  50%|█████     | 500019/1000000 [2:27:45<12:55:34, 10.74step/s]


--- Rollout Summary (Steps 499501 to 500000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.20
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 1075.9740
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  50%|█████     | 500490/1000000 [2:27:49<1:09:16, 120.17step/s]


Episode 1001 finished at step 500 (500500 total). Env Reward: 1.30, Steps: 500, Delivered: 7


Total Steps Trained:  50%|█████     | 500514/1000000 [2:27:54<11:54:03, 11.66step/s]


--- Rollout Summary (Steps 500001 to 500500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -8.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 940.8233
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  50%|█████     | 501000/1000000 [2:27:58<1:17:15, 107.65step/s]


Episode 1002 finished at step 500 (501000 total). Env Reward: -7.57, Steps: 500, Delivered: 7


Total Steps Trained:  50%|█████     | 501011/1000000 [2:28:03<17:57:54,  7.72step/s]


--- Rollout Summary (Steps 500501 to 501000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -8.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 506.0264
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  50%|█████     | 501493/1000000 [2:28:07<1:09:21, 119.78step/s]


Episode 1003 finished at step 500 (501500 total). Env Reward: 2.25, Steps: 500, Delivered: 8


Total Steps Trained:  50%|█████     | 501517/1000000 [2:28:12<12:02:32, 11.50step/s]


--- Rollout Summary (Steps 501001 to 501500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 1321.6229
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  50%|█████     | 501999/1000000 [2:28:16<1:10:44, 117.33step/s]


Episode 1004 finished at step 500 (502000 total). Env Reward: -7.10, Steps: 500, Delivered: 7


Total Steps Trained:  50%|█████     | 502011/1000000 [2:28:20<16:22:59,  8.44step/s]


--- Rollout Summary (Steps 501501 to 502000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 468.4067
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  50%|█████     | 502494/1000000 [2:28:24<1:11:34, 115.85step/s]


Episode 1005 finished at step 500 (502500 total). Env Reward: -9.69, Steps: 500, Delivered: 5


Total Steps Trained:  50%|█████     | 502516/1000000 [2:28:29<13:06:37, 10.54step/s]


--- Rollout Summary (Steps 502001 to 502500) ---
Update Duration: 4.73s
Avg Episode Reward (last 100): -7.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 570.2502
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  50%|█████     | 502995/1000000 [2:28:34<1:12:33, 114.17step/s]


Episode 1006 finished at step 500 (503000 total). Env Reward: -6.44, Steps: 500, Delivered: 8


Total Steps Trained:  50%|█████     | 503019/1000000 [2:28:38<12:03:50, 11.44step/s]


--- Rollout Summary (Steps 502501 to 503000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 449.1942
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  50%|█████     | 503489/1000000 [2:28:42<1:08:22, 121.02step/s]


Episode 1007 finished at step 500 (503500 total). Env Reward: -13.35, Steps: 500, Delivered: 2


Total Steps Trained:  50%|█████     | 503513/1000000 [2:28:47<11:41:46, 11.79step/s]


--- Rollout Summary (Steps 503001 to 503500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 522.7081
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  50%|█████     | 503997/1000000 [2:28:51<1:08:22, 120.90step/s]


Episode 1008 finished at step 500 (504000 total). Env Reward: 17.94, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001008_map1.pth


Total Steps Trained:  50%|█████     | 504010/1000000 [2:28:56<15:39:08,  8.80step/s]


--- Rollout Summary (Steps 503501 to 504000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 3406.1771
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  50%|█████     | 504489/1000000 [2:29:00<1:09:25, 118.97step/s]


Episode 1009 finished at step 500 (504500 total). Env Reward: -9.63, Steps: 500, Delivered: 5


Total Steps Trained:  50%|█████     | 504511/1000000 [2:29:05<12:16:58, 11.21step/s]


--- Rollout Summary (Steps 504001 to 504500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 504.8247
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  50%|█████     | 504996/1000000 [2:29:09<1:07:28, 122.28step/s]


Episode 1010 finished at step 500 (505000 total). Env Reward: -10.21, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001010_map1.pth


Total Steps Trained:  51%|█████     | 505019/1000000 [2:29:14<12:00:21, 11.45step/s]


--- Rollout Summary (Steps 504501 to 505000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 553.5438
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  51%|█████     | 505494/1000000 [2:29:18<1:08:52, 119.67step/s]


Episode 1011 finished at step 500 (505500 total). Env Reward: -11.55, Steps: 500, Delivered: 3


Total Steps Trained:  51%|█████     | 505517/1000000 [2:29:22<11:52:20, 11.57step/s]


--- Rollout Summary (Steps 505001 to 505500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0079
Avg Critic Loss (per minibatch): 478.3029
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  51%|█████     | 505994/1000000 [2:29:26<1:09:03, 119.22step/s]


Episode 1012 finished at step 500 (506000 total). Env Reward: 0.92, Steps: 500, Delivered: 6


Total Steps Trained:  51%|█████     | 506016/1000000 [2:29:31<12:56:55, 10.60step/s]


--- Rollout Summary (Steps 505501 to 506000) ---
Update Duration: 4.71s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 743.9736
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  51%|█████     | 506494/1000000 [2:29:35<1:08:32, 120.01step/s]


Episode 1013 finished at step 500 (506500 total). Env Reward: -11.92, Steps: 500, Delivered: 3


Total Steps Trained:  51%|█████     | 506518/1000000 [2:29:40<11:49:01, 11.60step/s]


--- Rollout Summary (Steps 506001 to 506500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 623.5804
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  51%|█████     | 506990/1000000 [2:29:44<1:07:20, 122.03step/s]


Episode 1014 finished at step 500 (507000 total). Env Reward: -7.35, Steps: 500, Delivered: 7


Total Steps Trained:  51%|█████     | 507014/1000000 [2:29:49<11:40:01, 11.74step/s]


--- Rollout Summary (Steps 506501 to 507000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0091
Avg Critic Loss (per minibatch): 551.2502
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  51%|█████     | 507490/1000000 [2:29:53<1:08:18, 120.18step/s]


Episode 1015 finished at step 500 (507500 total). Env Reward: -9.82, Steps: 500, Delivered: 5


Total Steps Trained:  51%|█████     | 507512/1000000 [2:29:58<12:20:10, 11.09step/s]


--- Rollout Summary (Steps 507001 to 507500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0086
Avg Critic Loss (per minibatch): 516.7775
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  51%|█████     | 507996/1000000 [2:30:02<1:09:35, 117.82step/s]


Episode 1016 finished at step 500 (508000 total). Env Reward: -10.01, Steps: 500, Delivered: 5


Total Steps Trained:  51%|█████     | 508019/1000000 [2:30:07<11:59:43, 11.39step/s]


--- Rollout Summary (Steps 507501 to 508000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 558.1853
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  51%|█████     | 508500/1000000 [2:30:11<1:09:01, 118.69step/s]


Episode 1017 finished at step 500 (508500 total). Env Reward: 2.38, Steps: 500, Delivered: 8


Total Steps Trained:  51%|█████     | 508512/1000000 [2:30:15<15:53:24,  8.59step/s]


--- Rollout Summary (Steps 508001 to 508500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 1157.3607
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  51%|█████     | 508994/1000000 [2:30:20<1:08:17, 119.83step/s]


Episode 1018 finished at step 500 (509000 total). Env Reward: -8.73, Steps: 500, Delivered: 6


Total Steps Trained:  51%|█████     | 509016/1000000 [2:30:24<12:08:37, 11.23step/s]


--- Rollout Summary (Steps 508501 to 509000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 411.7576
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  51%|█████     | 509492/1000000 [2:30:28<1:08:09, 119.95step/s]


Episode 1019 finished at step 500 (509500 total). Env Reward: -11.23, Steps: 500, Delivered: 3


Total Steps Trained:  51%|█████     | 509515/1000000 [2:30:33<11:59:21, 11.36step/s]


--- Rollout Summary (Steps 509001 to 509500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 576.9597
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  51%|█████     | 509999/1000000 [2:30:37<1:16:51, 106.25step/s]


Episode 1020 finished at step 500 (510000 total). Env Reward: -8.60, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001020_map1.pth


Total Steps Trained:  51%|█████     | 510010/1000000 [2:30:42<17:40:18,  7.70step/s]


--- Rollout Summary (Steps 509501 to 510000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 1001.6153
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  51%|█████     | 510499/1000000 [2:30:46<1:06:48, 122.10step/s]


Episode 1021 finished at step 500 (510500 total). Env Reward: -10.44, Steps: 500, Delivered: 4


Total Steps Trained:  51%|█████     | 510512/1000000 [2:30:51<15:35:08,  8.72step/s]


--- Rollout Summary (Steps 510001 to 510500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 541.6037
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  51%|█████     | 510995/1000000 [2:30:55<1:05:55, 123.63step/s]


Episode 1022 finished at step 500 (511000 total). Env Reward: -11.29, Steps: 500, Delivered: 3


Total Steps Trained:  51%|█████     | 511020/1000000 [2:31:00<11:14:38, 12.08step/s]


--- Rollout Summary (Steps 510501 to 511000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 533.5598
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  51%|█████     | 511496/1000000 [2:31:04<1:09:02, 117.91step/s]


Episode 1023 finished at step 500 (511500 total). Env Reward: -10.98, Steps: 500, Delivered: 4


Total Steps Trained:  51%|█████     | 511519/1000000 [2:31:09<12:10:57, 11.14step/s]


--- Rollout Summary (Steps 511001 to 511500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 445.7626
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  51%|█████     | 511993/1000000 [2:31:13<1:06:02, 123.17step/s]


Episode 1024 finished at step 500 (512000 total). Env Reward: 2.76, Steps: 500, Delivered: 8


Total Steps Trained:  51%|█████     | 512017/1000000 [2:31:17<11:30:44, 11.77step/s]


--- Rollout Summary (Steps 511501 to 512000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 1001.9947
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  51%|█████     | 512496/1000000 [2:31:22<1:07:17, 120.75step/s]


Episode 1025 finished at step 500 (512500 total). Env Reward: -9.19, Steps: 500, Delivered: 5


Total Steps Trained:  51%|█████▏    | 512520/1000000 [2:31:26<11:28:36, 11.80step/s]


--- Rollout Summary (Steps 512001 to 512500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 455.2938
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  51%|█████▏    | 512990/1000000 [2:31:30<1:06:56, 121.26step/s]


Episode 1026 finished at step 500 (513000 total). Env Reward: -10.65, Steps: 500, Delivered: 4


Total Steps Trained:  51%|█████▏    | 513014/1000000 [2:31:35<11:30:23, 11.76step/s]


--- Rollout Summary (Steps 512501 to 513000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0079
Avg Critic Loss (per minibatch): 459.5212
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  51%|█████▏    | 513498/1000000 [2:31:39<1:07:38, 119.87step/s]


Episode 1027 finished at step 500 (513500 total). Env Reward: 2.22, Steps: 500, Delivered: 8


Total Steps Trained:  51%|█████▏    | 513510/1000000 [2:31:44<16:02:13,  8.43step/s]


--- Rollout Summary (Steps 513001 to 513500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 764.3668
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  51%|█████▏    | 513995/1000000 [2:31:48<1:13:29, 110.21step/s]


Episode 1028 finished at step 500 (514000 total). Env Reward: -9.58, Steps: 500, Delivered: 5


Total Steps Trained:  51%|█████▏    | 514017/1000000 [2:31:53<12:26:22, 10.85step/s]


--- Rollout Summary (Steps 513501 to 514000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 466.9418
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  51%|█████▏    | 514500/1000000 [2:31:57<1:09:16, 116.79step/s]


Episode 1029 finished at step 500 (514500 total). Env Reward: -7.64, Steps: 500, Delivered: 7


Total Steps Trained:  51%|█████▏    | 514512/1000000 [2:32:02<16:02:08,  8.41step/s]


--- Rollout Summary (Steps 514001 to 514500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 778.4381
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  51%|█████▏    | 514993/1000000 [2:32:06<1:06:38, 121.30step/s]


Episode 1030 finished at step 500 (515000 total). Env Reward: 8.12, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001030_map1.pth


Total Steps Trained:  52%|█████▏    | 515016/1000000 [2:32:11<12:22:25, 10.89step/s]


--- Rollout Summary (Steps 514501 to 515000) ---
Update Duration: 4.70s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 1579.1780
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  52%|█████▏    | 515496/1000000 [2:32:15<1:17:01, 104.84step/s]


Episode 1031 finished at step 500 (515500 total). Env Reward: -7.25, Steps: 500, Delivered: 7


Total Steps Trained:  52%|█████▏    | 515519/1000000 [2:32:20<12:25:35, 10.83step/s]


--- Rollout Summary (Steps 515001 to 515500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 418.2378
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  52%|█████▏    | 515991/1000000 [2:32:23<1:07:33, 119.40step/s]


Episode 1032 finished at step 500 (516000 total). Env Reward: -3.58, Steps: 500, Delivered: 2


Total Steps Trained:  52%|█████▏    | 516014/1000000 [2:32:28<11:52:31, 11.32step/s]


--- Rollout Summary (Steps 515501 to 516000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 788.4596
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  52%|█████▏    | 516498/1000000 [2:32:32<1:06:28, 121.23step/s]


Episode 1033 finished at step 500 (516500 total). Env Reward: -8.65, Steps: 500, Delivered: 6


Total Steps Trained:  52%|█████▏    | 516511/1000000 [2:32:37<15:10:56,  8.85step/s]


--- Rollout Summary (Steps 516001 to 516500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 418.3631
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  52%|█████▏    | 516996/1000000 [2:32:41<1:09:33, 115.72step/s]


Episode 1034 finished at step 500 (517000 total). Env Reward: -7.77, Steps: 500, Delivered: 7


Total Steps Trained:  52%|█████▏    | 517020/1000000 [2:32:46<11:38:40, 11.52step/s]


--- Rollout Summary (Steps 516501 to 517000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 534.8652
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  52%|█████▏    | 517498/1000000 [2:32:50<1:10:02, 114.80step/s]


Episode 1035 finished at step 500 (517500 total). Env Reward: -8.11, Steps: 500, Delivered: 7


Total Steps Trained:  52%|█████▏    | 517510/1000000 [2:32:55<16:06:32,  8.32step/s]


--- Rollout Summary (Steps 517001 to 517500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.24
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 651.4021
Avg Entropy (per minibatch): 2.5586
------------------------------


Total Steps Trained:  52%|█████▏    | 517988/1000000 [2:32:59<1:05:49, 122.04step/s]


Episode 1036 finished at step 500 (518000 total). Env Reward: 1.04, Steps: 500, Delivered: 7


Total Steps Trained:  52%|█████▏    | 518012/1000000 [2:33:04<11:20:39, 11.80step/s]


--- Rollout Summary (Steps 517501 to 518000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 1109.0246
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  52%|█████▏    | 518500/1000000 [2:33:08<1:11:08, 112.81step/s]


Episode 1037 finished at step 500 (518500 total). Env Reward: -10.96, Steps: 500, Delivered: 3


Total Steps Trained:  52%|█████▏    | 518512/1000000 [2:33:13<16:16:30,  8.22step/s]


--- Rollout Summary (Steps 518001 to 518500) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.15
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 540.5722
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  52%|█████▏    | 518996/1000000 [2:33:17<1:07:06, 119.46step/s]


Episode 1038 finished at step 500 (519000 total). Env Reward: -10.99, Steps: 500, Delivered: 4


Total Steps Trained:  52%|█████▏    | 519019/1000000 [2:33:22<11:52:52, 11.25step/s]


--- Rollout Summary (Steps 518501 to 519000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.17
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 516.1120
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  52%|█████▏    | 519496/1000000 [2:33:26<1:11:14, 112.41step/s]


Episode 1039 finished at step 500 (519500 total). Env Reward: -7.98, Steps: 500, Delivered: 7


Total Steps Trained:  52%|█████▏    | 519519/1000000 [2:33:31<12:07:57, 11.00step/s]


--- Rollout Summary (Steps 519001 to 519500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.12
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 633.5902
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  52%|█████▏    | 519993/1000000 [2:33:35<1:06:20, 120.58step/s]


Episode 1040 finished at step 500 (520000 total). Env Reward: -8.22, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001040_map1.pth


Total Steps Trained:  52%|█████▏    | 520017/1000000 [2:33:39<11:32:28, 11.55step/s]


--- Rollout Summary (Steps 519501 to 520000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 554.4366
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  52%|█████▏    | 520490/1000000 [2:33:44<1:06:34, 120.05step/s]


Episode 1041 finished at step 500 (520500 total). Env Reward: -2.59, Steps: 500, Delivered: 3


Total Steps Trained:  52%|█████▏    | 520514/1000000 [2:33:48<11:27:00, 11.63step/s]


--- Rollout Summary (Steps 520001 to 520500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 666.4114
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  52%|█████▏    | 520995/1000000 [2:33:53<1:11:21, 111.87step/s]


Episode 1042 finished at step 500 (521000 total). Env Reward: -3.60, Steps: 500, Delivered: 11


Total Steps Trained:  52%|█████▏    | 521018/1000000 [2:33:57<12:10:06, 10.93step/s]


--- Rollout Summary (Steps 520501 to 521000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 630.2516
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  52%|█████▏    | 521490/1000000 [2:34:01<1:06:03, 120.72step/s]


Episode 1043 finished at step 500 (521500 total). Env Reward: 2.33, Steps: 500, Delivered: 8


Total Steps Trained:  52%|█████▏    | 521514/1000000 [2:34:06<11:22:25, 11.69step/s]


--- Rollout Summary (Steps 521001 to 521500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 881.5180
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  52%|█████▏    | 521989/1000000 [2:34:10<1:06:47, 119.28step/s]


Episode 1044 finished at step 500 (522000 total). Env Reward: -6.36, Steps: 500, Delivered: 8


Total Steps Trained:  52%|█████▏    | 522012/1000000 [2:34:15<12:02:57, 11.02step/s]


--- Rollout Summary (Steps 521501 to 522000) ---
Update Duration: 4.75s
Avg Episode Reward (last 100): -6.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 603.0710
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  52%|█████▏    | 522496/1000000 [2:34:19<1:08:37, 115.97step/s]


Episode 1045 finished at step 500 (522500 total). Env Reward: -10.09, Steps: 500, Delivered: 5


Total Steps Trained:  52%|█████▏    | 522518/1000000 [2:34:24<12:01:26, 11.03step/s]


--- Rollout Summary (Steps 522001 to 522500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 415.4098
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  52%|█████▏    | 522997/1000000 [2:34:28<1:08:07, 116.71step/s]


Episode 1046 finished at step 500 (523000 total). Env Reward: -12.38, Steps: 500, Delivered: 2


Total Steps Trained:  52%|█████▏    | 523021/1000000 [2:34:33<11:21:26, 11.67step/s]


--- Rollout Summary (Steps 522501 to 523000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0078
Avg Critic Loss (per minibatch): 653.9379
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  52%|█████▏    | 523500/1000000 [2:34:37<1:12:22, 109.74step/s]


Episode 1047 finished at step 500 (523500 total). Env Reward: -9.04, Steps: 500, Delivered: 6


Total Steps Trained:  52%|█████▏    | 523511/1000000 [2:34:42<16:42:53,  7.92step/s]


--- Rollout Summary (Steps 523001 to 523500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 715.6784
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  52%|█████▏    | 523989/1000000 [2:34:46<1:10:14, 112.94step/s]


Episode 1048 finished at step 500 (524000 total). Env Reward: -9.22, Steps: 500, Delivered: 5


Total Steps Trained:  52%|█████▏    | 524012/1000000 [2:34:51<11:58:17, 11.04step/s]


--- Rollout Summary (Steps 523501 to 524000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 648.2808
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  52%|█████▏    | 524494/1000000 [2:34:55<1:03:55, 123.98step/s]


Episode 1049 finished at step 500 (524500 total). Env Reward: -8.73, Steps: 500, Delivered: 6


Total Steps Trained:  52%|█████▏    | 524519/1000000 [2:35:00<11:09:20, 11.84step/s]


--- Rollout Summary (Steps 524001 to 524500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0001
Avg Critic Loss (per minibatch): 482.0416
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  52%|█████▏    | 524988/1000000 [2:35:03<1:04:32, 122.67step/s]


Episode 1050 finished at step 500 (525000 total). Env Reward: -12.69, Steps: 500, Delivered: 2
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001050_map1.pth


Total Steps Trained:  53%|█████▎    | 525012/1000000 [2:35:08<11:22:35, 11.60step/s]


--- Rollout Summary (Steps 524501 to 525000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 497.3744
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  53%|█████▎    | 525496/1000000 [2:35:13<1:11:09, 111.14step/s]


Episode 1051 finished at step 500 (525500 total). Env Reward: -6.67, Steps: 500, Delivered: 8


Total Steps Trained:  53%|█████▎    | 525517/1000000 [2:35:17<12:43:24, 10.36step/s]


--- Rollout Summary (Steps 525001 to 525500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.01
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0080
Avg Critic Loss (per minibatch): 817.8516
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  53%|█████▎    | 525998/1000000 [2:35:22<1:11:04, 111.14step/s]


Episode 1052 finished at step 500 (526000 total). Env Reward: -6.42, Steps: 500, Delivered: 8


Total Steps Trained:  53%|█████▎    | 526021/1000000 [2:35:26<11:54:58, 11.05step/s]


--- Rollout Summary (Steps 525501 to 526000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 578.3479
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  53%|█████▎    | 526499/1000000 [2:35:30<1:08:18, 115.52step/s]


Episode 1053 finished at step 500 (526500 total). Env Reward: -7.42, Steps: 500, Delivered: 7


Total Steps Trained:  53%|█████▎    | 526511/1000000 [2:35:35<16:05:47,  8.17step/s]


--- Rollout Summary (Steps 526001 to 526500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 527.4288
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  53%|█████▎    | 526996/1000000 [2:35:39<1:06:47, 118.02step/s]


Episode 1054 finished at step 500 (527000 total). Env Reward: -10.71, Steps: 500, Delivered: 4


Total Steps Trained:  53%|█████▎    | 527020/1000000 [2:35:44<11:32:47, 11.38step/s]


--- Rollout Summary (Steps 526501 to 527000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 517.7242
Avg Entropy (per minibatch): 2.5588
------------------------------


Total Steps Trained:  53%|█████▎    | 527496/1000000 [2:35:48<1:06:09, 119.04step/s]


Episode 1055 finished at step 500 (527500 total). Env Reward: -9.47, Steps: 500, Delivered: 5


Total Steps Trained:  53%|█████▎    | 527519/1000000 [2:35:53<11:47:29, 11.13step/s]


--- Rollout Summary (Steps 527001 to 527500) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -6.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0084
Avg Critic Loss (per minibatch): 562.8674
Avg Entropy (per minibatch): 2.5622
------------------------------


Total Steps Trained:  53%|█████▎    | 527993/1000000 [2:35:57<1:06:34, 118.16step/s]


Episode 1056 finished at step 500 (528000 total). Env Reward: -9.20, Steps: 500, Delivered: 6


Total Steps Trained:  53%|█████▎    | 528016/1000000 [2:36:02<11:38:15, 11.27step/s]


--- Rollout Summary (Steps 527501 to 528000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 480.7588
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  53%|█████▎    | 528496/1000000 [2:36:06<1:05:30, 119.97step/s]


Episode 1057 finished at step 500 (528500 total). Env Reward: -5.70, Steps: 500, Delivered: 9


Total Steps Trained:  53%|█████▎    | 528521/1000000 [2:36:11<10:57:08, 11.96step/s]


--- Rollout Summary (Steps 528001 to 528500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 527.8600
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  53%|█████▎    | 528995/1000000 [2:36:15<1:06:05, 118.78step/s]


Episode 1058 finished at step 500 (529000 total). Env Reward: -9.47, Steps: 500, Delivered: 5


Total Steps Trained:  53%|█████▎    | 529018/1000000 [2:36:19<11:28:32, 11.40step/s]


--- Rollout Summary (Steps 528501 to 529000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 499.5144
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  53%|█████▎    | 529492/1000000 [2:36:24<1:11:01, 110.42step/s]


Episode 1059 finished at step 500 (529500 total). Env Reward: -8.38, Steps: 500, Delivered: 6


Total Steps Trained:  53%|█████▎    | 529515/1000000 [2:36:29<12:01:08, 10.87step/s]


--- Rollout Summary (Steps 529001 to 529500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -6.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0085
Avg Critic Loss (per minibatch): 727.3418
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  53%|█████▎    | 529996/1000000 [2:36:33<1:06:10, 118.38step/s]


Episode 1060 finished at step 500 (530000 total). Env Reward: -12.85, Steps: 500, Delivered: 2
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001060_map1.pth


Total Steps Trained:  53%|█████▎    | 530019/1000000 [2:36:38<11:43:41, 11.13step/s]


--- Rollout Summary (Steps 529501 to 530000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 468.9112
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  53%|█████▎    | 530494/1000000 [2:36:42<1:03:51, 122.55step/s]


Episode 1061 finished at step 500 (530500 total). Env Reward: 0.20, Steps: 500, Delivered: 6


Total Steps Trained:  53%|█████▎    | 530518/1000000 [2:36:46<11:16:02, 11.57step/s]


--- Rollout Summary (Steps 530001 to 530500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -6.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 929.6217
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  53%|█████▎    | 530991/1000000 [2:36:51<1:08:48, 113.59step/s]


Episode 1062 finished at step 500 (531000 total). Env Reward: 3.22, Steps: 500, Delivered: 9


Total Steps Trained:  53%|█████▎    | 531014/1000000 [2:36:56<12:28:02, 10.45step/s]


--- Rollout Summary (Steps 530501 to 531000) ---
Update Duration: 4.83s
Avg Episode Reward (last 100): -6.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 907.9361
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  53%|█████▎    | 531500/1000000 [2:37:00<1:08:17, 114.33step/s]


Episode 1063 finished at step 500 (531500 total). Env Reward: -6.12, Steps: 500, Delivered: 9


Total Steps Trained:  53%|█████▎    | 531512/1000000 [2:37:04<15:39:39,  8.31step/s]


--- Rollout Summary (Steps 531001 to 531500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 553.5886
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  53%|█████▎    | 532000/1000000 [2:37:09<1:06:05, 118.02step/s]


Episode 1064 finished at step 500 (532000 total). Env Reward: -10.66, Steps: 500, Delivered: 4


Total Steps Trained:  53%|█████▎    | 532012/1000000 [2:37:13<15:36:29,  8.33step/s]


--- Rollout Summary (Steps 531501 to 532000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 507.0409
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  53%|█████▎    | 532498/1000000 [2:37:18<1:09:43, 111.74step/s]


Episode 1065 finished at step 500 (532500 total). Env Reward: -6.34, Steps: 500, Delivered: 8


Total Steps Trained:  53%|█████▎    | 532522/1000000 [2:37:22<11:24:30, 11.38step/s]


--- Rollout Summary (Steps 532001 to 532500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 461.1619
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  53%|█████▎    | 532998/1000000 [2:37:26<1:08:11, 114.15step/s]


Episode 1066 finished at step 500 (533000 total). Env Reward: 11.65, Steps: 500, Delivered: 8


Total Steps Trained:  53%|█████▎    | 533021/1000000 [2:37:31<11:27:38, 11.32step/s]


--- Rollout Summary (Steps 532501 to 533000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0080
Avg Critic Loss (per minibatch): 1115.9955
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  53%|█████▎    | 533489/1000000 [2:37:35<1:07:05, 115.89step/s]


Episode 1067 finished at step 500 (533500 total). Env Reward: -8.27, Steps: 500, Delivered: 6


Total Steps Trained:  53%|█████▎    | 533512/1000000 [2:37:40<11:26:29, 11.33step/s]


--- Rollout Summary (Steps 533001 to 533500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0082
Avg Critic Loss (per minibatch): 501.6785
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  53%|█████▎    | 533998/1000000 [2:37:44<1:03:02, 123.21step/s]


Episode 1068 finished at step 500 (534000 total). Env Reward: -11.76, Steps: 500, Delivered: 3


Total Steps Trained:  53%|█████▎    | 534011/1000000 [2:37:48<14:47:05,  8.75step/s]


--- Rollout Summary (Steps 533501 to 534000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -6.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 558.4475
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  53%|█████▎    | 534491/1000000 [2:37:53<1:04:30, 120.28step/s]


Episode 1069 finished at step 500 (534500 total). Env Reward: -1.61, Steps: 500, Delivered: 4


Total Steps Trained:  53%|█████▎    | 534515/1000000 [2:37:57<11:26:40, 11.30step/s]


--- Rollout Summary (Steps 534001 to 534500) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -6.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 754.8949
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  53%|█████▎    | 534990/1000000 [2:38:01<1:04:37, 119.94step/s]


Episode 1070 finished at step 500 (535000 total). Env Reward: -7.60, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001070_map1.pth


Total Steps Trained:  54%|█████▎    | 535014/1000000 [2:38:06<11:05:28, 11.65step/s]


--- Rollout Summary (Steps 534501 to 535000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 747.5474
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  54%|█████▎    | 535489/1000000 [2:38:10<1:04:00, 120.97step/s]


Episode 1071 finished at step 500 (535500 total). Env Reward: -10.71, Steps: 500, Delivered: 3


Total Steps Trained:  54%|█████▎    | 535513/1000000 [2:38:15<10:58:57, 11.75step/s]


--- Rollout Summary (Steps 535001 to 535500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 434.5481
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  54%|█████▎    | 536000/1000000 [2:38:19<1:06:19, 116.59step/s]


Episode 1072 finished at step 500 (536000 total). Env Reward: -10.91, Steps: 500, Delivered: 3


Total Steps Trained:  54%|█████▎    | 536012/1000000 [2:38:24<15:34:32,  8.27step/s]


--- Rollout Summary (Steps 535501 to 536000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 560.2200
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  54%|█████▎    | 536493/1000000 [2:38:28<1:11:31, 108.01step/s]


Episode 1073 finished at step 500 (536500 total). Env Reward: -7.12, Steps: 500, Delivered: 8


Total Steps Trained:  54%|█████▎    | 536515/1000000 [2:38:33<11:53:05, 10.83step/s]


--- Rollout Summary (Steps 536001 to 536500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.78
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 531.7006
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  54%|█████▎    | 537000/1000000 [2:38:37<1:05:12, 118.32step/s]


Episode 1074 finished at step 500 (537000 total). Env Reward: -9.16, Steps: 500, Delivered: 6


Total Steps Trained:  54%|█████▎    | 537012/1000000 [2:38:42<15:03:32,  8.54step/s]


--- Rollout Summary (Steps 536501 to 537000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 543.5751
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  54%|█████▍    | 537500/1000000 [2:38:46<1:06:32, 115.84step/s]


Episode 1075 finished at step 500 (537500 total). Env Reward: -0.90, Steps: 500, Delivered: 4


Total Steps Trained:  54%|█████▍    | 537512/1000000 [2:38:50<15:30:42,  8.28step/s]


--- Rollout Summary (Steps 537001 to 537500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 806.8084
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  54%|█████▍    | 537989/1000000 [2:38:55<1:06:47, 115.30step/s]


Episode 1076 finished at step 500 (538000 total). Env Reward: -7.81, Steps: 500, Delivered: 7


Total Steps Trained:  54%|█████▍    | 538010/1000000 [2:39:00<12:43:04, 10.09step/s]


--- Rollout Summary (Steps 537501 to 538000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -6.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 703.1883
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  54%|█████▍    | 538495/1000000 [2:39:04<1:02:32, 122.98step/s]


Episode 1077 finished at step 500 (538500 total). Env Reward: -11.62, Steps: 500, Delivered: 3


Total Steps Trained:  54%|█████▍    | 538520/1000000 [2:39:08<10:39:04, 12.04step/s]


--- Rollout Summary (Steps 538001 to 538500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 456.0601
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  54%|█████▍    | 538990/1000000 [2:39:12<1:06:36, 115.35step/s]


Episode 1078 finished at step 500 (539000 total). Env Reward: -0.79, Steps: 500, Delivered: 5


Total Steps Trained:  54%|█████▍    | 539013/1000000 [2:39:17<11:22:04, 11.26step/s]


--- Rollout Summary (Steps 538501 to 539000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 695.1747
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  54%|█████▍    | 539493/1000000 [2:39:21<1:09:12, 110.89step/s]


Episode 1079 finished at step 500 (539500 total). Env Reward: 0.80, Steps: 500, Delivered: 6


Total Steps Trained:  54%|█████▍    | 539517/1000000 [2:39:26<11:13:24, 11.40step/s]


--- Rollout Summary (Steps 539001 to 539500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 939.9823
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  54%|█████▍    | 539990/1000000 [2:39:30<1:11:29, 107.25step/s]


Episode 1080 finished at step 500 (540000 total). Env Reward: -10.11, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001080_map1.pth


Total Steps Trained:  54%|█████▍    | 540012/1000000 [2:39:35<11:52:11, 10.76step/s]


--- Rollout Summary (Steps 539501 to 540000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -6.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 542.5659
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  54%|█████▍    | 540494/1000000 [2:39:39<1:03:26, 120.73step/s]


Episode 1081 finished at step 500 (540500 total). Env Reward: -12.04, Steps: 500, Delivered: 2


Total Steps Trained:  54%|█████▍    | 540517/1000000 [2:39:44<11:12:20, 11.39step/s]


--- Rollout Summary (Steps 540001 to 540500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): 0.0002
Avg Critic Loss (per minibatch): 620.5898
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  54%|█████▍    | 540990/1000000 [2:39:48<1:06:47, 114.53step/s]


Episode 1082 finished at step 500 (541000 total). Env Reward: -11.69, Steps: 500, Delivered: 3


Total Steps Trained:  54%|█████▍    | 541013/1000000 [2:39:53<11:40:47, 10.92step/s]


--- Rollout Summary (Steps 540501 to 541000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -6.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 581.4237
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  54%|█████▍    | 541489/1000000 [2:39:57<1:03:03, 121.18step/s]


Episode 1083 finished at step 500 (541500 total). Env Reward: -10.69, Steps: 500, Delivered: 4


Total Steps Trained:  54%|█████▍    | 541512/1000000 [2:40:02<11:21:28, 11.21step/s]


--- Rollout Summary (Steps 541001 to 541500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -6.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 495.7043
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  54%|█████▍    | 541999/1000000 [2:40:06<1:03:57, 119.36step/s]


Episode 1084 finished at step 500 (542000 total). Env Reward: -10.44, Steps: 500, Delivered: 4


Total Steps Trained:  54%|█████▍    | 542011/1000000 [2:40:11<14:44:31,  8.63step/s]


--- Rollout Summary (Steps 541501 to 542000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): 0.0007
Avg Critic Loss (per minibatch): 466.9481
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  54%|█████▍    | 542490/1000000 [2:40:15<1:02:02, 122.90step/s]


Episode 1085 finished at step 500 (542500 total). Env Reward: -12.09, Steps: 500, Delivered: 3


Total Steps Trained:  54%|█████▍    | 542514/1000000 [2:40:20<10:57:12, 11.60step/s]


--- Rollout Summary (Steps 542001 to 542500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -6.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 570.8854
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  54%|█████▍    | 542994/1000000 [2:40:24<1:04:40, 117.76step/s]


Episode 1086 finished at step 500 (543000 total). Env Reward: -0.49, Steps: 500, Delivered: 6


Total Steps Trained:  54%|█████▍    | 543017/1000000 [2:40:29<11:19:00, 11.22step/s]


--- Rollout Summary (Steps 542501 to 543000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 957.5307
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  54%|█████▍    | 543498/1000000 [2:40:33<1:04:46, 117.45step/s]


Episode 1087 finished at step 500 (543500 total). Env Reward: -6.79, Steps: 500, Delivered: 8


Total Steps Trained:  54%|█████▍    | 543510/1000000 [2:40:38<15:21:38,  8.25step/s]


--- Rollout Summary (Steps 543001 to 543500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -6.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 506.8674
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  54%|█████▍    | 543997/1000000 [2:40:42<1:06:06, 114.96step/s]


Episode 1088 finished at step 500 (544000 total). Env Reward: 3.68, Steps: 500, Delivered: 9


Total Steps Trained:  54%|█████▍    | 544020/1000000 [2:40:47<11:22:28, 11.14step/s]


--- Rollout Summary (Steps 543501 to 544000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 1291.0800
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  54%|█████▍    | 544494/1000000 [2:40:51<1:02:38, 121.20step/s]


Episode 1089 finished at step 500 (544500 total). Env Reward: -2.33, Steps: 500, Delivered: 4


Total Steps Trained:  54%|█████▍    | 544518/1000000 [2:40:55<10:44:23, 11.78step/s]


--- Rollout Summary (Steps 544001 to 544500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 756.1721
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  54%|█████▍    | 544992/1000000 [2:40:59<1:06:40, 113.72step/s]


Episode 1090 finished at step 500 (545000 total). Env Reward: -9.67, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001090_map1.pth


Total Steps Trained:  55%|█████▍    | 545014/1000000 [2:41:04<11:27:14, 11.03step/s]


--- Rollout Summary (Steps 544501 to 545000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 661.6641
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  55%|█████▍    | 545495/1000000 [2:41:09<1:03:19, 119.63step/s]


Episode 1091 finished at step 500 (545500 total). Env Reward: -12.75, Steps: 500, Delivered: 2


Total Steps Trained:  55%|█████▍    | 545518/1000000 [2:41:13<11:06:18, 11.37step/s]


--- Rollout Summary (Steps 545001 to 545500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 522.2586
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  55%|█████▍    | 545998/1000000 [2:41:17<1:06:37, 113.57step/s]


Episode 1092 finished at step 500 (546000 total). Env Reward: -12.09, Steps: 500, Delivered: 2


Total Steps Trained:  55%|█████▍    | 546010/1000000 [2:41:22<15:08:13,  8.33step/s]


--- Rollout Summary (Steps 545501 to 546000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -6.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 571.5457
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  55%|█████▍    | 546498/1000000 [2:41:26<1:04:46, 116.70step/s]


Episode 1093 finished at step 500 (546500 total). Env Reward: -9.14, Steps: 500, Delivered: 5


Total Steps Trained:  55%|█████▍    | 546521/1000000 [2:41:31<11:09:56, 11.28step/s]


--- Rollout Summary (Steps 546001 to 546500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 563.3426
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  55%|█████▍    | 546996/1000000 [2:41:35<1:01:33, 122.65step/s]


Episode 1094 finished at step 500 (547000 total). Env Reward: -9.45, Steps: 500, Delivered: 5


Total Steps Trained:  55%|█████▍    | 547020/1000000 [2:41:40<11:03:47, 11.37step/s]


--- Rollout Summary (Steps 546501 to 547000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -6.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 366.7375
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  55%|█████▍    | 547491/1000000 [2:41:44<1:02:40, 120.33step/s]


Episode 1095 finished at step 500 (547500 total). Env Reward: -14.46, Steps: 500, Delivered: 0


Total Steps Trained:  55%|█████▍    | 547516/1000000 [2:41:49<10:32:48, 11.92step/s]


--- Rollout Summary (Steps 547001 to 547500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 577.8360
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  55%|█████▍    | 547994/1000000 [2:41:53<1:02:28, 120.58step/s]


Episode 1096 finished at step 500 (548000 total). Env Reward: -10.63, Steps: 500, Delivered: 4


Total Steps Trained:  55%|█████▍    | 548017/1000000 [2:41:57<11:04:48, 11.33step/s]


--- Rollout Summary (Steps 547501 to 548000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 484.8158
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  55%|█████▍    | 548490/1000000 [2:42:02<1:05:58, 114.05step/s]


Episode 1097 finished at step 500 (548500 total). Env Reward: 11.58, Steps: 500, Delivered: 8


Total Steps Trained:  55%|█████▍    | 548512/1000000 [2:42:06<11:33:59, 10.84step/s]


--- Rollout Summary (Steps 548001 to 548500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 1864.7523
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  55%|█████▍    | 549000/1000000 [2:42:11<1:06:36, 112.86step/s]


Episode 1098 finished at step 500 (549000 total). Env Reward: -8.49, Steps: 500, Delivered: 6


Total Steps Trained:  55%|█████▍    | 549012/1000000 [2:42:15<15:01:14,  8.34step/s]


--- Rollout Summary (Steps 548501 to 549000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 406.1305
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  55%|█████▍    | 549494/1000000 [2:42:19<1:05:58, 113.82step/s]


Episode 1099 finished at step 500 (549500 total). Env Reward: -10.39, Steps: 500, Delivered: 4


Total Steps Trained:  55%|█████▍    | 549517/1000000 [2:42:24<11:11:33, 11.18step/s]


--- Rollout Summary (Steps 549001 to 549500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 459.6518
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  55%|█████▍    | 549994/1000000 [2:42:28<1:02:42, 119.60step/s]


Episode 1100 finished at step 500 (550000 total). Env Reward: -8.45, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001100_map1.pth


Total Steps Trained:  55%|█████▌    | 550018/1000000 [2:42:33<10:44:19, 11.64step/s]


--- Rollout Summary (Steps 549501 to 550000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 386.1540
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  55%|█████▌    | 550498/1000000 [2:42:37<1:04:56, 115.36step/s]


Episode 1101 finished at step 500 (550500 total). Env Reward: 1.06, Steps: 500, Delivered: 7


Total Steps Trained:  55%|█████▌    | 550521/1000000 [2:42:42<11:22:25, 10.98step/s]


--- Rollout Summary (Steps 550001 to 550500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -6.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 829.5815
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  55%|█████▌    | 550997/1000000 [2:42:46<1:02:56, 118.89step/s]


Episode 1102 finished at step 500 (551000 total). Env Reward: -7.74, Steps: 500, Delivered: 7


Total Steps Trained:  55%|█████▌    | 551020/1000000 [2:42:51<11:20:03, 11.00step/s]


--- Rollout Summary (Steps 550501 to 551000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -6.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 643.1490
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  55%|█████▌    | 551500/1000000 [2:42:55<1:01:28, 121.61step/s]


Episode 1103 finished at step 500 (551500 total). Env Reward: -13.14, Steps: 500, Delivered: 1


Total Steps Trained:  55%|█████▌    | 551513/1000000 [2:42:59<14:03:43,  8.86step/s]


--- Rollout Summary (Steps 551001 to 551500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 597.9850
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  55%|█████▌    | 551991/1000000 [2:43:04<1:09:11, 107.91step/s]


Episode 1104 finished at step 500 (552000 total). Env Reward: 3.41, Steps: 500, Delivered: 9


Total Steps Trained:  55%|█████▌    | 552013/1000000 [2:43:08<11:37:02, 10.71step/s]


--- Rollout Summary (Steps 551501 to 552000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 526.6895
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  55%|█████▌    | 552498/1000000 [2:43:13<1:05:45, 113.41step/s]


Episode 1105 finished at step 500 (552500 total). Env Reward: -10.27, Steps: 500, Delivered: 4


Total Steps Trained:  55%|█████▌    | 552510/1000000 [2:43:17<15:02:33,  8.26step/s]


--- Rollout Summary (Steps 552001 to 552500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 581.2122
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  55%|█████▌    | 552989/1000000 [2:43:22<1:01:25, 121.29step/s]


Episode 1106 finished at step 500 (553000 total). Env Reward: -10.55, Steps: 500, Delivered: 4


Total Steps Trained:  55%|█████▌    | 553013/1000000 [2:43:26<10:37:12, 11.69step/s]


--- Rollout Summary (Steps 552501 to 553000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 553.3536
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  55%|█████▌    | 553497/1000000 [2:43:30<1:01:57, 120.12step/s]


Episode 1107 finished at step 500 (553500 total). Env Reward: -11.66, Steps: 500, Delivered: 3


Total Steps Trained:  55%|█████▌    | 553510/1000000 [2:43:35<14:00:59,  8.85step/s]


--- Rollout Summary (Steps 553001 to 553500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 572.5901
Avg Entropy (per minibatch): 2.5588
------------------------------


Total Steps Trained:  55%|█████▌    | 553997/1000000 [2:43:39<1:00:44, 122.36step/s]


Episode 1108 finished at step 500 (554000 total). Env Reward: -12.75, Steps: 500, Delivered: 2


Total Steps Trained:  55%|█████▌    | 554010/1000000 [2:43:44<14:24:23,  8.60step/s]


--- Rollout Summary (Steps 553501 to 554000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 481.9439
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  55%|█████▌    | 554493/1000000 [2:43:48<1:04:03, 115.91step/s]


Episode 1109 finished at step 500 (554500 total). Env Reward: -8.78, Steps: 500, Delivered: 6


Total Steps Trained:  55%|█████▌    | 554515/1000000 [2:43:53<11:20:22, 10.91step/s]


--- Rollout Summary (Steps 554001 to 554500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 404.0012
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  55%|█████▌    | 554997/1000000 [2:43:57<1:03:46, 116.29step/s]


Episode 1110 finished at step 500 (555000 total). Env Reward: -6.38, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001110_map1.pth


Total Steps Trained:  56%|█████▌    | 555021/1000000 [2:44:02<10:46:16, 11.48step/s]


--- Rollout Summary (Steps 554501 to 555000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 383.3613
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  56%|█████▌    | 555488/1000000 [2:44:06<1:01:04, 121.31step/s]


Episode 1111 finished at step 500 (555500 total). Env Reward: -12.76, Steps: 500, Delivered: 2


Total Steps Trained:  56%|█████▌    | 555511/1000000 [2:44:10<10:45:31, 11.48step/s]


--- Rollout Summary (Steps 555001 to 555500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 468.4045
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  56%|█████▌    | 555994/1000000 [2:44:14<1:05:12, 113.49step/s]


Episode 1112 finished at step 500 (556000 total). Env Reward: -12.79, Steps: 500, Delivered: 2


Total Steps Trained:  56%|█████▌    | 556016/1000000 [2:44:19<11:25:59, 10.79step/s]


--- Rollout Summary (Steps 555501 to 556000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.18
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 646.8990
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  56%|█████▌    | 556497/1000000 [2:44:23<1:03:06, 117.13step/s]


Episode 1113 finished at step 500 (556500 total). Env Reward: -8.79, Steps: 500, Delivered: 6


Total Steps Trained:  56%|█████▌    | 556519/1000000 [2:44:28<11:17:09, 10.92step/s]


--- Rollout Summary (Steps 556001 to 556500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.15
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 669.9163
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  56%|█████▌    | 557000/1000000 [2:44:32<1:02:32, 118.05step/s]


Episode 1114 finished at step 500 (557000 total). Env Reward: -0.61, Steps: 500, Delivered: 5


Total Steps Trained:  56%|█████▌    | 557012/1000000 [2:44:37<14:36:12,  8.43step/s]


--- Rollout Summary (Steps 556501 to 557000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 924.8573
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  56%|█████▌    | 557494/1000000 [2:44:41<1:07:26, 109.34step/s]


Episode 1115 finished at step 500 (557500 total). Env Reward: -9.22, Steps: 500, Delivered: 5


Total Steps Trained:  56%|█████▌    | 557514/1000000 [2:44:46<12:07:54, 10.13step/s]


--- Rollout Summary (Steps 557001 to 557500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 646.4586
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  56%|█████▌    | 557999/1000000 [2:44:50<1:06:27, 110.86step/s]


Episode 1116 finished at step 500 (558000 total). Env Reward: -8.49, Steps: 500, Delivered: 6


Total Steps Trained:  56%|█████▌    | 558011/1000000 [2:44:55<14:57:25,  8.21step/s]


--- Rollout Summary (Steps 557501 to 558000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 511.2995
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  56%|█████▌    | 558499/1000000 [2:44:59<1:04:52, 113.43step/s]


Episode 1117 finished at step 500 (558500 total). Env Reward: -12.74, Steps: 500, Delivered: 2


Total Steps Trained:  56%|█████▌    | 558511/1000000 [2:45:04<14:49:04,  8.28step/s]


--- Rollout Summary (Steps 558001 to 558500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 560.8190
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  56%|█████▌    | 558999/1000000 [2:45:08<1:02:33, 117.49step/s]


Episode 1118 finished at step 500 (559000 total). Env Reward: -11.07, Steps: 500, Delivered: 4


Total Steps Trained:  56%|█████▌    | 559011/1000000 [2:45:12<14:38:57,  8.36step/s]


--- Rollout Summary (Steps 558501 to 559000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 509.8660
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  56%|█████▌    | 559496/1000000 [2:45:16<1:00:53, 120.57step/s]


Episode 1119 finished at step 500 (559500 total). Env Reward: -10.30, Steps: 500, Delivered: 4


Total Steps Trained:  56%|█████▌    | 559520/1000000 [2:45:21<10:48:22, 11.32step/s]


--- Rollout Summary (Steps 559001 to 559500) ---
Update Duration: 4.68s
Avg Episode Reward (last 100): -7.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 445.1694
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  56%|█████▌    | 559998/1000000 [2:45:26<1:07:42, 108.30step/s]


Episode 1120 finished at step 500 (560000 total). Env Reward: -8.12, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001120_map1.pth


Total Steps Trained:  56%|█████▌    | 560020/1000000 [2:45:30<11:34:43, 10.56step/s]


--- Rollout Summary (Steps 559501 to 560000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 602.4897
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  56%|█████▌    | 560500/1000000 [2:45:34<1:02:43, 116.77step/s]


Episode 1121 finished at step 500 (560500 total). Env Reward: -8.75, Steps: 500, Delivered: 6


Total Steps Trained:  56%|█████▌    | 560512/1000000 [2:45:39<14:53:29,  8.20step/s]


--- Rollout Summary (Steps 560001 to 560500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.20
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 574.2969
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  56%|█████▌    | 560999/1000000 [2:45:43<1:00:54, 120.12step/s]


Episode 1122 finished at step 500 (561000 total). Env Reward: -10.19, Steps: 500, Delivered: 4


Total Steps Trained:  56%|█████▌    | 561012/1000000 [2:45:48<14:13:24,  8.57step/s]


--- Rollout Summary (Steps 560501 to 561000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.19
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 464.8133
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  56%|█████▌    | 561494/1000000 [2:45:52<1:01:30, 118.83step/s]


Episode 1123 finished at step 500 (561500 total). Env Reward: -11.11, Steps: 500, Delivered: 3


Total Steps Trained:  56%|█████▌    | 561518/1000000 [2:45:57<10:33:26, 11.54step/s]


--- Rollout Summary (Steps 561001 to 561500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.19
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 622.5343
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  56%|█████▌    | 561995/1000000 [2:46:01<1:01:46, 118.17step/s]


Episode 1124 finished at step 500 (562000 total). Env Reward: -9.10, Steps: 500, Delivered: 5


Total Steps Trained:  56%|█████▌    | 562018/1000000 [2:46:06<10:39:10, 11.42step/s]


--- Rollout Summary (Steps 561501 to 562000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 388.0557
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  56%|█████▌    | 562498/1000000 [2:46:10<1:04:04, 113.80step/s]


Episode 1125 finished at step 500 (562500 total). Env Reward: -9.84, Steps: 500, Delivered: 5


Total Steps Trained:  56%|█████▋    | 562510/1000000 [2:46:14<14:44:27,  8.24step/s]


--- Rollout Summary (Steps 562001 to 562500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 592.3120
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  56%|█████▋    | 562993/1000000 [2:46:19<1:01:11, 119.04step/s]


Episode 1126 finished at step 500 (563000 total). Env Reward: -1.81, Steps: 500, Delivered: 4


Total Steps Trained:  56%|█████▋    | 563015/1000000 [2:46:24<11:11:40, 10.84step/s]


--- Rollout Summary (Steps 562501 to 563000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 882.7459
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  56%|█████▋    | 563500/1000000 [2:46:28<1:10:25, 103.29step/s]


Episode 1127 finished at step 500 (563500 total). Env Reward: -11.50, Steps: 500, Delivered: 3


Total Steps Trained:  56%|█████▋    | 563523/1000000 [2:46:32<10:45:01, 11.28step/s]


--- Rollout Summary (Steps 563001 to 563500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0014
Avg Critic Loss (per minibatch): 519.2597
Avg Entropy (per minibatch): 2.5589
------------------------------


Total Steps Trained:  56%|█████▋    | 563996/1000000 [2:46:36<1:01:40, 117.81step/s]


Episode 1128 finished at step 500 (564000 total). Env Reward: -0.51, Steps: 500, Delivered: 5


Total Steps Trained:  56%|█████▋    | 564019/1000000 [2:46:41<10:48:17, 11.21step/s]


--- Rollout Summary (Steps 563501 to 564000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 841.7391
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  56%|█████▋    | 564488/1000000 [2:46:45<59:35, 121.80step/s]


Episode 1129 finished at step 500 (564500 total). Env Reward: -7.62, Steps: 500, Delivered: 7


Total Steps Trained:  56%|█████▋    | 564512/1000000 [2:46:50<10:19:32, 11.72step/s]


--- Rollout Summary (Steps 564001 to 564500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 538.8041
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  56%|█████▋    | 564995/1000000 [2:46:54<1:00:40, 119.49step/s]


Episode 1130 finished at step 500 (565000 total). Env Reward: -8.35, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001130_map1.pth


Total Steps Trained:  57%|█████▋    | 565018/1000000 [2:46:59<10:38:53, 11.35step/s]


--- Rollout Summary (Steps 564501 to 565000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 494.8750
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  57%|█████▋    | 565490/1000000 [2:47:03<1:05:20, 110.84step/s]


Episode 1131 finished at step 500 (565500 total). Env Reward: 1.31, Steps: 500, Delivered: 7


Total Steps Trained:  57%|█████▋    | 565513/1000000 [2:47:08<10:51:39, 11.11step/s]


--- Rollout Summary (Steps 565001 to 565500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.35
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 631.9071
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  57%|█████▋    | 565993/1000000 [2:47:12<1:00:19, 119.92step/s]


Episode 1132 finished at step 500 (566000 total). Env Reward: -9.43, Steps: 500, Delivered: 5


Total Steps Trained:  57%|█████▋    | 566016/1000000 [2:47:16<10:42:02, 11.27step/s]


--- Rollout Summary (Steps 565501 to 566000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0083
Avg Critic Loss (per minibatch): 393.1309
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  57%|█████▋    | 566489/1000000 [2:47:21<59:44, 120.93step/s]  


Episode 1133 finished at step 500 (566500 total). Env Reward: -7.30, Steps: 500, Delivered: 7


Total Steps Trained:  57%|█████▋    | 566512/1000000 [2:47:25<10:50:46, 11.10step/s]


--- Rollout Summary (Steps 566001 to 566500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 404.3790
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  57%|█████▋    | 566999/1000000 [2:47:30<1:00:11, 119.89step/s]


Episode 1134 finished at step 500 (567000 total). Env Reward: -9.35, Steps: 500, Delivered: 5


Total Steps Trained:  57%|█████▋    | 567011/1000000 [2:47:34<14:16:49,  8.42step/s]


--- Rollout Summary (Steps 566501 to 567000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 587.2133
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  57%|█████▋    | 567488/1000000 [2:47:38<59:56, 120.27step/s]  


Episode 1135 finished at step 500 (567500 total). Env Reward: -11.57, Steps: 500, Delivered: 3


Total Steps Trained:  57%|█████▋    | 567512/1000000 [2:47:43<10:14:41, 11.73step/s]


--- Rollout Summary (Steps 567001 to 567500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0090
Avg Critic Loss (per minibatch): 458.4545
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  57%|█████▋    | 567998/1000000 [2:47:47<59:27, 121.11step/s]


Episode 1136 finished at step 500 (568000 total). Env Reward: -6.79, Steps: 500, Delivered: 8


Total Steps Trained:  57%|█████▋    | 568011/1000000 [2:47:52<13:43:56,  8.74step/s]


--- Rollout Summary (Steps 567501 to 568000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 539.2411
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  57%|█████▋    | 568490/1000000 [2:47:56<1:04:45, 111.05step/s]


Episode 1137 finished at step 500 (568500 total). Env Reward: -8.92, Steps: 500, Delivered: 6


Total Steps Trained:  57%|█████▋    | 568513/1000000 [2:48:01<10:53:12, 11.01step/s]


--- Rollout Summary (Steps 568001 to 568500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 563.2209
Avg Entropy (per minibatch): 2.5617
------------------------------


Total Steps Trained:  57%|█████▋    | 568998/1000000 [2:48:05<1:02:46, 114.42step/s]


Episode 1138 finished at step 500 (569000 total). Env Reward: -8.71, Steps: 500, Delivered: 6


Total Steps Trained:  57%|█████▋    | 569022/1000000 [2:48:09<10:25:27, 11.48step/s]


--- Rollout Summary (Steps 568501 to 569000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 486.6084
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  57%|█████▋    | 569493/1000000 [2:48:13<58:29, 122.67step/s]


Episode 1139 finished at step 500 (569500 total). Env Reward: -10.51, Steps: 500, Delivered: 4


Total Steps Trained:  57%|█████▋    | 569518/1000000 [2:48:18<9:59:16, 11.97step/s] 


--- Rollout Summary (Steps 569001 to 569500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 459.4742
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  57%|█████▋    | 569998/1000000 [2:48:22<58:09, 123.23step/s]


Episode 1140 finished at step 500 (570000 total). Env Reward: -12.92, Steps: 500, Delivered: 2
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001140_map1.pth


Total Steps Trained:  57%|█████▋    | 570011/1000000 [2:48:27<13:28:03,  8.87step/s]


--- Rollout Summary (Steps 569501 to 570000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 441.3448
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  57%|█████▋    | 570499/1000000 [2:48:31<1:01:24, 116.57step/s]


Episode 1141 finished at step 500 (570500 total). Env Reward: -11.46, Steps: 500, Delivered: 3


Total Steps Trained:  57%|█████▋    | 570511/1000000 [2:48:36<14:02:11,  8.50step/s]


--- Rollout Summary (Steps 570001 to 570500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 466.2202
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  57%|█████▋    | 570999/1000000 [2:48:40<1:00:53, 117.44step/s]


Episode 1142 finished at step 500 (571000 total). Env Reward: -6.75, Steps: 500, Delivered: 8


Total Steps Trained:  57%|█████▋    | 571011/1000000 [2:48:45<14:32:54,  8.19step/s]


--- Rollout Summary (Steps 570501 to 571000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 652.9215
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  57%|█████▋    | 571494/1000000 [2:48:49<1:02:29, 114.28step/s]


Episode 1143 finished at step 500 (571500 total). Env Reward: -10.39, Steps: 500, Delivered: 4


Total Steps Trained:  57%|█████▋    | 571517/1000000 [2:48:54<10:33:36, 11.27step/s]


--- Rollout Summary (Steps 571001 to 571500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.07
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 420.2689
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  57%|█████▋    | 571989/1000000 [2:48:58<1:01:24, 116.16step/s]


Episode 1144 finished at step 500 (572000 total). Env Reward: -11.04, Steps: 500, Delivered: 4


Total Steps Trained:  57%|█████▋    | 572012/1000000 [2:49:03<10:25:04, 11.41step/s]


--- Rollout Summary (Steps 571501 to 572000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.03
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 379.5132
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  57%|█████▋    | 572489/1000000 [2:49:06<57:23, 124.16step/s]


Episode 1145 finished at step 500 (572500 total). Env Reward: -13.42, Steps: 500, Delivered: 1


Total Steps Trained:  57%|█████▋    | 572513/1000000 [2:49:11<10:06:44, 11.74step/s]


--- Rollout Summary (Steps 572001 to 572500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 4.99
Avg Actor Loss (per minibatch): 0.0013
Avg Critic Loss (per minibatch): 601.1756
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  57%|█████▋    | 572990/1000000 [2:49:15<57:56, 122.82step/s]


Episode 1146 finished at step 500 (573000 total). Env Reward: -8.51, Steps: 500, Delivered: 6


Total Steps Trained:  57%|█████▋    | 573014/1000000 [2:49:20<10:12:07, 11.63step/s]


--- Rollout Summary (Steps 572501 to 573000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.03
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 517.1999
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  57%|█████▋    | 573496/1000000 [2:49:24<1:00:04, 118.32step/s]


Episode 1147 finished at step 500 (573500 total). Env Reward: -7.64, Steps: 500, Delivered: 7


Total Steps Trained:  57%|█████▋    | 573519/1000000 [2:49:29<10:31:28, 11.26step/s]


--- Rollout Summary (Steps 573001 to 573500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.04
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 389.6861
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  57%|█████▋    | 573989/1000000 [2:49:33<1:00:25, 117.51step/s]


Episode 1148 finished at step 500 (574000 total). Env Reward: -5.14, Steps: 500, Delivered: 9


Total Steps Trained:  57%|█████▋    | 574010/1000000 [2:49:38<10:55:26, 10.83step/s]


--- Rollout Summary (Steps 573501 to 574000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 496.1255
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  57%|█████▋    | 574489/1000000 [2:49:42<1:04:14, 110.39step/s]


Episode 1149 finished at step 500 (574500 total). Env Reward: -0.61, Steps: 500, Delivered: 5


Total Steps Trained:  57%|█████▋    | 574511/1000000 [2:49:47<10:55:40, 10.82step/s]


--- Rollout Summary (Steps 574001 to 574500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.07
Avg Actor Loss (per minibatch): -0.0073
Avg Critic Loss (per minibatch): 855.2372
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  57%|█████▋    | 574995/1000000 [2:49:51<59:46, 118.50step/s]


Episode 1150 finished at step 500 (575000 total). Env Reward: 2.94, Steps: 500, Delivered: 9
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001150_map1.pth


Total Steps Trained:  58%|█████▊    | 575019/1000000 [2:49:56<10:15:42, 11.50step/s]


--- Rollout Summary (Steps 574501 to 575000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 862.3175
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  58%|█████▊    | 575496/1000000 [2:50:00<1:01:58, 114.16step/s]


Episode 1151 finished at step 500 (575500 total). Env Reward: 2.17, Steps: 500, Delivered: 8


Total Steps Trained:  58%|█████▊    | 575520/1000000 [2:50:05<10:36:43, 11.11step/s]


--- Rollout Summary (Steps 575001 to 575500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 1036.5028
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  58%|█████▊    | 575992/1000000 [2:50:09<1:00:49, 116.17step/s]


Episode 1152 finished at step 500 (576000 total). Env Reward: -3.81, Steps: 500, Delivered: 11


Total Steps Trained:  58%|█████▊    | 576014/1000000 [2:50:13<10:49:44, 10.88step/s]


--- Rollout Summary (Steps 575501 to 576000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0088
Avg Critic Loss (per minibatch): 543.2947
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  58%|█████▊    | 576498/1000000 [2:50:18<58:43, 120.19step/s]


Episode 1153 finished at step 500 (576500 total). Env Reward: -11.95, Steps: 500, Delivered: 2


Total Steps Trained:  58%|█████▊    | 576511/1000000 [2:50:22<13:20:05,  8.82step/s]


--- Rollout Summary (Steps 576001 to 576500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 491.0003
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  58%|█████▊    | 576995/1000000 [2:50:26<58:25, 120.66step/s]


Episode 1154 finished at step 500 (577000 total). Env Reward: -12.86, Steps: 500, Delivered: 2


Total Steps Trained:  58%|█████▊    | 577018/1000000 [2:50:31<10:16:32, 11.43step/s]


--- Rollout Summary (Steps 576501 to 577000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 541.8921
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  58%|█████▊    | 577492/1000000 [2:50:35<58:06, 121.19step/s]


Episode 1155 finished at step 500 (577500 total). Env Reward: -11.33, Steps: 500, Delivered: 4


Total Steps Trained:  58%|█████▊    | 577516/1000000 [2:50:40<10:03:09, 11.67step/s]


--- Rollout Summary (Steps 577001 to 577500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.09
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 455.3436
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  58%|█████▊    | 577996/1000000 [2:50:44<1:02:58, 111.68step/s]


Episode 1156 finished at step 500 (578000 total). Env Reward: -9.71, Steps: 500, Delivered: 5


Total Steps Trained:  58%|█████▊    | 578020/1000000 [2:50:49<10:22:43, 11.29step/s]


--- Rollout Summary (Steps 577501 to 578000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 660.6778
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  58%|█████▊    | 578499/1000000 [2:50:53<57:19, 122.55step/s]


Episode 1157 finished at step 500 (578500 total). Env Reward: -8.80, Steps: 500, Delivered: 6


Total Steps Trained:  58%|█████▊    | 578512/1000000 [2:50:57<13:07:52,  8.92step/s]


--- Rollout Summary (Steps 578001 to 578500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 544.9740
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  58%|█████▊    | 578989/1000000 [2:51:01<57:32, 121.94step/s]


Episode 1158 finished at step 500 (579000 total). Env Reward: -9.47, Steps: 500, Delivered: 5


Total Steps Trained:  58%|█████▊    | 579013/1000000 [2:51:06<10:03:14, 11.63step/s]


--- Rollout Summary (Steps 578501 to 579000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 523.6109
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  58%|█████▊    | 579494/1000000 [2:51:10<59:59, 116.83step/s]


Episode 1159 finished at step 500 (579500 total). Env Reward: -8.63, Steps: 500, Delivered: 6


Total Steps Trained:  58%|█████▊    | 579518/1000000 [2:51:15<10:04:18, 11.60step/s]


--- Rollout Summary (Steps 579001 to 579500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 428.0706
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  58%|█████▊    | 579991/1000000 [2:51:19<58:08, 120.40step/s]


Episode 1160 finished at step 500 (580000 total). Env Reward: -11.70, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001160_map1.pth


Total Steps Trained:  58%|█████▊    | 580015/1000000 [2:51:24<10:00:56, 11.65step/s]


--- Rollout Summary (Steps 579501 to 580000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.06
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 519.6355
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  58%|█████▊    | 580500/1000000 [2:51:28<1:03:15, 110.54step/s]


Episode 1161 finished at step 500 (580500 total). Env Reward: -6.22, Steps: 500, Delivered: 8


Total Steps Trained:  58%|█████▊    | 580512/1000000 [2:51:32<14:11:41,  8.21step/s]


--- Rollout Summary (Steps 580001 to 580500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 585.7507
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  58%|█████▊    | 580991/1000000 [2:51:37<1:01:03, 114.37step/s]


Episode 1162 finished at step 500 (581000 total). Env Reward: -6.94, Steps: 500, Delivered: 8


Total Steps Trained:  58%|█████▊    | 581014/1000000 [2:51:42<11:04:35, 10.51step/s]


--- Rollout Summary (Steps 580501 to 581000) ---
Update Duration: 4.76s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.07
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 562.9589
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  58%|█████▊    | 581493/1000000 [2:51:46<55:43, 125.17step/s]


Episode 1163 finished at step 500 (581500 total). Env Reward: -8.59, Steps: 500, Delivered: 6


Total Steps Trained:  58%|█████▊    | 581517/1000000 [2:51:51<9:48:52, 11.84step/s] 


--- Rollout Summary (Steps 581001 to 581500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.04
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 784.8759
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  58%|█████▊    | 581995/1000000 [2:51:55<58:16, 119.55step/s]


Episode 1164 finished at step 500 (582000 total). Env Reward: -8.54, Steps: 500, Delivered: 6


Total Steps Trained:  58%|█████▊    | 582018/1000000 [2:51:59<10:06:10, 11.49step/s]


--- Rollout Summary (Steps 581501 to 582000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.06
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 430.6782
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  58%|█████▊    | 582490/1000000 [2:52:03<57:24, 121.21step/s]


Episode 1165 finished at step 500 (582500 total). Env Reward: -6.22, Steps: 500, Delivered: 8


Total Steps Trained:  58%|█████▊    | 582514/1000000 [2:52:08<9:52:39, 11.74step/s] 


--- Rollout Summary (Steps 582001 to 582500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.06
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 661.5972
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  58%|█████▊    | 582994/1000000 [2:52:12<1:00:17, 115.29step/s]


Episode 1166 finished at step 500 (583000 total). Env Reward: 0.04, Steps: 500, Delivered: 6


Total Steps Trained:  58%|█████▊    | 583018/1000000 [2:52:17<10:11:18, 11.37step/s]


--- Rollout Summary (Steps 582501 to 583000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.04
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 680.9840
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  58%|█████▊    | 583496/1000000 [2:52:21<58:03, 119.55step/s]


Episode 1167 finished at step 500 (583500 total). Env Reward: -10.26, Steps: 500, Delivered: 4


Total Steps Trained:  58%|█████▊    | 583519/1000000 [2:52:26<10:23:23, 11.13step/s]


--- Rollout Summary (Steps 583001 to 583500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.02
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 473.6576
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  58%|█████▊    | 583997/1000000 [2:52:30<59:21, 116.80step/s]


Episode 1168 finished at step 500 (584000 total). Env Reward: -0.13, Steps: 500, Delivered: 6


Total Steps Trained:  58%|█████▊    | 584020/1000000 [2:52:35<10:18:17, 11.21step/s]


--- Rollout Summary (Steps 583501 to 584000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 938.8940
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  58%|█████▊    | 584493/1000000 [2:52:39<57:38, 120.15step/s]


Episode 1169 finished at step 500 (584500 total). Env Reward: -10.57, Steps: 500, Delivered: 4


Total Steps Trained:  58%|█████▊    | 584517/1000000 [2:52:44<10:07:55, 11.39step/s]


--- Rollout Summary (Steps 584001 to 584500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 564.6726
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  58%|█████▊    | 584993/1000000 [2:52:48<58:43, 117.77step/s]


Episode 1170 finished at step 500 (585000 total). Env Reward: -5.45, Steps: 500, Delivered: 9
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001170_map1.pth


Total Steps Trained:  59%|█████▊    | 585015/1000000 [2:52:53<10:22:13, 11.12step/s]


--- Rollout Summary (Steps 584501 to 585000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.07
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 452.5360
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  59%|█████▊    | 585491/1000000 [2:52:57<1:01:23, 112.54step/s]


Episode 1171 finished at step 500 (585500 total). Env Reward: -8.14, Steps: 500, Delivered: 6


Total Steps Trained:  59%|█████▊    | 585515/1000000 [2:53:02<10:07:21, 11.37step/s]


--- Rollout Summary (Steps 585001 to 585500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.78
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 758.0451
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  59%|█████▊    | 585989/1000000 [2:53:06<1:00:07, 114.77step/s]


Episode 1172 finished at step 500 (586000 total). Env Reward: -7.64, Steps: 500, Delivered: 7


Total Steps Trained:  59%|█████▊    | 586011/1000000 [2:53:11<10:36:03, 10.85step/s]


--- Rollout Summary (Steps 585501 to 586000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 444.4665
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  59%|█████▊    | 586489/1000000 [2:53:15<56:43, 121.50step/s]


Episode 1173 finished at step 500 (586500 total). Env Reward: -10.99, Steps: 500, Delivered: 4


Total Steps Trained:  59%|█████▊    | 586513/1000000 [2:53:20<9:51:51, 11.64step/s] 


--- Rollout Summary (Steps 586001 to 586500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 575.1620
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  59%|█████▊    | 586994/1000000 [2:53:24<56:30, 121.80step/s]


Episode 1174 finished at step 500 (587000 total). Env Reward: -12.14, Steps: 500, Delivered: 3


Total Steps Trained:  59%|█████▊    | 587018/1000000 [2:53:28<9:45:26, 11.76step/s] 


--- Rollout Summary (Steps 586501 to 587000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.07
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 525.6811
Avg Entropy (per minibatch): 2.5589
------------------------------


Total Steps Trained:  59%|█████▊    | 587497/1000000 [2:53:33<58:39, 117.19step/s]


Episode 1175 finished at step 500 (587500 total). Env Reward: 1.11, Steps: 500, Delivered: 7


Total Steps Trained:  59%|█████▉    | 587520/1000000 [2:53:37<10:06:44, 11.33step/s]


--- Rollout Summary (Steps 587001 to 587500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 927.0685
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  59%|█████▉    | 587990/1000000 [2:53:41<56:11, 122.22step/s]


Episode 1176 finished at step 500 (588000 total). Env Reward: -10.49, Steps: 500, Delivered: 4


Total Steps Trained:  59%|█████▉    | 588014/1000000 [2:53:46<10:04:04, 11.37step/s]


--- Rollout Summary (Steps 587501 to 588000) ---
Update Duration: 4.68s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.07
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 512.5288
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  59%|█████▉    | 588499/1000000 [2:53:50<56:48, 120.73step/s]


Episode 1177 finished at step 500 (588500 total). Env Reward: -8.87, Steps: 500, Delivered: 6


Total Steps Trained:  59%|█████▉    | 588512/1000000 [2:53:55<12:50:13,  8.90step/s]


--- Rollout Summary (Steps 588001 to 588500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 512.2877
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  59%|█████▉    | 589000/1000000 [2:53:59<58:20, 117.40step/s]


Episode 1178 finished at step 500 (589000 total). Env Reward: -8.29, Steps: 500, Delivered: 6


Total Steps Trained:  59%|█████▉    | 589012/1000000 [2:54:04<13:23:36,  8.52step/s]


--- Rollout Summary (Steps 588501 to 589000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 522.8069
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  59%|█████▉    | 589493/1000000 [2:54:08<1:02:01, 110.32step/s]


Episode 1179 finished at step 500 (589500 total). Env Reward: 5.80, Steps: 500, Delivered: 11


Total Steps Trained:  59%|█████▉    | 589517/1000000 [2:54:13<9:56:36, 11.47step/s] 


--- Rollout Summary (Steps 589001 to 589500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 990.2822
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  59%|█████▉    | 589991/1000000 [2:54:17<58:12, 117.40step/s]


Episode 1180 finished at step 500 (590000 total). Env Reward: -12.96, Steps: 500, Delivered: 2
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001180_map1.pth


Total Steps Trained:  59%|█████▉    | 590013/1000000 [2:54:21<10:23:35, 10.96step/s]


--- Rollout Summary (Steps 589501 to 590000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 612.1162
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  59%|█████▉    | 590492/1000000 [2:54:26<58:12, 117.27step/s]


Episode 1181 finished at step 500 (590500 total). Env Reward: 2.40, Steps: 500, Delivered: 8


Total Steps Trained:  59%|█████▉    | 590515/1000000 [2:54:30<10:10:47, 11.17step/s]


--- Rollout Summary (Steps 590001 to 590500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0076
Avg Critic Loss (per minibatch): 749.2630
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  59%|█████▉    | 590997/1000000 [2:54:34<56:02, 121.62step/s]


Episode 1182 finished at step 500 (591000 total). Env Reward: -12.29, Steps: 500, Delivered: 2


Total Steps Trained:  59%|█████▉    | 591010/1000000 [2:54:39<13:12:46,  8.60step/s]


--- Rollout Summary (Steps 590501 to 591000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 393.4263
Avg Entropy (per minibatch): 2.5615
------------------------------


Total Steps Trained:  59%|█████▉    | 591495/1000000 [2:54:43<56:43, 120.02step/s]


Episode 1183 finished at step 500 (591500 total). Env Reward: -11.88, Steps: 500, Delivered: 3


Total Steps Trained:  59%|█████▉    | 591519/1000000 [2:54:48<9:57:01, 11.40step/s] 


--- Rollout Summary (Steps 591001 to 591500) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 498.4135
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  59%|█████▉    | 591990/1000000 [2:54:52<56:44, 119.86step/s]


Episode 1184 finished at step 500 (592000 total). Env Reward: -8.88, Steps: 500, Delivered: 6


Total Steps Trained:  59%|█████▉    | 592013/1000000 [2:54:57<9:51:57, 11.49step/s] 


--- Rollout Summary (Steps 591501 to 592000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 598.1347
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  59%|█████▉    | 592497/1000000 [2:55:01<58:30, 116.10step/s]


Episode 1185 finished at step 500 (592500 total). Env Reward: -8.13, Steps: 500, Delivered: 7


Total Steps Trained:  59%|█████▉    | 592521/1000000 [2:55:06<9:44:52, 11.61step/s] 


--- Rollout Summary (Steps 592001 to 592500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 473.3085
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  59%|█████▉    | 592991/1000000 [2:55:10<1:01:55, 109.55step/s]


Episode 1186 finished at step 500 (593000 total). Env Reward: -1.44, Steps: 500, Delivered: 4


Total Steps Trained:  59%|█████▉    | 593012/1000000 [2:55:15<10:36:06, 10.66step/s]


--- Rollout Summary (Steps 592501 to 593000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 1029.3574
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  59%|█████▉    | 593489/1000000 [2:55:19<57:35, 117.64step/s]


Episode 1187 finished at step 500 (593500 total). Env Reward: -8.72, Steps: 500, Delivered: 6


Total Steps Trained:  59%|█████▉    | 593511/1000000 [2:55:24<10:31:51, 10.72step/s]


--- Rollout Summary (Steps 593001 to 593500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 478.0439
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  59%|█████▉    | 593995/1000000 [2:55:28<1:01:00, 110.93step/s]


Episode 1188 finished at step 500 (594000 total). Env Reward: -8.17, Steps: 500, Delivered: 6


Total Steps Trained:  59%|█████▉    | 594017/1000000 [2:55:33<10:24:23, 10.84step/s]


--- Rollout Summary (Steps 593501 to 594000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 462.5914
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  59%|█████▉    | 594489/1000000 [2:55:37<1:00:30, 111.70step/s]


Episode 1189 finished at step 500 (594500 total). Env Reward: -11.59, Steps: 500, Delivered: 3


Total Steps Trained:  59%|█████▉    | 594512/1000000 [2:55:42<10:07:29, 11.12step/s]


--- Rollout Summary (Steps 594001 to 594500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 642.3392
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  59%|█████▉    | 594997/1000000 [2:55:46<56:17, 119.89step/s]


Episode 1190 finished at step 500 (595000 total). Env Reward: -12.37, Steps: 500, Delivered: 2
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001190_map1.pth


Total Steps Trained:  60%|█████▉    | 595021/1000000 [2:55:50<9:29:51, 11.84step/s] 


--- Rollout Summary (Steps 594501 to 595000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0098
Avg Critic Loss (per minibatch): 438.9779
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  60%|█████▉    | 595492/1000000 [2:55:54<55:50, 120.73step/s]


Episode 1191 finished at step 500 (595500 total). Env Reward: -10.35, Steps: 500, Delivered: 4


Total Steps Trained:  60%|█████▉    | 595516/1000000 [2:55:59<9:35:26, 11.72step/s] 


--- Rollout Summary (Steps 595001 to 595500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 601.8515
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  60%|█████▉    | 596000/1000000 [2:56:03<1:01:21, 109.73step/s]


Episode 1192 finished at step 500 (596000 total). Env Reward: -9.54, Steps: 500, Delivered: 5


Total Steps Trained:  60%|█████▉    | 596012/1000000 [2:56:08<13:41:33,  8.20step/s]


--- Rollout Summary (Steps 595501 to 596000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 643.3992
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  60%|█████▉    | 596495/1000000 [2:56:12<55:29, 121.20step/s]


Episode 1193 finished at step 500 (596500 total). Env Reward: -11.43, Steps: 500, Delivered: 4


Total Steps Trained:  60%|█████▉    | 596519/1000000 [2:56:17<9:35:49, 11.68step/s] 


--- Rollout Summary (Steps 596001 to 596500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 469.5472
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  60%|█████▉    | 596994/1000000 [2:56:21<57:32, 116.72step/s]


Episode 1194 finished at step 500 (597000 total). Env Reward: -12.14, Steps: 500, Delivered: 3


Total Steps Trained:  60%|█████▉    | 597017/1000000 [2:56:26<10:03:56, 11.12step/s]


--- Rollout Summary (Steps 596501 to 597000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 626.4963
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  60%|█████▉    | 597499/1000000 [2:56:30<55:49, 120.18step/s]


Episode 1195 finished at step 500 (597500 total). Env Reward: -9.50, Steps: 500, Delivered: 5


Total Steps Trained:  60%|█████▉    | 597512/1000000 [2:56:35<12:46:33,  8.75step/s]


--- Rollout Summary (Steps 597001 to 597500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 458.0904
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  60%|█████▉    | 597999/1000000 [2:56:39<1:01:26, 109.05step/s]


Episode 1196 finished at step 500 (598000 total). Env Reward: -7.51, Steps: 500, Delivered: 7


Total Steps Trained:  60%|█████▉    | 598021/1000000 [2:56:44<10:17:45, 10.85step/s]


--- Rollout Summary (Steps 597501 to 598000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 619.5249
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  60%|█████▉    | 598492/1000000 [2:56:48<54:23, 123.04step/s]


Episode 1197 finished at step 500 (598500 total). Env Reward: -9.21, Steps: 500, Delivered: 5


Total Steps Trained:  60%|█████▉    | 598516/1000000 [2:56:52<9:24:48, 11.85step/s] 


--- Rollout Summary (Steps 598001 to 598500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -8.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 404.2309
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  60%|█████▉    | 598989/1000000 [2:56:56<56:29, 118.30step/s]


Episode 1198 finished at step 500 (599000 total). Env Reward: -10.81, Steps: 500, Delivered: 4


Total Steps Trained:  60%|█████▉    | 599012/1000000 [2:57:01<9:44:51, 11.43step/s] 


--- Rollout Summary (Steps 598501 to 599000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 528.2216
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  60%|█████▉    | 599491/1000000 [2:57:05<55:14, 120.85step/s]


Episode 1199 finished at step 500 (599500 total). Env Reward: -9.86, Steps: 500, Delivered: 5


Total Steps Trained:  60%|█████▉    | 599516/1000000 [2:57:10<9:18:21, 11.95step/s] 


--- Rollout Summary (Steps 599001 to 599500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 528.5003
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  60%|██████    | 600000/1000000 [2:57:14<58:02, 114.86step/s]


Episode 1200 finished at step 500 (600000 total). Env Reward: 1.56, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001200_map1.pth


Total Steps Trained:  60%|██████    | 600012/1000000 [2:57:19<13:28:38,  8.24step/s]


--- Rollout Summary (Steps 599501 to 600000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 899.5789
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  60%|██████    | 600495/1000000 [2:57:23<55:02, 120.99step/s]


Episode 1201 finished at step 500 (600500 total). Env Reward: -8.21, Steps: 500, Delivered: 6


Total Steps Trained:  60%|██████    | 600519/1000000 [2:57:28<9:41:31, 11.45step/s] 


--- Rollout Summary (Steps 600001 to 600500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -8.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0098
Avg Critic Loss (per minibatch): 489.4063
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  60%|██████    | 600990/1000000 [2:57:32<55:53, 118.98step/s]


Episode 1202 finished at step 500 (601000 total). Env Reward: -11.04, Steps: 500, Delivered: 4


Total Steps Trained:  60%|██████    | 601013/1000000 [2:57:36<10:01:19, 11.06step/s]


--- Rollout Summary (Steps 600501 to 601000) ---
Update Duration: 4.69s
Avg Episode Reward (last 100): -8.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 540.2155
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  60%|██████    | 601495/1000000 [2:57:41<1:00:04, 110.57step/s]


Episode 1203 finished at step 500 (601500 total). Env Reward: -8.37, Steps: 500, Delivered: 6


Total Steps Trained:  60%|██████    | 601517/1000000 [2:57:45<10:02:39, 11.02step/s]


--- Rollout Summary (Steps 601001 to 601500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 671.8261
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  60%|██████    | 602000/1000000 [2:57:50<55:28, 119.58step/s]


Episode 1204 finished at step 500 (602000 total). Env Reward: -0.50, Steps: 500, Delivered: 5


Total Steps Trained:  60%|██████    | 602012/1000000 [2:57:54<12:37:54,  8.75step/s]


--- Rollout Summary (Steps 601501 to 602000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -8.10
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 791.3334
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  60%|██████    | 602497/1000000 [2:57:58<59:51, 110.69step/s]  


Episode 1205 finished at step 500 (602500 total). Env Reward: 5.91, Steps: 500, Delivered: 11


Total Steps Trained:  60%|██████    | 602520/1000000 [2:58:03<9:53:13, 11.17step/s] 


--- Rollout Summary (Steps 602001 to 602500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 966.5253
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  60%|██████    | 603000/1000000 [2:58:07<56:30, 117.07step/s]


Episode 1206 finished at step 500 (603000 total). Env Reward: -9.35, Steps: 500, Delivered: 5


Total Steps Trained:  60%|██████    | 603012/1000000 [2:58:12<13:06:09,  8.42step/s]


--- Rollout Summary (Steps 602501 to 603000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 518.9628
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  60%|██████    | 603493/1000000 [2:58:16<58:15, 113.45step/s]


Episode 1207 finished at step 500 (603500 total). Env Reward: 1.88, Steps: 500, Delivered: 8


Total Steps Trained:  60%|██████    | 603516/1000000 [2:58:21<9:54:19, 11.12step/s] 


--- Rollout Summary (Steps 603001 to 603500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 1225.9612
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  60%|██████    | 603988/1000000 [2:58:25<54:38, 120.79step/s]


Episode 1208 finished at step 500 (604000 total). Env Reward: -10.87, Steps: 500, Delivered: 4


Total Steps Trained:  60%|██████    | 604010/1000000 [2:58:30<10:09:36, 10.83step/s]


--- Rollout Summary (Steps 603501 to 604000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 456.2911
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  60%|██████    | 604494/1000000 [2:58:34<55:29, 118.78step/s]


Episode 1209 finished at step 500 (604500 total). Env Reward: -12.90, Steps: 500, Delivered: 2


Total Steps Trained:  60%|██████    | 604517/1000000 [2:58:39<9:35:30, 11.45step/s] 


--- Rollout Summary (Steps 604001 to 604500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 582.5471
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  60%|██████    | 604988/1000000 [2:58:43<54:03, 121.80step/s]


Episode 1210 finished at step 500 (605000 total). Env Reward: -12.87, Steps: 500, Delivered: 1
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001210_map1.pth


Total Steps Trained:  61%|██████    | 605012/1000000 [2:58:47<9:22:32, 11.70step/s] 


--- Rollout Summary (Steps 604501 to 605000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 605.9433
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  61%|██████    | 605488/1000000 [2:58:52<53:30, 122.90step/s]


Episode 1211 finished at step 500 (605500 total). Env Reward: -10.12, Steps: 500, Delivered: 5


Total Steps Trained:  61%|██████    | 605512/1000000 [2:58:56<9:13:22, 11.88step/s] 


--- Rollout Summary (Steps 605001 to 605500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0014
Avg Critic Loss (per minibatch): 374.5022
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  61%|██████    | 605993/1000000 [2:59:00<57:45, 113.71step/s]


Episode 1212 finished at step 500 (606000 total). Env Reward: -10.91, Steps: 500, Delivered: 4


Total Steps Trained:  61%|██████    | 606016/1000000 [2:59:05<9:44:18, 11.24step/s] 


--- Rollout Summary (Steps 605501 to 606000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): 0.0018
Avg Critic Loss (per minibatch): 439.3399
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  61%|██████    | 606496/1000000 [2:59:09<54:37, 120.08step/s]


Episode 1213 finished at step 500 (606500 total). Env Reward: -11.02, Steps: 500, Delivered: 4


Total Steps Trained:  61%|██████    | 606521/1000000 [2:59:14<9:10:44, 11.91step/s] 


--- Rollout Summary (Steps 606001 to 606500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 456.8094
Avg Entropy (per minibatch): 2.5619
------------------------------


Total Steps Trained:  61%|██████    | 606998/1000000 [2:59:18<1:02:27, 104.86step/s]


Episode 1214 finished at step 500 (607000 total). Env Reward: -3.79, Steps: 500, Delivered: 10


Total Steps Trained:  61%|██████    | 607019/1000000 [2:59:23<10:28:55, 10.41step/s]


--- Rollout Summary (Steps 606501 to 607000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 625.1347
Avg Entropy (per minibatch): 2.5621
------------------------------


Total Steps Trained:  61%|██████    | 607494/1000000 [2:59:27<54:03, 121.03step/s]


Episode 1215 finished at step 500 (607500 total). Env Reward: -9.58, Steps: 500, Delivered: 5


Total Steps Trained:  61%|██████    | 607518/1000000 [2:59:32<9:26:19, 11.55step/s] 


--- Rollout Summary (Steps 607001 to 607500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 779.6444
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  61%|██████    | 607998/1000000 [2:59:36<54:41, 119.46step/s]


Episode 1216 finished at step 500 (608000 total). Env Reward: -7.89, Steps: 500, Delivered: 7


Total Steps Trained:  61%|██████    | 608021/1000000 [2:59:41<9:35:29, 11.35step/s] 


--- Rollout Summary (Steps 607501 to 608000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 467.2757
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  61%|██████    | 608493/1000000 [2:59:45<54:49, 119.03step/s]


Episode 1217 finished at step 500 (608500 total). Env Reward: -7.07, Steps: 500, Delivered: 8


Total Steps Trained:  61%|██████    | 608516/1000000 [2:59:49<9:31:36, 11.41step/s] 


--- Rollout Summary (Steps 608001 to 608500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 517.7639
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  61%|██████    | 609000/1000000 [2:59:54<59:36, 109.32step/s]


Episode 1218 finished at step 500 (609000 total). Env Reward: -9.68, Steps: 500, Delivered: 5


Total Steps Trained:  61%|██████    | 609011/1000000 [2:59:58<13:27:42,  8.07step/s]


--- Rollout Summary (Steps 608501 to 609000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 460.5575
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  61%|██████    | 609489/1000000 [3:00:02<56:49, 114.54step/s]


Episode 1219 finished at step 500 (609500 total). Env Reward: -8.91, Steps: 500, Delivered: 6


Total Steps Trained:  61%|██████    | 609511/1000000 [3:00:07<9:59:58, 10.85step/s] 


--- Rollout Summary (Steps 609001 to 609500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 593.0794
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  61%|██████    | 609990/1000000 [3:00:11<1:01:40, 105.40step/s]


Episode 1220 finished at step 500 (610000 total). Env Reward: -4.71, Steps: 500, Delivered: 10
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001220_map1.pth


Total Steps Trained:  61%|██████    | 610011/1000000 [3:00:16<10:17:04, 10.53step/s]


--- Rollout Summary (Steps 609501 to 610000) ---
Update Duration: 4.43s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 709.3568
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  61%|██████    | 610497/1000000 [3:00:20<53:35, 121.14step/s]


Episode 1221 finished at step 500 (610500 total). Env Reward: 9.34, Steps: 500, Delivered: 6


Total Steps Trained:  61%|██████    | 610510/1000000 [3:00:25<12:07:45,  8.92step/s]


--- Rollout Summary (Steps 610001 to 610500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 928.9442
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  61%|██████    | 610999/1000000 [3:00:29<53:46, 120.58step/s]


Episode 1222 finished at step 500 (611000 total). Env Reward: -7.95, Steps: 500, Delivered: 7


Total Steps Trained:  61%|██████    | 611012/1000000 [3:00:34<12:21:41,  8.74step/s]


--- Rollout Summary (Steps 610501 to 611000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0079
Avg Critic Loss (per minibatch): 615.9806
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  61%|██████    | 611493/1000000 [3:00:38<1:00:26, 107.14step/s]


Episode 1223 finished at step 500 (611500 total). Env Reward: 2.56, Steps: 500, Delivered: 8


Total Steps Trained:  61%|██████    | 611515/1000000 [3:00:42<9:48:57, 10.99step/s] 


--- Rollout Summary (Steps 611001 to 611500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 800.6117
Avg Entropy (per minibatch): 2.5619
------------------------------


Total Steps Trained:  61%|██████    | 611996/1000000 [3:00:47<56:47, 113.86step/s]


Episode 1224 finished at step 500 (612000 total). Env Reward: 0.80, Steps: 500, Delivered: 7


Total Steps Trained:  61%|██████    | 612018/1000000 [3:00:51<9:54:01, 10.89step/s] 


--- Rollout Summary (Steps 611501 to 612000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 1320.6888
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  61%|██████    | 612492/1000000 [3:00:55<57:45, 111.81step/s]


Episode 1225 finished at step 500 (612500 total). Env Reward: 2.29, Steps: 500, Delivered: 8


Total Steps Trained:  61%|██████▏   | 612515/1000000 [3:01:00<9:38:09, 11.17step/s] 


--- Rollout Summary (Steps 612001 to 612500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.20
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 904.2739
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  61%|██████▏   | 612999/1000000 [3:01:04<53:43, 120.05step/s]


Episode 1226 finished at step 500 (613000 total). Env Reward: -6.67, Steps: 500, Delivered: 8


Total Steps Trained:  61%|██████▏   | 613012/1000000 [3:01:09<12:26:38,  8.64step/s]


--- Rollout Summary (Steps 612501 to 613000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 472.3260
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  61%|██████▏   | 613497/1000000 [3:01:13<51:54, 124.11step/s]


Episode 1227 finished at step 500 (613500 total). Env Reward: -13.21, Steps: 500, Delivered: 1


Total Steps Trained:  61%|██████▏   | 613510/1000000 [3:01:18<12:06:49,  8.86step/s]


--- Rollout Summary (Steps 613001 to 613500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 509.3658
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  61%|██████▏   | 613993/1000000 [3:01:22<52:46, 121.91step/s]


Episode 1228 finished at step 500 (614000 total). Env Reward: -11.72, Steps: 500, Delivered: 3


Total Steps Trained:  61%|██████▏   | 614017/1000000 [3:01:26<9:06:17, 11.78step/s] 


--- Rollout Summary (Steps 613501 to 614000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 474.2286
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  61%|██████▏   | 614492/1000000 [3:01:31<54:42, 117.44step/s]


Episode 1229 finished at step 500 (614500 total). Env Reward: -11.67, Steps: 500, Delivered: 3


Total Steps Trained:  61%|██████▏   | 614515/1000000 [3:01:35<9:21:17, 11.45step/s] 


--- Rollout Summary (Steps 614001 to 614500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 533.4484
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  62%|██████▏   | 615000/1000000 [3:01:40<55:31, 115.57step/s]


Episode 1230 finished at step 500 (615000 total). Env Reward: -7.01, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001230_map1.pth


Total Steps Trained:  62%|██████▏   | 615012/1000000 [3:01:44<12:54:48,  8.28step/s]


--- Rollout Summary (Steps 614501 to 615000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 581.5396
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  62%|██████▏   | 615499/1000000 [3:01:48<54:11, 118.25step/s]


Episode 1231 finished at step 500 (615500 total). Env Reward: -10.05, Steps: 500, Delivered: 5


Total Steps Trained:  62%|██████▏   | 615511/1000000 [3:01:53<12:28:50,  8.56step/s]


--- Rollout Summary (Steps 615001 to 615500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 414.5208
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  62%|██████▏   | 615996/1000000 [3:01:57<53:36, 119.39step/s]


Episode 1232 finished at step 500 (616000 total). Env Reward: -14.76, Steps: 500, Delivered: 0


Total Steps Trained:  62%|██████▏   | 616019/1000000 [3:02:02<9:30:41, 11.21step/s] 


--- Rollout Summary (Steps 615501 to 616000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): 0.0012
Avg Critic Loss (per minibatch): 510.4951
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  62%|██████▏   | 616489/1000000 [3:02:06<53:12, 120.14step/s]


Episode 1233 finished at step 500 (616500 total). Env Reward: -11.91, Steps: 500, Delivered: 3


Total Steps Trained:  62%|██████▏   | 616513/1000000 [3:02:11<9:24:23, 11.32step/s] 


--- Rollout Summary (Steps 616001 to 616500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 512.7309
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  62%|██████▏   | 616997/1000000 [3:02:15<55:18, 115.42step/s]


Episode 1234 finished at step 500 (617000 total). Env Reward: -10.25, Steps: 500, Delivered: 5


Total Steps Trained:  62%|██████▏   | 617021/1000000 [3:02:20<9:17:47, 11.44step/s] 


--- Rollout Summary (Steps 616501 to 617000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 536.6860
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  62%|██████▏   | 617496/1000000 [3:02:23<52:48, 120.71step/s]


Episode 1235 finished at step 500 (617500 total). Env Reward: -11.38, Steps: 500, Delivered: 3


Total Steps Trained:  62%|██████▏   | 617520/1000000 [3:02:28<8:58:39, 11.83step/s] 


--- Rollout Summary (Steps 617001 to 617500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 608.5010
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  62%|██████▏   | 617996/1000000 [3:02:32<54:20, 117.15step/s]


Episode 1236 finished at step 500 (618000 total). Env Reward: -11.51, Steps: 500, Delivered: 3


Total Steps Trained:  62%|██████▏   | 618019/1000000 [3:02:37<9:22:20, 11.32step/s] 


--- Rollout Summary (Steps 617501 to 618000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 562.6421
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  62%|██████▏   | 618492/1000000 [3:02:41<56:43, 112.09step/s]


Episode 1237 finished at step 500 (618500 total). Env Reward: -11.99, Steps: 500, Delivered: 3


Total Steps Trained:  62%|██████▏   | 618515/1000000 [3:02:46<9:25:18, 11.25step/s] 


--- Rollout Summary (Steps 618001 to 618500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 571.2889
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  62%|██████▏   | 618989/1000000 [3:02:50<56:53, 111.63step/s]


Episode 1238 finished at step 500 (619000 total). Env Reward: -6.86, Steps: 500, Delivered: 8


Total Steps Trained:  62%|██████▏   | 619012/1000000 [3:02:55<9:32:55, 11.08step/s] 


--- Rollout Summary (Steps 618501 to 619000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 565.7670
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  62%|██████▏   | 619494/1000000 [3:02:59<54:56, 115.43step/s]


Episode 1239 finished at step 500 (619500 total). Env Reward: -10.75, Steps: 500, Delivered: 4


Total Steps Trained:  62%|██████▏   | 619518/1000000 [3:03:04<9:06:30, 11.60step/s] 


--- Rollout Summary (Steps 619001 to 619500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 578.6289
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  62%|██████▏   | 620000/1000000 [3:03:08<55:58, 113.16step/s]


Episode 1240 finished at step 500 (620000 total). Env Reward: -10.16, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001240_map1.pth


Total Steps Trained:  62%|██████▏   | 620012/1000000 [3:03:12<12:47:14,  8.25step/s]


--- Rollout Summary (Steps 619501 to 620000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0077
Avg Critic Loss (per minibatch): 764.2496
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  62%|██████▏   | 620492/1000000 [3:03:17<52:00, 121.63step/s]


Episode 1241 finished at step 500 (620500 total). Env Reward: -5.86, Steps: 500, Delivered: 9


Total Steps Trained:  62%|██████▏   | 620516/1000000 [3:03:21<9:02:44, 11.65step/s] 


--- Rollout Summary (Steps 620001 to 620500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 443.6528
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  62%|██████▏   | 620991/1000000 [3:03:25<54:14, 116.44step/s]


Episode 1242 finished at step 500 (621000 total). Env Reward: -8.66, Steps: 500, Delivered: 6


Total Steps Trained:  62%|██████▏   | 621013/1000000 [3:03:30<9:47:02, 10.76step/s] 


--- Rollout Summary (Steps 620501 to 621000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 628.4584
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  62%|██████▏   | 621493/1000000 [3:03:34<51:07, 123.39step/s]


Episode 1243 finished at step 500 (621500 total). Env Reward: -13.06, Steps: 500, Delivered: 1


Total Steps Trained:  62%|██████▏   | 621517/1000000 [3:03:39<8:58:46, 11.71step/s] 


--- Rollout Summary (Steps 621001 to 621500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 561.2655
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  62%|██████▏   | 621995/1000000 [3:03:43<56:04, 112.36step/s]


Episode 1244 finished at step 500 (622000 total). Env Reward: -6.92, Steps: 500, Delivered: 8


Total Steps Trained:  62%|██████▏   | 622019/1000000 [3:03:48<9:34:19, 10.97step/s] 


--- Rollout Summary (Steps 621501 to 622000) ---
Update Duration: 4.69s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 601.2620
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  62%|██████▏   | 622500/1000000 [3:03:52<53:27, 117.71step/s]


Episode 1245 finished at step 500 (622500 total). Env Reward: -10.44, Steps: 500, Delivered: 4


Total Steps Trained:  62%|██████▏   | 622512/1000000 [3:03:57<12:16:23,  8.54step/s]


--- Rollout Summary (Steps 622001 to 622500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 464.2775
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  62%|██████▏   | 623000/1000000 [3:04:01<53:04, 118.38step/s]


Episode 1246 finished at step 500 (623000 total). Env Reward: -10.76, Steps: 500, Delivered: 4


Total Steps Trained:  62%|██████▏   | 623012/1000000 [3:04:05<12:12:56,  8.57step/s]


--- Rollout Summary (Steps 622501 to 623000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 692.5919
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  62%|██████▏   | 623497/1000000 [3:04:10<53:19, 117.67step/s]


Episode 1247 finished at step 500 (623500 total). Env Reward: -7.55, Steps: 500, Delivered: 7


Total Steps Trained:  62%|██████▏   | 623521/1000000 [3:04:14<9:01:13, 11.59step/s] 


--- Rollout Summary (Steps 623001 to 623500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 467.8751
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  62%|██████▏   | 624000/1000000 [3:04:19<55:35, 112.72step/s]


Episode 1248 finished at step 500 (624000 total). Env Reward: -6.71, Steps: 500, Delivered: 8


Total Steps Trained:  62%|██████▏   | 624012/1000000 [3:04:23<12:45:57,  8.18step/s]


--- Rollout Summary (Steps 623501 to 624000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 645.0000
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  62%|██████▏   | 624493/1000000 [3:04:27<54:29, 114.87step/s]


Episode 1249 finished at step 500 (624500 total). Env Reward: -8.31, Steps: 500, Delivered: 7


Total Steps Trained:  62%|██████▏   | 624516/1000000 [3:04:32<9:27:45, 11.02step/s] 


--- Rollout Summary (Steps 624001 to 624500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 610.7322
Avg Entropy (per minibatch): 2.5588
------------------------------


Total Steps Trained:  62%|██████▏   | 624991/1000000 [3:04:36<53:46, 116.21step/s]


Episode 1250 finished at step 500 (625000 total). Env Reward: -10.63, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001250_map1.pth


Total Steps Trained:  63%|██████▎   | 625014/1000000 [3:04:41<9:24:23, 11.07step/s] 


--- Rollout Summary (Steps 624501 to 625000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 651.5587
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  63%|██████▎   | 625491/1000000 [3:04:45<54:16, 114.99step/s]


Episode 1251 finished at step 500 (625500 total). Env Reward: -6.96, Steps: 500, Delivered: 8


Total Steps Trained:  63%|██████▎   | 625514/1000000 [3:04:50<9:36:27, 10.83step/s] 


--- Rollout Summary (Steps 625001 to 625500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0090
Avg Critic Loss (per minibatch): 599.5416
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  63%|██████▎   | 625995/1000000 [3:04:54<53:50, 115.76step/s]


Episode 1252 finished at step 500 (626000 total). Env Reward: -9.40, Steps: 500, Delivered: 5


Total Steps Trained:  63%|██████▎   | 626019/1000000 [3:04:59<8:58:35, 11.57step/s] 


--- Rollout Summary (Steps 625501 to 626000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 455.2334
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  63%|██████▎   | 626498/1000000 [3:05:03<53:33, 116.23step/s]


Episode 1253 finished at step 500 (626500 total). Env Reward: 0.27, Steps: 500, Delivered: 6


Total Steps Trained:  63%|██████▎   | 626510/1000000 [3:05:08<12:25:21,  8.35step/s]


--- Rollout Summary (Steps 626001 to 626500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 673.9520
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  63%|██████▎   | 626990/1000000 [3:05:12<52:33, 118.28step/s]


Episode 1254 finished at step 500 (627000 total). Env Reward: -8.54, Steps: 500, Delivered: 6


Total Steps Trained:  63%|██████▎   | 627014/1000000 [3:05:16<8:55:45, 11.60step/s] 


--- Rollout Summary (Steps 626501 to 627000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0004
Avg Critic Loss (per minibatch): 491.4722
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  63%|██████▎   | 627494/1000000 [3:05:21<58:40, 105.82step/s]


Episode 1255 finished at step 500 (627500 total). Env Reward: -8.78, Steps: 500, Delivered: 6


Total Steps Trained:  63%|██████▎   | 627517/1000000 [3:05:25<9:33:31, 10.82step/s] 


--- Rollout Summary (Steps 627001 to 627500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 576.7910
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  63%|██████▎   | 627991/1000000 [3:05:29<54:53, 112.94step/s]


Episode 1256 finished at step 500 (628000 total). Env Reward: 0.79, Steps: 500, Delivered: 6


Total Steps Trained:  63%|██████▎   | 628013/1000000 [3:05:34<9:25:01, 10.97step/s] 


--- Rollout Summary (Steps 627501 to 628000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 726.6416
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  63%|██████▎   | 628490/1000000 [3:05:38<52:34, 117.78step/s]


Episode 1257 finished at step 500 (628500 total). Env Reward: -6.75, Steps: 500, Delivered: 7


Total Steps Trained:  63%|██████▎   | 628513/1000000 [3:05:43<9:17:01, 11.12step/s] 


--- Rollout Summary (Steps 628001 to 628500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 545.7131
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  63%|██████▎   | 628997/1000000 [3:05:47<54:23, 113.70step/s]


Episode 1258 finished at step 500 (629000 total). Env Reward: -11.07, Steps: 500, Delivered: 4


Total Steps Trained:  63%|██████▎   | 629022/1000000 [3:05:52<8:57:38, 11.50step/s] 


--- Rollout Summary (Steps 628501 to 629000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 438.3834
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  63%|██████▎   | 629492/1000000 [3:05:56<51:47, 119.24step/s]


Episode 1259 finished at step 500 (629500 total). Env Reward: -9.55, Steps: 500, Delivered: 5


Total Steps Trained:  63%|██████▎   | 629515/1000000 [3:06:01<9:07:13, 11.28step/s] 


--- Rollout Summary (Steps 629001 to 629500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 349.2869
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  63%|██████▎   | 629991/1000000 [3:06:05<49:37, 124.26step/s]


Episode 1260 finished at step 500 (630000 total). Env Reward: -13.96, Steps: 500, Delivered: 1
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001260_map1.pth


Total Steps Trained:  63%|██████▎   | 630014/1000000 [3:06:09<8:59:18, 11.43step/s] 


--- Rollout Summary (Steps 629501 to 630000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 462.9531
Avg Entropy (per minibatch): 2.5617
------------------------------


Total Steps Trained:  63%|██████▎   | 630499/1000000 [3:06:14<51:12, 120.27step/s]


Episode 1261 finished at step 500 (630500 total). Env Reward: -11.63, Steps: 500, Delivered: 3


Total Steps Trained:  63%|██████▎   | 630512/1000000 [3:06:18<11:36:33,  8.84step/s]


--- Rollout Summary (Steps 630001 to 630500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 553.6083
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  63%|██████▎   | 630994/1000000 [3:06:22<49:33, 124.10step/s]


Episode 1262 finished at step 500 (631000 total). Env Reward: -7.22, Steps: 500, Delivered: 7


Total Steps Trained:  63%|██████▎   | 631018/1000000 [3:06:27<9:15:34, 11.07step/s] 


--- Rollout Summary (Steps 630501 to 631000) ---
Update Duration: 4.81s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0082
Avg Critic Loss (per minibatch): 496.5353
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  63%|██████▎   | 631498/1000000 [3:06:32<54:06, 113.52step/s]


Episode 1263 finished at step 500 (631500 total). Env Reward: -5.56, Steps: 500, Delivered: 9


Total Steps Trained:  63%|██████▎   | 631521/1000000 [3:06:36<9:15:07, 11.06step/s] 


--- Rollout Summary (Steps 631001 to 631500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 538.2717
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  63%|██████▎   | 631996/1000000 [3:06:40<51:36, 118.85step/s]


Episode 1264 finished at step 500 (632000 total). Env Reward: 0.24, Steps: 500, Delivered: 6


Total Steps Trained:  63%|██████▎   | 632018/1000000 [3:06:45<9:14:27, 11.06step/s] 


--- Rollout Summary (Steps 631501 to 632000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 1381.7532
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  63%|██████▎   | 632495/1000000 [3:06:49<51:22, 119.23step/s]


Episode 1265 finished at step 500 (632500 total). Env Reward: -9.86, Steps: 500, Delivered: 5


Total Steps Trained:  63%|██████▎   | 632519/1000000 [3:06:54<8:46:35, 11.63step/s] 


--- Rollout Summary (Steps 632001 to 632500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 558.1735
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  63%|██████▎   | 633000/1000000 [3:06:58<55:36, 109.98step/s]


Episode 1266 finished at step 500 (633000 total). Env Reward: -5.23, Steps: 500, Delivered: 9


Total Steps Trained:  63%|██████▎   | 633012/1000000 [3:07:03<12:27:15,  8.19step/s]


--- Rollout Summary (Steps 632501 to 633000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0090
Avg Critic Loss (per minibatch): 685.8306
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  63%|██████▎   | 633491/1000000 [3:07:07<49:39, 123.01step/s]


Episode 1267 finished at step 500 (633500 total). Env Reward: -9.82, Steps: 500, Delivered: 5


Total Steps Trained:  63%|██████▎   | 633515/1000000 [3:07:12<8:40:06, 11.74step/s] 


--- Rollout Summary (Steps 633001 to 633500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 497.6625
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  63%|██████▎   | 633996/1000000 [3:07:16<50:48, 120.08step/s]


Episode 1268 finished at step 500 (634000 total). Env Reward: -7.65, Steps: 500, Delivered: 7


Total Steps Trained:  63%|██████▎   | 634020/1000000 [3:07:21<8:43:42, 11.65step/s] 


--- Rollout Summary (Steps 633501 to 634000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 518.6665
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  63%|██████▎   | 634494/1000000 [3:07:25<50:07, 121.52step/s]


Episode 1269 finished at step 500 (634500 total). Env Reward: -8.79, Steps: 500, Delivered: 6


Total Steps Trained:  63%|██████▎   | 634517/1000000 [3:07:29<9:05:11, 11.17step/s] 


--- Rollout Summary (Steps 634001 to 634500) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 477.9632
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  63%|██████▎   | 634988/1000000 [3:07:34<50:25, 120.65step/s]


Episode 1270 finished at step 500 (635000 total). Env Reward: -7.58, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001270_map1.pth


Total Steps Trained:  64%|██████▎   | 635011/1000000 [3:07:38<8:57:53, 11.31step/s] 


--- Rollout Summary (Steps 634501 to 635000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0082
Avg Critic Loss (per minibatch): 607.2731
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  64%|██████▎   | 635494/1000000 [3:07:42<49:16, 123.30step/s]


Episode 1271 finished at step 500 (635500 total). Env Reward: -3.22, Steps: 500, Delivered: 3


Total Steps Trained:  64%|██████▎   | 635519/1000000 [3:07:47<8:21:10, 12.12step/s] 


--- Rollout Summary (Steps 635001 to 635500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 978.8827
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  64%|██████▎   | 635996/1000000 [3:07:51<53:55, 112.49step/s]


Episode 1272 finished at step 500 (636000 total). Env Reward: -5.87, Steps: 500, Delivered: 9


Total Steps Trained:  64%|██████▎   | 636019/1000000 [3:07:56<9:01:58, 11.19step/s] 


--- Rollout Summary (Steps 635501 to 636000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 779.7166
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  64%|██████▎   | 636489/1000000 [3:08:00<49:11, 123.16step/s]


Episode 1273 finished at step 500 (636500 total). Env Reward: -11.36, Steps: 500, Delivered: 4


Total Steps Trained:  64%|██████▎   | 636513/1000000 [3:08:05<8:38:42, 11.68step/s] 


--- Rollout Summary (Steps 636001 to 636500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 605.1589
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  64%|██████▎   | 636999/1000000 [3:08:09<49:36, 121.95step/s]


Episode 1274 finished at step 500 (637000 total). Env Reward: -13.21, Steps: 500, Delivered: 1


Total Steps Trained:  64%|██████▎   | 637012/1000000 [3:08:14<11:15:58,  8.95step/s]


--- Rollout Summary (Steps 636501 to 637000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): 0.0021
Avg Critic Loss (per minibatch): 452.3101
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  64%|██████▎   | 637492/1000000 [3:08:18<55:36, 108.66step/s]


Episode 1275 finished at step 500 (637500 total). Env Reward: -10.55, Steps: 500, Delivered: 4


Total Steps Trained:  64%|██████▍   | 637515/1000000 [3:08:22<8:46:16, 11.48step/s] 


--- Rollout Summary (Steps 637001 to 637500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 454.0879
Avg Entropy (per minibatch): 2.5587
------------------------------


Total Steps Trained:  64%|██████▍   | 637996/1000000 [3:08:26<49:52, 120.97step/s]


Episode 1276 finished at step 500 (638000 total). Env Reward: -13.04, Steps: 500, Delivered: 2


Total Steps Trained:  64%|██████▍   | 638020/1000000 [3:08:31<8:57:15, 11.23step/s] 


--- Rollout Summary (Steps 637501 to 638000) ---
Update Duration: 4.73s
Avg Episode Reward (last 100): -7.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 393.0563
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  64%|██████▍   | 638498/1000000 [3:08:36<49:25, 121.92step/s]


Episode 1277 finished at step 500 (638500 total). Env Reward: -7.53, Steps: 500, Delivered: 7


Total Steps Trained:  64%|██████▍   | 638511/1000000 [3:08:40<11:22:14,  8.83step/s]


--- Rollout Summary (Steps 638001 to 638500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 516.7777
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  64%|██████▍   | 638992/1000000 [3:08:44<51:25, 116.99step/s]


Episode 1278 finished at step 500 (639000 total). Env Reward: -8.83, Steps: 500, Delivered: 6


Total Steps Trained:  64%|██████▍   | 639015/1000000 [3:08:49<8:54:14, 11.26step/s] 


--- Rollout Summary (Steps 638501 to 639000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 380.2634
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  64%|██████▍   | 639495/1000000 [3:08:53<52:40, 114.07step/s]


Episode 1279 finished at step 500 (639500 total). Env Reward: -10.59, Steps: 500, Delivered: 4


Total Steps Trained:  64%|██████▍   | 639516/1000000 [3:08:58<9:15:19, 10.82step/s] 


--- Rollout Summary (Steps 639001 to 639500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 585.6713
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  64%|██████▍   | 639997/1000000 [3:09:02<53:19, 112.51step/s]


Episode 1280 finished at step 500 (640000 total). Env Reward: -7.98, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001280_map1.pth


Total Steps Trained:  64%|██████▍   | 640019/1000000 [3:09:07<9:28:02, 10.56step/s] 


--- Rollout Summary (Steps 639501 to 640000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -8.01
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 488.9419
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  64%|██████▍   | 640491/1000000 [3:09:11<50:08, 119.49step/s]


Episode 1281 finished at step 500 (640500 total). Env Reward: -9.79, Steps: 500, Delivered: 5


Total Steps Trained:  64%|██████▍   | 640514/1000000 [3:09:16<8:51:22, 11.28step/s] 


--- Rollout Summary (Steps 640001 to 640500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 566.3780
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  64%|██████▍   | 640992/1000000 [3:09:20<49:36, 120.61step/s]


Episode 1282 finished at step 500 (641000 total). Env Reward: -10.64, Steps: 500, Delivered: 4


Total Steps Trained:  64%|██████▍   | 641016/1000000 [3:09:25<8:37:21, 11.56step/s] 


--- Rollout Summary (Steps 640501 to 641000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -8.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 424.1511
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  64%|██████▍   | 641495/1000000 [3:09:29<50:28, 118.36step/s]


Episode 1283 finished at step 500 (641500 total). Env Reward: 3.22, Steps: 500, Delivered: 9


Total Steps Trained:  64%|██████▍   | 641518/1000000 [3:09:33<8:51:06, 11.25step/s] 


--- Rollout Summary (Steps 641001 to 641500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 1002.0366
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  64%|██████▍   | 642000/1000000 [3:09:38<55:17, 107.91step/s]


Episode 1284 finished at step 500 (642000 total). Env Reward: -8.78, Steps: 500, Delivered: 6


Total Steps Trained:  64%|██████▍   | 642011/1000000 [3:09:42<12:25:24,  8.00step/s]


--- Rollout Summary (Steps 641501 to 642000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 623.6160
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  64%|██████▍   | 642493/1000000 [3:09:46<49:46, 119.69step/s]


Episode 1285 finished at step 500 (642500 total). Env Reward: -12.08, Steps: 500, Delivered: 3


Total Steps Trained:  64%|██████▍   | 642516/1000000 [3:09:51<8:45:27, 11.34step/s] 


--- Rollout Summary (Steps 642001 to 642500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -8.00
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 457.6907
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  64%|██████▍   | 642998/1000000 [3:09:55<49:18, 120.68step/s]


Episode 1286 finished at step 500 (643000 total). Env Reward: -9.60, Steps: 500, Delivered: 5


Total Steps Trained:  64%|██████▍   | 643023/1000000 [3:10:00<8:19:39, 11.91step/s] 


--- Rollout Summary (Steps 642501 to 643000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -8.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 523.0090
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  64%|██████▍   | 643498/1000000 [3:10:04<50:43, 117.12step/s]


Episode 1287 finished at step 500 (643500 total). Env Reward: 2.70, Steps: 500, Delivered: 8


Total Steps Trained:  64%|██████▍   | 643510/1000000 [3:10:09<12:14:14,  8.09step/s]


--- Rollout Summary (Steps 643001 to 643500) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 803.9183
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  64%|██████▍   | 643995/1000000 [3:10:13<52:11, 113.69step/s]


Episode 1288 finished at step 500 (644000 total). Env Reward: -6.78, Steps: 500, Delivered: 8


Total Steps Trained:  64%|██████▍   | 644018/1000000 [3:10:18<8:53:56, 11.11step/s] 


--- Rollout Summary (Steps 643501 to 644000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 967.4441
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  64%|██████▍   | 644488/1000000 [3:10:22<48:44, 121.56step/s]


Episode 1289 finished at step 500 (644500 total). Env Reward: -8.59, Steps: 500, Delivered: 6


Total Steps Trained:  64%|██████▍   | 644512/1000000 [3:10:27<8:20:18, 11.84step/s] 


--- Rollout Summary (Steps 644001 to 644500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 621.9428
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  64%|██████▍   | 644995/1000000 [3:10:31<54:06, 109.34step/s]


Episode 1290 finished at step 500 (645000 total). Env Reward: 0.92, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001290_map1.pth


Total Steps Trained:  65%|██████▍   | 645017/1000000 [3:10:36<9:12:14, 10.71step/s] 


--- Rollout Summary (Steps 644501 to 645000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 534.9782
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  65%|██████▍   | 645492/1000000 [3:10:40<48:44, 121.22step/s]


Episode 1291 finished at step 500 (645500 total). Env Reward: -0.35, Steps: 500, Delivered: 5


Total Steps Trained:  65%|██████▍   | 645517/1000000 [3:10:45<8:15:03, 11.93step/s] 


--- Rollout Summary (Steps 645001 to 645500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 919.8853
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  65%|██████▍   | 645997/1000000 [3:10:49<50:12, 117.50step/s]


Episode 1292 finished at step 500 (646000 total). Env Reward: -8.29, Steps: 500, Delivered: 6


Total Steps Trained:  65%|██████▍   | 646020/1000000 [3:10:53<8:39:36, 11.35step/s] 


--- Rollout Summary (Steps 645501 to 646000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 596.8506
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  65%|██████▍   | 646499/1000000 [3:10:58<48:59, 120.27step/s]


Episode 1293 finished at step 500 (646500 total). Env Reward: -10.13, Steps: 500, Delivered: 4


Total Steps Trained:  65%|██████▍   | 646512/1000000 [3:11:02<11:01:20,  8.91step/s]


--- Rollout Summary (Steps 646001 to 646500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 376.3147
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  65%|██████▍   | 646989/1000000 [3:11:06<49:07, 119.78step/s]


Episode 1294 finished at step 500 (647000 total). Env Reward: -8.46, Steps: 500, Delivered: 6


Total Steps Trained:  65%|██████▍   | 647013/1000000 [3:11:11<8:38:52, 11.34step/s] 


--- Rollout Summary (Steps 646501 to 647000) ---
Update Duration: 4.69s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 446.9478
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  65%|██████▍   | 647492/1000000 [3:11:15<49:16, 119.24step/s]


Episode 1295 finished at step 500 (647500 total). Env Reward: -8.94, Steps: 500, Delivered: 6


Total Steps Trained:  65%|██████▍   | 647515/1000000 [3:11:20<8:26:36, 11.60step/s] 


--- Rollout Summary (Steps 647001 to 647500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0003
Avg Critic Loss (per minibatch): 411.6696
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  65%|██████▍   | 647996/1000000 [3:11:24<51:18, 114.33step/s]


Episode 1296 finished at step 500 (648000 total). Env Reward: -0.36, Steps: 500, Delivered: 5


Total Steps Trained:  65%|██████▍   | 648020/1000000 [3:11:29<8:40:14, 11.28step/s] 


--- Rollout Summary (Steps 647501 to 648000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 844.2174
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  65%|██████▍   | 648495/1000000 [3:11:33<49:12, 119.05step/s]


Episode 1297 finished at step 500 (648500 total). Env Reward: -7.54, Steps: 500, Delivered: 8


Total Steps Trained:  65%|██████▍   | 648517/1000000 [3:11:38<8:50:12, 11.05step/s] 


--- Rollout Summary (Steps 648001 to 648500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 468.0744
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  65%|██████▍   | 648997/1000000 [3:11:42<51:59, 112.52step/s]


Episode 1298 finished at step 500 (649000 total). Env Reward: 2.01, Steps: 500, Delivered: 8


Total Steps Trained:  65%|██████▍   | 649019/1000000 [3:11:47<8:59:26, 10.84step/s] 


--- Rollout Summary (Steps 648501 to 649000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 772.4290
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  65%|██████▍   | 649496/1000000 [3:11:51<47:28, 123.06step/s]


Episode 1299 finished at step 500 (649500 total). Env Reward: -8.84, Steps: 500, Delivered: 6


Total Steps Trained:  65%|██████▍   | 649519/1000000 [3:11:55<8:29:10, 11.47step/s] 


--- Rollout Summary (Steps 649001 to 649500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 520.2635
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  65%|██████▍   | 649997/1000000 [3:12:00<51:50, 112.53step/s]


Episode 1300 finished at step 500 (650000 total). Env Reward: -7.76, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001300_map1.pth


Total Steps Trained:  65%|██████▌   | 650020/1000000 [3:12:04<8:48:36, 11.03step/s] 


--- Rollout Summary (Steps 649501 to 650000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 427.3440
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  65%|██████▌   | 650493/1000000 [3:12:08<49:39, 117.32step/s]


Episode 1301 finished at step 500 (650500 total). Env Reward: -9.30, Steps: 500, Delivered: 5


Total Steps Trained:  65%|██████▌   | 650517/1000000 [3:12:13<8:37:27, 11.26step/s] 


--- Rollout Summary (Steps 650001 to 650500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 425.6598
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  65%|██████▌   | 650995/1000000 [3:12:17<49:56, 116.49step/s]


Episode 1302 finished at step 500 (651000 total). Env Reward: -10.64, Steps: 500, Delivered: 4


Total Steps Trained:  65%|██████▌   | 651018/1000000 [3:12:22<8:42:24, 11.13step/s] 


--- Rollout Summary (Steps 650501 to 651000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 538.8585
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  65%|██████▌   | 651492/1000000 [3:12:26<49:29, 117.35step/s]


Episode 1303 finished at step 500 (651500 total). Env Reward: -9.62, Steps: 500, Delivered: 5


Total Steps Trained:  65%|██████▌   | 651514/1000000 [3:12:31<8:39:17, 11.18step/s] 


--- Rollout Summary (Steps 651001 to 651500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 521.2141
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  65%|██████▌   | 651988/1000000 [3:12:35<48:00, 120.84step/s]


Episode 1304 finished at step 500 (652000 total). Env Reward: 2.45, Steps: 500, Delivered: 8


Total Steps Trained:  65%|██████▌   | 652011/1000000 [3:12:40<8:45:37, 11.03step/s] 


--- Rollout Summary (Steps 651501 to 652000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 889.7148
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  65%|██████▌   | 652498/1000000 [3:12:44<54:40, 105.94step/s]


Episode 1305 finished at step 500 (652500 total). Env Reward: -8.14, Steps: 500, Delivered: 6


Total Steps Trained:  65%|██████▌   | 652520/1000000 [3:12:49<9:25:33, 10.24step/s] 


--- Rollout Summary (Steps 652001 to 652500) ---
Update Duration: 4.70s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 584.8001
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  65%|██████▌   | 652989/1000000 [3:12:54<49:43, 116.30step/s]


Episode 1306 finished at step 500 (653000 total). Env Reward: 2.71, Steps: 500, Delivered: 8


Total Steps Trained:  65%|██████▌   | 653012/1000000 [3:12:59<8:54:25, 10.82step/s] 


--- Rollout Summary (Steps 652501 to 653000) ---
Update Duration: 4.70s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 963.5396
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  65%|██████▌   | 653496/1000000 [3:13:03<52:04, 110.91step/s]


Episode 1307 finished at step 500 (653500 total). Env Reward: -6.96, Steps: 500, Delivered: 8


Total Steps Trained:  65%|██████▌   | 653519/1000000 [3:13:08<9:05:43, 10.58step/s] 


--- Rollout Summary (Steps 653001 to 653500) ---
Update Duration: 4.71s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0097
Avg Critic Loss (per minibatch): 830.2535
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  65%|██████▌   | 653999/1000000 [3:13:12<49:59, 115.35step/s]


Episode 1308 finished at step 500 (654000 total). Env Reward: -1.62, Steps: 500, Delivered: 4


Total Steps Trained:  65%|██████▌   | 654011/1000000 [3:13:17<12:12:17,  7.87step/s]


--- Rollout Summary (Steps 653501 to 654000) ---
Update Duration: 4.88s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 759.5657
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  65%|██████▌   | 654491/1000000 [3:13:22<49:07, 117.24step/s]


Episode 1309 finished at step 500 (654500 total). Env Reward: -9.83, Steps: 500, Delivered: 5


Total Steps Trained:  65%|██████▌   | 654514/1000000 [3:13:26<8:29:29, 11.30step/s] 


--- Rollout Summary (Steps 654001 to 654500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 499.7602
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  65%|██████▌   | 654995/1000000 [3:13:30<48:45, 117.94step/s]


Episode 1310 finished at step 500 (655000 total). Env Reward: -6.60, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001310_map1.pth


Total Steps Trained:  66%|██████▌   | 655017/1000000 [3:13:35<8:49:12, 10.86step/s] 


--- Rollout Summary (Steps 654501 to 655000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 481.7894
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  66%|██████▌   | 655496/1000000 [3:13:39<50:39, 113.35step/s]


Episode 1311 finished at step 500 (655500 total). Env Reward: -11.41, Steps: 500, Delivered: 3


Total Steps Trained:  66%|██████▌   | 655520/1000000 [3:13:44<8:21:33, 11.45step/s] 


--- Rollout Summary (Steps 655001 to 655500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 609.8634
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  66%|██████▌   | 655994/1000000 [3:13:49<49:46, 115.19step/s]


Episode 1312 finished at step 500 (656000 total). Env Reward: -12.96, Steps: 500, Delivered: 2


Total Steps Trained:  66%|██████▌   | 656017/1000000 [3:13:53<8:25:29, 11.34step/s] 


--- Rollout Summary (Steps 655501 to 656000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 489.7591
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  66%|██████▌   | 656496/1000000 [3:13:57<47:00, 121.80step/s]


Episode 1313 finished at step 500 (656500 total). Env Reward: -12.73, Steps: 500, Delivered: 2


Total Steps Trained:  66%|██████▌   | 656520/1000000 [3:14:02<8:08:45, 11.71step/s] 


--- Rollout Summary (Steps 656001 to 656500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0002
Avg Critic Loss (per minibatch): 533.6956
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  66%|██████▌   | 656992/1000000 [3:14:06<48:19, 118.29step/s]


Episode 1314 finished at step 500 (657000 total). Env Reward: -0.60, Steps: 500, Delivered: 5


Total Steps Trained:  66%|██████▌   | 657014/1000000 [3:14:11<8:31:59, 11.17step/s] 


--- Rollout Summary (Steps 656501 to 657000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 932.1769
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  66%|██████▌   | 657492/1000000 [3:14:15<47:59, 118.97step/s]


Episode 1315 finished at step 500 (657500 total). Env Reward: -10.90, Steps: 500, Delivered: 4


Total Steps Trained:  66%|██████▌   | 657515/1000000 [3:14:20<8:45:22, 10.86step/s] 


--- Rollout Summary (Steps 657001 to 657500) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 601.7445
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  66%|██████▌   | 657994/1000000 [3:14:24<47:52, 119.06step/s]


Episode 1316 finished at step 500 (658000 total). Env Reward: -6.65, Steps: 500, Delivered: 8


Total Steps Trained:  66%|██████▌   | 658018/1000000 [3:14:29<8:08:59, 11.66step/s] 


--- Rollout Summary (Steps 657501 to 658000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 624.4343
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  66%|██████▌   | 658494/1000000 [3:14:33<52:39, 108.08step/s]


Episode 1317 finished at step 500 (658500 total). Env Reward: -10.98, Steps: 500, Delivered: 4


Total Steps Trained:  66%|██████▌   | 658516/1000000 [3:14:38<8:43:59, 10.86step/s] 


--- Rollout Summary (Steps 658001 to 658500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 413.6271
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  66%|██████▌   | 658989/1000000 [3:14:42<47:45, 119.00step/s]


Episode 1318 finished at step 500 (659000 total). Env Reward: -6.28, Steps: 500, Delivered: 8


Total Steps Trained:  66%|██████▌   | 659012/1000000 [3:14:46<8:19:07, 11.39step/s] 


--- Rollout Summary (Steps 658501 to 659000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 604.9204
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  66%|██████▌   | 659496/1000000 [3:14:51<55:20, 102.54step/s]


Episode 1319 finished at step 500 (659500 total). Env Reward: -4.78, Steps: 500, Delivered: 10


Total Steps Trained:  66%|██████▌   | 659518/1000000 [3:14:55<9:04:22, 10.42step/s] 


--- Rollout Summary (Steps 659001 to 659500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 494.0717
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  66%|██████▌   | 659994/1000000 [3:15:00<47:12, 120.03step/s]


Episode 1320 finished at step 500 (660000 total). Env Reward: 0.18, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001320_map1.pth


Total Steps Trained:  66%|██████▌   | 660019/1000000 [3:15:04<7:57:27, 11.87step/s] 


--- Rollout Summary (Steps 659501 to 660000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 1131.7889
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  66%|██████▌   | 660496/1000000 [3:15:09<49:56, 113.31step/s]


Episode 1321 finished at step 500 (660500 total). Env Reward: 3.45, Steps: 500, Delivered: 9


Total Steps Trained:  66%|██████▌   | 660519/1000000 [3:15:13<8:22:57, 11.25step/s] 


--- Rollout Summary (Steps 660001 to 660500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 1659.2049
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  66%|██████▌   | 660998/1000000 [3:15:17<45:42, 123.61step/s]


Episode 1322 finished at step 500 (661000 total). Env Reward: -13.07, Steps: 500, Delivered: 2


Total Steps Trained:  66%|██████▌   | 661011/1000000 [3:15:22<11:07:47,  8.46step/s]


--- Rollout Summary (Steps 660501 to 661000) ---
Update Duration: 4.74s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 595.9414
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  66%|██████▌   | 661498/1000000 [3:15:26<46:53, 120.30step/s]


Episode 1323 finished at step 500 (661500 total). Env Reward: -12.97, Steps: 500, Delivered: 2


Total Steps Trained:  66%|██████▌   | 661511/1000000 [3:15:31<10:46:16,  8.73step/s]


--- Rollout Summary (Steps 661001 to 661500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): 0.0008
Avg Critic Loss (per minibatch): 837.1292
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  66%|██████▌   | 661989/1000000 [3:15:35<46:36, 120.89step/s]


Episode 1324 finished at step 500 (662000 total). Env Reward: -9.80, Steps: 500, Delivered: 5


Total Steps Trained:  66%|██████▌   | 662013/1000000 [3:15:40<8:06:28, 11.58step/s] 


--- Rollout Summary (Steps 661501 to 662000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 541.9476
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  66%|██████▌   | 662492/1000000 [3:15:44<47:54, 117.43step/s]


Episode 1325 finished at step 500 (662500 total). Env Reward: -8.91, Steps: 500, Delivered: 6


Total Steps Trained:  66%|██████▋   | 662514/1000000 [3:15:49<8:31:48, 10.99step/s] 


--- Rollout Summary (Steps 662001 to 662500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 709.6980
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  66%|██████▋   | 662994/1000000 [3:15:53<45:46, 122.71step/s]


Episode 1326 finished at step 500 (663000 total). Env Reward: -12.05, Steps: 500, Delivered: 3


Total Steps Trained:  66%|██████▋   | 663018/1000000 [3:15:58<8:10:03, 11.46step/s] 


--- Rollout Summary (Steps 662501 to 663000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): 0.0001
Avg Critic Loss (per minibatch): 719.4122
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  66%|██████▋   | 663500/1000000 [3:16:02<47:10, 118.88step/s]


Episode 1327 finished at step 500 (663500 total). Env Reward: -13.66, Steps: 500, Delivered: 1


Total Steps Trained:  66%|██████▋   | 663512/1000000 [3:16:06<10:48:11,  8.65step/s]


--- Rollout Summary (Steps 663001 to 663500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 561.4017
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  66%|██████▋   | 663990/1000000 [3:16:11<48:09, 116.28step/s]


Episode 1328 finished at step 500 (664000 total). Env Reward: -11.01, Steps: 500, Delivered: 4


Total Steps Trained:  66%|██████▋   | 664012/1000000 [3:16:15<8:27:06, 11.04step/s] 


--- Rollout Summary (Steps 663501 to 664000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 587.1212
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  66%|██████▋   | 664496/1000000 [3:16:20<49:03, 113.99step/s]


Episode 1329 finished at step 500 (664500 total). Env Reward: -9.56, Steps: 500, Delivered: 5


Total Steps Trained:  66%|██████▋   | 664518/1000000 [3:16:24<8:31:41, 10.93step/s] 


--- Rollout Summary (Steps 664001 to 664500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 618.9254
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  66%|██████▋   | 664997/1000000 [3:16:29<49:14, 113.38step/s]


Episode 1330 finished at step 500 (665000 total). Env Reward: 1.76, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001330_map1.pth


Total Steps Trained:  67%|██████▋   | 665019/1000000 [3:16:33<8:35:59, 10.82step/s] 


--- Rollout Summary (Steps 664501 to 665000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 854.4830
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  67%|██████▋   | 665493/1000000 [3:16:37<49:19, 113.01step/s]


Episode 1331 finished at step 500 (665500 total). Env Reward: 11.39, Steps: 500, Delivered: 8


Total Steps Trained:  67%|██████▋   | 665515/1000000 [3:16:42<8:30:04, 10.93step/s] 


--- Rollout Summary (Steps 665001 to 665500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 1609.3999
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  67%|██████▋   | 665988/1000000 [3:16:46<45:58, 121.08step/s]


Episode 1332 finished at step 500 (666000 total). Env Reward: -10.18, Steps: 500, Delivered: 4


Total Steps Trained:  67%|██████▋   | 666011/1000000 [3:16:51<8:03:53, 11.50step/s] 


--- Rollout Summary (Steps 665501 to 666000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 591.9011
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  67%|██████▋   | 666489/1000000 [3:16:55<46:18, 120.02step/s]


Episode 1333 finished at step 500 (666500 total). Env Reward: -9.60, Steps: 500, Delivered: 5


Total Steps Trained:  67%|██████▋   | 666513/1000000 [3:17:00<8:04:41, 11.47step/s] 


--- Rollout Summary (Steps 666001 to 666500) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 464.0512
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  67%|██████▋   | 666997/1000000 [3:17:04<45:54, 120.90step/s]


Episode 1334 finished at step 500 (667000 total). Env Reward: -10.97, Steps: 500, Delivered: 4


Total Steps Trained:  67%|██████▋   | 667021/1000000 [3:17:09<7:51:05, 11.78step/s] 


--- Rollout Summary (Steps 666501 to 667000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 435.2823
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  67%|██████▋   | 667500/1000000 [3:17:13<49:17, 112.44step/s]


Episode 1335 finished at step 500 (667500 total). Env Reward: -8.77, Steps: 500, Delivered: 6


Total Steps Trained:  67%|██████▋   | 667512/1000000 [3:17:17<10:53:15,  8.48step/s]


--- Rollout Summary (Steps 667001 to 667500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 375.3265
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  67%|██████▋   | 667991/1000000 [3:17:21<47:55, 115.46step/s]


Episode 1336 finished at step 500 (668000 total). Env Reward: -9.86, Steps: 500, Delivered: 5


Total Steps Trained:  67%|██████▋   | 668014/1000000 [3:17:26<8:08:51, 11.32step/s] 


--- Rollout Summary (Steps 667501 to 668000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 525.1419
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  67%|██████▋   | 668488/1000000 [3:17:30<45:47, 120.66step/s]


Episode 1337 finished at step 500 (668500 total). Env Reward: -10.19, Steps: 500, Delivered: 4


Total Steps Trained:  67%|██████▋   | 668511/1000000 [3:17:35<8:03:02, 11.44step/s] 


--- Rollout Summary (Steps 668001 to 668500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0014
Avg Critic Loss (per minibatch): 709.8753
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  67%|██████▋   | 668997/1000000 [3:17:39<45:38, 120.86step/s]


Episode 1338 finished at step 500 (669000 total). Env Reward: -13.67, Steps: 500, Delivered: 1


Total Steps Trained:  67%|██████▋   | 669010/1000000 [3:17:44<10:23:43,  8.84step/s]


--- Rollout Summary (Steps 668501 to 669000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0001
Avg Critic Loss (per minibatch): 606.7079
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  67%|██████▋   | 669493/1000000 [3:17:48<45:19, 121.52step/s]


Episode 1339 finished at step 500 (669500 total). Env Reward: -11.28, Steps: 500, Delivered: 4


Total Steps Trained:  67%|██████▋   | 669516/1000000 [3:17:52<7:59:53, 11.48step/s] 


--- Rollout Summary (Steps 669001 to 669500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 532.4383
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  67%|██████▋   | 669998/1000000 [3:17:56<45:59, 119.59step/s]


Episode 1340 finished at step 500 (670000 total). Env Reward: -12.09, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001340_map1.pth


Total Steps Trained:  67%|██████▋   | 670021/1000000 [3:18:01<8:05:34, 11.33step/s] 


--- Rollout Summary (Steps 669501 to 670000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 561.2139
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  67%|██████▋   | 670494/1000000 [3:18:05<45:33, 120.54step/s]


Episode 1341 finished at step 500 (670500 total). Env Reward: -12.85, Steps: 500, Delivered: 2


Total Steps Trained:  67%|██████▋   | 670518/1000000 [3:18:10<7:48:01, 11.73step/s] 


--- Rollout Summary (Steps 670001 to 670500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 471.3884
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  67%|██████▋   | 670995/1000000 [3:18:14<46:27, 118.02step/s]


Episode 1342 finished at step 500 (671000 total). Env Reward: -1.68, Steps: 500, Delivered: 4


Total Steps Trained:  67%|██████▋   | 671019/1000000 [3:18:19<8:15:46, 11.06step/s] 


--- Rollout Summary (Steps 670501 to 671000) ---
Update Duration: 4.70s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 770.5468
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  67%|██████▋   | 671500/1000000 [3:18:23<48:20, 113.25step/s]


Episode 1343 finished at step 500 (671500 total). Env Reward: -6.83, Steps: 500, Delivered: 8


Total Steps Trained:  67%|██████▋   | 671512/1000000 [3:18:28<11:08:51,  8.19step/s]


--- Rollout Summary (Steps 671001 to 671500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0094
Avg Critic Loss (per minibatch): 985.0373
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  67%|██████▋   | 671998/1000000 [3:18:32<47:36, 114.82step/s]


Episode 1344 finished at step 500 (672000 total). Env Reward: -9.57, Steps: 500, Delivered: 5


Total Steps Trained:  67%|██████▋   | 672010/1000000 [3:18:37<11:05:31,  8.21step/s]


--- Rollout Summary (Steps 671501 to 672000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 547.4990
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  67%|██████▋   | 672498/1000000 [3:18:41<45:17, 120.53step/s]


Episode 1345 finished at step 500 (672500 total). Env Reward: -8.42, Steps: 500, Delivered: 6


Total Steps Trained:  67%|██████▋   | 672511/1000000 [3:18:45<10:26:01,  8.72step/s]


--- Rollout Summary (Steps 672001 to 672500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 421.6730
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  67%|██████▋   | 673000/1000000 [3:18:50<46:05, 118.24step/s]


Episode 1346 finished at step 500 (673000 total). Env Reward: 0.75, Steps: 500, Delivered: 6


Total Steps Trained:  67%|██████▋   | 673012/1000000 [3:18:55<10:48:35,  8.40step/s]


--- Rollout Summary (Steps 672501 to 673000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.35
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 1008.0318
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  67%|██████▋   | 673500/1000000 [3:18:59<45:33, 119.46step/s]


Episode 1347 finished at step 500 (673500 total). Env Reward: -11.97, Steps: 500, Delivered: 2


Total Steps Trained:  67%|██████▋   | 673512/1000000 [3:19:03<10:30:22,  8.63step/s]


--- Rollout Summary (Steps 673001 to 673500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 604.1285
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  67%|██████▋   | 673992/1000000 [3:19:08<49:48, 109.09step/s]


Episode 1348 finished at step 500 (674000 total). Env Reward: -6.97, Steps: 500, Delivered: 8


Total Steps Trained:  67%|██████▋   | 674014/1000000 [3:19:12<8:30:02, 10.65step/s] 


--- Rollout Summary (Steps 673501 to 674000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0071
Avg Critic Loss (per minibatch): 403.9752
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  67%|██████▋   | 674492/1000000 [3:19:17<45:26, 119.38step/s]


Episode 1349 finished at step 500 (674500 total). Env Reward: -11.82, Steps: 500, Delivered: 3


Total Steps Trained:  67%|██████▋   | 674516/1000000 [3:19:21<7:47:00, 11.62step/s] 


--- Rollout Summary (Steps 674001 to 674500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 714.6952
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  67%|██████▋   | 674995/1000000 [3:19:25<47:18, 114.51step/s]


Episode 1350 finished at step 500 (675000 total). Env Reward: -8.08, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001350_map1.pth


Total Steps Trained:  68%|██████▊   | 675018/1000000 [3:19:30<7:58:21, 11.32step/s] 


--- Rollout Summary (Steps 674501 to 675000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 655.2349
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  68%|██████▊   | 675499/1000000 [3:19:34<45:17, 119.40step/s]


Episode 1351 finished at step 500 (675500 total). Env Reward: -7.89, Steps: 500, Delivered: 7


Total Steps Trained:  68%|██████▊   | 675511/1000000 [3:19:39<10:44:16,  8.39step/s]


--- Rollout Summary (Steps 675001 to 675500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 690.7153
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  68%|██████▊   | 675995/1000000 [3:19:43<45:22, 119.03step/s]


Episode 1352 finished at step 500 (676000 total). Env Reward: -11.77, Steps: 500, Delivered: 3


Total Steps Trained:  68%|██████▊   | 676018/1000000 [3:19:48<7:55:52, 11.35step/s] 


--- Rollout Summary (Steps 675501 to 676000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 457.9570
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  68%|██████▊   | 676492/1000000 [3:19:52<46:34, 115.78step/s]


Episode 1353 finished at step 500 (676500 total). Env Reward: -10.85, Steps: 500, Delivered: 4


Total Steps Trained:  68%|██████▊   | 676515/1000000 [3:19:56<7:45:52, 11.57step/s] 


--- Rollout Summary (Steps 676001 to 676500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 649.7738
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  68%|██████▊   | 676993/1000000 [3:20:00<45:10, 119.16step/s]


Episode 1354 finished at step 500 (677000 total). Env Reward: 0.37, Steps: 500, Delivered: 6


Total Steps Trained:  68%|██████▊   | 677015/1000000 [3:20:05<8:04:14, 11.12step/s] 


--- Rollout Summary (Steps 676501 to 677000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 1223.8484
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  68%|██████▊   | 677496/1000000 [3:20:09<45:19, 118.58step/s]


Episode 1355 finished at step 500 (677500 total). Env Reward: -9.77, Steps: 500, Delivered: 5


Total Steps Trained:  68%|██████▊   | 677519/1000000 [3:20:14<7:54:47, 11.32step/s] 


--- Rollout Summary (Steps 677001 to 677500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): 0.0007
Avg Critic Loss (per minibatch): 540.8419
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  68%|██████▊   | 677996/1000000 [3:20:18<45:57, 116.79step/s]


Episode 1356 finished at step 500 (678000 total). Env Reward: -7.59, Steps: 500, Delivered: 7


Total Steps Trained:  68%|██████▊   | 678020/1000000 [3:20:23<7:45:21, 11.53step/s] 


--- Rollout Summary (Steps 677501 to 678000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 513.7746
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  68%|██████▊   | 678493/1000000 [3:20:27<44:28, 120.49step/s]


Episode 1357 finished at step 500 (678500 total). Env Reward: -0.42, Steps: 500, Delivered: 6


Total Steps Trained:  68%|██████▊   | 678516/1000000 [3:20:32<7:49:52, 11.40step/s] 


--- Rollout Summary (Steps 678001 to 678500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 754.7091
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  68%|██████▊   | 678997/1000000 [3:20:36<44:17, 120.77step/s]


Episode 1358 finished at step 500 (679000 total). Env Reward: -12.53, Steps: 500, Delivered: 2


Total Steps Trained:  68%|██████▊   | 679021/1000000 [3:20:41<7:55:21, 11.25step/s] 


--- Rollout Summary (Steps 678501 to 679000) ---
Update Duration: 4.72s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 622.2541
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  68%|██████▊   | 679493/1000000 [3:20:45<47:19, 112.86step/s]


Episode 1359 finished at step 500 (679500 total). Env Reward: -9.68, Steps: 500, Delivered: 5


Total Steps Trained:  68%|██████▊   | 679515/1000000 [3:20:49<8:15:27, 10.78step/s] 


--- Rollout Summary (Steps 679001 to 679500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 599.3253
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  68%|██████▊   | 680000/1000000 [3:20:54<47:00, 113.47step/s]


Episode 1360 finished at step 500 (680000 total). Env Reward: -3.74, Steps: 500, Delivered: 11
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001360_map1.pth


Total Steps Trained:  68%|██████▊   | 680012/1000000 [3:20:58<10:33:21,  8.42step/s]


--- Rollout Summary (Steps 679501 to 680000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 404.1303
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  68%|██████▊   | 680495/1000000 [3:21:02<44:38, 119.27step/s]


Episode 1361 finished at step 500 (680500 total). Env Reward: -6.89, Steps: 500, Delivered: 8


Total Steps Trained:  68%|██████▊   | 680519/1000000 [3:21:07<7:39:49, 11.58step/s] 


--- Rollout Summary (Steps 680001 to 680500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.36
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0075
Avg Critic Loss (per minibatch): 550.1961
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  68%|██████▊   | 680999/1000000 [3:21:11<46:28, 114.42step/s]


Episode 1362 finished at step 500 (681000 total). Env Reward: -8.23, Steps: 500, Delivered: 6


Total Steps Trained:  68%|██████▊   | 681011/1000000 [3:21:16<10:55:15,  8.11step/s]


--- Rollout Summary (Steps 680501 to 681000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 813.8957
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  68%|██████▊   | 681500/1000000 [3:21:20<44:53, 118.25step/s]


Episode 1363 finished at step 500 (681500 total). Env Reward: -10.59, Steps: 500, Delivered: 4


Total Steps Trained:  68%|██████▊   | 681512/1000000 [3:21:25<10:24:19,  8.50step/s]


--- Rollout Summary (Steps 681001 to 681500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 496.7019
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  68%|██████▊   | 681998/1000000 [3:21:29<45:37, 116.15step/s]


Episode 1364 finished at step 500 (682000 total). Env Reward: -9.02, Steps: 500, Delivered: 6


Total Steps Trained:  68%|██████▊   | 682010/1000000 [3:21:34<10:33:39,  8.36step/s]


--- Rollout Summary (Steps 681501 to 682000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 513.1359
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  68%|██████▊   | 682494/1000000 [3:21:38<47:51, 110.59step/s]


Episode 1365 finished at step 500 (682500 total). Env Reward: 0.58, Steps: 500, Delivered: 6


Total Steps Trained:  68%|██████▊   | 682516/1000000 [3:21:43<8:15:09, 10.69step/s] 


--- Rollout Summary (Steps 682001 to 682500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 1076.1707
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  68%|██████▊   | 682990/1000000 [3:21:47<45:14, 116.78step/s]


Episode 1366 finished at step 500 (683000 total). Env Reward: -9.88, Steps: 500, Delivered: 5


Total Steps Trained:  68%|██████▊   | 683013/1000000 [3:21:51<7:48:31, 11.28step/s] 


--- Rollout Summary (Steps 682501 to 683000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 485.9622
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  68%|██████▊   | 683499/1000000 [3:21:56<42:58, 122.74step/s]


Episode 1367 finished at step 500 (683500 total). Env Reward: 3.40, Steps: 500, Delivered: 9


Total Steps Trained:  68%|██████▊   | 683512/1000000 [3:22:00<9:57:17,  8.83step/s]


--- Rollout Summary (Steps 683001 to 683500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 1199.0379
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  68%|██████▊   | 683998/1000000 [3:22:05<44:02, 119.58step/s]


Episode 1368 finished at step 500 (684000 total). Env Reward: -8.76, Steps: 500, Delivered: 6


Total Steps Trained:  68%|██████▊   | 684021/1000000 [3:22:10<7:43:33, 11.36step/s] 


--- Rollout Summary (Steps 683501 to 684000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 677.6070
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  68%|██████▊   | 684493/1000000 [3:22:14<44:33, 118.03step/s]


Episode 1369 finished at step 500 (684500 total). Env Reward: -11.40, Steps: 500, Delivered: 3


Total Steps Trained:  68%|██████▊   | 684515/1000000 [3:22:19<8:00:27, 10.94step/s] 


--- Rollout Summary (Steps 684001 to 684500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.36
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 576.8521
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  68%|██████▊   | 684995/1000000 [3:22:23<43:43, 120.09step/s]


Episode 1370 finished at step 500 (685000 total). Env Reward: 0.14, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001370_map1.pth


Total Steps Trained:  69%|██████▊   | 685019/1000000 [3:22:27<7:36:48, 11.49step/s] 


--- Rollout Summary (Steps 684501 to 685000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 977.8482
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  69%|██████▊   | 685500/1000000 [3:22:32<45:29, 115.20step/s]


Episode 1371 finished at step 500 (685500 total). Env Reward: -7.84, Steps: 500, Delivered: 7


Total Steps Trained:  69%|██████▊   | 685512/1000000 [3:22:36<10:23:33,  8.41step/s]


--- Rollout Summary (Steps 685001 to 685500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): 0.0001
Avg Critic Loss (per minibatch): 662.8260
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  69%|██████▊   | 685993/1000000 [3:22:40<44:56, 116.43step/s]


Episode 1372 finished at step 500 (686000 total). Env Reward: -0.13, Steps: 500, Delivered: 6


Total Steps Trained:  69%|██████▊   | 686016/1000000 [3:22:45<7:58:42, 10.93step/s] 


--- Rollout Summary (Steps 685501 to 686000) ---
Update Duration: 4.70s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0009
Avg Critic Loss (per minibatch): 878.1503
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  69%|██████▊   | 686489/1000000 [3:22:50<42:57, 121.63step/s]


Episode 1373 finished at step 500 (686500 total). Env Reward: -6.93, Steps: 500, Delivered: 8


Total Steps Trained:  69%|██████▊   | 686512/1000000 [3:22:54<7:36:21, 11.45step/s]


--- Rollout Summary (Steps 686001 to 686500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.22
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 390.2274
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  69%|██████▊   | 686991/1000000 [3:22:58<43:49, 119.02step/s]


Episode 1374 finished at step 500 (687000 total). Env Reward: -6.77, Steps: 500, Delivered: 8


Total Steps Trained:  69%|██████▊   | 687013/1000000 [3:23:03<7:44:23, 11.23step/s] 


--- Rollout Summary (Steps 686501 to 687000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.16
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0089
Avg Critic Loss (per minibatch): 413.3003
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  69%|██████▊   | 687493/1000000 [3:23:07<48:32, 107.29step/s]


Episode 1375 finished at step 500 (687500 total). Env Reward: 6.37, Steps: 500, Delivered: 12


Total Steps Trained:  69%|██████▉   | 687515/1000000 [3:23:12<8:02:17, 10.80step/s] 


--- Rollout Summary (Steps 687001 to 687500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 1902.3900
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  69%|██████▉   | 687997/1000000 [3:23:16<46:01, 112.99step/s]


Episode 1376 finished at step 500 (688000 total). Env Reward: -10.98, Steps: 500, Delivered: 4


Total Steps Trained:  69%|██████▉   | 688021/1000000 [3:23:21<7:42:45, 11.24step/s] 


--- Rollout Summary (Steps 687501 to 688000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0083
Avg Critic Loss (per minibatch): 470.7777
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  69%|██████▉   | 688496/1000000 [3:23:25<43:22, 119.69step/s]


Episode 1377 finished at step 500 (688500 total). Env Reward: 4.38, Steps: 500, Delivered: 10


Total Steps Trained:  69%|██████▉   | 688520/1000000 [3:23:30<7:27:12, 11.61step/s] 


--- Rollout Summary (Steps 688001 to 688500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 961.2474
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  69%|██████▉   | 688990/1000000 [3:23:34<44:56, 115.34step/s]


Episode 1378 finished at step 500 (689000 total). Env Reward: -11.94, Steps: 500, Delivered: 3


Total Steps Trained:  69%|██████▉   | 689013/1000000 [3:23:39<7:48:23, 11.07step/s] 


--- Rollout Summary (Steps 688501 to 689000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 506.2829
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  69%|██████▉   | 689500/1000000 [3:23:43<43:55, 117.81step/s]


Episode 1379 finished at step 500 (689500 total). Env Reward: -12.54, Steps: 500, Delivered: 2


Total Steps Trained:  69%|██████▉   | 689512/1000000 [3:23:48<10:33:45,  8.17step/s]


--- Rollout Summary (Steps 689001 to 689500) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 566.3394
Avg Entropy (per minibatch): 2.5615
------------------------------


Total Steps Trained:  69%|██████▉   | 689995/1000000 [3:23:52<44:33, 115.97step/s]


Episode 1380 finished at step 500 (690000 total). Env Reward: -9.63, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001380_map1.pth


Total Steps Trained:  69%|██████▉   | 690018/1000000 [3:23:57<7:41:53, 11.19step/s] 


--- Rollout Summary (Steps 689501 to 690000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -6.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 465.4095
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  69%|██████▉   | 690496/1000000 [3:24:01<42:51, 120.37step/s]


Episode 1381 finished at step 500 (690500 total). Env Reward: -11.73, Steps: 500, Delivered: 3


Total Steps Trained:  69%|██████▉   | 690520/1000000 [3:24:06<7:19:46, 11.73step/s]


--- Rollout Summary (Steps 690001 to 690500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -6.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 439.3507
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  69%|██████▉   | 690996/1000000 [3:24:10<45:16, 113.74step/s]


Episode 1382 finished at step 500 (691000 total). Env Reward: -4.61, Steps: 500, Delivered: 10


Total Steps Trained:  69%|██████▉   | 691019/1000000 [3:24:15<7:54:41, 10.85step/s] 


--- Rollout Summary (Steps 690501 to 691000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -6.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 529.3963
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  69%|██████▉   | 691494/1000000 [3:24:19<45:46, 112.33step/s]


Episode 1383 finished at step 500 (691500 total). Env Reward: -6.82, Steps: 500, Delivered: 8


Total Steps Trained:  69%|██████▉   | 691518/1000000 [3:24:24<7:34:44, 11.31step/s] 


--- Rollout Summary (Steps 691001 to 691500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -6.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 442.1347
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  69%|██████▉   | 691989/1000000 [3:24:28<45:51, 111.96step/s]


Episode 1384 finished at step 500 (692000 total). Env Reward: -7.72, Steps: 500, Delivered: 7


Total Steps Trained:  69%|██████▉   | 692012/1000000 [3:24:32<7:43:43, 11.07step/s] 


--- Rollout Summary (Steps 691501 to 692000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0071
Avg Critic Loss (per minibatch): 628.4891
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  69%|██████▉   | 692494/1000000 [3:24:37<44:36, 114.91step/s]


Episode 1385 finished at step 500 (692500 total). Env Reward: -1.53, Steps: 500, Delivered: 4


Total Steps Trained:  69%|██████▉   | 692517/1000000 [3:24:41<7:44:55, 11.02step/s] 


--- Rollout Summary (Steps 692001 to 692500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -6.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0007
Avg Critic Loss (per minibatch): 622.2428
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  69%|██████▉   | 692997/1000000 [3:24:45<44:12, 115.74step/s]


Episode 1386 finished at step 500 (693000 total). Env Reward: -8.57, Steps: 500, Delivered: 6


Total Steps Trained:  69%|██████▉   | 693020/1000000 [3:24:50<7:36:49, 11.20step/s] 


--- Rollout Summary (Steps 692501 to 693000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -6.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 673.1942
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  69%|██████▉   | 693492/1000000 [3:24:54<40:41, 125.56step/s]


Episode 1387 finished at step 500 (693500 total). Env Reward: -12.21, Steps: 500, Delivered: 2


Total Steps Trained:  69%|██████▉   | 693516/1000000 [3:24:59<7:14:32, 11.76step/s]


--- Rollout Summary (Steps 693001 to 693500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.00
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): 0.0000
Avg Critic Loss (per minibatch): 568.1874
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  69%|██████▉   | 693995/1000000 [3:25:03<42:28, 120.08step/s]


Episode 1388 finished at step 500 (694000 total). Env Reward: -10.95, Steps: 500, Delivered: 4


Total Steps Trained:  69%|██████▉   | 694017/1000000 [3:25:08<7:34:34, 11.22step/s]


--- Rollout Summary (Steps 693501 to 694000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 518.7369
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  69%|██████▉   | 694499/1000000 [3:25:12<43:36, 116.78step/s]


Episode 1389 finished at step 500 (694500 total). Env Reward: -7.88, Steps: 500, Delivered: 7


Total Steps Trained:  69%|██████▉   | 694511/1000000 [3:25:17<10:18:59,  8.23step/s]


--- Rollout Summary (Steps 694001 to 694500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 409.0817
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  69%|██████▉   | 694989/1000000 [3:25:21<42:12, 120.45step/s]


Episode 1390 finished at step 500 (695000 total). Env Reward: -8.48, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001390_map1.pth


Total Steps Trained:  70%|██████▉   | 695013/1000000 [3:25:25<7:28:45, 11.33step/s]


--- Rollout Summary (Steps 694501 to 695000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.13
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 571.3261
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  70%|██████▉   | 695498/1000000 [3:25:30<42:24, 119.65step/s]


Episode 1391 finished at step 500 (695500 total). Env Reward: -12.09, Steps: 500, Delivered: 2


Total Steps Trained:  70%|██████▉   | 695510/1000000 [3:25:35<9:59:54,  8.46step/s]


--- Rollout Summary (Steps 695001 to 695500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 532.9341
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  70%|██████▉   | 695990/1000000 [3:25:39<43:32, 116.36step/s]


Episode 1392 finished at step 500 (696000 total). Env Reward: -9.54, Steps: 500, Delivered: 5


Total Steps Trained:  70%|██████▉   | 696013/1000000 [3:25:43<7:29:50, 11.26step/s] 


--- Rollout Summary (Steps 695501 to 696000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): 0.0015
Avg Critic Loss (per minibatch): 510.6306
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  70%|██████▉   | 696489/1000000 [3:25:47<41:30, 121.85step/s]


Episode 1393 finished at step 500 (696500 total). Env Reward: -9.85, Steps: 500, Delivered: 5


Total Steps Trained:  70%|██████▉   | 696513/1000000 [3:25:52<7:11:36, 11.72step/s]


--- Rollout Summary (Steps 696001 to 696500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 503.0941
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  70%|██████▉   | 697000/1000000 [3:25:56<42:36, 118.50step/s]


Episode 1394 finished at step 500 (697000 total). Env Reward: -12.59, Steps: 500, Delivered: 2


Total Steps Trained:  70%|██████▉   | 697012/1000000 [3:26:01<9:55:45,  8.48step/s]


--- Rollout Summary (Steps 696501 to 697000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 509.3735
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  70%|██████▉   | 697493/1000000 [3:26:05<41:30, 121.47step/s]


Episode 1395 finished at step 500 (697500 total). Env Reward: -7.36, Steps: 500, Delivered: 7


Total Steps Trained:  70%|██████▉   | 697516/1000000 [3:26:10<7:24:10, 11.35step/s]


--- Rollout Summary (Steps 697001 to 697500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 585.3290
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  70%|██████▉   | 698000/1000000 [3:26:14<45:33, 110.48step/s]


Episode 1396 finished at step 500 (698000 total). Env Reward: -9.69, Steps: 500, Delivered: 5


Total Steps Trained:  70%|██████▉   | 698012/1000000 [3:26:19<10:17:12,  8.15step/s]


--- Rollout Summary (Steps 697501 to 698000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 652.2302
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  70%|██████▉   | 698489/1000000 [3:26:23<41:48, 120.17step/s]


Episode 1397 finished at step 500 (698500 total). Env Reward: -9.31, Steps: 500, Delivered: 5


Total Steps Trained:  70%|██████▉   | 698512/1000000 [3:26:28<7:41:03, 10.90step/s]


--- Rollout Summary (Steps 698001 to 698500) ---
Update Duration: 4.73s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 449.1786
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  70%|██████▉   | 698992/1000000 [3:26:32<44:44, 112.14step/s]


Episode 1398 finished at step 500 (699000 total). Env Reward: -8.93, Steps: 500, Delivered: 6


Total Steps Trained:  70%|██████▉   | 699014/1000000 [3:26:37<7:42:25, 10.85step/s] 


--- Rollout Summary (Steps 698501 to 699000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 379.0064
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  70%|██████▉   | 699495/1000000 [3:26:41<42:04, 119.05step/s]


Episode 1399 finished at step 500 (699500 total). Env Reward: -14.04, Steps: 500, Delivered: 1


Total Steps Trained:  70%|██████▉   | 699518/1000000 [3:26:45<7:16:07, 11.48step/s]


--- Rollout Summary (Steps 699001 to 699500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 525.5379
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  70%|██████▉   | 699989/1000000 [3:26:49<42:56, 116.45step/s]


Episode 1400 finished at step 500 (700000 total). Env Reward: 0.76, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001400_map1.pth


Total Steps Trained:  70%|███████   | 700012/1000000 [3:26:54<7:27:49, 11.16step/s] 


--- Rollout Summary (Steps 699501 to 700000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 441.3115
Avg Entropy (per minibatch): 2.5617
------------------------------


Total Steps Trained:  70%|███████   | 700498/1000000 [3:26:58<45:20, 110.11step/s]


Episode 1401 finished at step 500 (700500 total). Env Reward: -10.74, Steps: 500, Delivered: 4


Total Steps Trained:  70%|███████   | 700510/1000000 [3:27:03<10:00:03,  8.32step/s]


--- Rollout Summary (Steps 700001 to 700500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 512.0502
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  70%|███████   | 700989/1000000 [3:27:07<41:20, 120.54step/s]


Episode 1402 finished at step 500 (701000 total). Env Reward: -10.76, Steps: 500, Delivered: 4


Total Steps Trained:  70%|███████   | 701013/1000000 [3:27:12<7:18:32, 11.36step/s]


--- Rollout Summary (Steps 700501 to 701000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 672.2969
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  70%|███████   | 701489/1000000 [3:27:16<43:16, 114.98step/s]


Episode 1403 finished at step 500 (701500 total). Env Reward: -7.09, Steps: 500, Delivered: 8


Total Steps Trained:  70%|███████   | 701512/1000000 [3:27:21<7:26:35, 11.14step/s] 


--- Rollout Summary (Steps 701001 to 701500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 520.8395
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  70%|███████   | 701990/1000000 [3:27:25<43:27, 114.29step/s]


Episode 1404 finished at step 500 (702000 total). Env Reward: -8.62, Steps: 500, Delivered: 6


Total Steps Trained:  70%|███████   | 702013/1000000 [3:27:30<7:41:48, 10.75step/s] 


--- Rollout Summary (Steps 701501 to 702000) ---
Update Duration: 4.68s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0005
Avg Critic Loss (per minibatch): 569.8955
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  70%|███████   | 702488/1000000 [3:27:34<41:04, 120.73step/s]


Episode 1405 finished at step 500 (702500 total). Env Reward: -12.98, Steps: 500, Delivered: 1


Total Steps Trained:  70%|███████   | 702513/1000000 [3:27:39<6:52:45, 12.01step/s]


--- Rollout Summary (Steps 702001 to 702500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 458.1253
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  70%|███████   | 702996/1000000 [3:27:43<41:09, 120.29step/s]


Episode 1406 finished at step 500 (703000 total). Env Reward: -8.55, Steps: 500, Delivered: 6


Total Steps Trained:  70%|███████   | 703021/1000000 [3:27:47<6:55:20, 11.92step/s]


--- Rollout Summary (Steps 702501 to 703000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 387.4483
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  70%|███████   | 703498/1000000 [3:27:51<41:08, 120.12step/s]


Episode 1407 finished at step 500 (703500 total). Env Reward: -10.72, Steps: 500, Delivered: 4


Total Steps Trained:  70%|███████   | 703511/1000000 [3:27:56<9:15:58,  8.89step/s]


--- Rollout Summary (Steps 703001 to 703500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 527.1259
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  70%|███████   | 703993/1000000 [3:28:00<41:56, 117.63step/s]


Episode 1408 finished at step 500 (704000 total). Env Reward: -6.78, Steps: 500, Delivered: 8


Total Steps Trained:  70%|███████   | 704015/1000000 [3:28:05<7:30:26, 10.95step/s]


--- Rollout Summary (Steps 703501 to 704000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0111
Avg Critic Loss (per minibatch): 424.5805
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  70%|███████   | 704494/1000000 [3:28:09<40:40, 121.09step/s]


Episode 1409 finished at step 500 (704500 total). Env Reward: -10.94, Steps: 500, Delivered: 4


Total Steps Trained:  70%|███████   | 704519/1000000 [3:28:14<6:50:05, 12.01step/s]


--- Rollout Summary (Steps 704001 to 704500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0073
Avg Critic Loss (per minibatch): 438.7654
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  70%|███████   | 704995/1000000 [3:28:18<45:38, 107.74step/s]


Episode 1410 finished at step 500 (705000 total). Env Reward: -6.21, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001410_map1.pth


Total Steps Trained:  71%|███████   | 705018/1000000 [3:28:23<7:20:13, 11.17step/s] 


--- Rollout Summary (Steps 704501 to 705000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0074
Avg Critic Loss (per minibatch): 536.3493
Avg Entropy (per minibatch): 2.5588
------------------------------


Total Steps Trained:  71%|███████   | 705494/1000000 [3:28:27<41:21, 118.70step/s]


Episode 1411 finished at step 500 (705500 total). Env Reward: -6.51, Steps: 500, Delivered: 8


Total Steps Trained:  71%|███████   | 705517/1000000 [3:28:32<7:19:33, 11.17step/s]


--- Rollout Summary (Steps 705001 to 705500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.78
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 518.2098
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  71%|███████   | 706000/1000000 [3:28:36<41:03, 119.35step/s]


Episode 1412 finished at step 500 (706000 total). Env Reward: 1.43, Steps: 500, Delivered: 7


Total Steps Trained:  71%|███████   | 706012/1000000 [3:28:41<9:33:40,  8.54step/s]


--- Rollout Summary (Steps 705501 to 706000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 959.2372
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  71%|███████   | 706489/1000000 [3:28:45<42:21, 115.50step/s]


Episode 1413 finished at step 500 (706500 total). Env Reward: -8.51, Steps: 500, Delivered: 6


Total Steps Trained:  71%|███████   | 706513/1000000 [3:28:50<7:03:23, 11.55step/s]


--- Rollout Summary (Steps 706001 to 706500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 385.9797
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  71%|███████   | 706997/1000000 [3:28:54<40:29, 120.58step/s]


Episode 1414 finished at step 500 (707000 total). Env Reward: 2.24, Steps: 500, Delivered: 8


Total Steps Trained:  71%|███████   | 707022/1000000 [3:28:59<6:45:05, 12.05step/s]


--- Rollout Summary (Steps 706501 to 707000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 817.9909
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  71%|███████   | 707500/1000000 [3:29:03<41:04, 118.70step/s]


Episode 1415 finished at step 500 (707500 total). Env Reward: -8.95, Steps: 500, Delivered: 5


Total Steps Trained:  71%|███████   | 707512/1000000 [3:29:07<9:44:28,  8.34step/s]


--- Rollout Summary (Steps 707001 to 707500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 715.8839
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  71%|███████   | 707996/1000000 [3:29:11<44:24, 109.60step/s]


Episode 1416 finished at step 500 (708000 total). Env Reward: -8.97, Steps: 500, Delivered: 6


Total Steps Trained:  71%|███████   | 708018/1000000 [3:29:16<7:19:41, 11.07step/s]


--- Rollout Summary (Steps 707501 to 708000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 562.2028
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  71%|███████   | 708489/1000000 [3:29:20<41:07, 118.12step/s]


Episode 1417 finished at step 500 (708500 total). Env Reward: -7.71, Steps: 500, Delivered: 7


Total Steps Trained:  71%|███████   | 708512/1000000 [3:29:25<7:09:34, 11.31step/s]


--- Rollout Summary (Steps 708001 to 708500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 507.2261
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  71%|███████   | 708993/1000000 [3:29:29<41:17, 117.45step/s]


Episode 1418 finished at step 500 (709000 total). Env Reward: 0.51, Steps: 500, Delivered: 6


Total Steps Trained:  71%|███████   | 709016/1000000 [3:29:34<7:03:56, 11.44step/s]


--- Rollout Summary (Steps 708501 to 709000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 412.2253
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  71%|███████   | 709489/1000000 [3:29:38<43:52, 110.36step/s]


Episode 1419 finished at step 500 (709500 total). Env Reward: 2.63, Steps: 500, Delivered: 8


Total Steps Trained:  71%|███████   | 709512/1000000 [3:29:42<7:10:57, 11.23step/s]


--- Rollout Summary (Steps 709001 to 709500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 658.5733
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  71%|███████   | 709989/1000000 [3:29:47<40:48, 118.44step/s]


Episode 1420 finished at step 500 (710000 total). Env Reward: -10.40, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001420_map1.pth


Total Steps Trained:  71%|███████   | 710012/1000000 [3:29:51<6:58:18, 11.55step/s]


--- Rollout Summary (Steps 709501 to 710000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 650.9442
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  71%|███████   | 710495/1000000 [3:29:55<40:37, 118.75step/s]


Episode 1421 finished at step 500 (710500 total). Env Reward: -0.43, Steps: 500, Delivered: 5


Total Steps Trained:  71%|███████   | 710519/1000000 [3:30:00<7:01:17, 11.45step/s]


--- Rollout Summary (Steps 710001 to 710500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 1185.2777
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  71%|███████   | 710998/1000000 [3:30:04<42:34, 113.14step/s]


Episode 1422 finished at step 500 (711000 total). Env Reward: -4.30, Steps: 500, Delivered: 10


Total Steps Trained:  71%|███████   | 711022/1000000 [3:30:09<7:23:00, 10.87step/s] 


--- Rollout Summary (Steps 710501 to 711000) ---
Update Duration: 4.75s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 634.9280
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  71%|███████   | 711491/1000000 [3:30:13<40:19, 119.25step/s]


Episode 1423 finished at step 500 (711500 total). Env Reward: -11.84, Steps: 500, Delivered: 3


Total Steps Trained:  71%|███████   | 711512/1000000 [3:30:18<7:28:54, 10.71step/s]


--- Rollout Summary (Steps 711001 to 711500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 499.8001
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  71%|███████   | 711994/1000000 [3:30:22<39:25, 121.78step/s]


Episode 1424 finished at step 500 (712000 total). Env Reward: 8.31, Steps: 500, Delivered: 5


Total Steps Trained:  71%|███████   | 712018/1000000 [3:30:27<6:48:22, 11.75step/s]


--- Rollout Summary (Steps 711501 to 712000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 1888.4401
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  71%|███████   | 712490/1000000 [3:30:31<43:48, 109.40step/s]


Episode 1425 finished at step 500 (712500 total). Env Reward: -9.17, Steps: 500, Delivered: 5


Total Steps Trained:  71%|███████▏  | 712512/1000000 [3:30:35<7:16:05, 10.99step/s]


--- Rollout Summary (Steps 712001 to 712500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 641.6795
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  71%|███████▏  | 712998/1000000 [3:30:40<42:22, 112.90step/s]


Episode 1426 finished at step 500 (713000 total). Env Reward: -9.85, Steps: 500, Delivered: 5


Total Steps Trained:  71%|███████▏  | 713022/1000000 [3:30:44<6:54:50, 11.53step/s]


--- Rollout Summary (Steps 712501 to 713000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.24
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 339.8041
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  71%|███████▏  | 713491/1000000 [3:30:48<41:13, 115.84step/s]


Episode 1427 finished at step 500 (713500 total). Env Reward: -9.32, Steps: 500, Delivered: 5


Total Steps Trained:  71%|███████▏  | 713514/1000000 [3:30:53<7:00:44, 11.35step/s]


--- Rollout Summary (Steps 713001 to 713500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.19
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 459.9146
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  71%|███████▏  | 713995/1000000 [3:30:57<41:38, 114.49step/s]


Episode 1428 finished at step 500 (714000 total). Env Reward: -12.09, Steps: 500, Delivered: 2


Total Steps Trained:  71%|███████▏  | 714018/1000000 [3:31:02<7:00:34, 11.33step/s]


--- Rollout Summary (Steps 713501 to 714000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.20
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): 0.0034
Avg Critic Loss (per minibatch): 561.3467
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  71%|███████▏  | 714499/1000000 [3:31:06<39:17, 121.11step/s]


Episode 1429 finished at step 500 (714500 total). Env Reward: -10.24, Steps: 500, Delivered: 4


Total Steps Trained:  71%|███████▏  | 714512/1000000 [3:31:10<8:56:34,  8.87step/s]


--- Rollout Summary (Steps 714001 to 714500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.21
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0000
Avg Critic Loss (per minibatch): 577.2168
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  72%|███████▏  | 715000/1000000 [3:31:15<44:37, 106.43step/s]


Episode 1430 finished at step 500 (715000 total). Env Reward: -9.59, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001430_map1.pth


Total Steps Trained:  72%|███████▏  | 715023/1000000 [3:31:20<7:22:06, 10.74step/s] 


--- Rollout Summary (Steps 714501 to 715000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 705.1531
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  72%|███████▏  | 715488/1000000 [3:31:24<39:03, 121.41step/s]


Episode 1431 finished at step 500 (715500 total). Env Reward: -11.91, Steps: 500, Delivered: 3


Total Steps Trained:  72%|███████▏  | 715513/1000000 [3:31:28<6:30:34, 12.14step/s]


--- Rollout Summary (Steps 715001 to 715500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 500.6192
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  72%|███████▏  | 715996/1000000 [3:31:32<39:42, 119.20step/s]


Episode 1432 finished at step 500 (716000 total). Env Reward: -8.32, Steps: 500, Delivered: 6


Total Steps Trained:  72%|███████▏  | 716020/1000000 [3:31:37<6:39:26, 11.85step/s]


--- Rollout Summary (Steps 715501 to 716000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 520.6498
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  72%|███████▏  | 716496/1000000 [3:31:41<38:52, 121.53step/s]


Episode 1433 finished at step 500 (716500 total). Env Reward: -10.75, Steps: 500, Delivered: 4


Total Steps Trained:  72%|███████▏  | 716520/1000000 [3:31:46<6:50:38, 11.51step/s]


--- Rollout Summary (Steps 716001 to 716500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 448.1212
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  72%|███████▏  | 716996/1000000 [3:31:50<38:24, 122.82step/s]


Episode 1434 finished at step 500 (717000 total). Env Reward: -12.14, Steps: 500, Delivered: 3


Total Steps Trained:  72%|███████▏  | 717021/1000000 [3:31:54<6:28:58, 12.13step/s]


--- Rollout Summary (Steps 716501 to 717000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 512.6257
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  72%|███████▏  | 717489/1000000 [3:31:58<41:10, 114.37step/s]


Episode 1435 finished at step 500 (717500 total). Env Reward: -9.63, Steps: 500, Delivered: 5


Total Steps Trained:  72%|███████▏  | 717512/1000000 [3:32:03<6:55:24, 11.33step/s]


--- Rollout Summary (Steps 717001 to 717500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 518.3286
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  72%|███████▏  | 717998/1000000 [3:32:07<39:48, 118.06step/s]


Episode 1436 finished at step 500 (718000 total). Env Reward: -12.18, Steps: 500, Delivered: 3


Total Steps Trained:  72%|███████▏  | 718021/1000000 [3:32:12<6:49:01, 11.49step/s]


--- Rollout Summary (Steps 717501 to 718000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 637.6954
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  72%|███████▏  | 718497/1000000 [3:32:16<37:35, 124.81step/s]


Episode 1437 finished at step 500 (718500 total). Env Reward: -9.06, Steps: 500, Delivered: 5


Total Steps Trained:  72%|███████▏  | 718510/1000000 [3:32:21<8:46:36,  8.91step/s]


--- Rollout Summary (Steps 718001 to 718500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 458.2624
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  72%|███████▏  | 718991/1000000 [3:32:25<39:18, 119.17step/s]


Episode 1438 finished at step 500 (719000 total). Env Reward: 0.24, Steps: 500, Delivered: 6


Total Steps Trained:  72%|███████▏  | 719014/1000000 [3:32:30<6:53:37, 11.32step/s]


--- Rollout Summary (Steps 718501 to 719000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 1707.7823
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  72%|███████▏  | 719498/1000000 [3:32:34<38:04, 122.79step/s]


Episode 1439 finished at step 500 (719500 total). Env Reward: -11.60, Steps: 500, Delivered: 3


Total Steps Trained:  72%|███████▏  | 719511/1000000 [3:32:38<8:45:49,  8.89step/s]


--- Rollout Summary (Steps 719001 to 719500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 555.5299
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  72%|███████▏  | 719999/1000000 [3:32:42<39:36, 117.82step/s]


Episode 1440 finished at step 500 (720000 total). Env Reward: -12.15, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001440_map1.pth


Total Steps Trained:  72%|███████▏  | 720022/1000000 [3:32:47<7:10:08, 10.85step/s]


--- Rollout Summary (Steps 719501 to 720000) ---
Update Duration: 4.68s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 544.9371
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  72%|███████▏  | 720500/1000000 [3:32:51<40:23, 115.31step/s]


Episode 1441 finished at step 500 (720500 total). Env Reward: -7.36, Steps: 500, Delivered: 7


Total Steps Trained:  72%|███████▏  | 720512/1000000 [3:32:56<9:16:39,  8.37step/s]


--- Rollout Summary (Steps 720001 to 720500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 500.7744
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  72%|███████▏  | 720998/1000000 [3:33:00<40:14, 115.54step/s]


Episode 1442 finished at step 500 (721000 total). Env Reward: -7.42, Steps: 500, Delivered: 7


Total Steps Trained:  72%|███████▏  | 721021/1000000 [3:33:05<7:01:06, 11.04step/s]


--- Rollout Summary (Steps 720501 to 721000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 451.4393
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  72%|███████▏  | 721489/1000000 [3:33:09<38:48, 119.59step/s]


Episode 1443 finished at step 500 (721500 total). Env Reward: -8.61, Steps: 500, Delivered: 6


Total Steps Trained:  72%|███████▏  | 721512/1000000 [3:33:14<6:43:15, 11.51step/s]


--- Rollout Summary (Steps 721001 to 721500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 528.6263
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  72%|███████▏  | 722000/1000000 [3:33:18<42:48, 108.25step/s]


Episode 1444 finished at step 500 (722000 total). Env Reward: 14.48, Steps: 500, Delivered: 11


Total Steps Trained:  72%|███████▏  | 722011/1000000 [3:33:23<9:42:16,  7.96step/s]


--- Rollout Summary (Steps 721501 to 722000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 1462.4003
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  72%|███████▏  | 722492/1000000 [3:33:27<39:07, 118.20step/s]


Episode 1445 finished at step 500 (722500 total). Env Reward: -8.51, Steps: 500, Delivered: 6


Total Steps Trained:  72%|███████▏  | 722515/1000000 [3:33:32<6:45:42, 11.40step/s]


--- Rollout Summary (Steps 722001 to 722500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0006
Avg Critic Loss (per minibatch): 486.9807
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  72%|███████▏  | 722991/1000000 [3:33:36<39:15, 117.60step/s]


Episode 1446 finished at step 500 (723000 total). Env Reward: -10.67, Steps: 500, Delivered: 4


Total Steps Trained:  72%|███████▏  | 723014/1000000 [3:33:40<6:48:50, 11.29step/s]


--- Rollout Summary (Steps 722501 to 723000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 537.8602
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  72%|███████▏  | 723498/1000000 [3:33:45<37:58, 121.35step/s]


Episode 1447 finished at step 500 (723500 total). Env Reward: -7.62, Steps: 500, Delivered: 7


Total Steps Trained:  72%|███████▏  | 723511/1000000 [3:33:49<8:47:46,  8.73step/s]


--- Rollout Summary (Steps 723001 to 723500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 566.9362
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  72%|███████▏  | 724000/1000000 [3:33:54<38:24, 119.74step/s]


Episode 1448 finished at step 500 (724000 total). Env Reward: -10.93, Steps: 500, Delivered: 4


Total Steps Trained:  72%|███████▏  | 724012/1000000 [3:33:58<8:57:47,  8.55step/s]


--- Rollout Summary (Steps 723501 to 724000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 415.1720
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  72%|███████▏  | 724494/1000000 [3:34:03<38:43, 118.59step/s]


Episode 1449 finished at step 500 (724500 total). Env Reward: -9.37, Steps: 500, Delivered: 5


Total Steps Trained:  72%|███████▏  | 724517/1000000 [3:34:07<6:45:24, 11.33step/s]


--- Rollout Summary (Steps 724001 to 724500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 351.9159
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  72%|███████▏  | 724996/1000000 [3:34:11<39:25, 116.27step/s]


Episode 1450 finished at step 500 (725000 total). Env Reward: -7.13, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001450_map1.pth


Total Steps Trained:  73%|███████▎  | 725019/1000000 [3:34:16<6:47:15, 11.25step/s]


--- Rollout Summary (Steps 724501 to 725000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.30
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 440.8980
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  73%|███████▎  | 725490/1000000 [3:34:20<39:03, 117.15step/s]


Episode 1451 finished at step 500 (725500 total). Env Reward: 6.21, Steps: 500, Delivered: 12


Total Steps Trained:  73%|███████▎  | 725513/1000000 [3:34:25<6:48:39, 11.19step/s]


--- Rollout Summary (Steps 725001 to 725500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.16
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 657.5712
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  73%|███████▎  | 726000/1000000 [3:34:29<39:58, 114.22step/s]


Episode 1452 finished at step 500 (726000 total). Env Reward: 14.52, Steps: 500, Delivered: 11


Total Steps Trained:  73%|███████▎  | 726012/1000000 [3:34:34<9:09:13,  8.31step/s]


--- Rollout Summary (Steps 725501 to 726000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -6.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 1683.8524
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  73%|███████▎  | 726491/1000000 [3:34:38<42:03, 108.38step/s]


Episode 1453 finished at step 500 (726500 total). Env Reward: -6.82, Steps: 500, Delivered: 8


Total Steps Trained:  73%|███████▎  | 726513/1000000 [3:34:42<6:51:17, 11.08step/s]


--- Rollout Summary (Steps 726001 to 726500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -6.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.71
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 535.5058
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  73%|███████▎  | 727000/1000000 [3:34:46<37:20, 121.83step/s]


Episode 1454 finished at step 500 (727000 total). Env Reward: -12.01, Steps: 500, Delivered: 3


Total Steps Trained:  73%|███████▎  | 727013/1000000 [3:34:51<8:34:42,  8.84step/s]


--- Rollout Summary (Steps 726501 to 727000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -6.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 547.5872
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  73%|███████▎  | 727496/1000000 [3:34:55<37:05, 122.42step/s]


Episode 1455 finished at step 500 (727500 total). Env Reward: -9.65, Steps: 500, Delivered: 5


Total Steps Trained:  73%|███████▎  | 727520/1000000 [3:35:00<6:27:06, 11.73step/s]


--- Rollout Summary (Steps 727001 to 727500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -6.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.68
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 468.0202
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  73%|███████▎  | 727996/1000000 [3:35:04<37:46, 120.02step/s]


Episode 1456 finished at step 500 (728000 total). Env Reward: -8.82, Steps: 500, Delivered: 6


Total Steps Trained:  73%|███████▎  | 728020/1000000 [3:35:09<6:27:28, 11.70step/s]


--- Rollout Summary (Steps 727501 to 728000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.00
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 442.1187
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  73%|███████▎  | 728495/1000000 [3:35:13<40:18, 112.27step/s]


Episode 1457 finished at step 500 (728500 total). Env Reward: -7.23, Steps: 500, Delivered: 8


Total Steps Trained:  73%|███████▎  | 728518/1000000 [3:35:18<6:41:12, 11.28step/s]


--- Rollout Summary (Steps 728001 to 728500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 423.2152
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  73%|███████▎  | 728994/1000000 [3:35:22<37:35, 120.13step/s]


Episode 1458 finished at step 500 (729000 total). Env Reward: -11.52, Steps: 500, Delivered: 3


Total Steps Trained:  73%|███████▎  | 729019/1000000 [3:35:26<6:24:37, 11.74step/s]


--- Rollout Summary (Steps 728501 to 729000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.70
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 514.9340
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  73%|███████▎  | 729489/1000000 [3:35:30<37:26, 120.43step/s]


Episode 1459 finished at step 500 (729500 total). Env Reward: -1.69, Steps: 500, Delivered: 4


Total Steps Trained:  73%|███████▎  | 729513/1000000 [3:35:35<6:19:33, 11.88step/s]


--- Rollout Summary (Steps 729001 to 729500) ---
Update Duration: 4.42s
Avg Episode Reward (last 100): -6.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 733.4324
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  73%|███████▎  | 729988/1000000 [3:35:39<37:26, 120.17step/s]


Episode 1460 finished at step 500 (730000 total). Env Reward: -9.78, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001460_map1.pth


Total Steps Trained:  73%|███████▎  | 730011/1000000 [3:35:44<6:35:39, 11.37step/s]


--- Rollout Summary (Steps 729501 to 730000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 510.3651
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  73%|███████▎  | 730499/1000000 [3:35:48<39:04, 114.97step/s]


Episode 1461 finished at step 500 (730500 total). Env Reward: -7.47, Steps: 500, Delivered: 7


Total Steps Trained:  73%|███████▎  | 730511/1000000 [3:35:53<8:57:30,  8.36step/s]


--- Rollout Summary (Steps 730001 to 730500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 440.7156
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  73%|███████▎  | 730998/1000000 [3:35:57<36:40, 122.22step/s]


Episode 1462 finished at step 500 (731000 total). Env Reward: -9.45, Steps: 500, Delivered: 5


Total Steps Trained:  73%|███████▎  | 731011/1000000 [3:36:02<8:40:49,  8.61step/s]


--- Rollout Summary (Steps 730501 to 731000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 506.1721
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  73%|███████▎  | 731495/1000000 [3:36:06<36:46, 121.71step/s]


Episode 1463 finished at step 500 (731500 total). Env Reward: 1.74, Steps: 500, Delivered: 7


Total Steps Trained:  73%|███████▎  | 731520/1000000 [3:36:11<6:11:55, 12.03step/s]


--- Rollout Summary (Steps 731001 to 731500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 861.1526
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  73%|███████▎  | 731995/1000000 [3:36:15<37:12, 120.02step/s]


Episode 1464 finished at step 500 (732000 total). Env Reward: -9.47, Steps: 500, Delivered: 5


Total Steps Trained:  73%|███████▎  | 732019/1000000 [3:36:19<6:22:45, 11.67step/s]


--- Rollout Summary (Steps 731501 to 732000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -6.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 903.2232
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  73%|███████▎  | 732500/1000000 [3:36:24<37:32, 118.78step/s]


Episode 1465 finished at step 500 (732500 total). Env Reward: -9.42, Steps: 500, Delivered: 5


Total Steps Trained:  73%|███████▎  | 732512/1000000 [3:36:28<9:05:42,  8.17step/s]


--- Rollout Summary (Steps 732001 to 732500) ---
Update Duration: 4.68s
Avg Episode Reward (last 100): -7.03
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 411.1861
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  73%|███████▎  | 732988/1000000 [3:36:32<36:39, 121.41step/s]


Episode 1466 finished at step 500 (733000 total). Env Reward: -10.36, Steps: 500, Delivered: 4


Total Steps Trained:  73%|███████▎  | 733012/1000000 [3:36:37<6:18:08, 11.77step/s]


--- Rollout Summary (Steps 732501 to 733000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 543.2876
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  73%|███████▎  | 733490/1000000 [3:36:41<38:24, 115.66step/s]


Episode 1467 finished at step 500 (733500 total). Env Reward: -8.46, Steps: 500, Delivered: 6


Total Steps Trained:  73%|███████▎  | 733513/1000000 [3:36:46<6:30:09, 11.38step/s]


--- Rollout Summary (Steps 733001 to 733500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.16
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): 0.0021
Avg Critic Loss (per minibatch): 550.6572
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  73%|███████▎  | 733993/1000000 [3:36:50<39:30, 112.19step/s]


Episode 1468 finished at step 500 (734000 total). Env Reward: -10.16, Steps: 500, Delivered: 4


Total Steps Trained:  73%|███████▎  | 734016/1000000 [3:36:55<6:32:56, 11.28step/s]


--- Rollout Summary (Steps 733501 to 734000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.17
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 444.6886
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  73%|███████▎  | 734495/1000000 [3:36:59<41:10, 107.48step/s]


Episode 1469 finished at step 500 (734500 total). Env Reward: -10.11, Steps: 500, Delivered: 5


Total Steps Trained:  73%|███████▎  | 734517/1000000 [3:37:04<6:46:41, 10.88step/s]


--- Rollout Summary (Steps 734001 to 734500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.16
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 567.3834
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  73%|███████▎  | 734995/1000000 [3:37:08<39:42, 111.22step/s]


Episode 1470 finished at step 500 (735000 total). Env Reward: -8.65, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001470_map1.pth


Total Steps Trained:  74%|███████▎  | 735018/1000000 [3:37:13<6:34:44, 11.19step/s]


--- Rollout Summary (Steps 734501 to 735000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0084
Avg Critic Loss (per minibatch): 594.7195
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  74%|███████▎  | 735490/1000000 [3:37:17<36:23, 121.16step/s]


Episode 1471 finished at step 500 (735500 total). Env Reward: -13.90, Steps: 500, Delivered: 1


Total Steps Trained:  74%|███████▎  | 735514/1000000 [3:37:21<6:11:34, 11.86step/s]


--- Rollout Summary (Steps 735001 to 735500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0000
Avg Critic Loss (per minibatch): 503.7314
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  74%|███████▎  | 735999/1000000 [3:37:25<35:59, 122.26step/s]


Episode 1472 finished at step 500 (736000 total). Env Reward: -9.89, Steps: 500, Delivered: 5


Total Steps Trained:  74%|███████▎  | 736012/1000000 [3:37:30<8:16:40,  8.86step/s]


--- Rollout Summary (Steps 735501 to 736000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 441.6938
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  74%|███████▎  | 736498/1000000 [3:37:34<36:45, 119.49step/s]


Episode 1473 finished at step 500 (736500 total). Env Reward: -11.33, Steps: 500, Delivered: 3


Total Steps Trained:  74%|███████▎  | 736510/1000000 [3:37:39<8:38:49,  8.46step/s]


--- Rollout Summary (Steps 736001 to 736500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 613.3421
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  74%|███████▎  | 736994/1000000 [3:37:43<35:42, 122.77step/s]


Episode 1474 finished at step 500 (737000 total). Env Reward: -0.29, Steps: 500, Delivered: 5


Total Steps Trained:  74%|███████▎  | 737017/1000000 [3:37:48<6:22:57, 11.45step/s]


--- Rollout Summary (Steps 736501 to 737000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 676.9721
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  74%|███████▎  | 737492/1000000 [3:37:52<38:47, 112.80step/s]


Episode 1475 finished at step 500 (737500 total). Env Reward: 0.44, Steps: 500, Delivered: 6


Total Steps Trained:  74%|███████▍  | 737515/1000000 [3:37:57<6:32:14, 11.15step/s]


--- Rollout Summary (Steps 737001 to 737500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 778.4377
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  74%|███████▍  | 737993/1000000 [3:38:01<38:19, 113.92step/s]


Episode 1476 finished at step 500 (738000 total). Env Reward: -10.12, Steps: 500, Delivered: 4


Total Steps Trained:  74%|███████▍  | 738016/1000000 [3:38:06<6:29:19, 11.22step/s]


--- Rollout Summary (Steps 737501 to 738000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 503.5990
Avg Entropy (per minibatch): 2.5589
------------------------------


Total Steps Trained:  74%|███████▍  | 738500/1000000 [3:38:10<36:44, 118.60step/s]


Episode 1477 finished at step 500 (738500 total). Env Reward: -2.70, Steps: 500, Delivered: 3


Total Steps Trained:  74%|███████▍  | 738512/1000000 [3:38:14<8:34:26,  8.47step/s]


--- Rollout Summary (Steps 738001 to 738500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 804.3210
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  74%|███████▍  | 738999/1000000 [3:38:18<35:55, 121.07step/s]


Episode 1478 finished at step 500 (739000 total). Env Reward: -12.20, Steps: 500, Delivered: 2


Total Steps Trained:  74%|███████▍  | 739012/1000000 [3:38:23<8:12:18,  8.84step/s]


--- Rollout Summary (Steps 738501 to 739000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 484.1736
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  74%|███████▍  | 739495/1000000 [3:38:27<35:46, 121.34step/s]


Episode 1479 finished at step 500 (739500 total). Env Reward: -6.56, Steps: 500, Delivered: 8


Total Steps Trained:  74%|███████▍  | 739520/1000000 [3:38:32<6:04:30, 11.91step/s]


--- Rollout Summary (Steps 739001 to 739500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 458.9021
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  74%|███████▍  | 739994/1000000 [3:38:36<37:37, 115.20step/s]


Episode 1480 finished at step 500 (740000 total). Env Reward: 2.47, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001480_map1.pth


Total Steps Trained:  74%|███████▍  | 740017/1000000 [3:38:41<6:25:21, 11.24step/s]


--- Rollout Summary (Steps 739501 to 740000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 902.2948
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  74%|███████▍  | 740496/1000000 [3:38:45<35:48, 120.76step/s]


Episode 1481 finished at step 500 (740500 total). Env Reward: -8.18, Steps: 500, Delivered: 6


Total Steps Trained:  74%|███████▍  | 740520/1000000 [3:38:50<6:14:30, 11.55step/s]


--- Rollout Summary (Steps 740001 to 740500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 522.8833
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  74%|███████▍  | 740995/1000000 [3:38:54<36:00, 119.90step/s]


Episode 1482 finished at step 500 (741000 total). Env Reward: -11.29, Steps: 500, Delivered: 4


Total Steps Trained:  74%|███████▍  | 741019/1000000 [3:38:59<6:20:32, 11.34step/s]


--- Rollout Summary (Steps 740501 to 741000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.36
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 443.5820
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  74%|███████▍  | 741500/1000000 [3:39:03<36:05, 119.36step/s]


Episode 1483 finished at step 500 (741500 total). Env Reward: -7.75, Steps: 500, Delivered: 7


Total Steps Trained:  74%|███████▍  | 741512/1000000 [3:39:07<8:31:15,  8.43step/s]


--- Rollout Summary (Steps 741001 to 741500) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 486.6802
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  74%|███████▍  | 741995/1000000 [3:39:12<37:44, 113.95step/s]


Episode 1484 finished at step 500 (742000 total). Env Reward: -12.27, Steps: 500, Delivered: 3


Total Steps Trained:  74%|███████▍  | 742019/1000000 [3:39:16<6:12:07, 11.55step/s]


--- Rollout Summary (Steps 741501 to 742000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 454.9723
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  74%|███████▍  | 742499/1000000 [3:39:20<36:42, 116.90step/s]


Episode 1485 finished at step 500 (742500 total). Env Reward: -9.40, Steps: 500, Delivered: 5


Total Steps Trained:  74%|███████▍  | 742511/1000000 [3:39:25<8:31:56,  8.38step/s]


--- Rollout Summary (Steps 742001 to 742500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 741.4730
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  74%|███████▍  | 743000/1000000 [3:39:29<36:31, 117.25step/s]


Episode 1486 finished at step 500 (743000 total). Env Reward: -13.09, Steps: 500, Delivered: 2


Total Steps Trained:  74%|███████▍  | 743012/1000000 [3:39:34<8:32:14,  8.36step/s]


--- Rollout Summary (Steps 742501 to 743000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 539.1023
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  74%|███████▍  | 743499/1000000 [3:39:38<38:28, 111.12step/s]


Episode 1487 finished at step 500 (743500 total). Env Reward: -8.79, Steps: 500, Delivered: 6


Total Steps Trained:  74%|███████▍  | 743511/1000000 [3:39:43<8:37:49,  8.26step/s]


--- Rollout Summary (Steps 743001 to 743500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 683.0705
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  74%|███████▍  | 743989/1000000 [3:39:47<35:39, 119.66step/s]


Episode 1488 finished at step 500 (744000 total). Env Reward: -6.89, Steps: 500, Delivered: 8


Total Steps Trained:  74%|███████▍  | 744011/1000000 [3:39:52<6:29:07, 10.96step/s]


--- Rollout Summary (Steps 743501 to 744000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 693.5419
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  74%|███████▍  | 744493/1000000 [3:39:56<34:58, 121.74step/s]


Episode 1489 finished at step 500 (744500 total). Env Reward: -13.87, Steps: 500, Delivered: 1


Total Steps Trained:  74%|███████▍  | 744517/1000000 [3:40:00<6:04:51, 11.67step/s]


--- Rollout Summary (Steps 744001 to 744500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): 0.0002
Avg Critic Loss (per minibatch): 498.5666
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  74%|███████▍  | 744993/1000000 [3:40:04<34:27, 123.36step/s]


Episode 1490 finished at step 500 (745000 total). Env Reward: -11.54, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001490_map1.pth


Total Steps Trained:  75%|███████▍  | 745016/1000000 [3:40:09<6:15:30, 11.32step/s]


--- Rollout Summary (Steps 744501 to 745000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 555.4060
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  75%|███████▍  | 745493/1000000 [3:40:13<34:38, 122.45step/s]


Episode 1491 finished at step 500 (745500 total). Env Reward: -11.30, Steps: 500, Delivered: 3


Total Steps Trained:  75%|███████▍  | 745518/1000000 [3:40:18<5:52:45, 12.02step/s]


--- Rollout Summary (Steps 745001 to 745500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 547.6719
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  75%|███████▍  | 745991/1000000 [3:40:22<35:12, 120.27step/s]


Episode 1492 finished at step 500 (746000 total). Env Reward: -10.94, Steps: 500, Delivered: 4


Total Steps Trained:  75%|███████▍  | 746015/1000000 [3:40:27<6:00:45, 11.73step/s]


--- Rollout Summary (Steps 745501 to 746000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 540.8253
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  75%|███████▍  | 746492/1000000 [3:40:31<36:22, 116.17step/s]


Episode 1493 finished at step 500 (746500 total). Env Reward: -12.52, Steps: 500, Delivered: 2


Total Steps Trained:  75%|███████▍  | 746515/1000000 [3:40:36<6:12:40, 11.34step/s]


--- Rollout Summary (Steps 746001 to 746500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 582.6911
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  75%|███████▍  | 746989/1000000 [3:40:40<37:52, 111.34step/s]


Episode 1494 finished at step 500 (747000 total). Env Reward: -10.01, Steps: 500, Delivered: 5


Total Steps Trained:  75%|███████▍  | 747012/1000000 [3:40:44<6:11:29, 11.35step/s]


--- Rollout Summary (Steps 746501 to 747000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 496.8588
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  75%|███████▍  | 747497/1000000 [3:40:49<35:54, 117.17step/s]


Episode 1495 finished at step 500 (747500 total). Env Reward: -4.28, Steps: 500, Delivered: 10


Total Steps Trained:  75%|███████▍  | 747520/1000000 [3:40:53<6:14:03, 11.25step/s]


--- Rollout Summary (Steps 747001 to 747500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0089
Avg Critic Loss (per minibatch): 680.6046
Avg Entropy (per minibatch): 2.5589
------------------------------


Total Steps Trained:  75%|███████▍  | 747997/1000000 [3:40:57<35:37, 117.90step/s]


Episode 1496 finished at step 500 (748000 total). Env Reward: -11.10, Steps: 500, Delivered: 4


Total Steps Trained:  75%|███████▍  | 748021/1000000 [3:41:02<5:56:46, 11.77step/s]


--- Rollout Summary (Steps 747501 to 748000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 621.4978
Avg Entropy (per minibatch): 2.5588
------------------------------


Total Steps Trained:  75%|███████▍  | 748494/1000000 [3:41:06<36:17, 115.53step/s]


Episode 1497 finished at step 500 (748500 total). Env Reward: -7.39, Steps: 500, Delivered: 7


Total Steps Trained:  75%|███████▍  | 748517/1000000 [3:41:11<6:16:18, 11.14step/s]


--- Rollout Summary (Steps 748001 to 748500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 762.8015
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  75%|███████▍  | 748992/1000000 [3:41:15<36:01, 116.11step/s]


Episode 1498 finished at step 500 (749000 total). Env Reward: -7.64, Steps: 500, Delivered: 7


Total Steps Trained:  75%|███████▍  | 749015/1000000 [3:41:20<6:10:16, 11.30step/s]


--- Rollout Summary (Steps 748501 to 749000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 465.1725
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  75%|███████▍  | 749493/1000000 [3:41:24<36:19, 114.93step/s]


Episode 1499 finished at step 500 (749500 total). Env Reward: -8.49, Steps: 500, Delivered: 6


Total Steps Trained:  75%|███████▍  | 749517/1000000 [3:41:28<6:04:39, 11.45step/s]


--- Rollout Summary (Steps 749001 to 749500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 359.3692
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  75%|███████▍  | 749996/1000000 [3:41:33<36:10, 115.19step/s]


Episode 1500 finished at step 500 (750000 total). Env Reward: 9.29, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001500_map1.pth


Total Steps Trained:  75%|███████▌  | 750019/1000000 [3:41:37<6:12:10, 11.19step/s]


--- Rollout Summary (Steps 749501 to 750000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 1734.6571
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  75%|███████▌  | 750491/1000000 [3:41:41<36:07, 115.14step/s]


Episode 1501 finished at step 500 (750500 total). Env Reward: -10.29, Steps: 500, Delivered: 4


Total Steps Trained:  75%|███████▌  | 750514/1000000 [3:41:46<6:14:34, 11.10step/s]


--- Rollout Summary (Steps 750001 to 750500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.36
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0004
Avg Critic Loss (per minibatch): 462.1244
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  75%|███████▌  | 750990/1000000 [3:41:50<35:12, 117.90step/s]


Episode 1502 finished at step 500 (751000 total). Env Reward: -7.58, Steps: 500, Delivered: 7


Total Steps Trained:  75%|███████▌  | 751014/1000000 [3:41:55<6:04:15, 11.39step/s]


--- Rollout Summary (Steps 750501 to 751000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 454.6124
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  75%|███████▌  | 751496/1000000 [3:41:59<34:14, 120.97step/s]


Episode 1503 finished at step 500 (751500 total). Env Reward: -8.25, Steps: 500, Delivered: 6


Total Steps Trained:  75%|███████▌  | 751519/1000000 [3:42:04<6:01:04, 11.47step/s]


--- Rollout Summary (Steps 751001 to 751500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 506.0034
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  75%|███████▌  | 751990/1000000 [3:42:08<34:40, 119.23step/s]


Episode 1504 finished at step 500 (752000 total). Env Reward: 2.16, Steps: 500, Delivered: 8


Total Steps Trained:  75%|███████▌  | 752013/1000000 [3:42:13<6:00:20, 11.47step/s]


--- Rollout Summary (Steps 751501 to 752000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.24
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 653.2494
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  75%|███████▌  | 752500/1000000 [3:42:17<35:06, 117.49step/s]


Episode 1505 finished at step 500 (752500 total). Env Reward: -12.41, Steps: 500, Delivered: 2


Total Steps Trained:  75%|███████▌  | 752512/1000000 [3:42:22<8:02:48,  8.54step/s]


--- Rollout Summary (Steps 752001 to 752500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 559.0491
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  75%|███████▌  | 752997/1000000 [3:42:26<33:23, 123.28step/s]


Episode 1506 finished at step 500 (753000 total). Env Reward: -12.95, Steps: 500, Delivered: 2


Total Steps Trained:  75%|███████▌  | 753010/1000000 [3:42:30<7:45:19,  8.85step/s]


--- Rollout Summary (Steps 752501 to 753000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 454.7721
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  75%|███████▌  | 753499/1000000 [3:42:35<33:26, 122.85step/s]


Episode 1507 finished at step 500 (753500 total). Env Reward: -6.76, Steps: 500, Delivered: 8


Total Steps Trained:  75%|███████▌  | 753512/1000000 [3:42:39<7:46:37,  8.80step/s]


--- Rollout Summary (Steps 753001 to 753500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.24
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 645.4992
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  75%|███████▌  | 753994/1000000 [3:42:43<34:38, 118.33step/s]


Episode 1508 finished at step 500 (754000 total). Env Reward: -7.92, Steps: 500, Delivered: 6


Total Steps Trained:  75%|███████▌  | 754017/1000000 [3:42:48<6:10:46, 11.06step/s]


--- Rollout Summary (Steps 753501 to 754000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0076
Avg Critic Loss (per minibatch): 351.4078
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  75%|███████▌  | 754495/1000000 [3:42:52<33:52, 120.78step/s]


Episode 1509 finished at step 500 (754500 total). Env Reward: -10.79, Steps: 500, Delivered: 3


Total Steps Trained:  75%|███████▌  | 754519/1000000 [3:42:57<5:49:11, 11.72step/s]


--- Rollout Summary (Steps 754001 to 754500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.25
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 486.9199
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  75%|███████▌  | 754992/1000000 [3:43:01<33:42, 121.14step/s]


Episode 1510 finished at step 500 (755000 total). Env Reward: -10.77, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001510_map1.pth


Total Steps Trained:  76%|███████▌  | 755015/1000000 [3:43:06<5:55:06, 11.50step/s]


--- Rollout Summary (Steps 754501 to 755000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.29
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0086
Avg Critic Loss (per minibatch): 570.3112
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  76%|███████▌  | 755491/1000000 [3:43:10<35:35, 114.49step/s]


Episode 1511 finished at step 500 (755500 total). Env Reward: -8.10, Steps: 500, Delivered: 7


Total Steps Trained:  76%|███████▌  | 755514/1000000 [3:43:15<6:02:26, 11.24step/s]


--- Rollout Summary (Steps 755001 to 755500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.31
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 501.9925
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  76%|███████▌  | 755999/1000000 [3:43:19<34:30, 117.86step/s]


Episode 1512 finished at step 500 (756000 total). Env Reward: -13.18, Steps: 500, Delivered: 2


Total Steps Trained:  76%|███████▌  | 756011/1000000 [3:43:23<8:07:11,  8.35step/s]


--- Rollout Summary (Steps 755501 to 756000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 382.0313
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  76%|███████▌  | 756492/1000000 [3:43:28<34:13, 118.61step/s]


Episode 1513 finished at step 500 (756500 total). Env Reward: -9.71, Steps: 500, Delivered: 5


Total Steps Trained:  76%|███████▌  | 756515/1000000 [3:43:32<5:55:27, 11.42step/s]


--- Rollout Summary (Steps 756001 to 756500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 493.5386
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  76%|███████▌  | 757000/1000000 [3:43:36<33:31, 120.83step/s]


Episode 1514 finished at step 500 (757000 total). Env Reward: -11.07, Steps: 500, Delivered: 3


Total Steps Trained:  76%|███████▌  | 757013/1000000 [3:43:41<7:41:15,  8.78step/s]


--- Rollout Summary (Steps 756501 to 757000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 496.0854
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  76%|███████▌  | 757500/1000000 [3:43:45<34:57, 115.61step/s]


Episode 1515 finished at step 500 (757500 total). Env Reward: -8.93, Steps: 500, Delivered: 5


Total Steps Trained:  76%|███████▌  | 757512/1000000 [3:43:50<8:07:49,  8.28step/s]


--- Rollout Summary (Steps 757001 to 757500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 534.8609
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  76%|███████▌  | 757993/1000000 [3:43:54<33:29, 120.44step/s]


Episode 1516 finished at step 500 (758000 total). Env Reward: -10.53, Steps: 500, Delivered: 4


Total Steps Trained:  76%|███████▌  | 758017/1000000 [3:43:59<5:48:44, 11.56step/s]


--- Rollout Summary (Steps 757501 to 758000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 607.5635
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  76%|███████▌  | 758496/1000000 [3:44:03<35:08, 114.51step/s]


Episode 1517 finished at step 500 (758500 total). Env Reward: -8.97, Steps: 500, Delivered: 6


Total Steps Trained:  76%|███████▌  | 758518/1000000 [3:44:08<6:13:34, 10.77step/s]


--- Rollout Summary (Steps 758001 to 758500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 502.0510
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  76%|███████▌  | 758995/1000000 [3:44:12<34:08, 117.66step/s]


Episode 1518 finished at step 500 (759000 total). Env Reward: 1.55, Steps: 500, Delivered: 7


Total Steps Trained:  76%|███████▌  | 759018/1000000 [3:44:17<5:51:12, 11.44step/s]


--- Rollout Summary (Steps 758501 to 759000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 907.8437
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  76%|███████▌  | 759495/1000000 [3:44:21<35:20, 113.42step/s]


Episode 1519 finished at step 500 (759500 total). Env Reward: -8.61, Steps: 500, Delivered: 6


Total Steps Trained:  76%|███████▌  | 759517/1000000 [3:44:26<6:21:52, 10.50step/s]


--- Rollout Summary (Steps 759001 to 759500) ---
Update Duration: 4.68s
Avg Episode Reward (last 100): -7.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 334.3399
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  76%|███████▌  | 759990/1000000 [3:44:30<32:43, 122.22step/s]


Episode 1520 finished at step 500 (760000 total). Env Reward: -13.60, Steps: 500, Delivered: 1
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001520_map1.pth


Total Steps Trained:  76%|███████▌  | 760014/1000000 [3:44:35<5:42:40, 11.67step/s]


--- Rollout Summary (Steps 759501 to 760000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 706.7355
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  76%|███████▌  | 760496/1000000 [3:44:39<35:28, 112.51step/s]


Episode 1521 finished at step 500 (760500 total). Env Reward: -10.08, Steps: 500, Delivered: 4


Total Steps Trained:  76%|███████▌  | 760519/1000000 [3:44:43<5:57:53, 11.15step/s]


--- Rollout Summary (Steps 760001 to 760500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 605.1263
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  76%|███████▌  | 760998/1000000 [3:44:48<35:17, 112.90step/s]


Episode 1522 finished at step 500 (761000 total). Env Reward: 3.88, Steps: 500, Delivered: 9


Total Steps Trained:  76%|███████▌  | 761022/1000000 [3:44:53<5:56:39, 11.17step/s]


--- Rollout Summary (Steps 760501 to 761000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 408.0483
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  76%|███████▌  | 761497/1000000 [3:44:57<34:29, 115.23step/s]


Episode 1523 finished at step 500 (761500 total). Env Reward: -8.35, Steps: 500, Delivered: 7


Total Steps Trained:  76%|███████▌  | 761519/1000000 [3:45:01<6:02:28, 10.97step/s]


--- Rollout Summary (Steps 761001 to 761500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 460.4942
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  76%|███████▌  | 761988/1000000 [3:45:05<32:53, 120.61step/s]


Episode 1524 finished at step 500 (762000 total). Env Reward: -2.62, Steps: 500, Delivered: 3


Total Steps Trained:  76%|███████▌  | 762012/1000000 [3:45:10<5:38:05, 11.73step/s]


--- Rollout Summary (Steps 761501 to 762000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 756.3980
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  76%|███████▌  | 762499/1000000 [3:45:14<33:48, 117.06step/s]


Episode 1525 finished at step 500 (762500 total). Env Reward: -8.96, Steps: 500, Delivered: 6


Total Steps Trained:  76%|███████▋  | 762511/1000000 [3:45:19<7:55:38,  8.32step/s]


--- Rollout Summary (Steps 762001 to 762500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 731.0887
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  76%|███████▋  | 762994/1000000 [3:45:23<33:52, 116.63step/s]


Episode 1526 finished at step 500 (763000 total). Env Reward: 10.50, Steps: 500, Delivered: 7


Total Steps Trained:  76%|███████▋  | 763016/1000000 [3:45:28<6:05:07, 10.82step/s]


--- Rollout Summary (Steps 762501 to 763000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 1843.7844
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  76%|███████▋  | 763489/1000000 [3:45:32<32:18, 122.01step/s]


Episode 1527 finished at step 500 (763500 total). Env Reward: -0.23, Steps: 500, Delivered: 5


Total Steps Trained:  76%|███████▋  | 763512/1000000 [3:45:36<5:44:27, 11.44step/s]


--- Rollout Summary (Steps 763001 to 763500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 740.4971
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  76%|███████▋  | 763996/1000000 [3:45:41<33:07, 118.74step/s]


Episode 1528 finished at step 500 (764000 total). Env Reward: -5.53, Steps: 500, Delivered: 9


Total Steps Trained:  76%|███████▋  | 764018/1000000 [3:45:45<5:58:54, 10.96step/s]


--- Rollout Summary (Steps 763501 to 764000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 848.2483
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  76%|███████▋  | 764489/1000000 [3:45:49<32:17, 121.53step/s]


Episode 1529 finished at step 500 (764500 total). Env Reward: -11.62, Steps: 500, Delivered: 3


Total Steps Trained:  76%|███████▋  | 764513/1000000 [3:45:54<5:30:03, 11.89step/s]


--- Rollout Summary (Steps 764001 to 764500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): 0.0001
Avg Critic Loss (per minibatch): 567.1738
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  76%|███████▋  | 765000/1000000 [3:45:58<34:06, 114.82step/s]


Episode 1530 finished at step 500 (765000 total). Env Reward: -10.16, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001530_map1.pth


Total Steps Trained:  77%|███████▋  | 765012/1000000 [3:46:03<7:46:16,  8.40step/s]


--- Rollout Summary (Steps 764501 to 765000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 418.8252
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  77%|███████▋  | 765495/1000000 [3:46:07<34:21, 113.75step/s]


Episode 1531 finished at step 500 (765500 total). Env Reward: -9.56, Steps: 500, Delivered: 5


Total Steps Trained:  77%|███████▋  | 765517/1000000 [3:46:12<6:01:41, 10.81step/s]


--- Rollout Summary (Steps 765001 to 765500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0005
Avg Critic Loss (per minibatch): 584.5467
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  77%|███████▋  | 766000/1000000 [3:46:16<33:49, 115.33step/s]


Episode 1532 finished at step 500 (766000 total). Env Reward: -8.93, Steps: 500, Delivered: 6


Total Steps Trained:  77%|███████▋  | 766012/1000000 [3:46:21<7:46:13,  8.36step/s]


--- Rollout Summary (Steps 765501 to 766000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 437.9161
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  77%|███████▋  | 766494/1000000 [3:46:25<31:48, 122.35step/s]


Episode 1533 finished at step 500 (766500 total). Env Reward: -12.46, Steps: 500, Delivered: 2


Total Steps Trained:  77%|███████▋  | 766518/1000000 [3:46:30<5:43:55, 11.31step/s]


--- Rollout Summary (Steps 766001 to 766500) ---
Update Duration: 4.69s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 527.4357
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  77%|███████▋  | 766992/1000000 [3:46:34<33:44, 115.10step/s]


Episode 1534 finished at step 500 (767000 total). Env Reward: -10.74, Steps: 500, Delivered: 4


Total Steps Trained:  77%|███████▋  | 767015/1000000 [3:46:39<5:47:50, 11.16step/s]


--- Rollout Summary (Steps 766501 to 767000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 473.8798
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  77%|███████▋  | 767493/1000000 [3:46:43<34:16, 113.07step/s]


Episode 1535 finished at step 500 (767500 total). Env Reward: -8.73, Steps: 500, Delivered: 6


Total Steps Trained:  77%|███████▋  | 767516/1000000 [3:46:47<5:47:59, 11.13step/s]


--- Rollout Summary (Steps 767001 to 767500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 556.7293
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  77%|███████▋  | 767996/1000000 [3:46:52<32:09, 120.23step/s]


Episode 1536 finished at step 500 (768000 total). Env Reward: -9.75, Steps: 500, Delivered: 5


Total Steps Trained:  77%|███████▋  | 768020/1000000 [3:46:56<5:28:03, 11.79step/s]


--- Rollout Summary (Steps 767501 to 768000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 637.8437
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  77%|███████▋  | 768500/1000000 [3:47:01<32:57, 117.09step/s]


Episode 1537 finished at step 500 (768500 total). Env Reward: -7.76, Steps: 500, Delivered: 7


Total Steps Trained:  77%|███████▋  | 768512/1000000 [3:47:05<7:39:04,  8.40step/s]


--- Rollout Summary (Steps 768001 to 768500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 642.7275
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  77%|███████▋  | 768992/1000000 [3:47:09<34:18, 112.23step/s]


Episode 1538 finished at step 500 (769000 total). Env Reward: -9.43, Steps: 500, Delivered: 5


Total Steps Trained:  77%|███████▋  | 769016/1000000 [3:47:14<5:37:16, 11.41step/s]


--- Rollout Summary (Steps 768501 to 769000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 452.7683
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  77%|███████▋  | 769489/1000000 [3:47:18<33:09, 115.88step/s]


Episode 1539 finished at step 500 (769500 total). Env Reward: -8.77, Steps: 500, Delivered: 6


Total Steps Trained:  77%|███████▋  | 769511/1000000 [3:47:23<5:49:08, 11.00step/s]


--- Rollout Summary (Steps 769001 to 769500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 574.1731
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  77%|███████▋  | 769997/1000000 [3:47:27<33:14, 115.34step/s]


Episode 1540 finished at step 500 (770000 total). Env Reward: -9.00, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001540_map1.pth


Total Steps Trained:  77%|███████▋  | 770020/1000000 [3:47:32<5:56:04, 10.76step/s]


--- Rollout Summary (Steps 769501 to 770000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 417.3653
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  77%|███████▋  | 770496/1000000 [3:47:36<31:41, 120.72step/s]


Episode 1541 finished at step 500 (770500 total). Env Reward: -8.10, Steps: 500, Delivered: 6


Total Steps Trained:  77%|███████▋  | 770509/1000000 [3:47:41<7:15:21,  8.79step/s]


--- Rollout Summary (Steps 770001 to 770500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 613.4796
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  77%|███████▋  | 770990/1000000 [3:47:45<30:44, 124.19step/s]


Episode 1542 finished at step 500 (771000 total). Env Reward: -0.43, Steps: 500, Delivered: 5


Total Steps Trained:  77%|███████▋  | 771014/1000000 [3:47:50<5:31:11, 11.52step/s]


--- Rollout Summary (Steps 770501 to 771000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 845.9171
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  77%|███████▋  | 771493/1000000 [3:47:54<32:59, 115.45step/s]


Episode 1543 finished at step 500 (771500 total). Env Reward: -12.03, Steps: 500, Delivered: 2


Total Steps Trained:  77%|███████▋  | 771516/1000000 [3:47:58<5:41:20, 11.16step/s]


--- Rollout Summary (Steps 771001 to 771500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0004
Avg Critic Loss (per minibatch): 490.6628
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  77%|███████▋  | 771996/1000000 [3:48:03<33:21, 113.93step/s]


Episode 1544 finished at step 500 (772000 total). Env Reward: -11.73, Steps: 500, Delivered: 3


Total Steps Trained:  77%|███████▋  | 772019/1000000 [3:48:07<5:39:57, 11.18step/s]


--- Rollout Summary (Steps 771501 to 772000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 528.9720
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  77%|███████▋  | 772491/1000000 [3:48:11<33:20, 113.73step/s]


Episode 1545 finished at step 500 (772500 total). Env Reward: -13.11, Steps: 500, Delivered: 2


Total Steps Trained:  77%|███████▋  | 772515/1000000 [3:48:16<5:32:28, 11.40step/s]


--- Rollout Summary (Steps 772001 to 772500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0075
Avg Critic Loss (per minibatch): 549.7358
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  77%|███████▋  | 773000/1000000 [3:48:20<31:49, 118.91step/s]


Episode 1546 finished at step 500 (773000 total). Env Reward: -13.38, Steps: 500, Delivered: 2


Total Steps Trained:  77%|███████▋  | 773012/1000000 [3:48:25<7:13:40,  8.72step/s]


--- Rollout Summary (Steps 772501 to 773000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 405.1410
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  77%|███████▋  | 773496/1000000 [3:48:29<31:49, 118.61step/s]


Episode 1547 finished at step 500 (773500 total). Env Reward: -9.31, Steps: 500, Delivered: 5


Total Steps Trained:  77%|███████▋  | 773517/1000000 [3:48:34<5:50:01, 10.78step/s]


--- Rollout Summary (Steps 773001 to 773500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.09
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 468.1323
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  77%|███████▋  | 773998/1000000 [3:48:38<32:16, 116.68step/s]


Episode 1548 finished at step 500 (774000 total). Env Reward: -9.58, Steps: 500, Delivered: 5


Total Steps Trained:  77%|███████▋  | 774010/1000000 [3:48:43<7:26:56,  8.43step/s]


--- Rollout Summary (Steps 773501 to 774000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 481.4801
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  77%|███████▋  | 774500/1000000 [3:48:47<37:37, 99.89step/s] 


Episode 1549 finished at step 500 (774500 total). Env Reward: -8.67, Steps: 500, Delivered: 6


Total Steps Trained:  77%|███████▋  | 774511/1000000 [3:48:52<8:06:11,  7.73step/s]


--- Rollout Summary (Steps 774001 to 774500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 392.7210
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  77%|███████▋  | 774993/1000000 [3:48:56<32:00, 117.16step/s]


Episode 1550 finished at step 500 (775000 total). Env Reward: -1.00, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001550_map1.pth


Total Steps Trained:  78%|███████▊  | 775016/1000000 [3:49:01<5:32:58, 11.26step/s]


--- Rollout Summary (Steps 774501 to 775000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 816.2449
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  78%|███████▊  | 775493/1000000 [3:49:05<32:50, 113.95step/s]


Episode 1551 finished at step 500 (775500 total). Env Reward: -7.26, Steps: 500, Delivered: 7


Total Steps Trained:  78%|███████▊  | 775516/1000000 [3:49:10<5:42:44, 10.92step/s]


--- Rollout Summary (Steps 775001 to 775500) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.03
Avg Actor Loss (per minibatch): -0.0078
Avg Critic Loss (per minibatch): 621.1369
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  78%|███████▊  | 775991/1000000 [3:49:14<32:55, 113.41step/s]


Episode 1552 finished at step 500 (776000 total). Env Reward: -6.85, Steps: 500, Delivered: 7


Total Steps Trained:  78%|███████▊  | 776013/1000000 [3:49:19<5:43:20, 10.87step/s]


--- Rollout Summary (Steps 775501 to 776000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -8.07
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 4.99
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 799.8037
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  78%|███████▊  | 776499/1000000 [3:49:23<31:08, 119.60step/s]


Episode 1553 finished at step 500 (776500 total). Env Reward: -8.18, Steps: 500, Delivered: 7


Total Steps Trained:  78%|███████▊  | 776511/1000000 [3:49:28<7:13:06,  8.60step/s]


--- Rollout Summary (Steps 776001 to 776500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 4.98
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 419.5412
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  78%|███████▊  | 776998/1000000 [3:49:32<31:19, 118.64step/s]


Episode 1554 finished at step 500 (777000 total). Env Reward: -9.13, Steps: 500, Delivered: 5


Total Steps Trained:  78%|███████▊  | 777010/1000000 [3:49:37<7:22:09,  8.41step/s]


--- Rollout Summary (Steps 776501 to 777000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -8.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.00
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 373.9719
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  78%|███████▊  | 777492/1000000 [3:49:41<33:02, 112.26step/s]


Episode 1555 finished at step 500 (777500 total). Env Reward: -8.59, Steps: 500, Delivered: 6


Total Steps Trained:  78%|███████▊  | 777514/1000000 [3:49:46<5:41:34, 10.86step/s]


--- Rollout Summary (Steps 777001 to 777500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.01
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 388.3217
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  78%|███████▊  | 777996/1000000 [3:49:50<31:34, 117.17step/s]


Episode 1556 finished at step 500 (778000 total). Env Reward: -8.41, Steps: 500, Delivered: 6


Total Steps Trained:  78%|███████▊  | 778019/1000000 [3:49:55<5:26:43, 11.32step/s]


--- Rollout Summary (Steps 777501 to 778000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.01
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 504.9497
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  78%|███████▊  | 778498/1000000 [3:49:59<30:13, 122.15step/s]


Episode 1557 finished at step 500 (778500 total). Env Reward: -9.33, Steps: 500, Delivered: 6


Total Steps Trained:  78%|███████▊  | 778511/1000000 [3:50:03<6:52:00,  8.96step/s]


--- Rollout Summary (Steps 778001 to 778500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -8.06
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 4.99
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 514.5802
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  78%|███████▊  | 778993/1000000 [3:50:07<30:52, 119.33step/s]


Episode 1558 finished at step 500 (779000 total). Env Reward: -9.03, Steps: 500, Delivered: 5


Total Steps Trained:  78%|███████▊  | 779016/1000000 [3:50:12<5:33:40, 11.04step/s]


--- Rollout Summary (Steps 778501 to 779000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -8.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.01
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 553.0949
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  78%|███████▊  | 779494/1000000 [3:50:16<31:06, 118.11step/s]


Episode 1559 finished at step 500 (779500 total). Env Reward: -8.80, Steps: 500, Delivered: 5


Total Steps Trained:  78%|███████▊  | 779518/1000000 [3:50:21<5:20:44, 11.46step/s]


--- Rollout Summary (Steps 779001 to 779500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -8.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.02
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 474.7988
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  78%|███████▊  | 780000/1000000 [3:50:25<30:12, 121.36step/s]


Episode 1560 finished at step 500 (780000 total). Env Reward: -13.46, Steps: 500, Delivered: 1
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001560_map1.pth


Total Steps Trained:  78%|███████▊  | 780013/1000000 [3:50:30<6:53:50,  8.86step/s]


--- Rollout Summary (Steps 779501 to 780000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -8.15
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 4.99
Avg Actor Loss (per minibatch): 0.0011
Avg Critic Loss (per minibatch): 618.5941
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  78%|███████▊  | 780498/1000000 [3:50:34<32:53, 111.22step/s]


Episode 1561 finished at step 500 (780500 total). Env Reward: -8.58, Steps: 500, Delivered: 6


Total Steps Trained:  78%|███████▊  | 780510/1000000 [3:50:39<7:24:54,  8.22step/s]


--- Rollout Summary (Steps 780001 to 780500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -8.16
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 4.98
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 610.2381
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  78%|███████▊  | 780989/1000000 [3:50:43<31:30, 115.82step/s]


Episode 1562 finished at step 500 (781000 total). Env Reward: -7.49, Steps: 500, Delivered: 7


Total Steps Trained:  78%|███████▊  | 781010/1000000 [3:50:48<5:49:56, 10.43step/s]


--- Rollout Summary (Steps 780501 to 781000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -8.14
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.00
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 521.8901
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  78%|███████▊  | 781493/1000000 [3:50:52<32:09, 113.22step/s]


Episode 1563 finished at step 500 (781500 total). Env Reward: 4.53, Steps: 500, Delivered: 10


Total Steps Trained:  78%|███████▊  | 781516/1000000 [3:50:57<5:24:35, 11.22step/s]


--- Rollout Summary (Steps 781001 to 781500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -8.11
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.03
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 981.8417
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  78%|███████▊  | 781998/1000000 [3:51:01<29:54, 121.47step/s]


Episode 1564 finished at step 500 (782000 total). Env Reward: -6.59, Steps: 500, Delivered: 8


Total Steps Trained:  78%|███████▊  | 782011/1000000 [3:51:05<6:47:31,  8.92step/s]


--- Rollout Summary (Steps 781501 to 782000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -8.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.06
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 400.0351
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  78%|███████▊  | 782492/1000000 [3:51:09<31:01, 116.86step/s]


Episode 1565 finished at step 500 (782500 total). Env Reward: -10.35, Steps: 500, Delivered: 4


Total Steps Trained:  78%|███████▊  | 782515/1000000 [3:51:14<5:29:48, 10.99step/s]


--- Rollout Summary (Steps 782001 to 782500) ---
Update Duration: 4.69s
Avg Episode Reward (last 100): -8.09
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 390.2353
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  78%|███████▊  | 782993/1000000 [3:51:18<31:35, 114.51step/s]


Episode 1566 finished at step 500 (783000 total). Env Reward: -11.34, Steps: 500, Delivered: 4


Total Steps Trained:  78%|███████▊  | 783015/1000000 [3:51:23<5:27:38, 11.04step/s]


--- Rollout Summary (Steps 782501 to 783000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -8.10
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 521.9190
Avg Entropy (per minibatch): 2.5586
------------------------------


Total Steps Trained:  78%|███████▊  | 783491/1000000 [3:51:27<30:32, 118.14step/s]


Episode 1567 finished at step 500 (783500 total). Env Reward: -3.63, Steps: 500, Delivered: 11


Total Steps Trained:  78%|███████▊  | 783513/1000000 [3:51:32<5:31:12, 10.89step/s]


--- Rollout Summary (Steps 783001 to 783500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -8.05
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0084
Avg Critic Loss (per minibatch): 747.3073
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  78%|███████▊  | 783998/1000000 [3:51:36<29:52, 120.49step/s]


Episode 1568 finished at step 500 (784000 total). Env Reward: -8.98, Steps: 500, Delivered: 6


Total Steps Trained:  78%|███████▊  | 784011/1000000 [3:51:41<6:51:37,  8.75step/s]


--- Rollout Summary (Steps 783501 to 784000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.04
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 529.3900
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  78%|███████▊  | 784493/1000000 [3:51:45<32:00, 112.23step/s]


Episode 1569 finished at step 500 (784500 total). Env Reward: -8.28, Steps: 500, Delivered: 6


Total Steps Trained:  78%|███████▊  | 784516/1000000 [3:51:50<5:23:57, 11.09step/s]


--- Rollout Summary (Steps 784001 to 784500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -8.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 477.2699
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  78%|███████▊  | 784989/1000000 [3:51:54<31:28, 113.87step/s]


Episode 1570 finished at step 500 (785000 total). Env Reward: -5.34, Steps: 500, Delivered: 9
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001570_map1.pth


Total Steps Trained:  79%|███████▊  | 785012/1000000 [3:51:58<5:23:04, 11.09step/s]


--- Rollout Summary (Steps 784501 to 785000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 360.2737
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  79%|███████▊  | 785488/1000000 [3:52:02<29:42, 120.33step/s]


Episode 1571 finished at step 500 (785500 total). Env Reward: -0.49, Steps: 500, Delivered: 5


Total Steps Trained:  79%|███████▊  | 785513/1000000 [3:52:07<4:58:30, 11.98step/s]


--- Rollout Summary (Steps 785001 to 785500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): 0.0001
Avg Critic Loss (per minibatch): 770.2536
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  79%|███████▊  | 785995/1000000 [3:52:11<29:33, 120.65step/s]


Episode 1572 finished at step 500 (786000 total). Env Reward: -8.18, Steps: 500, Delivered: 6


Total Steps Trained:  79%|███████▊  | 786018/1000000 [3:52:16<5:16:02, 11.28step/s]


--- Rollout Summary (Steps 785501 to 786000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 542.7992
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  79%|███████▊  | 786493/1000000 [3:52:20<31:18, 113.69step/s]


Episode 1573 finished at step 500 (786500 total). Env Reward: -8.96, Steps: 500, Delivered: 6


Total Steps Trained:  79%|███████▊  | 786516/1000000 [3:52:25<5:20:17, 11.11step/s]


--- Rollout Summary (Steps 786001 to 786500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0080
Avg Critic Loss (per minibatch): 746.4459
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  79%|███████▊  | 786994/1000000 [3:52:29<29:04, 122.11step/s]


Episode 1574 finished at step 500 (787000 total). Env Reward: -9.75, Steps: 500, Delivered: 5


Total Steps Trained:  79%|███████▊  | 787019/1000000 [3:52:34<4:54:33, 12.05step/s]


--- Rollout Summary (Steps 786501 to 787000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 488.6094
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  79%|███████▊  | 787490/1000000 [3:52:38<30:22, 116.60step/s]


Episode 1575 finished at step 500 (787500 total). Env Reward: -0.93, Steps: 500, Delivered: 4


Total Steps Trained:  79%|███████▉  | 787513/1000000 [3:52:42<5:04:42, 11.62step/s]


--- Rollout Summary (Steps 787001 to 787500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 807.8484
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  79%|███████▉  | 787990/1000000 [3:52:46<31:05, 113.65step/s]


Episode 1576 finished at step 500 (788000 total). Env Reward: -6.22, Steps: 500, Delivered: 8


Total Steps Trained:  79%|███████▉  | 788012/1000000 [3:52:51<5:32:17, 10.63step/s]


--- Rollout Summary (Steps 787501 to 788000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0107
Avg Critic Loss (per minibatch): 915.0309
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  79%|███████▉  | 788491/1000000 [3:52:55<29:36, 119.09step/s]


Episode 1577 finished at step 500 (788500 total). Env Reward: -7.11, Steps: 500, Delivered: 7


Total Steps Trained:  79%|███████▉  | 788514/1000000 [3:53:00<5:08:39, 11.42step/s]


--- Rollout Summary (Steps 788001 to 788500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 604.5331
Avg Entropy (per minibatch): 2.5619
------------------------------


Total Steps Trained:  79%|███████▉  | 788989/1000000 [3:53:04<29:13, 120.31step/s]


Episode 1578 finished at step 500 (789000 total). Env Reward: -8.90, Steps: 500, Delivered: 6


Total Steps Trained:  79%|███████▉  | 789014/1000000 [3:53:09<4:54:24, 11.94step/s]


--- Rollout Summary (Steps 788501 to 789000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 402.3455
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  79%|███████▉  | 789500/1000000 [3:53:13<30:27, 115.20step/s]


Episode 1579 finished at step 500 (789500 total). Env Reward: -6.73, Steps: 500, Delivered: 8


Total Steps Trained:  79%|███████▉  | 789512/1000000 [3:53:18<6:59:36,  8.36step/s]


--- Rollout Summary (Steps 789001 to 789500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 556.2202
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  79%|███████▉  | 789993/1000000 [3:53:22<30:10, 116.02step/s]


Episode 1580 finished at step 500 (790000 total). Env Reward: -6.85, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001580_map1.pth


Total Steps Trained:  79%|███████▉  | 790016/1000000 [3:53:27<5:11:04, 11.25step/s]


--- Rollout Summary (Steps 789501 to 790000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 611.7079
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  79%|███████▉  | 790498/1000000 [3:53:31<29:27, 118.56step/s]


Episode 1581 finished at step 500 (790500 total). Env Reward: -8.13, Steps: 500, Delivered: 6


Total Steps Trained:  79%|███████▉  | 790522/1000000 [3:53:35<4:58:43, 11.69step/s]


--- Rollout Summary (Steps 790001 to 790500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 704.6328
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  79%|███████▉  | 790998/1000000 [3:53:40<29:27, 118.27step/s]


Episode 1582 finished at step 500 (791000 total). Env Reward: -7.66, Steps: 500, Delivered: 7


Total Steps Trained:  79%|███████▉  | 791010/1000000 [3:53:44<7:05:58,  8.18step/s]


--- Rollout Summary (Steps 790501 to 791000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 578.6887
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  79%|███████▉  | 791490/1000000 [3:53:48<29:09, 119.18step/s]


Episode 1583 finished at step 500 (791500 total). Env Reward: -8.97, Steps: 500, Delivered: 6


Total Steps Trained:  79%|███████▉  | 791512/1000000 [3:53:53<5:16:59, 10.96step/s]


--- Rollout Summary (Steps 791001 to 791500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 454.8756
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  79%|███████▉  | 791994/1000000 [3:53:57<28:35, 121.24step/s]


Episode 1584 finished at step 500 (792000 total). Env Reward: -9.13, Steps: 500, Delivered: 5


Total Steps Trained:  79%|███████▉  | 792019/1000000 [3:54:02<4:49:00, 11.99step/s]


--- Rollout Summary (Steps 791501 to 792000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0071
Avg Critic Loss (per minibatch): 706.3381
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  79%|███████▉  | 792492/1000000 [3:54:06<28:48, 120.05step/s]


Episode 1585 finished at step 500 (792500 total). Env Reward: -11.21, Steps: 500, Delivered: 3


Total Steps Trained:  79%|███████▉  | 792515/1000000 [3:54:11<5:03:12, 11.40step/s]


--- Rollout Summary (Steps 792001 to 792500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 573.9018
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  79%|███████▉  | 793000/1000000 [3:54:15<31:27, 109.67step/s]


Episode 1586 finished at step 500 (793000 total). Env Reward: -7.54, Steps: 500, Delivered: 7


Total Steps Trained:  79%|███████▉  | 793011/1000000 [3:54:20<7:11:53,  7.99step/s]


--- Rollout Summary (Steps 792501 to 793000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 491.9176
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  79%|███████▉  | 793495/1000000 [3:54:24<28:23, 121.23step/s]


Episode 1587 finished at step 500 (793500 total). Env Reward: -0.57, Steps: 500, Delivered: 5


Total Steps Trained:  79%|███████▉  | 793520/1000000 [3:54:29<4:47:23, 11.97step/s]


--- Rollout Summary (Steps 793001 to 793500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 638.6661
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  79%|███████▉  | 793996/1000000 [3:54:33<30:42, 111.79step/s]


Episode 1588 finished at step 500 (794000 total). Env Reward: -7.60, Steps: 500, Delivered: 7


Total Steps Trained:  79%|███████▉  | 794020/1000000 [3:54:37<4:59:02, 11.48step/s]


--- Rollout Summary (Steps 793501 to 794000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 674.2908
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  79%|███████▉  | 794492/1000000 [3:54:41<28:14, 121.30step/s]


Episode 1589 finished at step 500 (794500 total). Env Reward: -9.99, Steps: 500, Delivered: 4


Total Steps Trained:  79%|███████▉  | 794516/1000000 [3:54:46<4:53:30, 11.67step/s]


--- Rollout Summary (Steps 794001 to 794500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.78
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 493.8195
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  79%|███████▉  | 794991/1000000 [3:54:50<29:02, 117.65step/s]


Episode 1590 finished at step 500 (795000 total). Env Reward: -1.90, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001590_map1.pth


Total Steps Trained:  80%|███████▉  | 795014/1000000 [3:54:55<5:07:56, 11.09step/s]


--- Rollout Summary (Steps 794501 to 795000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): 0.0001
Avg Critic Loss (per minibatch): 759.6449
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  80%|███████▉  | 795495/1000000 [3:54:59<28:49, 118.27step/s]


Episode 1591 finished at step 500 (795500 total). Env Reward: -10.62, Steps: 500, Delivered: 4


Total Steps Trained:  80%|███████▉  | 795519/1000000 [3:55:04<4:51:14, 11.70step/s]


--- Rollout Summary (Steps 795001 to 795500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 506.7209
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  80%|███████▉  | 795992/1000000 [3:55:08<28:55, 117.54step/s]


Episode 1592 finished at step 500 (796000 total). Env Reward: -6.40, Steps: 500, Delivered: 8


Total Steps Trained:  80%|███████▉  | 796016/1000000 [3:55:13<4:50:36, 11.70step/s]


--- Rollout Summary (Steps 795501 to 796000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 527.7322
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  80%|███████▉  | 796498/1000000 [3:55:17<28:54, 117.31step/s]


Episode 1593 finished at step 500 (796500 total). Env Reward: -9.46, Steps: 500, Delivered: 5


Total Steps Trained:  80%|███████▉  | 796510/1000000 [3:55:22<6:33:52,  8.61step/s]


--- Rollout Summary (Steps 796001 to 796500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 609.0492
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  80%|███████▉  | 796991/1000000 [3:55:26<30:47, 109.86step/s]


Episode 1594 finished at step 500 (797000 total). Env Reward: -7.03, Steps: 500, Delivered: 8


Total Steps Trained:  80%|███████▉  | 797014/1000000 [3:55:31<5:05:14, 11.08step/s]


--- Rollout Summary (Steps 796501 to 797000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 507.9550
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  80%|███████▉  | 797497/1000000 [3:55:35<28:04, 120.18step/s]


Episode 1595 finished at step 500 (797500 total). Env Reward: 1.27, Steps: 500, Delivered: 7


Total Steps Trained:  80%|███████▉  | 797521/1000000 [3:55:39<4:51:04, 11.59step/s]


--- Rollout Summary (Steps 797001 to 797500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 1040.6352
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  80%|███████▉  | 797990/1000000 [3:55:44<27:44, 121.35step/s]


Episode 1596 finished at step 500 (798000 total). Env Reward: -8.60, Steps: 500, Delivered: 6


Total Steps Trained:  80%|███████▉  | 798014/1000000 [3:55:48<4:48:17, 11.68step/s]


--- Rollout Summary (Steps 797501 to 798000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.53
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 690.1969
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  80%|███████▉  | 798490/1000000 [3:55:52<27:42, 121.19step/s]


Episode 1597 finished at step 500 (798500 total). Env Reward: -9.76, Steps: 500, Delivered: 5


Total Steps Trained:  80%|███████▉  | 798513/1000000 [3:55:57<4:53:28, 11.44step/s]


--- Rollout Summary (Steps 798001 to 798500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0007
Avg Critic Loss (per minibatch): 623.9142
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  80%|███████▉  | 798989/1000000 [3:56:01<28:52, 116.03step/s]


Episode 1598 finished at step 500 (799000 total). Env Reward: -9.04, Steps: 500, Delivered: 6


Total Steps Trained:  80%|███████▉  | 799011/1000000 [3:56:06<5:04:22, 11.01step/s]


--- Rollout Summary (Steps 798501 to 799000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 613.7903
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  80%|███████▉  | 799490/1000000 [3:56:10<29:18, 114.05step/s]


Episode 1599 finished at step 500 (799500 total). Env Reward: -11.08, Steps: 500, Delivered: 4


Total Steps Trained:  80%|███████▉  | 799513/1000000 [3:56:15<4:55:37, 11.30step/s]


--- Rollout Summary (Steps 799001 to 799500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0007
Avg Critic Loss (per minibatch): 473.5831
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  80%|███████▉  | 799988/1000000 [3:56:19<27:24, 121.65step/s]


Episode 1600 finished at step 500 (800000 total). Env Reward: -0.69, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001600_map1.pth


Total Steps Trained:  80%|████████  | 800012/1000000 [3:56:23<4:44:07, 11.73step/s]


--- Rollout Summary (Steps 799501 to 800000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 988.9899
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  80%|████████  | 800494/1000000 [3:56:28<30:18, 109.72step/s]


Episode 1601 finished at step 500 (800500 total). Env Reward: 1.13, Steps: 500, Delivered: 7


Total Steps Trained:  80%|████████  | 800517/1000000 [3:56:33<5:04:52, 10.91step/s]


--- Rollout Summary (Steps 800001 to 800500) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 1045.2331
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  80%|████████  | 800995/1000000 [3:56:37<29:50, 111.18step/s]


Episode 1602 finished at step 500 (801000 total). Env Reward: -7.53, Steps: 500, Delivered: 7


Total Steps Trained:  80%|████████  | 801018/1000000 [3:56:42<5:05:47, 10.85step/s]


--- Rollout Summary (Steps 800501 to 801000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 530.0056
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  80%|████████  | 801495/1000000 [3:56:46<29:03, 113.86step/s]


Episode 1603 finished at step 500 (801500 total). Env Reward: -8.53, Steps: 500, Delivered: 6


Total Steps Trained:  80%|████████  | 801518/1000000 [3:56:51<4:54:34, 11.23step/s]


--- Rollout Summary (Steps 801001 to 801500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 425.5529
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  80%|████████  | 802000/1000000 [3:56:55<27:30, 119.94step/s]


Episode 1604 finished at step 500 (802000 total). Env Reward: -13.54, Steps: 500, Delivered: 1


Total Steps Trained:  80%|████████  | 802013/1000000 [3:56:59<6:17:10,  8.75step/s]


--- Rollout Summary (Steps 801501 to 802000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 427.9076
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  80%|████████  | 802488/1000000 [3:57:03<27:13, 120.90step/s]


Episode 1605 finished at step 500 (802500 total). Env Reward: -11.25, Steps: 500, Delivered: 3


Total Steps Trained:  80%|████████  | 802512/1000000 [3:57:08<4:43:03, 11.63step/s]


--- Rollout Summary (Steps 802001 to 802500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 561.0244
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  80%|████████  | 802994/1000000 [3:57:12<26:55, 121.96step/s]


Episode 1606 finished at step 500 (803000 total). Env Reward: -11.03, Steps: 500, Delivered: 3


Total Steps Trained:  80%|████████  | 803019/1000000 [3:57:17<4:32:03, 12.07step/s]


--- Rollout Summary (Steps 802501 to 803000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 558.5015
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  80%|████████  | 803490/1000000 [3:57:21<26:37, 123.03step/s]


Episode 1607 finished at step 500 (803500 total). Env Reward: -10.65, Steps: 500, Delivered: 4


Total Steps Trained:  80%|████████  | 803512/1000000 [3:57:26<4:51:57, 11.22step/s]


--- Rollout Summary (Steps 803001 to 803500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 387.9075
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  80%|████████  | 803996/1000000 [3:57:30<30:00, 108.85step/s]


Episode 1608 finished at step 500 (804000 total). Env Reward: -7.76, Steps: 500, Delivered: 7


Total Steps Trained:  80%|████████  | 804018/1000000 [3:57:35<5:05:44, 10.68step/s]


--- Rollout Summary (Steps 803501 to 804000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 806.5775
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  80%|████████  | 804491/1000000 [3:57:39<29:29, 110.49step/s]


Episode 1609 finished at step 500 (804500 total). Env Reward: -7.94, Steps: 500, Delivered: 7


Total Steps Trained:  80%|████████  | 804514/1000000 [3:57:44<4:55:29, 11.03step/s]


--- Rollout Summary (Steps 804001 to 804500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 672.5812
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  80%|████████  | 804996/1000000 [3:57:48<30:25, 106.80step/s]


Episode 1610 finished at step 500 (805000 total). Env Reward: -7.82, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001610_map1.pth


Total Steps Trained:  81%|████████  | 805019/1000000 [3:57:53<4:48:50, 11.25step/s]


--- Rollout Summary (Steps 804501 to 805000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 552.0005
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  81%|████████  | 805499/1000000 [3:57:57<26:36, 121.86step/s]


Episode 1611 finished at step 500 (805500 total). Env Reward: -10.38, Steps: 500, Delivered: 4


Total Steps Trained:  81%|████████  | 805512/1000000 [3:58:02<6:10:35,  8.75step/s]


--- Rollout Summary (Steps 805001 to 805500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 636.3691
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  81%|████████  | 805999/1000000 [3:58:06<26:50, 120.50step/s]


Episode 1612 finished at step 500 (806000 total). Env Reward: -12.37, Steps: 500, Delivered: 2


Total Steps Trained:  81%|████████  | 806012/1000000 [3:58:11<6:07:19,  8.80step/s]


--- Rollout Summary (Steps 805501 to 806000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0082
Avg Critic Loss (per minibatch): 615.8197
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  81%|████████  | 806500/1000000 [3:58:15<28:01, 115.08step/s]


Episode 1613 finished at step 500 (806500 total). Env Reward: -6.98, Steps: 500, Delivered: 8


Total Steps Trained:  81%|████████  | 806512/1000000 [3:58:20<6:29:01,  8.29step/s]


--- Rollout Summary (Steps 806001 to 806500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 540.0861
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  81%|████████  | 806999/1000000 [3:58:24<29:02, 110.74step/s]


Episode 1614 finished at step 500 (807000 total). Env Reward: 26.12, Steps: 500, Delivered: 14
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001614_map1.pth


Total Steps Trained:  81%|████████  | 807011/1000000 [3:58:28<6:35:27,  8.13step/s]


--- Rollout Summary (Steps 806501 to 807000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.27
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 2735.9707
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  81%|████████  | 807491/1000000 [3:58:33<26:36, 120.57step/s]


Episode 1615 finished at step 500 (807500 total). Env Reward: -10.72, Steps: 500, Delivered: 4


Total Steps Trained:  81%|████████  | 807515/1000000 [3:58:38<4:41:03, 11.41step/s]


--- Rollout Summary (Steps 807001 to 807500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 521.5597
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  81%|████████  | 807993/1000000 [3:58:42<28:01, 114.16step/s]


Episode 1616 finished at step 500 (808000 total). Env Reward: -10.42, Steps: 500, Delivered: 4


Total Steps Trained:  81%|████████  | 808015/1000000 [3:58:46<4:55:44, 10.82step/s]


--- Rollout Summary (Steps 807501 to 808000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 467.2543
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  81%|████████  | 808497/1000000 [3:58:50<27:08, 117.57step/s]


Episode 1617 finished at step 500 (808500 total). Env Reward: -8.62, Steps: 500, Delivered: 6


Total Steps Trained:  81%|████████  | 808521/1000000 [3:58:55<4:31:42, 11.75step/s]


--- Rollout Summary (Steps 808001 to 808500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.28
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0073
Avg Critic Loss (per minibatch): 561.8058
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  81%|████████  | 808998/1000000 [3:58:59<27:53, 114.15step/s]


Episode 1618 finished at step 500 (809000 total). Env Reward: -10.13, Steps: 500, Delivered: 4


Total Steps Trained:  81%|████████  | 809021/1000000 [3:59:04<4:43:58, 11.21step/s]


--- Rollout Summary (Steps 808501 to 809000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 498.9966
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  81%|████████  | 809493/1000000 [3:59:08<31:02, 102.30step/s]


Episode 1619 finished at step 500 (809500 total). Env Reward: -7.85, Steps: 500, Delivered: 6


Total Steps Trained:  81%|████████  | 809514/1000000 [3:59:13<5:05:04, 10.41step/s]


--- Rollout Summary (Steps 809001 to 809500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 474.7169
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  81%|████████  | 809999/1000000 [3:59:17<25:54, 122.20step/s]


Episode 1620 finished at step 500 (810000 total). Env Reward: -8.07, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001620_map1.pth


Total Steps Trained:  81%|████████  | 810012/1000000 [3:59:22<6:00:20,  8.79step/s]


--- Rollout Summary (Steps 809501 to 810000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0081
Avg Critic Loss (per minibatch): 416.7879
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  81%|████████  | 810489/1000000 [3:59:26<26:03, 121.22step/s]


Episode 1621 finished at step 500 (810500 total). Env Reward: -9.72, Steps: 500, Delivered: 5


Total Steps Trained:  81%|████████  | 810513/1000000 [3:59:30<4:27:58, 11.79step/s]


--- Rollout Summary (Steps 810001 to 810500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0074
Avg Critic Loss (per minibatch): 566.7848
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  81%|████████  | 810996/1000000 [3:59:35<26:27, 119.05step/s]


Episode 1622 finished at step 500 (811000 total). Env Reward: -7.69, Steps: 500, Delivered: 6


Total Steps Trained:  81%|████████  | 811019/1000000 [3:59:40<4:55:32, 10.66step/s]


--- Rollout Summary (Steps 810501 to 811000) ---
Update Duration: 4.79s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 484.6344
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  81%|████████  | 811493/1000000 [3:59:44<28:02, 112.06step/s]


Episode 1623 finished at step 500 (811500 total). Env Reward: -7.06, Steps: 500, Delivered: 8


Total Steps Trained:  81%|████████  | 811516/1000000 [3:59:48<4:42:19, 11.13step/s]


--- Rollout Summary (Steps 811001 to 811500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 472.9657
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  81%|████████  | 811990/1000000 [3:59:53<26:25, 118.56step/s]


Episode 1624 finished at step 500 (812000 total). Env Reward: -10.88, Steps: 500, Delivered: 4


Total Steps Trained:  81%|████████  | 812013/1000000 [3:59:57<4:34:58, 11.39step/s]


--- Rollout Summary (Steps 811501 to 812000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 557.6216
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  81%|████████▏ | 812500/1000000 [4:00:02<26:31, 117.82step/s]


Episode 1625 finished at step 500 (812500 total). Env Reward: -7.68, Steps: 500, Delivered: 7


Total Steps Trained:  81%|████████▏ | 812512/1000000 [4:00:06<5:59:42,  8.69step/s]


--- Rollout Summary (Steps 812001 to 812500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 387.0465
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  81%|████████▏ | 812993/1000000 [4:00:10<25:37, 121.66step/s]


Episode 1626 finished at step 500 (813000 total). Env Reward: -10.74, Steps: 500, Delivered: 4


Total Steps Trained:  81%|████████▏ | 813017/1000000 [4:00:15<4:25:44, 11.73step/s]


--- Rollout Summary (Steps 812501 to 813000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 530.6953
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  81%|████████▏ | 813490/1000000 [4:00:19<25:36, 121.36step/s]


Episode 1627 finished at step 500 (813500 total). Env Reward: -10.76, Steps: 500, Delivered: 4


Total Steps Trained:  81%|████████▏ | 813514/1000000 [4:00:24<4:23:25, 11.80step/s]


--- Rollout Summary (Steps 813001 to 813500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 442.9049
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  81%|████████▏ | 813994/1000000 [4:00:28<27:23, 113.16step/s]


Episode 1628 finished at step 500 (814000 total). Env Reward: 1.76, Steps: 500, Delivered: 8


Total Steps Trained:  81%|████████▏ | 814018/1000000 [4:00:32<4:32:21, 11.38step/s]


--- Rollout Summary (Steps 813501 to 814000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 1117.9450
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  81%|████████▏ | 814490/1000000 [4:00:36<24:57, 123.90step/s]


Episode 1629 finished at step 500 (814500 total). Env Reward: -8.81, Steps: 500, Delivered: 6


Total Steps Trained:  81%|████████▏ | 814514/1000000 [4:00:41<4:24:12, 11.70step/s]


--- Rollout Summary (Steps 814001 to 814500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 345.2678
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  81%|████████▏ | 814997/1000000 [4:00:45<25:33, 120.61step/s]


Episode 1630 finished at step 500 (815000 total). Env Reward: 1.52, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001630_map1.pth


Total Steps Trained:  82%|████████▏ | 815020/1000000 [4:00:50<4:31:21, 11.36step/s]


--- Rollout Summary (Steps 814501 to 815000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 853.2903
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  82%|████████▏ | 815489/1000000 [4:00:54<25:38, 119.96step/s]


Episode 1631 finished at step 500 (815500 total). Env Reward: -9.86, Steps: 500, Delivered: 5


Total Steps Trained:  82%|████████▏ | 815513/1000000 [4:00:59<4:23:40, 11.66step/s]


--- Rollout Summary (Steps 815001 to 815500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 516.6883
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  82%|████████▏ | 815999/1000000 [4:01:03<25:18, 121.16step/s]


Episode 1632 finished at step 500 (816000 total). Env Reward: -1.00, Steps: 500, Delivered: 5


Total Steps Trained:  82%|████████▏ | 816012/1000000 [4:01:07<5:45:31,  8.87step/s]


--- Rollout Summary (Steps 815501 to 816000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 904.9806
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  82%|████████▏ | 816488/1000000 [4:01:12<24:46, 123.46step/s]


Episode 1633 finished at step 500 (816500 total). Env Reward: -10.07, Steps: 500, Delivered: 4


Total Steps Trained:  82%|████████▏ | 816512/1000000 [4:01:16<4:25:00, 11.54step/s]


--- Rollout Summary (Steps 816001 to 816500) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 464.0264
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  82%|████████▏ | 816994/1000000 [4:01:20<24:56, 122.28step/s]


Episode 1634 finished at step 500 (817000 total). Env Reward: -3.92, Steps: 500, Delivered: 2


Total Steps Trained:  82%|████████▏ | 817018/1000000 [4:01:25<4:17:51, 11.83step/s]


--- Rollout Summary (Steps 816501 to 817000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 1038.6149
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  82%|████████▏ | 817491/1000000 [4:01:29<26:25, 115.12step/s]


Episode 1635 finished at step 500 (817500 total). Env Reward: -8.18, Steps: 500, Delivered: 7


Total Steps Trained:  82%|████████▏ | 817514/1000000 [4:01:34<4:32:05, 11.18step/s]


--- Rollout Summary (Steps 817001 to 817500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 528.6946
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  82%|████████▏ | 817990/1000000 [4:01:38<26:09, 116.00step/s]


Episode 1636 finished at step 500 (818000 total). Env Reward: -11.76, Steps: 500, Delivered: 3


Total Steps Trained:  82%|████████▏ | 818013/1000000 [4:01:43<4:22:30, 11.55step/s]


--- Rollout Summary (Steps 817501 to 818000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0065
Avg Critic Loss (per minibatch): 601.8186
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  82%|████████▏ | 818491/1000000 [4:01:47<25:27, 118.84step/s]


Episode 1637 finished at step 500 (818500 total). Env Reward: -12.53, Steps: 500, Delivered: 2


Total Steps Trained:  82%|████████▏ | 818514/1000000 [4:01:52<4:26:49, 11.34step/s]


--- Rollout Summary (Steps 818001 to 818500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 538.9132
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  82%|████████▏ | 819000/1000000 [4:01:56<25:30, 118.24step/s]


Episode 1638 finished at step 500 (819000 total). Env Reward: -9.28, Steps: 500, Delivered: 5


Total Steps Trained:  82%|████████▏ | 819012/1000000 [4:02:00<5:55:35,  8.48step/s]


--- Rollout Summary (Steps 818501 to 819000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0007
Avg Critic Loss (per minibatch): 408.7284
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  82%|████████▏ | 819496/1000000 [4:02:04<25:22, 118.59step/s]


Episode 1639 finished at step 500 (819500 total). Env Reward: -9.16, Steps: 500, Delivered: 6


Total Steps Trained:  82%|████████▏ | 819518/1000000 [4:02:09<4:28:30, 11.20step/s]


--- Rollout Summary (Steps 819001 to 819500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 436.3427
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  82%|████████▏ | 819999/1000000 [4:02:13<26:24, 113.63step/s]


Episode 1640 finished at step 500 (820000 total). Env Reward: -1.15, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001640_map1.pth


Total Steps Trained:  82%|████████▏ | 820011/1000000 [4:02:18<6:14:08,  8.02step/s]


--- Rollout Summary (Steps 819501 to 820000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 872.4978
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  82%|████████▏ | 820495/1000000 [4:02:22<24:54, 120.13step/s]


Episode 1641 finished at step 500 (820500 total). Env Reward: -12.36, Steps: 500, Delivered: 2


Total Steps Trained:  82%|████████▏ | 820520/1000000 [4:02:27<4:08:44, 12.03step/s]


--- Rollout Summary (Steps 820001 to 820500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 520.0383
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  82%|████████▏ | 820996/1000000 [4:02:31<24:50, 120.11step/s]


Episode 1642 finished at step 500 (821000 total). Env Reward: -7.52, Steps: 500, Delivered: 7


Total Steps Trained:  82%|████████▏ | 821020/1000000 [4:02:36<4:20:20, 11.46step/s]


--- Rollout Summary (Steps 820501 to 821000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 538.9290
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  82%|████████▏ | 821493/1000000 [4:02:40<24:46, 120.08step/s]


Episode 1643 finished at step 500 (821500 total). Env Reward: -9.97, Steps: 500, Delivered: 5


Total Steps Trained:  82%|████████▏ | 821516/1000000 [4:02:45<4:21:34, 11.37step/s]


--- Rollout Summary (Steps 821001 to 821500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): 0.0003
Avg Critic Loss (per minibatch): 582.2969
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  82%|████████▏ | 821990/1000000 [4:02:49<30:35, 97.00step/s]


Episode 1644 finished at step 500 (822000 total). Env Reward: 2.27, Steps: 500, Delivered: 8


Total Steps Trained:  82%|████████▏ | 822012/1000000 [4:02:54<4:42:45, 10.49step/s]


--- Rollout Summary (Steps 821501 to 822000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 1025.9229
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  82%|████████▏ | 822499/1000000 [4:02:58<24:53, 118.88step/s]


Episode 1645 finished at step 500 (822500 total). Env Reward: -10.78, Steps: 500, Delivered: 4


Total Steps Trained:  82%|████████▏ | 822511/1000000 [4:03:02<5:39:07,  8.72step/s]


--- Rollout Summary (Steps 822001 to 822500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.35
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 510.5484
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  82%|████████▏ | 822989/1000000 [4:03:06<24:42, 119.38step/s]


Episode 1646 finished at step 500 (823000 total). Env Reward: -11.96, Steps: 500, Delivered: 3


Total Steps Trained:  82%|████████▏ | 823012/1000000 [4:03:11<4:16:02, 11.52step/s]


--- Rollout Summary (Steps 822501 to 823000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): 0.0005
Avg Critic Loss (per minibatch): 555.0741
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  82%|████████▏ | 823489/1000000 [4:03:15<24:34, 119.70step/s]


Episode 1647 finished at step 500 (823500 total). Env Reward: -10.51, Steps: 500, Delivered: 4


Total Steps Trained:  82%|████████▏ | 823511/1000000 [4:03:20<4:22:05, 11.22step/s]


--- Rollout Summary (Steps 823001 to 823500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.35
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 509.3308
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  82%|████████▏ | 823990/1000000 [4:03:24<25:11, 116.47step/s]


Episode 1648 finished at step 500 (824000 total). Env Reward: -1.05, Steps: 500, Delivered: 5


Total Steps Trained:  82%|████████▏ | 824014/1000000 [4:03:29<4:11:52, 11.65step/s]


--- Rollout Summary (Steps 823501 to 824000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.26
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 1078.0410
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  82%|████████▏ | 824497/1000000 [4:03:33<25:09, 116.23step/s]


Episode 1649 finished at step 500 (824500 total). Env Reward: -5.75, Steps: 500, Delivered: 9


Total Steps Trained:  82%|████████▏ | 824520/1000000 [4:03:38<4:18:50, 11.30step/s]


--- Rollout Summary (Steps 824001 to 824500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.23
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 576.8228
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  82%|████████▏ | 824993/1000000 [4:03:42<23:45, 122.74step/s]


Episode 1650 finished at step 500 (825000 total). Env Reward: -12.40, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001650_map1.pth


Total Steps Trained:  83%|████████▎ | 825017/1000000 [4:03:46<4:06:30, 11.83step/s]


--- Rollout Summary (Steps 824501 to 825000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.35
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 494.7496
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  83%|████████▎ | 825498/1000000 [4:03:50<24:20, 119.47step/s]


Episode 1651 finished at step 500 (825500 total). Env Reward: -11.69, Steps: 500, Delivered: 3


Total Steps Trained:  83%|████████▎ | 825521/1000000 [4:03:55<4:21:05, 11.14step/s]


--- Rollout Summary (Steps 825001 to 825500) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.39
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 357.1461
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  83%|████████▎ | 825996/1000000 [4:03:59<23:48, 121.79step/s]


Episode 1652 finished at step 500 (826000 total). Env Reward: -13.92, Steps: 500, Delivered: 1


Total Steps Trained:  83%|████████▎ | 826021/1000000 [4:04:04<3:58:56, 12.14step/s]


--- Rollout Summary (Steps 825501 to 826000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 500.0609
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  83%|████████▎ | 826495/1000000 [4:04:08<24:33, 117.74step/s]


Episode 1653 finished at step 500 (826500 total). Env Reward: -11.95, Steps: 500, Delivered: 3


Total Steps Trained:  83%|████████▎ | 826520/1000000 [4:04:13<3:58:34, 12.12step/s]


--- Rollout Summary (Steps 826001 to 826500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 578.3108
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  83%|████████▎ | 826990/1000000 [4:04:17<23:38, 122.00step/s]


Episode 1654 finished at step 500 (827000 total). Env Reward: -11.34, Steps: 500, Delivered: 3


Total Steps Trained:  83%|████████▎ | 827014/1000000 [4:04:21<4:05:01, 11.77step/s]


--- Rollout Summary (Steps 826501 to 827000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 370.5054
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  83%|████████▎ | 827491/1000000 [4:04:26<23:15, 123.64step/s]


Episode 1655 finished at step 500 (827500 total). Env Reward: -10.55, Steps: 500, Delivered: 4


Total Steps Trained:  83%|████████▎ | 827516/1000000 [4:04:30<4:00:55, 11.93step/s]


--- Rollout Summary (Steps 827001 to 827500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 500.6245
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  83%|████████▎ | 827990/1000000 [4:04:34<25:06, 114.22step/s]


Episode 1656 finished at step 500 (828000 total). Env Reward: -8.41, Steps: 500, Delivered: 5


Total Steps Trained:  83%|████████▎ | 828013/1000000 [4:04:39<4:18:37, 11.08step/s]


--- Rollout Summary (Steps 827501 to 828000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 587.4527
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  83%|████████▎ | 828499/1000000 [4:04:43<25:25, 112.41step/s]


Episode 1657 finished at step 500 (828500 total). Env Reward: 2.25, Steps: 500, Delivered: 8


Total Steps Trained:  83%|████████▎ | 828511/1000000 [4:04:48<5:47:57,  8.21step/s]


--- Rollout Summary (Steps 828001 to 828500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 411.0540
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  83%|████████▎ | 828998/1000000 [4:04:52<23:45, 119.93step/s]


Episode 1658 finished at step 500 (829000 total). Env Reward: -12.54, Steps: 500, Delivered: 2


Total Steps Trained:  83%|████████▎ | 829010/1000000 [4:04:57<5:36:56,  8.46step/s]


--- Rollout Summary (Steps 828501 to 829000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 668.7145
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  83%|████████▎ | 829497/1000000 [4:05:01<22:47, 124.68step/s]


Episode 1659 finished at step 500 (829500 total). Env Reward: -11.33, Steps: 500, Delivered: 3


Total Steps Trained:  83%|████████▎ | 829522/1000000 [4:05:06<3:54:43, 12.10step/s]


--- Rollout Summary (Steps 829001 to 829500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 461.8492
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  83%|████████▎ | 829990/1000000 [4:05:10<25:49, 109.69step/s]


Episode 1660 finished at step 500 (830000 total). Env Reward: -8.02, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001660_map1.pth


Total Steps Trained:  83%|████████▎ | 830011/1000000 [4:05:14<4:27:24, 10.59step/s]


--- Rollout Summary (Steps 829501 to 830000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 559.6909
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  83%|████████▎ | 830492/1000000 [4:05:19<25:41, 109.94step/s]


Episode 1661 finished at step 500 (830500 total). Env Reward: -10.11, Steps: 500, Delivered: 5


Total Steps Trained:  83%|████████▎ | 830515/1000000 [4:05:23<4:12:51, 11.17step/s]


--- Rollout Summary (Steps 830001 to 830500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 449.7458
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  83%|████████▎ | 831000/1000000 [4:05:28<26:19, 107.01step/s]


Episode 1662 finished at step 500 (831000 total). Env Reward: 1.56, Steps: 500, Delivered: 7


Total Steps Trained:  83%|████████▎ | 831011/1000000 [4:05:32<5:53:50,  7.96step/s]


--- Rollout Summary (Steps 830501 to 831000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.36
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 788.0265
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  83%|████████▎ | 831488/1000000 [4:05:36<23:15, 120.75step/s]


Episode 1663 finished at step 500 (831500 total). Env Reward: -9.26, Steps: 500, Delivered: 5


Total Steps Trained:  83%|████████▎ | 831512/1000000 [4:05:41<4:01:14, 11.64step/s]


--- Rollout Summary (Steps 831001 to 831500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 426.4939
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  83%|████████▎ | 831989/1000000 [4:05:45<23:25, 119.55step/s]


Episode 1664 finished at step 500 (832000 total). Env Reward: -11.01, Steps: 500, Delivered: 4


Total Steps Trained:  83%|████████▎ | 832012/1000000 [4:05:50<4:05:06, 11.42step/s]


--- Rollout Summary (Steps 831501 to 832000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 543.2675
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  83%|████████▎ | 832489/1000000 [4:05:54<25:00, 111.62step/s]


Episode 1665 finished at step 500 (832500 total). Env Reward: -7.30, Steps: 500, Delivered: 8


Total Steps Trained:  83%|████████▎ | 832512/1000000 [4:05:59<4:11:32, 11.10step/s]


--- Rollout Summary (Steps 832001 to 832500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0082
Avg Critic Loss (per minibatch): 415.8207
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  83%|████████▎ | 832994/1000000 [4:06:03<23:35, 117.94step/s]


Episode 1666 finished at step 500 (833000 total). Env Reward: -7.12, Steps: 500, Delivered: 8


Total Steps Trained:  83%|████████▎ | 833016/1000000 [4:06:08<4:11:09, 11.08step/s]


--- Rollout Summary (Steps 832501 to 833000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 577.4535
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  83%|████████▎ | 833491/1000000 [4:06:12<23:01, 120.55step/s]


Episode 1667 finished at step 500 (833500 total). Env Reward: -9.69, Steps: 500, Delivered: 5


Total Steps Trained:  83%|████████▎ | 833516/1000000 [4:06:16<3:50:39, 12.03step/s]


--- Rollout Summary (Steps 833001 to 833500) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.53
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 506.4207
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  83%|████████▎ | 833998/1000000 [4:06:20<22:51, 121.05step/s]


Episode 1668 finished at step 500 (834000 total). Env Reward: -13.02, Steps: 500, Delivered: 2


Total Steps Trained:  83%|████████▎ | 834011/1000000 [4:06:25<5:11:21,  8.89step/s]


--- Rollout Summary (Steps 833501 to 834000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 421.5484
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  83%|████████▎ | 834495/1000000 [4:06:29<22:34, 122.15step/s]


Episode 1669 finished at step 500 (834500 total). Env Reward: -9.87, Steps: 500, Delivered: 5


Total Steps Trained:  83%|████████▎ | 834519/1000000 [4:06:34<4:02:37, 11.37step/s]


--- Rollout Summary (Steps 834001 to 834500) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 583.2224
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  84%|████████▎ | 835000/1000000 [4:06:38<24:13, 113.51step/s]


Episode 1670 finished at step 500 (835000 total). Env Reward: -10.85, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001670_map1.pth


Total Steps Trained:  84%|████████▎ | 835012/1000000 [4:06:43<5:24:39,  8.47step/s]


--- Rollout Summary (Steps 834501 to 835000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 435.3647
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  84%|████████▎ | 835493/1000000 [4:06:47<22:52, 119.83step/s]


Episode 1671 finished at step 500 (835500 total). Env Reward: -2.34, Steps: 500, Delivered: 3


Total Steps Trained:  84%|████████▎ | 835515/1000000 [4:06:52<4:05:46, 11.15step/s]


--- Rollout Summary (Steps 835001 to 835500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 969.7547
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  84%|████████▎ | 835999/1000000 [4:06:56<22:40, 120.59step/s]


Episode 1672 finished at step 500 (836000 total). Env Reward: -11.43, Steps: 500, Delivered: 3


Total Steps Trained:  84%|████████▎ | 836012/1000000 [4:07:00<5:09:08,  8.84step/s]


--- Rollout Summary (Steps 835501 to 836000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 645.8040
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  84%|████████▎ | 836494/1000000 [4:07:04<22:19, 122.09step/s]


Episode 1673 finished at step 500 (836500 total). Env Reward: -0.30, Steps: 500, Delivered: 5


Total Steps Trained:  84%|████████▎ | 836518/1000000 [4:07:09<3:52:27, 11.72step/s]


--- Rollout Summary (Steps 836001 to 836500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 1043.9127
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  84%|████████▎ | 836998/1000000 [4:07:13<23:17, 116.67step/s]


Episode 1674 finished at step 500 (837000 total). Env Reward: -10.07, Steps: 500, Delivered: 4


Total Steps Trained:  84%|████████▎ | 837021/1000000 [4:07:18<3:59:19, 11.35step/s]


--- Rollout Summary (Steps 836501 to 837000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 646.3598
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  84%|████████▎ | 837496/1000000 [4:07:22<22:31, 120.26step/s]


Episode 1675 finished at step 500 (837500 total). Env Reward: -10.76, Steps: 500, Delivered: 4


Total Steps Trained:  84%|████████▍ | 837521/1000000 [4:07:27<3:46:04, 11.98step/s]


--- Rollout Summary (Steps 837001 to 837500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0001
Avg Critic Loss (per minibatch): 583.0260
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  84%|████████▍ | 837997/1000000 [4:07:31<23:04, 116.97step/s]


Episode 1676 finished at step 500 (838000 total). Env Reward: -10.51, Steps: 500, Delivered: 4


Total Steps Trained:  84%|████████▍ | 838020/1000000 [4:07:36<4:09:02, 10.84step/s]


--- Rollout Summary (Steps 837501 to 838000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 636.6691
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  84%|████████▍ | 838492/1000000 [4:07:40<23:50, 112.93step/s]


Episode 1677 finished at step 500 (838500 total). Env Reward: -4.53, Steps: 500, Delivered: 10


Total Steps Trained:  84%|████████▍ | 838516/1000000 [4:07:45<3:55:48, 11.41step/s]


--- Rollout Summary (Steps 838001 to 838500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 454.4158
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  84%|████████▍ | 838993/1000000 [4:07:49<22:12, 120.82step/s]


Episode 1678 finished at step 500 (839000 total). Env Reward: -10.42, Steps: 500, Delivered: 4


Total Steps Trained:  84%|████████▍ | 839016/1000000 [4:07:54<3:53:03, 11.51step/s]


--- Rollout Summary (Steps 838501 to 839000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 536.7513
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  84%|████████▍ | 839491/1000000 [4:07:58<23:29, 113.87step/s]


Episode 1679 finished at step 500 (839500 total). Env Reward: -7.62, Steps: 500, Delivered: 7


Total Steps Trained:  84%|████████▍ | 839514/1000000 [4:08:03<3:54:04, 11.43step/s]


--- Rollout Summary (Steps 839001 to 839500) ---
Update Duration: 4.43s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 500.2151
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  84%|████████▍ | 839989/1000000 [4:08:07<22:41, 117.48step/s]


Episode 1680 finished at step 500 (840000 total). Env Reward: -12.20, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001680_map1.pth


Total Steps Trained:  84%|████████▍ | 840013/1000000 [4:08:11<3:54:02, 11.39step/s]


--- Rollout Summary (Steps 839501 to 840000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 469.9669
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  84%|████████▍ | 840492/1000000 [4:08:16<22:18, 119.17step/s]


Episode 1681 finished at step 500 (840500 total). Env Reward: -9.95, Steps: 500, Delivered: 5


Total Steps Trained:  84%|████████▍ | 840516/1000000 [4:08:20<3:45:53, 11.77step/s]


--- Rollout Summary (Steps 840001 to 840500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 462.3616
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  84%|████████▍ | 840996/1000000 [4:08:25<22:17, 118.91step/s]


Episode 1682 finished at step 500 (841000 total). Env Reward: -9.12, Steps: 500, Delivered: 5


Total Steps Trained:  84%|████████▍ | 841020/1000000 [4:08:29<3:48:53, 11.58step/s]


--- Rollout Summary (Steps 840501 to 841000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 700.9739
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  84%|████████▍ | 841491/1000000 [4:08:33<21:58, 120.23step/s]


Episode 1683 finished at step 500 (841500 total). Env Reward: -9.15, Steps: 500, Delivered: 5


Total Steps Trained:  84%|████████▍ | 841515/1000000 [4:08:38<3:49:48, 11.49step/s]


--- Rollout Summary (Steps 841001 to 841500) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 432.9662
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  84%|████████▍ | 841999/1000000 [4:08:42<21:19, 123.53step/s]


Episode 1684 finished at step 500 (842000 total). Env Reward: -10.51, Steps: 500, Delivered: 4


Total Steps Trained:  84%|████████▍ | 842012/1000000 [4:08:47<4:54:28,  8.94step/s]


--- Rollout Summary (Steps 841501 to 842000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 549.8243
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  84%|████████▍ | 842491/1000000 [4:08:51<21:53, 119.95step/s]


Episode 1685 finished at step 500 (842500 total). Env Reward: -10.75, Steps: 500, Delivered: 4


Total Steps Trained:  84%|████████▍ | 842514/1000000 [4:08:56<3:46:32, 11.59step/s]


--- Rollout Summary (Steps 842001 to 842500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 328.4607
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  84%|████████▍ | 842997/1000000 [4:09:00<22:37, 115.67step/s]


Episode 1686 finished at step 500 (843000 total). Env Reward: 2.67, Steps: 500, Delivered: 8


Total Steps Trained:  84%|████████▍ | 843020/1000000 [4:09:04<3:53:02, 11.23step/s]


--- Rollout Summary (Steps 842501 to 843000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 970.3541
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  84%|████████▍ | 843489/1000000 [4:09:08<22:45, 114.65step/s]


Episode 1687 finished at step 500 (843500 total). Env Reward: -10.45, Steps: 500, Delivered: 4


Total Steps Trained:  84%|████████▍ | 843511/1000000 [4:09:13<3:58:36, 10.93step/s]


--- Rollout Summary (Steps 843001 to 843500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0076
Avg Critic Loss (per minibatch): 468.2694
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  84%|████████▍ | 843998/1000000 [4:09:17<22:52, 113.68step/s]


Episode 1688 finished at step 500 (844000 total). Env Reward: -7.52, Steps: 500, Delivered: 7


Total Steps Trained:  84%|████████▍ | 844010/1000000 [4:09:22<5:14:25,  8.27step/s]


--- Rollout Summary (Steps 843501 to 844000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 379.4162
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  84%|████████▍ | 844488/1000000 [4:09:26<21:19, 121.53step/s]


Episode 1689 finished at step 500 (844500 total). Env Reward: -9.72, Steps: 500, Delivered: 5


Total Steps Trained:  84%|████████▍ | 844512/1000000 [4:09:31<3:39:16, 11.82step/s]


--- Rollout Summary (Steps 844001 to 844500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 376.4500
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  84%|████████▍ | 844990/1000000 [4:09:35<24:06, 107.19step/s]


Episode 1690 finished at step 500 (845000 total). Env Reward: -9.18, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001690_map1.pth


Total Steps Trained:  85%|████████▍ | 845013/1000000 [4:09:40<3:55:29, 10.97step/s]


--- Rollout Summary (Steps 844501 to 845000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 494.4122
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  85%|████████▍ | 845492/1000000 [4:09:44<21:37, 119.12step/s]


Episode 1691 finished at step 500 (845500 total). Env Reward: 3.78, Steps: 500, Delivered: 9


Total Steps Trained:  85%|████████▍ | 845516/1000000 [4:09:48<3:43:45, 11.51step/s]


--- Rollout Summary (Steps 845001 to 845500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 817.4188
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  85%|████████▍ | 845989/1000000 [4:09:53<21:11, 121.16step/s]


Episode 1692 finished at step 500 (846000 total). Env Reward: -6.83, Steps: 500, Delivered: 8


Total Steps Trained:  85%|████████▍ | 846013/1000000 [4:09:57<3:37:51, 11.78step/s]


--- Rollout Summary (Steps 845501 to 846000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 382.4159
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  85%|████████▍ | 846500/1000000 [4:10:01<22:03, 116.00step/s]


Episode 1693 finished at step 500 (846500 total). Env Reward: -7.37, Steps: 500, Delivered: 7


Total Steps Trained:  85%|████████▍ | 846512/1000000 [4:10:06<5:13:18,  8.17step/s]


--- Rollout Summary (Steps 846001 to 846500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 457.9570
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  85%|████████▍ | 846990/1000000 [4:10:10<21:48, 116.94step/s]


Episode 1694 finished at step 500 (847000 total). Env Reward: -9.01, Steps: 500, Delivered: 6


Total Steps Trained:  85%|████████▍ | 847013/1000000 [4:10:15<3:51:20, 11.02step/s]


--- Rollout Summary (Steps 846501 to 847000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 587.8849
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  85%|████████▍ | 847495/1000000 [4:10:19<21:31, 118.08step/s]


Episode 1695 finished at step 500 (847500 total). Env Reward: -11.00, Steps: 500, Delivered: 4


Total Steps Trained:  85%|████████▍ | 847517/1000000 [4:10:24<3:53:12, 10.90step/s]


--- Rollout Summary (Steps 847001 to 847500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 431.2431
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  85%|████████▍ | 847990/1000000 [4:10:28<21:25, 118.25step/s]


Episode 1696 finished at step 500 (848000 total). Env Reward: -9.71, Steps: 500, Delivered: 5


Total Steps Trained:  85%|████████▍ | 848013/1000000 [4:10:33<3:38:35, 11.59step/s]


--- Rollout Summary (Steps 847501 to 848000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 708.8535
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  85%|████████▍ | 848497/1000000 [4:10:37<20:48, 121.33step/s]


Episode 1697 finished at step 500 (848500 total). Env Reward: -11.98, Steps: 500, Delivered: 3


Total Steps Trained:  85%|████████▍ | 848521/1000000 [4:10:42<3:35:23, 11.72step/s]


--- Rollout Summary (Steps 848001 to 848500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 490.1123
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  85%|████████▍ | 848995/1000000 [4:10:46<20:51, 120.62step/s]


Episode 1698 finished at step 500 (849000 total). Env Reward: -8.54, Steps: 500, Delivered: 6


Total Steps Trained:  85%|████████▍ | 849019/1000000 [4:10:51<3:37:56, 11.55step/s]


--- Rollout Summary (Steps 848501 to 849000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 499.8836
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  85%|████████▍ | 849488/1000000 [4:10:55<20:30, 122.36step/s]


Episode 1699 finished at step 500 (849500 total). Env Reward: 0.44, Steps: 500, Delivered: 6


Total Steps Trained:  85%|████████▍ | 849512/1000000 [4:10:59<3:34:06, 11.71step/s]


--- Rollout Summary (Steps 849001 to 849500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): 0.0002
Avg Critic Loss (per minibatch): 839.8303
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  85%|████████▍ | 849996/1000000 [4:11:03<22:45, 109.86step/s]


Episode 1700 finished at step 500 (850000 total). Env Reward: -13.55, Steps: 500, Delivered: 1
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001700_map1.pth


Total Steps Trained:  85%|████████▌ | 850018/1000000 [4:11:08<3:54:00, 10.68step/s]


--- Rollout Summary (Steps 849501 to 850000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 521.5203
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  85%|████████▌ | 850497/1000000 [4:11:12<21:33, 115.60step/s]


Episode 1701 finished at step 500 (850500 total). Env Reward: -5.60, Steps: 500, Delivered: 9


Total Steps Trained:  85%|████████▌ | 850520/1000000 [4:11:17<3:47:39, 10.94step/s]


--- Rollout Summary (Steps 850001 to 850500) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -8.00
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 415.5794
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  85%|████████▌ | 851000/1000000 [4:11:21<21:36, 114.91step/s]


Episode 1702 finished at step 500 (851000 total). Env Reward: -6.90, Steps: 500, Delivered: 7


Total Steps Trained:  85%|████████▌ | 851012/1000000 [4:11:26<5:02:28,  8.21step/s]


--- Rollout Summary (Steps 850501 to 851000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.99
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 672.9297
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  85%|████████▌ | 851497/1000000 [4:11:30<20:51, 118.63step/s]


Episode 1703 finished at step 500 (851500 total). Env Reward: -6.51, Steps: 500, Delivered: 8


Total Steps Trained:  85%|████████▌ | 851521/1000000 [4:11:35<3:31:11, 11.72step/s]


--- Rollout Summary (Steps 851001 to 851500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0074
Avg Critic Loss (per minibatch): 534.2978
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  85%|████████▌ | 851999/1000000 [4:11:39<21:01, 117.37step/s]


Episode 1704 finished at step 500 (852000 total). Env Reward: -8.15, Steps: 500, Delivered: 6


Total Steps Trained:  85%|████████▌ | 852011/1000000 [4:11:44<4:58:29,  8.26step/s]


--- Rollout Summary (Steps 851501 to 852000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): 0.0011
Avg Critic Loss (per minibatch): 411.6138
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  85%|████████▌ | 852496/1000000 [4:11:48<24:09, 101.74step/s]


Episode 1705 finished at step 500 (852500 total). Env Reward: -8.29, Steps: 500, Delivered: 6


Total Steps Trained:  85%|████████▌ | 852518/1000000 [4:11:53<3:50:11, 10.68step/s]


--- Rollout Summary (Steps 852001 to 852500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.22
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 511.8240
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  85%|████████▌ | 852999/1000000 [4:11:57<21:35, 113.43step/s]


Episode 1706 finished at step 500 (853000 total). Env Reward: -6.91, Steps: 500, Delivered: 7


Total Steps Trained:  85%|████████▌ | 853011/1000000 [4:12:02<5:00:42,  8.15step/s]


--- Rollout Summary (Steps 852501 to 853000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 655.6224
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  85%|████████▌ | 853500/1000000 [4:12:06<20:51, 117.03step/s]


Episode 1707 finished at step 500 (853500 total). Env Reward: -7.38, Steps: 500, Delivered: 7


Total Steps Trained:  85%|████████▌ | 853512/1000000 [4:12:10<4:55:25,  8.26step/s]


--- Rollout Summary (Steps 853001 to 853500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 603.2966
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  85%|████████▌ | 853994/1000000 [4:12:15<20:23, 119.30step/s]


Episode 1708 finished at step 500 (854000 total). Env Reward: -11.88, Steps: 500, Delivered: 3


Total Steps Trained:  85%|████████▌ | 854016/1000000 [4:12:20<3:46:45, 10.73step/s]


--- Rollout Summary (Steps 853501 to 854000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0057
Avg Critic Loss (per minibatch): 505.2400
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  85%|████████▌ | 854491/1000000 [4:12:24<20:41, 117.21step/s]


Episode 1709 finished at step 500 (854500 total). Env Reward: -7.28, Steps: 500, Delivered: 8


Total Steps Trained:  85%|████████▌ | 854514/1000000 [4:12:28<3:36:08, 11.22step/s]


--- Rollout Summary (Steps 854001 to 854500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 447.6582
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  85%|████████▌ | 854999/1000000 [4:12:33<20:27, 118.10step/s]


Episode 1710 finished at step 500 (855000 total). Env Reward: 2.89, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001710_map1.pth


Total Steps Trained:  86%|████████▌ | 855011/1000000 [4:12:37<4:48:54,  8.36step/s]


--- Rollout Summary (Steps 854501 to 855000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 956.3833
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  86%|████████▌ | 855489/1000000 [4:12:42<21:16, 113.20step/s]


Episode 1711 finished at step 500 (855500 total). Env Reward: -7.81, Steps: 500, Delivered: 6


Total Steps Trained:  86%|████████▌ | 855511/1000000 [4:12:46<3:41:54, 10.85step/s]


--- Rollout Summary (Steps 855001 to 855500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 428.8347
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  86%|████████▌ | 855990/1000000 [4:12:51<23:26, 102.39step/s]


Episode 1712 finished at step 500 (856000 total). Env Reward: -8.98, Steps: 500, Delivered: 6


Total Steps Trained:  86%|████████▌ | 856012/1000000 [4:12:55<3:50:37, 10.41step/s]


--- Rollout Summary (Steps 855501 to 856000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 636.0439
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  86%|████████▌ | 856492/1000000 [4:13:00<20:20, 117.56step/s]


Episode 1713 finished at step 500 (856500 total). Env Reward: -8.70, Steps: 500, Delivered: 6


Total Steps Trained:  86%|████████▌ | 856515/1000000 [4:13:04<3:31:59, 11.28step/s]


--- Rollout Summary (Steps 856001 to 856500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 741.9219
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  86%|████████▌ | 856995/1000000 [4:13:08<19:50, 120.08step/s]


Episode 1714 finished at step 500 (857000 total). Env Reward: 0.05, Steps: 500, Delivered: 6


Total Steps Trained:  86%|████████▌ | 857019/1000000 [4:13:13<3:25:45, 11.58step/s]


--- Rollout Summary (Steps 856501 to 857000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.23
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 749.4124
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  86%|████████▌ | 857500/1000000 [4:13:17<20:12, 117.54step/s]


Episode 1715 finished at step 500 (857500 total). Env Reward: -8.78, Steps: 500, Delivered: 6


Total Steps Trained:  86%|████████▌ | 857512/1000000 [4:13:22<4:45:13,  8.33step/s]


--- Rollout Summary (Steps 857001 to 857500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 444.7933
Avg Entropy (per minibatch): 2.5615
------------------------------


Total Steps Trained:  86%|████████▌ | 857999/1000000 [4:13:27<20:52, 113.40step/s]


Episode 1716 finished at step 500 (858000 total). Env Reward: -5.52, Steps: 500, Delivered: 9


Total Steps Trained:  86%|████████▌ | 858011/1000000 [4:13:31<4:50:17,  8.15step/s]


--- Rollout Summary (Steps 857501 to 858000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0071
Avg Critic Loss (per minibatch): 554.7142
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  86%|████████▌ | 858493/1000000 [4:13:35<19:21, 121.86step/s]


Episode 1717 finished at step 500 (858500 total). Env Reward: -11.13, Steps: 500, Delivered: 4


Total Steps Trained:  86%|████████▌ | 858517/1000000 [4:13:40<3:22:25, 11.65step/s]


--- Rollout Summary (Steps 858001 to 858500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 444.3351
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  86%|████████▌ | 858992/1000000 [4:13:44<20:10, 116.44step/s]


Episode 1718 finished at step 500 (859000 total). Env Reward: -8.97, Steps: 500, Delivered: 5


Total Steps Trained:  86%|████████▌ | 859015/1000000 [4:13:49<3:28:20, 11.28step/s]


--- Rollout Summary (Steps 858501 to 859000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 735.6036
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  86%|████████▌ | 859500/1000000 [4:13:53<19:58, 117.22step/s]


Episode 1719 finished at step 500 (859500 total). Env Reward: -5.86, Steps: 500, Delivered: 9


Total Steps Trained:  86%|████████▌ | 859512/1000000 [4:13:58<4:46:10,  8.18step/s]


--- Rollout Summary (Steps 859001 to 859500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 419.8063
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  86%|████████▌ | 859992/1000000 [4:14:02<19:22, 120.44step/s]


Episode 1720 finished at step 500 (860000 total). Env Reward: -12.52, Steps: 500, Delivered: 2
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001720_map1.pth


Total Steps Trained:  86%|████████▌ | 860015/1000000 [4:14:07<3:24:11, 11.43step/s]


--- Rollout Summary (Steps 859501 to 860000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 484.3826
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  86%|████████▌ | 860497/1000000 [4:14:11<19:17, 120.52step/s]


Episode 1721 finished at step 500 (860500 total). Env Reward: -10.04, Steps: 500, Delivered: 5


Total Steps Trained:  86%|████████▌ | 860521/1000000 [4:14:16<3:19:03, 11.68step/s]


--- Rollout Summary (Steps 860001 to 860500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 350.8684
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  86%|████████▌ | 861000/1000000 [4:14:20<19:42, 117.60step/s]


Episode 1722 finished at step 500 (861000 total). Env Reward: -11.42, Steps: 500, Delivered: 4


Total Steps Trained:  86%|████████▌ | 861012/1000000 [4:14:25<4:36:27,  8.38step/s]


--- Rollout Summary (Steps 860501 to 861000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 469.8007
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  86%|████████▌ | 861500/1000000 [4:14:29<19:38, 117.47step/s]


Episode 1723 finished at step 500 (861500 total). Env Reward: 1.59, Steps: 500, Delivered: 7


Total Steps Trained:  86%|████████▌ | 861512/1000000 [4:14:33<4:29:11,  8.57step/s]


--- Rollout Summary (Steps 861001 to 861500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 1052.8704
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  86%|████████▌ | 861992/1000000 [4:14:38<20:09, 114.06step/s]


Episode 1724 finished at step 500 (862000 total). Env Reward: -6.88, Steps: 500, Delivered: 7


Total Steps Trained:  86%|████████▌ | 862014/1000000 [4:14:42<3:29:28, 10.98step/s]


--- Rollout Summary (Steps 861501 to 862000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0032
Avg Critic Loss (per minibatch): 420.7538
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  86%|████████▌ | 862492/1000000 [4:14:47<19:34, 117.12step/s]


Episode 1725 finished at step 500 (862500 total). Env Reward: 0.40, Steps: 500, Delivered: 6


Total Steps Trained:  86%|████████▋ | 862514/1000000 [4:14:51<3:29:16, 10.95step/s]


--- Rollout Summary (Steps 862001 to 862500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0080
Avg Critic Loss (per minibatch): 716.7706
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  86%|████████▋ | 862998/1000000 [4:14:55<18:44, 121.78step/s]


Episode 1726 finished at step 500 (863000 total). Env Reward: -8.78, Steps: 500, Delivered: 6


Total Steps Trained:  86%|████████▋ | 863011/1000000 [4:15:00<4:25:32,  8.60step/s]


--- Rollout Summary (Steps 862501 to 863000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 364.6671
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  86%|████████▋ | 863500/1000000 [4:15:04<19:29, 116.67step/s]


Episode 1727 finished at step 500 (863500 total). Env Reward: -6.01, Steps: 500, Delivered: 9


Total Steps Trained:  86%|████████▋ | 863512/1000000 [4:15:09<4:30:56,  8.40step/s]


--- Rollout Summary (Steps 863001 to 863500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 424.7196
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  86%|████████▋ | 863995/1000000 [4:15:13<21:03, 107.64step/s]


Episode 1728 finished at step 500 (864000 total). Env Reward: -7.69, Steps: 500, Delivered: 7


Total Steps Trained:  86%|████████▋ | 864016/1000000 [4:15:18<3:41:38, 10.23step/s]


--- Rollout Summary (Steps 863501 to 864000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0079
Avg Critic Loss (per minibatch): 438.0349
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  86%|████████▋ | 864492/1000000 [4:15:22<18:50, 119.89step/s]


Episode 1729 finished at step 500 (864500 total). Env Reward: -8.10, Steps: 500, Delivered: 7


Total Steps Trained:  86%|████████▋ | 864516/1000000 [4:15:27<3:10:08, 11.88step/s]


--- Rollout Summary (Steps 864001 to 864500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.78
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 573.7511
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  86%|████████▋ | 864992/1000000 [4:15:31<18:08, 124.07step/s]


Episode 1730 finished at step 500 (865000 total). Env Reward: -2.87, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001730_map1.pth


Total Steps Trained:  87%|████████▋ | 865015/1000000 [4:15:36<3:15:11, 11.53step/s]


--- Rollout Summary (Steps 864501 to 865000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 757.0278
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  87%|████████▋ | 865490/1000000 [4:15:40<18:28, 121.31step/s]


Episode 1731 finished at step 500 (865500 total). Env Reward: -9.40, Steps: 500, Delivered: 5


Total Steps Trained:  87%|████████▋ | 865516/1000000 [4:15:45<3:01:22, 12.36step/s]


--- Rollout Summary (Steps 865001 to 865500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 510.4855
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  87%|████████▋ | 865995/1000000 [4:15:49<19:20, 115.45step/s]


Episode 1732 finished at step 500 (866000 total). Env Reward: -9.52, Steps: 500, Delivered: 5


Total Steps Trained:  87%|████████▋ | 866018/1000000 [4:15:53<3:17:32, 11.30step/s]


--- Rollout Summary (Steps 865501 to 866000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0006
Avg Critic Loss (per minibatch): 436.6389
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  87%|████████▋ | 866495/1000000 [4:15:57<18:42, 118.95step/s]


Episode 1733 finished at step 500 (866500 total). Env Reward: -3.27, Steps: 500, Delivered: 2


Total Steps Trained:  87%|████████▋ | 866519/1000000 [4:16:02<3:11:19, 11.63step/s]


--- Rollout Summary (Steps 866001 to 866500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 898.7053
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  87%|████████▋ | 866991/1000000 [4:16:06<18:06, 122.40step/s]


Episode 1734 finished at step 500 (867000 total). Env Reward: -11.92, Steps: 500, Delivered: 3


Total Steps Trained:  87%|████████▋ | 867014/1000000 [4:16:11<3:13:28, 11.46step/s]


--- Rollout Summary (Steps 866501 to 867000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 537.5123
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  87%|████████▋ | 867488/1000000 [4:16:15<18:09, 121.57step/s]


Episode 1735 finished at step 500 (867500 total). Env Reward: -10.73, Steps: 500, Delivered: 4


Total Steps Trained:  87%|████████▋ | 867511/1000000 [4:16:19<3:12:23, 11.48step/s]


--- Rollout Summary (Steps 867001 to 867500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 426.3835
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  87%|████████▋ | 867998/1000000 [4:16:23<18:13, 120.77step/s]


Episode 1736 finished at step 500 (868000 total). Env Reward: -7.50, Steps: 500, Delivered: 7


Total Steps Trained:  87%|████████▋ | 868011/1000000 [4:16:28<4:08:31,  8.85step/s]


--- Rollout Summary (Steps 867501 to 868000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 554.5184
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  87%|████████▋ | 868489/1000000 [4:16:32<19:53, 110.20step/s]


Episode 1737 finished at step 500 (868500 total). Env Reward: -7.03, Steps: 500, Delivered: 7


Total Steps Trained:  87%|████████▋ | 868511/1000000 [4:16:37<3:21:38, 10.87step/s]


--- Rollout Summary (Steps 868001 to 868500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 471.9282
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  87%|████████▋ | 868994/1000000 [4:16:41<18:16, 119.52step/s]


Episode 1738 finished at step 500 (869000 total). Env Reward: -11.26, Steps: 500, Delivered: 4


Total Steps Trained:  87%|████████▋ | 869018/1000000 [4:16:46<3:03:55, 11.87step/s]


--- Rollout Summary (Steps 868501 to 869000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 489.0737
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  87%|████████▋ | 869500/1000000 [4:16:50<18:32, 117.26step/s]


Episode 1739 finished at step 500 (869500 total). Env Reward: -9.57, Steps: 500, Delivered: 5


Total Steps Trained:  87%|████████▋ | 869512/1000000 [4:16:55<4:17:27,  8.45step/s]


--- Rollout Summary (Steps 869001 to 869500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 485.3739
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  87%|████████▋ | 869997/1000000 [4:16:59<19:38, 110.27step/s]


Episode 1740 finished at step 500 (870000 total). Env Reward: -10.45, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001740_map1.pth


Total Steps Trained:  87%|████████▋ | 870020/1000000 [4:17:04<3:20:55, 10.78step/s]


--- Rollout Summary (Steps 869501 to 870000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0081
Avg Critic Loss (per minibatch): 592.1759
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  87%|████████▋ | 870499/1000000 [4:17:08<18:51, 114.46step/s]


Episode 1741 finished at step 500 (870500 total). Env Reward: -1.31, Steps: 500, Delivered: 5


Total Steps Trained:  87%|████████▋ | 870511/1000000 [4:17:13<4:16:26,  8.42step/s]


--- Rollout Summary (Steps 870001 to 870500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 634.5744
Avg Entropy (per minibatch): 2.5589
------------------------------


Total Steps Trained:  87%|████████▋ | 870995/1000000 [4:17:17<17:52, 120.31step/s]


Episode 1742 finished at step 500 (871000 total). Env Reward: -12.12, Steps: 500, Delivered: 3


Total Steps Trained:  87%|████████▋ | 871019/1000000 [4:17:21<3:06:14, 11.54step/s]


--- Rollout Summary (Steps 870501 to 871000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 422.0006
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  87%|████████▋ | 871490/1000000 [4:17:26<18:02, 118.76step/s]


Episode 1743 finished at step 500 (871500 total). Env Reward: -6.34, Steps: 500, Delivered: 8


Total Steps Trained:  87%|████████▋ | 871511/1000000 [4:17:30<3:17:04, 10.87step/s]


--- Rollout Summary (Steps 871001 to 871500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 539.8874
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  87%|████████▋ | 871997/1000000 [4:17:34<17:54, 119.12step/s]


Episode 1744 finished at step 500 (872000 total). Env Reward: -9.78, Steps: 500, Delivered: 5


Total Steps Trained:  87%|████████▋ | 872009/1000000 [4:17:39<4:18:12,  8.26step/s]


--- Rollout Summary (Steps 871501 to 872000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 524.5418
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  87%|████████▋ | 872495/1000000 [4:17:43<17:35, 120.76step/s]


Episode 1745 finished at step 500 (872500 total). Env Reward: -8.04, Steps: 500, Delivered: 7


Total Steps Trained:  87%|████████▋ | 872519/1000000 [4:17:48<3:01:50, 11.68step/s]


--- Rollout Summary (Steps 872001 to 872500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 613.5019
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  87%|████████▋ | 872996/1000000 [4:17:52<18:10, 116.48step/s]


Episode 1746 finished at step 500 (873000 total). Env Reward: 4.05, Steps: 500, Delivered: 9


Total Steps Trained:  87%|████████▋ | 873019/1000000 [4:17:57<3:08:46, 11.21step/s]


--- Rollout Summary (Steps 872501 to 873000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 934.8308
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  87%|████████▋ | 873490/1000000 [4:18:01<18:21, 114.90step/s]


Episode 1747 finished at step 500 (873500 total). Env Reward: -6.96, Steps: 500, Delivered: 7


Total Steps Trained:  87%|████████▋ | 873513/1000000 [4:18:06<3:07:18, 11.25step/s]


--- Rollout Summary (Steps 873001 to 873500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 559.1337
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  87%|████████▋ | 873993/1000000 [4:18:10<17:28, 120.23step/s]


Episode 1748 finished at step 500 (874000 total). Env Reward: -12.82, Steps: 500, Delivered: 2


Total Steps Trained:  87%|████████▋ | 874018/1000000 [4:18:15<2:54:09, 12.06step/s]


--- Rollout Summary (Steps 873501 to 874000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 609.2710
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  87%|████████▋ | 874495/1000000 [4:18:19<17:18, 120.86step/s]


Episode 1749 finished at step 500 (874500 total). Env Reward: -10.69, Steps: 500, Delivered: 4


Total Steps Trained:  87%|████████▋ | 874518/1000000 [4:18:24<3:02:40, 11.45step/s]


--- Rollout Summary (Steps 874001 to 874500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 434.1364
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  87%|████████▋ | 874997/1000000 [4:18:28<17:58, 115.94step/s]


Episode 1750 finished at step 500 (875000 total). Env Reward: -10.37, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001750_map1.pth


Total Steps Trained:  88%|████████▊ | 875020/1000000 [4:18:33<3:02:18, 11.43step/s]


--- Rollout Summary (Steps 874501 to 875000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 577.0849
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  88%|████████▊ | 875488/1000000 [4:18:37<17:16, 120.15step/s]


Episode 1751 finished at step 500 (875500 total). Env Reward: -4.63, Steps: 500, Delivered: 1


Total Steps Trained:  88%|████████▊ | 875512/1000000 [4:18:41<2:59:06, 11.58step/s]


--- Rollout Summary (Steps 875001 to 875500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 791.7935
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  88%|████████▊ | 875999/1000000 [4:18:46<17:18, 119.41step/s]


Episode 1752 finished at step 500 (876000 total). Env Reward: -9.38, Steps: 500, Delivered: 5


Total Steps Trained:  88%|████████▊ | 876023/1000000 [4:18:50<2:55:36, 11.77step/s]


--- Rollout Summary (Steps 875501 to 876000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 406.5254
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  88%|████████▊ | 876492/1000000 [4:18:54<17:21, 118.60step/s]


Episode 1753 finished at step 500 (876500 total). Env Reward: 1.39, Steps: 500, Delivered: 7


Total Steps Trained:  88%|████████▊ | 876515/1000000 [4:18:59<2:56:45, 11.64step/s]


--- Rollout Summary (Steps 876001 to 876500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 835.2173
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  88%|████████▊ | 876996/1000000 [4:19:03<17:25, 117.65step/s]


Episode 1754 finished at step 500 (877000 total). Env Reward: -10.34, Steps: 500, Delivered: 4


Total Steps Trained:  88%|████████▊ | 877019/1000000 [4:19:08<3:00:05, 11.38step/s]


--- Rollout Summary (Steps 876501 to 877000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 432.6548
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  88%|████████▊ | 877495/1000000 [4:19:12<16:53, 120.87step/s]


Episode 1755 finished at step 500 (877500 total). Env Reward: -10.24, Steps: 500, Delivered: 4


Total Steps Trained:  88%|████████▊ | 877519/1000000 [4:19:17<2:55:01, 11.66step/s]


--- Rollout Summary (Steps 877001 to 877500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 589.2971
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  88%|████████▊ | 877995/1000000 [4:19:21<17:52, 113.75step/s]


Episode 1756 finished at step 500 (878000 total). Env Reward: -5.95, Steps: 500, Delivered: 9


Total Steps Trained:  88%|████████▊ | 878017/1000000 [4:19:26<3:06:11, 10.92step/s]


--- Rollout Summary (Steps 877501 to 878000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 579.9056
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  88%|████████▊ | 878492/1000000 [4:19:30<17:46, 113.97step/s]


Episode 1757 finished at step 500 (878500 total). Env Reward: -9.50, Steps: 500, Delivered: 5


Total Steps Trained:  88%|████████▊ | 878515/1000000 [4:19:34<2:58:41, 11.33step/s]


--- Rollout Summary (Steps 878001 to 878500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0010
Avg Critic Loss (per minibatch): 460.0569
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  88%|████████▊ | 878998/1000000 [4:19:38<18:47, 107.28step/s]


Episode 1758 finished at step 500 (879000 total). Env Reward: -9.06, Steps: 500, Delivered: 5


Total Steps Trained:  88%|████████▊ | 879020/1000000 [4:19:43<3:09:16, 10.65step/s]


--- Rollout Summary (Steps 878501 to 879000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 591.1112
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  88%|████████▊ | 879500/1000000 [4:19:47<16:43, 120.04step/s]


Episode 1759 finished at step 500 (879500 total). Env Reward: -12.64, Steps: 500, Delivered: 2


Total Steps Trained:  88%|████████▊ | 879513/1000000 [4:19:52<3:46:57,  8.85step/s]


--- Rollout Summary (Steps 879001 to 879500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0005
Avg Critic Loss (per minibatch): 493.6995
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  88%|████████▊ | 879993/1000000 [4:19:56<16:22, 122.12step/s]


Episode 1760 finished at step 500 (880000 total). Env Reward: -10.41, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001760_map1.pth


Total Steps Trained:  88%|████████▊ | 880016/1000000 [4:20:01<2:55:01, 11.43step/s]


--- Rollout Summary (Steps 879501 to 880000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 624.1079
Avg Entropy (per minibatch): 2.5589
------------------------------


Total Steps Trained:  88%|████████▊ | 880499/1000000 [4:20:05<16:35, 120.00step/s]


Episode 1761 finished at step 500 (880500 total). Env Reward: -11.77, Steps: 500, Delivered: 3


Total Steps Trained:  88%|████████▊ | 880512/1000000 [4:20:10<3:44:53,  8.86step/s]


--- Rollout Summary (Steps 880001 to 880500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 636.1977
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  88%|████████▊ | 880991/1000000 [4:20:14<17:42, 112.06step/s]


Episode 1762 finished at step 500 (881000 total). Env Reward: -8.49, Steps: 500, Delivered: 6


Total Steps Trained:  88%|████████▊ | 881015/1000000 [4:20:19<2:58:38, 11.10step/s]


--- Rollout Summary (Steps 880501 to 881000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0081
Avg Critic Loss (per minibatch): 507.5123
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  88%|████████▊ | 881489/1000000 [4:20:23<16:38, 118.67step/s]


Episode 1763 finished at step 500 (881500 total). Env Reward: -10.27, Steps: 500, Delivered: 4


Total Steps Trained:  88%|████████▊ | 881512/1000000 [4:20:27<2:55:32, 11.25step/s]


--- Rollout Summary (Steps 881001 to 881500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.87
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 403.1273
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  88%|████████▊ | 881994/1000000 [4:20:31<16:11, 121.44step/s]


Episode 1764 finished at step 500 (882000 total). Env Reward: 9.33, Steps: 500, Delivered: 6


Total Steps Trained:  88%|████████▊ | 882018/1000000 [4:20:36<2:46:51, 11.78step/s]


--- Rollout Summary (Steps 881501 to 882000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 1537.5068
Avg Entropy (per minibatch): 2.5616
------------------------------


Total Steps Trained:  88%|████████▊ | 882489/1000000 [4:20:40<16:39, 117.61step/s]


Episode 1765 finished at step 500 (882500 total). Env Reward: -11.64, Steps: 500, Delivered: 3


Total Steps Trained:  88%|████████▊ | 882511/1000000 [4:20:45<2:59:13, 10.93step/s]


--- Rollout Summary (Steps 882001 to 882500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 489.6816
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  88%|████████▊ | 882994/1000000 [4:20:49<15:44, 123.85step/s]


Episode 1766 finished at step 500 (883000 total). Env Reward: -8.67, Steps: 500, Delivered: 6


Total Steps Trained:  88%|████████▊ | 883019/1000000 [4:20:54<2:41:31, 12.07step/s]


--- Rollout Summary (Steps 882501 to 883000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 524.2621
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  88%|████████▊ | 883496/1000000 [4:20:58<17:13, 112.71step/s]


Episode 1767 finished at step 500 (883500 total). Env Reward: -11.90, Steps: 500, Delivered: 3


Total Steps Trained:  88%|████████▊ | 883520/1000000 [4:21:02<2:44:50, 11.78step/s]


--- Rollout Summary (Steps 883001 to 883500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 587.6955
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  88%|████████▊ | 883998/1000000 [4:21:07<16:10, 119.58step/s]


Episode 1768 finished at step 500 (884000 total). Env Reward: -4.87, Steps: 500, Delivered: 10


Total Steps Trained:  88%|████████▊ | 884022/1000000 [4:21:11<2:43:55, 11.79step/s]


--- Rollout Summary (Steps 883501 to 884000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.66
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 634.9214
Avg Entropy (per minibatch): 2.5617
------------------------------


Total Steps Trained:  88%|████████▊ | 884492/1000000 [4:21:15<15:57, 120.59step/s]


Episode 1769 finished at step 500 (884500 total). Env Reward: -11.79, Steps: 500, Delivered: 3


Total Steps Trained:  88%|████████▊ | 884516/1000000 [4:21:20<2:49:37, 11.35step/s]


--- Rollout Summary (Steps 884001 to 884500) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 547.5357
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  88%|████████▊ | 885000/1000000 [4:21:24<16:27, 116.49step/s]


Episode 1770 finished at step 500 (885000 total). Env Reward: 3.73, Steps: 500, Delivered: 9
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001770_map1.pth


Total Steps Trained:  89%|████████▊ | 885012/1000000 [4:21:29<3:48:59,  8.37step/s]


--- Rollout Summary (Steps 884501 to 885000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 865.2150
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  89%|████████▊ | 885489/1000000 [4:21:33<16:02, 119.03step/s]


Episode 1771 finished at step 500 (885500 total). Env Reward: -8.33, Steps: 500, Delivered: 6


Total Steps Trained:  89%|████████▊ | 885510/1000000 [4:21:38<2:55:24, 10.88step/s]


--- Rollout Summary (Steps 885001 to 885500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0088
Avg Critic Loss (per minibatch): 508.3676
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  89%|████████▊ | 885999/1000000 [4:21:42<16:29, 115.19step/s]


Episode 1772 finished at step 500 (886000 total). Env Reward: -9.85, Steps: 500, Delivered: 5


Total Steps Trained:  89%|████████▊ | 886011/1000000 [4:21:47<3:49:42,  8.27step/s]


--- Rollout Summary (Steps 885501 to 886000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 464.7647
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  89%|████████▊ | 886491/1000000 [4:21:51<15:56, 118.65step/s]


Episode 1773 finished at step 500 (886500 total). Env Reward: -14.00, Steps: 500, Delivered: 1


Total Steps Trained:  89%|████████▊ | 886514/1000000 [4:21:55<2:47:00, 11.33step/s]


--- Rollout Summary (Steps 886001 to 886500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 582.9206
Avg Entropy (per minibatch): 2.5589
------------------------------


Total Steps Trained:  89%|████████▊ | 886990/1000000 [4:22:00<15:37, 120.58step/s]


Episode 1774 finished at step 500 (887000 total). Env Reward: -10.09, Steps: 500, Delivered: 4


Total Steps Trained:  89%|████████▊ | 887015/1000000 [4:22:04<2:36:01, 12.07step/s]


--- Rollout Summary (Steps 886501 to 887000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 647.9483
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  89%|████████▊ | 887494/1000000 [4:22:08<16:29, 113.70step/s]


Episode 1775 finished at step 500 (887500 total). Env Reward: -9.75, Steps: 500, Delivered: 5


Total Steps Trained:  89%|████████▉ | 887518/1000000 [4:22:13<2:42:39, 11.53step/s]


--- Rollout Summary (Steps 887001 to 887500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 461.4453
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  89%|████████▉ | 887999/1000000 [4:22:17<15:54, 117.28step/s]


Episode 1776 finished at step 500 (888000 total). Env Reward: -9.16, Steps: 500, Delivered: 5


Total Steps Trained:  89%|████████▉ | 888011/1000000 [4:22:22<3:49:19,  8.14step/s]


--- Rollout Summary (Steps 887501 to 888000) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 451.2952
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  89%|████████▉ | 888492/1000000 [4:22:26<15:15, 121.81step/s]


Episode 1777 finished at step 500 (888500 total). Env Reward: -3.01, Steps: 500, Delivered: 2


Total Steps Trained:  89%|████████▉ | 888516/1000000 [4:22:31<2:38:26, 11.73step/s]


--- Rollout Summary (Steps 888001 to 888500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 1047.0503
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  89%|████████▉ | 888991/1000000 [4:22:35<16:03, 115.21step/s]


Episode 1778 finished at step 500 (889000 total). Env Reward: -2.85, Steps: 500, Delivered: 3


Total Steps Trained:  89%|████████▉ | 889014/1000000 [4:22:40<2:45:44, 11.16step/s]


--- Rollout Summary (Steps 888501 to 889000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 784.4755
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  89%|████████▉ | 889498/1000000 [4:22:44<15:29, 118.82step/s]


Episode 1779 finished at step 500 (889500 total). Env Reward: -7.16, Steps: 500, Delivered: 7


Total Steps Trained:  89%|████████▉ | 889521/1000000 [4:22:48<2:41:41, 11.39step/s]


--- Rollout Summary (Steps 889001 to 889500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 404.5633
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  89%|████████▉ | 889997/1000000 [4:22:53<17:29, 104.83step/s]


Episode 1780 finished at step 500 (890000 total). Env Reward: -7.76, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001780_map1.pth


Total Steps Trained:  89%|████████▉ | 890019/1000000 [4:22:57<2:54:34, 10.50step/s]


--- Rollout Summary (Steps 889501 to 890000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.40
Avg Actor Loss (per minibatch): -0.0079
Avg Critic Loss (per minibatch): 880.6054
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  89%|████████▉ | 890489/1000000 [4:23:01<15:06, 120.85step/s]


Episode 1781 finished at step 500 (890500 total). Env Reward: -8.50, Steps: 500, Delivered: 6


Total Steps Trained:  89%|████████▉ | 890512/1000000 [4:23:06<2:37:17, 11.60step/s]


--- Rollout Summary (Steps 890001 to 890500) ---
Update Duration: 4.43s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 467.3310
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  89%|████████▉ | 890993/1000000 [4:23:10<15:06, 120.32step/s]


Episode 1782 finished at step 500 (891000 total). Env Reward: -0.73, Steps: 500, Delivered: 5


Total Steps Trained:  89%|████████▉ | 891017/1000000 [4:23:15<2:39:15, 11.40step/s]


--- Rollout Summary (Steps 890501 to 891000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 1152.3810
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  89%|████████▉ | 891496/1000000 [4:23:19<15:05, 119.84step/s]


Episode 1783 finished at step 500 (891500 total). Env Reward: -8.39, Steps: 500, Delivered: 6


Total Steps Trained:  89%|████████▉ | 891520/1000000 [4:23:24<2:36:13, 11.57step/s]


--- Rollout Summary (Steps 891001 to 891500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.42
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 611.0707
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  89%|████████▉ | 891989/1000000 [4:23:28<16:05, 111.91step/s]


Episode 1784 finished at step 500 (892000 total). Env Reward: -8.00, Steps: 500, Delivered: 6


Total Steps Trained:  89%|████████▉ | 892012/1000000 [4:23:33<2:41:24, 11.15step/s]


--- Rollout Summary (Steps 891501 to 892000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0088
Avg Critic Loss (per minibatch): 684.1656
Avg Entropy (per minibatch): 2.5590
------------------------------


Total Steps Trained:  89%|████████▉ | 892499/1000000 [4:23:37<15:56, 112.41step/s]


Episode 1785 finished at step 500 (892500 total). Env Reward: -1.52, Steps: 500, Delivered: 4


Total Steps Trained:  89%|████████▉ | 892511/1000000 [4:23:42<3:37:13,  8.25step/s]


--- Rollout Summary (Steps 892001 to 892500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.33
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.44
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 1319.0750
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  89%|████████▉ | 892993/1000000 [4:23:46<14:59, 119.01step/s]


Episode 1786 finished at step 500 (893000 total). Env Reward: -8.01, Steps: 500, Delivered: 7


Total Steps Trained:  89%|████████▉ | 893016/1000000 [4:23:51<2:35:58, 11.43step/s]


--- Rollout Summary (Steps 892501 to 893000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 559.6263
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  89%|████████▉ | 893494/1000000 [4:23:55<15:39, 113.38step/s]


Episode 1787 finished at step 500 (893500 total). Env Reward: -8.19, Steps: 500, Delivered: 7


Total Steps Trained:  89%|████████▉ | 893516/1000000 [4:24:00<2:44:56, 10.76step/s]


--- Rollout Summary (Steps 893001 to 893500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0075
Avg Critic Loss (per minibatch): 602.8691
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  89%|████████▉ | 893989/1000000 [4:24:04<16:15, 108.64step/s]


Episode 1788 finished at step 500 (894000 total). Env Reward: -6.94, Steps: 500, Delivered: 8


Total Steps Trained:  89%|████████▉ | 894012/1000000 [4:24:09<2:41:29, 10.94step/s]


--- Rollout Summary (Steps 893501 to 894000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 546.2198
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  89%|████████▉ | 894490/1000000 [4:24:13<15:32, 113.20step/s]


Episode 1789 finished at step 500 (894500 total). Env Reward: -10.83, Steps: 500, Delivered: 3


Total Steps Trained:  89%|████████▉ | 894513/1000000 [4:24:18<2:38:08, 11.12step/s]


--- Rollout Summary (Steps 894001 to 894500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.45
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 640.4613
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  89%|████████▉ | 894996/1000000 [4:24:22<14:40, 119.25step/s]


Episode 1790 finished at step 500 (895000 total). Env Reward: -8.14, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001790_map1.pth


Total Steps Trained:  90%|████████▉ | 895020/1000000 [4:24:27<2:30:48, 11.60step/s]


--- Rollout Summary (Steps 894501 to 895000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.47
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 601.4164
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  90%|████████▉ | 895492/1000000 [4:24:31<14:40, 118.63step/s]


Episode 1791 finished at step 500 (895500 total). Env Reward: -6.98, Steps: 500, Delivered: 8


Total Steps Trained:  90%|████████▉ | 895514/1000000 [4:24:35<2:37:35, 11.05step/s]


--- Rollout Summary (Steps 895001 to 895500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.46
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 540.3313
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  90%|████████▉ | 895993/1000000 [4:24:39<14:39, 118.23step/s]


Episode 1792 finished at step 500 (896000 total). Env Reward: -11.75, Steps: 500, Delivered: 3


Total Steps Trained:  90%|████████▉ | 896017/1000000 [4:24:44<2:27:08, 11.78step/s]


--- Rollout Summary (Steps 895501 to 896000) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.41
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 689.5866
Avg Entropy (per minibatch): 2.5589
------------------------------


Total Steps Trained:  90%|████████▉ | 896493/1000000 [4:24:48<14:38, 117.88step/s]


Episode 1793 finished at step 500 (896500 total). Env Reward: -14.15, Steps: 500, Delivered: 0


Total Steps Trained:  90%|████████▉ | 896515/1000000 [4:24:53<2:34:09, 11.19step/s]


--- Rollout Summary (Steps 896001 to 896500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): 0.0014
Avg Critic Loss (per minibatch): 473.9898
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  90%|████████▉ | 896991/1000000 [4:24:57<14:56, 114.93step/s]


Episode 1794 finished at step 500 (897000 total). Env Reward: -8.98, Steps: 500, Delivered: 6


Total Steps Trained:  90%|████████▉ | 897014/1000000 [4:25:02<2:37:38, 10.89step/s]


--- Rollout Summary (Steps 896501 to 897000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 416.1954
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  90%|████████▉ | 897500/1000000 [4:25:06<14:46, 115.64step/s]


Episode 1795 finished at step 500 (897500 total). Env Reward: -8.83, Steps: 500, Delivered: 6


Total Steps Trained:  90%|████████▉ | 897512/1000000 [4:25:11<3:26:02,  8.29step/s]


--- Rollout Summary (Steps 897001 to 897500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0071
Avg Critic Loss (per minibatch): 470.4978
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  90%|████████▉ | 897990/1000000 [4:25:15<14:48, 114.88step/s]


Episode 1796 finished at step 500 (898000 total). Env Reward: -9.66, Steps: 500, Delivered: 5


Total Steps Trained:  90%|████████▉ | 898013/1000000 [4:25:20<2:35:10, 10.95step/s]


--- Rollout Summary (Steps 897501 to 898000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 616.2426
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  90%|████████▉ | 898495/1000000 [4:25:24<14:05, 120.00step/s]


Episode 1797 finished at step 500 (898500 total). Env Reward: -12.37, Steps: 500, Delivered: 2


Total Steps Trained:  90%|████████▉ | 898518/1000000 [4:25:29<2:30:08, 11.27step/s]


--- Rollout Summary (Steps 898001 to 898500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 559.2670
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  90%|████████▉ | 898994/1000000 [4:25:33<15:19, 109.88step/s]


Episode 1798 finished at step 500 (899000 total). Env Reward: 10.95, Steps: 500, Delivered: 7


Total Steps Trained:  90%|████████▉ | 899013/1000000 [4:25:38<2:47:21, 10.06step/s]


--- Rollout Summary (Steps 898501 to 899000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 2130.2880
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  90%|████████▉ | 899491/1000000 [4:25:42<14:08, 118.43step/s]


Episode 1799 finished at step 500 (899500 total). Env Reward: -11.89, Steps: 500, Delivered: 3


Total Steps Trained:  90%|████████▉ | 899513/1000000 [4:25:47<2:30:12, 11.15step/s]


--- Rollout Summary (Steps 899001 to 899500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 527.0624
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  90%|████████▉ | 899997/1000000 [4:25:51<13:42, 121.54step/s]


Episode 1800 finished at step 500 (900000 total). Env Reward: -12.63, Steps: 500, Delivered: 2
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001800_map1.pth


Total Steps Trained:  90%|█████████ | 900010/1000000 [4:25:55<3:11:24,  8.71step/s]


--- Rollout Summary (Steps 899501 to 900000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 650.8774
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  90%|█████████ | 900489/1000000 [4:26:00<14:42, 112.80step/s]


Episode 1801 finished at step 500 (900500 total). Env Reward: -6.63, Steps: 500, Delivered: 8


Total Steps Trained:  90%|█████████ | 900511/1000000 [4:26:05<2:39:15, 10.41step/s]


--- Rollout Summary (Steps 900001 to 900500) ---
Update Duration: 4.74s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0093
Avg Critic Loss (per minibatch): 614.2335
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  90%|█████████ | 900990/1000000 [4:26:09<13:41, 120.53step/s]


Episode 1802 finished at step 500 (901000 total). Env Reward: -7.01, Steps: 500, Delivered: 7


Total Steps Trained:  90%|█████████ | 901014/1000000 [4:26:14<2:24:23, 11.43step/s]


--- Rollout Summary (Steps 900501 to 901000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0012
Avg Critic Loss (per minibatch): 622.7614
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  90%|█████████ | 901496/1000000 [4:26:18<14:29, 113.31step/s]


Episode 1803 finished at step 500 (901500 total). Env Reward: -8.77, Steps: 500, Delivered: 6


Total Steps Trained:  90%|█████████ | 901518/1000000 [4:26:23<2:29:35, 10.97step/s]


--- Rollout Summary (Steps 901001 to 901500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0073
Avg Critic Loss (per minibatch): 743.0332
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  90%|█████████ | 901999/1000000 [4:26:27<13:39, 119.53step/s]


Episode 1804 finished at step 500 (902000 total). Env Reward: -10.16, Steps: 500, Delivered: 5


Total Steps Trained:  90%|█████████ | 902011/1000000 [4:26:31<3:17:08,  8.28step/s]


--- Rollout Summary (Steps 901501 to 902000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 349.2514
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  90%|█████████ | 902498/1000000 [4:26:36<13:46, 118.01step/s]


Episode 1805 finished at step 500 (902500 total). Env Reward: 3.19, Steps: 500, Delivered: 9


Total Steps Trained:  90%|█████████ | 902510/1000000 [4:26:40<3:16:09,  8.28step/s]


--- Rollout Summary (Steps 902001 to 902500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 1237.3076
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  90%|█████████ | 902998/1000000 [4:26:45<13:29, 119.81step/s]


Episode 1806 finished at step 500 (903000 total). Env Reward: -9.02, Steps: 500, Delivered: 5


Total Steps Trained:  90%|█████████ | 903021/1000000 [4:26:49<2:21:33, 11.42step/s]


--- Rollout Summary (Steps 902501 to 903000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 408.9450
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  90%|█████████ | 903494/1000000 [4:26:53<13:34, 118.49step/s]


Episode 1807 finished at step 500 (903500 total). Env Reward: -2.78, Steps: 500, Delivered: 3


Total Steps Trained:  90%|█████████ | 903517/1000000 [4:26:58<2:21:26, 11.37step/s]


--- Rollout Summary (Steps 903001 to 903500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 674.3420
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  90%|█████████ | 903993/1000000 [4:27:02<13:15, 120.64step/s]


Episode 1808 finished at step 500 (904000 total). Env Reward: -12.04, Steps: 500, Delivered: 2


Total Steps Trained:  90%|█████████ | 904017/1000000 [4:27:07<2:23:55, 11.12step/s]


--- Rollout Summary (Steps 903501 to 904000) ---
Update Duration: 4.77s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 470.1672
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  90%|█████████ | 904489/1000000 [4:27:11<14:10, 112.29step/s]


Episode 1809 finished at step 500 (904500 total). Env Reward: -6.62, Steps: 500, Delivered: 8


Total Steps Trained:  90%|█████████ | 904511/1000000 [4:27:16<2:26:44, 10.85step/s]


--- Rollout Summary (Steps 904001 to 904500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 561.1300
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  90%|█████████ | 904997/1000000 [4:27:20<14:00, 113.01step/s]


Episode 1810 finished at step 500 (905000 total). Env Reward: -11.78, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001810_map1.pth


Total Steps Trained:  91%|█████████ | 905021/1000000 [4:27:25<2:19:16, 11.37step/s]


--- Rollout Summary (Steps 904501 to 905000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.21
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 470.7770
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  91%|█████████ | 905499/1000000 [4:27:29<13:15, 118.80step/s]


Episode 1811 finished at step 500 (905500 total). Env Reward: -10.48, Steps: 500, Delivered: 4


Total Steps Trained:  91%|█████████ | 905511/1000000 [4:27:34<3:05:33,  8.49step/s]


--- Rollout Summary (Steps 905001 to 905500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 441.9845
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  91%|█████████ | 905997/1000000 [4:27:38<15:21, 102.06step/s]


Episode 1812 finished at step 500 (906000 total). Env Reward: -8.70, Steps: 500, Delivered: 6


Total Steps Trained:  91%|█████████ | 906018/1000000 [4:27:43<2:35:05, 10.10step/s]


--- Rollout Summary (Steps 905501 to 906000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 608.4100
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  91%|█████████ | 906490/1000000 [4:27:47<13:44, 113.47step/s]


Episode 1813 finished at step 500 (906500 total). Env Reward: -8.80, Steps: 500, Delivered: 6


Total Steps Trained:  91%|█████████ | 906513/1000000 [4:27:52<2:21:54, 10.98step/s]


--- Rollout Summary (Steps 906001 to 906500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.19
Avg Actor Loss (per minibatch): -0.0077
Avg Critic Loss (per minibatch): 466.5203
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  91%|█████████ | 906997/1000000 [4:27:56<13:31, 114.65step/s]


Episode 1814 finished at step 500 (907000 total). Env Reward: -1.35, Steps: 500, Delivered: 4


Total Steps Trained:  91%|█████████ | 907021/1000000 [4:28:01<2:18:04, 11.22step/s]


--- Rollout Summary (Steps 906501 to 907000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 718.2061
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  91%|█████████ | 907500/1000000 [4:28:05<14:28, 106.46step/s]


Episode 1815 finished at step 500 (907500 total). Env Reward: -9.86, Steps: 500, Delivered: 5


Total Steps Trained:  91%|█████████ | 907511/1000000 [4:28:10<3:26:02,  7.48step/s]


--- Rollout Summary (Steps 907001 to 907500) ---
Update Duration: 4.75s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 641.6341
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  91%|█████████ | 907993/1000000 [4:28:14<13:51, 110.63step/s]


Episode 1816 finished at step 500 (908000 total). Env Reward: -11.26, Steps: 500, Delivered: 3


Total Steps Trained:  91%|█████████ | 908016/1000000 [4:28:19<2:19:01, 11.03step/s]


--- Rollout Summary (Steps 907501 to 908000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 637.8824
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  91%|█████████ | 908490/1000000 [4:28:23<12:47, 119.19step/s]


Episode 1817 finished at step 500 (908500 total). Env Reward: -13.42, Steps: 500, Delivered: 2


Total Steps Trained:  91%|█████████ | 908512/1000000 [4:28:28<2:18:04, 11.04step/s]


--- Rollout Summary (Steps 908001 to 908500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 487.7145
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  91%|█████████ | 908993/1000000 [4:28:32<13:03, 116.12step/s]


Episode 1818 finished at step 500 (909000 total). Env Reward: -7.32, Steps: 500, Delivered: 7


Total Steps Trained:  91%|█████████ | 909016/1000000 [4:28:37<2:14:04, 11.31step/s]


--- Rollout Summary (Steps 908501 to 909000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 764.5028
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  91%|█████████ | 909496/1000000 [4:28:41<13:11, 114.40step/s]


Episode 1819 finished at step 500 (909500 total). Env Reward: -4.31, Steps: 500, Delivered: 10


Total Steps Trained:  91%|█████████ | 909520/1000000 [4:28:46<2:11:45, 11.45step/s]


--- Rollout Summary (Steps 909001 to 909500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.11
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 565.5966
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  91%|█████████ | 909998/1000000 [4:28:50<13:08, 114.19step/s]


Episode 1820 finished at step 500 (910000 total). Env Reward: -8.71, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001820_map1.pth


Total Steps Trained:  91%|█████████ | 910022/1000000 [4:28:55<2:09:41, 11.56step/s]


--- Rollout Summary (Steps 909501 to 910000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 402.5321
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  91%|█████████ | 910494/1000000 [4:28:59<12:47, 116.58step/s]


Episode 1821 finished at step 500 (910500 total). Env Reward: -9.61, Steps: 500, Delivered: 5


Total Steps Trained:  91%|█████████ | 910517/1000000 [4:29:03<2:14:07, 11.12step/s]


--- Rollout Summary (Steps 910001 to 910500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 535.4284
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  91%|█████████ | 910990/1000000 [4:29:08<12:41, 116.90step/s]


Episode 1822 finished at step 500 (911000 total). Env Reward: -10.05, Steps: 500, Delivered: 5


Total Steps Trained:  91%|█████████ | 911013/1000000 [4:29:13<2:16:37, 10.86step/s]


--- Rollout Summary (Steps 910501 to 911000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0023
Avg Critic Loss (per minibatch): 595.0850
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  91%|█████████ | 911499/1000000 [4:29:17<12:47, 115.34step/s]


Episode 1823 finished at step 500 (911500 total). Env Reward: 1.66, Steps: 500, Delivered: 8


Total Steps Trained:  91%|█████████ | 911511/1000000 [4:29:22<2:58:02,  8.28step/s]


--- Rollout Summary (Steps 911001 to 911500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 914.7843
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  91%|█████████ | 912000/1000000 [4:29:26<12:27, 117.65step/s]


Episode 1824 finished at step 500 (912000 total). Env Reward: -10.35, Steps: 500, Delivered: 4


Total Steps Trained:  91%|█████████ | 912012/1000000 [4:29:30<2:54:48,  8.39step/s]


--- Rollout Summary (Steps 911501 to 912000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 357.7540
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  91%|█████████ | 912492/1000000 [4:29:35<12:30, 116.65step/s]


Episode 1825 finished at step 500 (912500 total). Env Reward: -8.26, Steps: 500, Delivered: 6


Total Steps Trained:  91%|█████████▏| 912514/1000000 [4:29:39<2:12:42, 10.99step/s]


--- Rollout Summary (Steps 912001 to 912500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 449.3588
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  91%|█████████▏| 912991/1000000 [4:29:43<12:18, 117.89step/s]


Episode 1826 finished at step 500 (913000 total). Env Reward: -9.37, Steps: 500, Delivered: 5


Total Steps Trained:  91%|█████████▏| 913014/1000000 [4:29:48<2:11:27, 11.03step/s]


--- Rollout Summary (Steps 912501 to 913000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 490.6738
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  91%|█████████▏| 913490/1000000 [4:29:52<12:41, 113.62step/s]


Episode 1827 finished at step 500 (913500 total). Env Reward: -10.19, Steps: 500, Delivered: 5


Total Steps Trained:  91%|█████████▏| 913513/1000000 [4:29:57<2:09:41, 11.11step/s]


--- Rollout Summary (Steps 913001 to 913500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.09
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 566.0488
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  91%|█████████▏| 913994/1000000 [4:30:01<11:45, 121.86step/s]


Episode 1828 finished at step 500 (914000 total). Env Reward: -11.89, Steps: 500, Delivered: 3


Total Steps Trained:  91%|█████████▏| 914018/1000000 [4:30:06<2:03:01, 11.65step/s]


--- Rollout Summary (Steps 913501 to 914000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 444.7796
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  91%|█████████▏| 914493/1000000 [4:30:10<12:42, 112.17step/s]


Episode 1829 finished at step 500 (914500 total). Env Reward: -10.06, Steps: 500, Delivered: 4


Total Steps Trained:  91%|█████████▏| 914515/1000000 [4:30:15<2:13:05, 10.70step/s]


--- Rollout Summary (Steps 914001 to 914500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.02
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 378.4708
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  91%|█████████▏| 914989/1000000 [4:30:19<11:43, 120.84step/s]


Episode 1830 finished at step 500 (915000 total). Env Reward: -11.55, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001830_map1.pth


Total Steps Trained:  92%|█████████▏| 915013/1000000 [4:30:24<2:01:09, 11.69step/s]


--- Rollout Summary (Steps 914501 to 915000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.02
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 663.9055
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  92%|█████████▏| 915493/1000000 [4:30:28<13:21, 105.49step/s]


Episode 1831 finished at step 500 (915500 total). Env Reward: -10.51, Steps: 500, Delivered: 4


Total Steps Trained:  92%|█████████▏| 915513/1000000 [4:30:33<2:20:21, 10.03step/s]


--- Rollout Summary (Steps 915001 to 915500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.01
Avg Actor Loss (per minibatch): -0.0070
Avg Critic Loss (per minibatch): 578.7376
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  92%|█████████▏| 915993/1000000 [4:30:37<12:00, 116.60step/s]


Episode 1832 finished at step 500 (916000 total). Env Reward: -11.53, Steps: 500, Delivered: 3


Total Steps Trained:  92%|█████████▏| 916016/1000000 [4:30:42<2:07:36, 10.97step/s]


--- Rollout Summary (Steps 915501 to 916000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.98
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 4.99
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 607.2066
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  92%|█████████▏| 916494/1000000 [4:30:46<13:10, 105.67step/s]


Episode 1833 finished at step 500 (916500 total). Env Reward: -12.80, Steps: 500, Delivered: 2


Total Steps Trained:  92%|█████████▏| 916517/1000000 [4:30:51<2:04:10, 11.20step/s]


--- Rollout Summary (Steps 916001 to 916500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -8.08
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 4.99
Avg Actor Loss (per minibatch): -0.0066
Avg Critic Loss (per minibatch): 400.2488
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  92%|█████████▏| 916995/1000000 [4:30:55<12:24, 111.54step/s]


Episode 1834 finished at step 500 (917000 total). Env Reward: 5.24, Steps: 500, Delivered: 11


Total Steps Trained:  92%|█████████▏| 917019/1000000 [4:31:00<2:01:59, 11.34step/s]


--- Rollout Summary (Steps 916501 to 917000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.07
Avg Actor Loss (per minibatch): 0.0024
Avg Critic Loss (per minibatch): 1548.9153
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  92%|█████████▏| 917493/1000000 [4:31:04<12:00, 114.59step/s]


Episode 1835 finished at step 500 (917500 total). Env Reward: 4.20, Steps: 500, Delivered: 10


Total Steps Trained:  92%|█████████▏| 917517/1000000 [4:31:09<2:01:19, 11.33step/s]


--- Rollout Summary (Steps 917001 to 917500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 958.1726
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  92%|█████████▏| 917997/1000000 [4:31:13<11:25, 119.63step/s]


Episode 1836 finished at step 500 (918000 total). Env Reward: -10.52, Steps: 500, Delivered: 4


Total Steps Trained:  92%|█████████▏| 918019/1000000 [4:31:18<2:05:18, 10.90step/s]


--- Rollout Summary (Steps 917501 to 918000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 460.6747
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  92%|█████████▏| 918500/1000000 [4:31:22<11:29, 118.13step/s]


Episode 1837 finished at step 500 (918500 total). Env Reward: -12.27, Steps: 500, Delivered: 2


Total Steps Trained:  92%|█████████▏| 918512/1000000 [4:31:26<2:43:14,  8.32step/s]


--- Rollout Summary (Steps 918001 to 918500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 566.3205
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  92%|█████████▏| 918997/1000000 [4:31:31<11:42, 115.33step/s]


Episode 1838 finished at step 500 (919000 total). Env Reward: -9.52, Steps: 500, Delivered: 5


Total Steps Trained:  92%|█████████▏| 919020/1000000 [4:31:35<2:00:20, 11.22step/s]


--- Rollout Summary (Steps 918501 to 919000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.06
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 509.8340
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  92%|█████████▏| 919496/1000000 [4:31:39<11:28, 116.96step/s]


Episode 1839 finished at step 500 (919500 total). Env Reward: -8.74, Steps: 500, Delivered: 6


Total Steps Trained:  92%|█████████▏| 919519/1000000 [4:31:44<1:59:28, 11.23step/s]


--- Rollout Summary (Steps 919001 to 919500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.81
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.07
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 434.0913
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  92%|█████████▏| 919994/1000000 [4:31:48<12:05, 110.29step/s]


Episode 1840 finished at step 500 (920000 total). Env Reward: -9.15, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001840_map1.pth


Total Steps Trained:  92%|█████████▏| 920017/1000000 [4:31:53<2:04:56, 10.67step/s]


--- Rollout Summary (Steps 919501 to 920000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.80
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 637.4647
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  92%|█████████▏| 920493/1000000 [4:31:57<10:53, 121.60step/s]


Episode 1841 finished at step 500 (920500 total). Env Reward: -9.68, Steps: 500, Delivered: 5


Total Steps Trained:  92%|█████████▏| 920518/1000000 [4:32:02<1:50:02, 12.04step/s]


--- Rollout Summary (Steps 920001 to 920500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 450.7308
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  92%|█████████▏| 920990/1000000 [4:32:06<11:05, 118.73step/s]


Episode 1842 finished at step 500 (921000 total). Env Reward: -6.30, Steps: 500, Delivered: 9


Total Steps Trained:  92%|█████████▏| 921014/1000000 [4:32:11<1:56:47, 11.27step/s]


--- Rollout Summary (Steps 920501 to 921000) ---
Update Duration: 4.66s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0015
Avg Critic Loss (per minibatch): 419.3509
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  92%|█████████▏| 921491/1000000 [4:32:15<11:12, 116.81step/s]


Episode 1843 finished at step 500 (921500 total). Env Reward: -9.12, Steps: 500, Delivered: 6


Total Steps Trained:  92%|█████████▏| 921514/1000000 [4:32:20<1:58:15, 11.06step/s]


--- Rollout Summary (Steps 921001 to 921500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.12
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 542.0780
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  92%|█████████▏| 921989/1000000 [4:32:24<11:07, 116.90step/s]


Episode 1844 finished at step 500 (922000 total). Env Reward: -12.40, Steps: 500, Delivered: 3


Total Steps Trained:  92%|█████████▏| 922012/1000000 [4:32:29<1:56:06, 11.20step/s]


--- Rollout Summary (Steps 921501 to 922000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.88
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.10
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 421.2817
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  92%|█████████▏| 922489/1000000 [4:32:33<11:30, 112.26step/s]


Episode 1845 finished at step 500 (922500 total). Env Reward: -9.75, Steps: 500, Delivered: 5


Total Steps Trained:  92%|█████████▏| 922511/1000000 [4:32:38<1:57:52, 10.96step/s]


--- Rollout Summary (Steps 922001 to 922500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 632.3649
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  92%|█████████▏| 922993/1000000 [4:32:42<11:10, 114.87step/s]


Episode 1846 finished at step 500 (923000 total). Env Reward: -8.40, Steps: 500, Delivered: 6


Total Steps Trained:  92%|█████████▏| 923016/1000000 [4:32:47<1:55:15, 11.13step/s]


--- Rollout Summary (Steps 922501 to 923000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -8.02
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.05
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 490.2967
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  92%|█████████▏| 923489/1000000 [4:32:51<11:02, 115.55step/s]


Episode 1847 finished at step 500 (923500 total). Env Reward: 4.18, Steps: 500, Delivered: 10


Total Steps Trained:  92%|█████████▏| 923512/1000000 [4:32:56<1:57:22, 10.86step/s]


--- Rollout Summary (Steps 923001 to 923500) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.08
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 907.3715
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  92%|█████████▏| 923999/1000000 [4:33:00<10:58, 115.37step/s]


Episode 1848 finished at step 500 (924000 total). Env Reward: -5.98, Steps: 500, Delivered: 9


Total Steps Trained:  92%|█████████▏| 924011/1000000 [4:33:05<2:34:19,  8.21step/s]


--- Rollout Summary (Steps 923501 to 924000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 1046.8113
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  92%|█████████▏| 924493/1000000 [4:33:09<10:34, 119.03step/s]


Episode 1849 finished at step 500 (924500 total). Env Reward: -10.90, Steps: 500, Delivered: 4


Total Steps Trained:  92%|█████████▏| 924516/1000000 [4:33:14<1:51:37, 11.27step/s]


--- Rollout Summary (Steps 924001 to 924500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 536.2945
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  92%|█████████▏| 924992/1000000 [4:33:18<10:27, 119.58step/s]


Episode 1850 finished at step 500 (925000 total). Env Reward: -7.80, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001850_map1.pth


Total Steps Trained:  93%|█████████▎| 925015/1000000 [4:33:22<1:50:21, 11.32step/s]


--- Rollout Summary (Steps 924501 to 925000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.18
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 543.8713
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  93%|█████████▎| 925494/1000000 [4:33:27<10:52, 114.12step/s]


Episode 1851 finished at step 500 (925500 total). Env Reward: 1.31, Steps: 500, Delivered: 7


Total Steps Trained:  93%|█████████▎| 925517/1000000 [4:33:31<1:52:18, 11.05step/s]


--- Rollout Summary (Steps 925001 to 925500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0034
Avg Critic Loss (per minibatch): 904.5941
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  93%|█████████▎| 925994/1000000 [4:33:36<10:06, 122.08step/s]


Episode 1852 finished at step 500 (926000 total). Env Reward: -8.31, Steps: 500, Delivered: 6


Total Steps Trained:  93%|█████████▎| 926019/1000000 [4:33:40<1:43:17, 11.94step/s]


--- Rollout Summary (Steps 925501 to 926000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 580.0586
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  93%|█████████▎| 926498/1000000 [4:33:44<10:02, 121.98step/s]


Episode 1853 finished at step 500 (926500 total). Env Reward: -13.13, Steps: 500, Delivered: 2


Total Steps Trained:  93%|█████████▎| 926511/1000000 [4:33:49<2:18:10,  8.86step/s]


--- Rollout Summary (Steps 926001 to 926500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.20
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 445.2676
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  93%|█████████▎| 926990/1000000 [4:33:53<10:08, 120.06step/s]


Episode 1854 finished at step 500 (927000 total). Env Reward: -13.78, Steps: 500, Delivered: 1


Total Steps Trained:  93%|█████████▎| 927013/1000000 [4:33:58<1:48:55, 11.17step/s]


--- Rollout Summary (Steps 926501 to 927000) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.93
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.17
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 683.2622
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  93%|█████████▎| 927494/1000000 [4:34:02<09:40, 124.98step/s]


Episode 1855 finished at step 500 (927500 total). Env Reward: -13.86, Steps: 500, Delivered: 1


Total Steps Trained:  93%|█████████▎| 927517/1000000 [4:34:07<1:44:28, 11.56step/s]


--- Rollout Summary (Steps 927001 to 927500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.96
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.14
Avg Actor Loss (per minibatch): -0.0009
Avg Critic Loss (per minibatch): 513.1514
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  93%|█████████▎| 927993/1000000 [4:34:11<10:24, 115.27step/s]


Episode 1856 finished at step 500 (928000 total). Env Reward: -6.40, Steps: 500, Delivered: 8


Total Steps Trained:  93%|█████████▎| 928015/1000000 [4:34:16<1:49:04, 11.00step/s]


--- Rollout Summary (Steps 927501 to 928000) ---
Update Duration: 4.43s
Avg Episode Reward (last 100): -7.97
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.13
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 569.2330
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  93%|█████████▎| 928496/1000000 [4:34:20<10:26, 114.04step/s]


Episode 1857 finished at step 500 (928500 total). Env Reward: -6.46, Steps: 500, Delivered: 8


Total Steps Trained:  93%|█████████▎| 928520/1000000 [4:34:25<1:43:53, 11.47step/s]


--- Rollout Summary (Steps 928001 to 928500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.94
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.16
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 548.5049
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  93%|█████████▎| 928992/1000000 [4:34:29<10:32, 112.25step/s]


Episode 1858 finished at step 500 (929000 total). Env Reward: -10.33, Steps: 500, Delivered: 4


Total Steps Trained:  93%|█████████▎| 929015/1000000 [4:34:33<1:46:12, 11.14step/s]


--- Rollout Summary (Steps 928501 to 929000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.95
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.15
Avg Actor Loss (per minibatch): -0.0059
Avg Critic Loss (per minibatch): 368.3228
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  93%|█████████▎| 929495/1000000 [4:34:38<10:53, 107.95step/s]


Episode 1859 finished at step 500 (929500 total). Env Reward: 5.57, Steps: 500, Delivered: 11


Total Steps Trained:  93%|█████████▎| 929518/1000000 [4:34:42<1:45:53, 11.09step/s]


--- Rollout Summary (Steps 929001 to 929500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 685.0366
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  93%|█████████▎| 929993/1000000 [4:34:47<10:17, 113.43step/s]


Episode 1860 finished at step 500 (930000 total). Env Reward: -9.47, Steps: 500, Delivered: 5
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001860_map1.pth


Total Steps Trained:  93%|█████████▎| 930016/1000000 [4:34:51<1:43:39, 11.25step/s]


--- Rollout Summary (Steps 929501 to 930000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 771.6254
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  93%|█████████▎| 930499/1000000 [4:34:55<09:53, 117.09step/s]


Episode 1861 finished at step 500 (930500 total). Env Reward: 1.73, Steps: 500, Delivered: 7


Total Steps Trained:  93%|█████████▎| 930511/1000000 [4:35:00<2:22:02,  8.15step/s]


--- Rollout Summary (Steps 930001 to 930500) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 879.7536
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  93%|█████████▎| 930992/1000000 [4:35:04<09:54, 116.15step/s]


Episode 1862 finished at step 500 (931000 total). Env Reward: -4.60, Steps: 500, Delivered: 10


Total Steps Trained:  93%|█████████▎| 931015/1000000 [4:35:09<1:46:56, 10.75step/s]


--- Rollout Summary (Steps 930501 to 931000) ---
Update Duration: 4.69s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0064
Avg Critic Loss (per minibatch): 632.2871
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  93%|█████████▎| 931493/1000000 [4:35:13<09:36, 118.87step/s]


Episode 1863 finished at step 500 (931500 total). Env Reward: -1.89, Steps: 500, Delivered: 4


Total Steps Trained:  93%|█████████▎| 931515/1000000 [4:35:18<1:42:57, 11.09step/s]


--- Rollout Summary (Steps 931001 to 931500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 849.7455
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  93%|█████████▎| 931993/1000000 [4:35:22<09:46, 115.94step/s]


Episode 1864 finished at step 500 (932000 total). Env Reward: -10.13, Steps: 500, Delivered: 4


Total Steps Trained:  93%|█████████▎| 932016/1000000 [4:35:27<1:40:15, 11.30step/s]


--- Rollout Summary (Steps 931501 to 932000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 634.4708
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  93%|█████████▎| 932489/1000000 [4:35:31<10:14, 109.88step/s]


Episode 1865 finished at step 500 (932500 total). Env Reward: -10.67, Steps: 500, Delivered: 4


Total Steps Trained:  93%|█████████▎| 932512/1000000 [4:35:36<1:43:33, 10.86step/s]


--- Rollout Summary (Steps 932001 to 932500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0063
Avg Critic Loss (per minibatch): 523.3566
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  93%|█████████▎| 932988/1000000 [4:35:40<09:14, 120.91step/s]


Episode 1866 finished at step 500 (933000 total). Env Reward: -11.90, Steps: 500, Delivered: 3


Total Steps Trained:  93%|█████████▎| 933012/1000000 [4:35:44<1:34:40, 11.79step/s]


--- Rollout Summary (Steps 932501 to 933000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 516.7791
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  93%|█████████▎| 933490/1000000 [4:35:49<10:02, 110.37step/s]


Episode 1867 finished at step 500 (933500 total). Env Reward: -9.66, Steps: 500, Delivered: 5


Total Steps Trained:  93%|█████████▎| 933512/1000000 [4:35:53<1:43:45, 10.68step/s]


--- Rollout Summary (Steps 933001 to 933500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 494.3061
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  93%|█████████▎| 933995/1000000 [4:35:58<11:09, 98.64step/s] 


Episode 1868 finished at step 500 (934000 total). Env Reward: -6.79, Steps: 500, Delivered: 8


Total Steps Trained:  93%|█████████▎| 934015/1000000 [4:36:03<1:48:47, 10.11step/s]


--- Rollout Summary (Steps 933501 to 934000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 453.2493
Avg Entropy (per minibatch): 2.5618
------------------------------


Total Steps Trained:  93%|█████████▎| 934491/1000000 [4:36:07<09:30, 114.89step/s]


Episode 1869 finished at step 500 (934500 total). Env Reward: -6.83, Steps: 500, Delivered: 8


Total Steps Trained:  93%|█████████▎| 934513/1000000 [4:36:12<1:41:21, 10.77step/s]


--- Rollout Summary (Steps 934001 to 934500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 666.1041
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  93%|█████████▎| 934989/1000000 [4:36:16<09:30, 114.02step/s]


Episode 1870 finished at step 500 (935000 total). Env Reward: 0.49, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001870_map1.pth


Total Steps Trained:  94%|█████████▎| 935012/1000000 [4:36:20<1:38:27, 11.00step/s]


--- Rollout Summary (Steps 934501 to 935000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 1081.7478
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  94%|█████████▎| 935496/1000000 [4:36:24<08:54, 120.78step/s]


Episode 1871 finished at step 500 (935500 total). Env Reward: -9.43, Steps: 500, Delivered: 5


Total Steps Trained:  94%|█████████▎| 935521/1000000 [4:36:29<1:29:47, 11.97step/s]


--- Rollout Summary (Steps 935001 to 935500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 535.3897
Avg Entropy (per minibatch): 2.5614
------------------------------


Total Steps Trained:  94%|█████████▎| 935990/1000000 [4:36:33<09:01, 118.23step/s]


Episode 1872 finished at step 500 (936000 total). Env Reward: -9.88, Steps: 500, Delivered: 5


Total Steps Trained:  94%|█████████▎| 936012/1000000 [4:36:38<1:40:05, 10.65step/s]


--- Rollout Summary (Steps 935501 to 936000) ---
Update Duration: 4.72s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 423.4196
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  94%|█████████▎| 936492/1000000 [4:36:42<08:36, 123.01step/s]


Episode 1873 finished at step 500 (936500 total). Env Reward: -13.23, Steps: 500, Delivered: 1


Total Steps Trained:  94%|█████████▎| 936516/1000000 [4:36:47<1:29:26, 11.83step/s]


--- Rollout Summary (Steps 936001 to 936500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 650.9383
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  94%|█████████▎| 936995/1000000 [4:36:51<08:50, 118.66step/s]


Episode 1874 finished at step 500 (937000 total). Env Reward: -0.53, Steps: 500, Delivered: 6


Total Steps Trained:  94%|█████████▎| 937018/1000000 [4:36:56<1:31:53, 11.42step/s]


--- Rollout Summary (Steps 936501 to 937000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 650.9416
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  94%|█████████▎| 937493/1000000 [4:37:00<08:39, 120.23step/s]


Episode 1875 finished at step 500 (937500 total). Env Reward: -12.86, Steps: 500, Delivered: 2


Total Steps Trained:  94%|█████████▍| 937517/1000000 [4:37:04<1:28:41, 11.74step/s]


--- Rollout Summary (Steps 937001 to 937500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0041
Avg Critic Loss (per minibatch): 590.7163
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  94%|█████████▍| 938000/1000000 [4:37:09<08:53, 116.27step/s]


Episode 1876 finished at step 500 (938000 total). Env Reward: -8.39, Steps: 500, Delivered: 6


Total Steps Trained:  94%|█████████▍| 938012/1000000 [4:37:14<2:02:45,  8.42step/s]


--- Rollout Summary (Steps 937501 to 938000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 365.6887
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  94%|█████████▍| 938497/1000000 [4:37:18<09:09, 111.85step/s]


Episode 1877 finished at step 500 (938500 total). Env Reward: -9.63, Steps: 500, Delivered: 5


Total Steps Trained:  94%|█████████▍| 938520/1000000 [4:37:23<1:31:31, 11.20step/s]


--- Rollout Summary (Steps 938001 to 938500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 474.9136
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  94%|█████████▍| 938993/1000000 [4:37:27<08:29, 119.63step/s]


Episode 1878 finished at step 500 (939000 total). Env Reward: -9.04, Steps: 500, Delivered: 6


Total Steps Trained:  94%|█████████▍| 939016/1000000 [4:37:31<1:29:41, 11.33step/s]


--- Rollout Summary (Steps 938501 to 939000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 609.3034
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  94%|█████████▍| 939498/1000000 [4:37:36<08:12, 122.96step/s]


Episode 1879 finished at step 500 (939500 total). Env Reward: -1.52, Steps: 500, Delivered: 4


Total Steps Trained:  94%|█████████▍| 939511/1000000 [4:37:40<1:58:17,  8.52step/s]


--- Rollout Summary (Steps 939001 to 939500) ---
Update Duration: 4.70s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 599.3005
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  94%|█████████▍| 939998/1000000 [4:37:45<08:46, 113.95step/s]


Episode 1880 finished at step 500 (940000 total). Env Reward: -10.38, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001880_map1.pth


Total Steps Trained:  94%|█████████▍| 940022/1000000 [4:37:49<1:28:28, 11.30step/s]


--- Rollout Summary (Steps 939501 to 940000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 516.2312
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  94%|█████████▍| 940496/1000000 [4:37:53<08:14, 120.23step/s]


Episode 1881 finished at step 500 (940500 total). Env Reward: -13.23, Steps: 500, Delivered: 2


Total Steps Trained:  94%|█████████▍| 940519/1000000 [4:37:58<1:28:01, 11.26step/s]


--- Rollout Summary (Steps 940001 to 940500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.78
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0024
Avg Critic Loss (per minibatch): 507.4581
Avg Entropy (per minibatch): 2.5615
------------------------------


Total Steps Trained:  94%|█████████▍| 941000/1000000 [4:38:02<08:13, 119.49step/s]


Episode 1882 finished at step 500 (941000 total). Env Reward: -8.59, Steps: 500, Delivered: 6


Total Steps Trained:  94%|█████████▍| 941012/1000000 [4:38:07<1:54:51,  8.56step/s]


--- Rollout Summary (Steps 940501 to 941000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 491.2841
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  94%|█████████▍| 941494/1000000 [4:38:11<08:27, 115.32step/s]


Episode 1883 finished at step 500 (941500 total). Env Reward: -9.45, Steps: 500, Delivered: 5


Total Steps Trained:  94%|█████████▍| 941517/1000000 [4:38:16<1:27:05, 11.19step/s]


--- Rollout Summary (Steps 941001 to 941500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.86
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.26
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 513.4910
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  94%|█████████▍| 941995/1000000 [4:38:20<08:12, 117.84step/s]


Episode 1884 finished at step 500 (942000 total). Env Reward: -10.19, Steps: 500, Delivered: 4


Total Steps Trained:  94%|█████████▍| 942018/1000000 [4:38:25<1:24:57, 11.37step/s]


--- Rollout Summary (Steps 941501 to 942000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.89
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.24
Avg Actor Loss (per minibatch): -0.0037
Avg Critic Loss (per minibatch): 435.9674
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  94%|█████████▍| 942498/1000000 [4:38:29<08:12, 116.70step/s]


Episode 1885 finished at step 500 (942500 total). Env Reward: 3.45, Steps: 500, Delivered: 9


Total Steps Trained:  94%|█████████▍| 942510/1000000 [4:38:34<1:56:15,  8.24step/s]


--- Rollout Summary (Steps 942001 to 942500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.84
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 585.3641
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  94%|█████████▍| 942996/1000000 [4:38:38<08:30, 111.75step/s]


Episode 1886 finished at step 500 (943000 total). Env Reward: -8.91, Steps: 500, Delivered: 6


Total Steps Trained:  94%|█████████▍| 943019/1000000 [4:38:43<1:28:23, 10.74step/s]


--- Rollout Summary (Steps 942501 to 943000) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 409.1916
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  94%|█████████▍| 943500/1000000 [4:38:47<08:12, 114.67step/s]


Episode 1887 finished at step 500 (943500 total). Env Reward: -8.73, Steps: 500, Delivered: 6


Total Steps Trained:  94%|█████████▍| 943512/1000000 [4:38:51<1:54:42,  8.21step/s]


--- Rollout Summary (Steps 943001 to 943500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.85
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 461.6952
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  94%|█████████▍| 943994/1000000 [4:38:56<08:15, 113.02step/s]


Episode 1888 finished at step 500 (944000 total). Env Reward: 0.57, Steps: 500, Delivered: 6


Total Steps Trained:  94%|█████████▍| 944017/1000000 [4:39:00<1:25:12, 10.95step/s]


--- Rollout Summary (Steps 943501 to 944000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.78
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.25
Avg Actor Loss (per minibatch): -0.0020
Avg Critic Loss (per minibatch): 725.6811
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  94%|█████████▍| 944490/1000000 [4:39:04<07:53, 117.16step/s]


Episode 1889 finished at step 500 (944500 total). Env Reward: -8.33, Steps: 500, Delivered: 6


Total Steps Trained:  94%|█████████▍| 944512/1000000 [4:39:09<1:24:31, 10.94step/s]


--- Rollout Summary (Steps 944001 to 944500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 651.9830
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  94%|█████████▍| 944996/1000000 [4:39:13<07:57, 115.24step/s]


Episode 1890 finished at step 500 (945000 total). Env Reward: -8.88, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001890_map1.pth


Total Steps Trained:  95%|█████████▍| 945020/1000000 [4:39:18<1:21:04, 11.30step/s]


--- Rollout Summary (Steps 944501 to 945000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0090
Avg Critic Loss (per minibatch): 427.3282
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  95%|█████████▍| 945493/1000000 [4:39:22<07:41, 118.05step/s]


Episode 1891 finished at step 500 (945500 total). Env Reward: -6.30, Steps: 500, Delivered: 8


Total Steps Trained:  95%|█████████▍| 945517/1000000 [4:39:27<1:19:06, 11.48step/s]


--- Rollout Summary (Steps 945001 to 945500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0030
Avg Critic Loss (per minibatch): 546.5805
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  95%|█████████▍| 945995/1000000 [4:39:31<07:33, 119.21step/s]


Episode 1892 finished at step 500 (946000 total). Env Reward: -11.26, Steps: 500, Delivered: 3


Total Steps Trained:  95%|█████████▍| 946018/1000000 [4:39:36<1:18:12, 11.50step/s]


--- Rollout Summary (Steps 945501 to 946000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0009
Avg Critic Loss (per minibatch): 511.0064
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  95%|█████████▍| 946491/1000000 [4:39:40<07:16, 122.55step/s]


Episode 1893 finished at step 500 (946500 total). Env Reward: -8.58, Steps: 500, Delivered: 6


Total Steps Trained:  95%|█████████▍| 946513/1000000 [4:39:45<1:21:35, 10.93step/s]


--- Rollout Summary (Steps 946001 to 946500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0035
Avg Critic Loss (per minibatch): 475.1914
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  95%|█████████▍| 946998/1000000 [4:39:49<07:16, 121.37step/s]


Episode 1894 finished at step 500 (947000 total). Env Reward: 1.27, Steps: 500, Delivered: 7


Total Steps Trained:  95%|█████████▍| 947011/1000000 [4:39:54<1:40:42,  8.77step/s]


--- Rollout Summary (Steps 946501 to 947000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 1073.0655
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  95%|█████████▍| 947495/1000000 [4:39:58<07:14, 120.97step/s]


Episode 1895 finished at step 500 (947500 total). Env Reward: -1.61, Steps: 500, Delivered: 4


Total Steps Trained:  95%|█████████▍| 947519/1000000 [4:40:02<1:14:33, 11.73step/s]


--- Rollout Summary (Steps 947001 to 947500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.52
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 762.7427
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  95%|█████████▍| 947993/1000000 [4:40:07<07:38, 113.55step/s]


Episode 1896 finished at step 500 (948000 total). Env Reward: -7.81, Steps: 500, Delivered: 7


Total Steps Trained:  95%|█████████▍| 948016/1000000 [4:40:12<1:18:16, 11.07step/s]


--- Rollout Summary (Steps 947501 to 948000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 504.6886
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  95%|█████████▍| 948494/1000000 [4:40:16<07:11, 119.48step/s]


Episode 1897 finished at step 500 (948500 total). Env Reward: -12.56, Steps: 500, Delivered: 2


Total Steps Trained:  95%|█████████▍| 948517/1000000 [4:40:21<1:16:15, 11.25step/s]


--- Rollout Summary (Steps 948001 to 948500) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.50
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 561.8296
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  95%|█████████▍| 948991/1000000 [4:40:25<07:54, 107.48step/s]


Episode 1898 finished at step 500 (949000 total). Env Reward: -7.70, Steps: 500, Delivered: 7


Total Steps Trained:  95%|█████████▍| 949013/1000000 [4:40:29<1:18:14, 10.86step/s]


--- Rollout Summary (Steps 948501 to 949000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0077
Avg Critic Loss (per minibatch): 498.5985
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  95%|█████████▍| 949500/1000000 [4:40:34<07:24, 113.58step/s]


Episode 1899 finished at step 500 (949500 total). Env Reward: -7.11, Steps: 500, Delivered: 7


Total Steps Trained:  95%|█████████▍| 949512/1000000 [4:40:38<1:41:44,  8.27step/s]


--- Rollout Summary (Steps 949001 to 949500) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 415.8328
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  95%|█████████▍| 949993/1000000 [4:40:42<06:51, 121.61step/s]


Episode 1900 finished at step 500 (950000 total). Env Reward: -11.48, Steps: 500, Delivered: 3
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001900_map1.pth


Total Steps Trained:  95%|█████████▌| 950017/1000000 [4:40:47<1:11:05, 11.72step/s]


--- Rollout Summary (Steps 949501 to 950000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0009
Avg Critic Loss (per minibatch): 406.9268
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  95%|█████████▌| 950499/1000000 [4:40:51<06:45, 122.07step/s]


Episode 1901 finished at step 500 (950500 total). Env Reward: -7.53, Steps: 500, Delivered: 7


Total Steps Trained:  95%|█████████▌| 950512/1000000 [4:40:56<1:34:16,  8.75step/s]


--- Rollout Summary (Steps 950001 to 950500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 627.4825
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  95%|█████████▌| 950998/1000000 [4:41:00<06:58, 117.05step/s]


Episode 1902 finished at step 500 (951000 total). Env Reward: -8.90, Steps: 500, Delivered: 5


Total Steps Trained:  95%|█████████▌| 951010/1000000 [4:41:05<1:40:05,  8.16step/s]


--- Rollout Summary (Steps 950501 to 951000) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.36
Avg Actor Loss (per minibatch): -0.0069
Avg Critic Loss (per minibatch): 408.5622
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  95%|█████████▌| 951496/1000000 [4:41:09<07:05, 113.95step/s]


Episode 1903 finished at step 500 (951500 total). Env Reward: -10.94, Steps: 500, Delivered: 4


Total Steps Trained:  95%|█████████▌| 951518/1000000 [4:41:14<1:14:43, 10.81step/s]


--- Rollout Summary (Steps 951001 to 951500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.68
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 463.6874
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  95%|█████████▌| 951999/1000000 [4:41:18<07:01, 113.75step/s]


Episode 1904 finished at step 500 (952000 total). Env Reward: -9.50, Steps: 500, Delivered: 5


Total Steps Trained:  95%|█████████▌| 952011/1000000 [4:41:23<1:39:06,  8.07step/s]


--- Rollout Summary (Steps 951501 to 952000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.67
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 496.8285
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  95%|█████████▌| 952495/1000000 [4:41:27<06:31, 121.34step/s]


Episode 1905 finished at step 500 (952500 total). Env Reward: -12.43, Steps: 500, Delivered: 2


Total Steps Trained:  95%|█████████▌| 952519/1000000 [4:41:32<1:07:52, 11.66step/s]


--- Rollout Summary (Steps 952001 to 952500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.83
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0042
Avg Critic Loss (per minibatch): 670.7287
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  95%|█████████▌| 952998/1000000 [4:41:36<06:24, 122.37step/s]


Episode 1906 finished at step 500 (953000 total). Env Reward: -8.28, Steps: 500, Delivered: 6


Total Steps Trained:  95%|█████████▌| 953011/1000000 [4:41:40<1:28:42,  8.83step/s]


--- Rollout Summary (Steps 952501 to 953000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.82
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.28
Avg Actor Loss (per minibatch): -0.0027
Avg Critic Loss (per minibatch): 450.1661
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  95%|█████████▌| 953498/1000000 [4:41:44<06:24, 121.05step/s]


Episode 1907 finished at step 500 (953500 total). Env Reward: -12.56, Steps: 500, Delivered: 2


Total Steps Trained:  95%|█████████▌| 953511/1000000 [4:41:49<1:27:54,  8.81step/s]


--- Rollout Summary (Steps 953001 to 953500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.92
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.27
Avg Actor Loss (per minibatch): -0.0011
Avg Critic Loss (per minibatch): 436.9093
Avg Entropy (per minibatch): 2.5618
------------------------------


Total Steps Trained:  95%|█████████▌| 954000/1000000 [4:41:54<06:27, 118.62step/s]


Episode 1908 finished at step 500 (954000 total). Env Reward: -9.91, Steps: 500, Delivered: 5


Total Steps Trained:  95%|█████████▌| 954012/1000000 [4:41:59<1:31:06,  8.41step/s]


--- Rollout Summary (Steps 953501 to 954000) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.90
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 618.3379
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  95%|█████████▌| 954490/1000000 [4:42:03<06:27, 117.36step/s]


Episode 1909 finished at step 500 (954500 total). Env Reward: -7.74, Steps: 500, Delivered: 7


Total Steps Trained:  95%|█████████▌| 954512/1000000 [4:42:07<1:09:37, 10.89step/s]


--- Rollout Summary (Steps 954001 to 954500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.91
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.29
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 373.2707
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  95%|█████████▌| 954993/1000000 [4:42:11<06:11, 121.26step/s]


Episode 1910 finished at step 500 (955000 total). Env Reward: 1.79, Steps: 500, Delivered: 7
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001910_map1.pth


Total Steps Trained:  96%|█████████▌| 955018/1000000 [4:42:16<1:02:05, 12.07step/s]


--- Rollout Summary (Steps 954501 to 955000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.77
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0028
Avg Critic Loss (per minibatch): 1054.7417
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  96%|█████████▌| 955500/1000000 [4:42:20<06:26, 115.17step/s]


Episode 1911 finished at step 500 (955500 total). Env Reward: -9.08, Steps: 500, Delivered: 5


Total Steps Trained:  96%|█████████▌| 955512/1000000 [4:42:25<1:30:48,  8.16step/s]


--- Rollout Summary (Steps 955001 to 955500) ---
Update Duration: 4.58s
Avg Episode Reward (last 100): -7.76
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.34
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 642.1918
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  96%|█████████▌| 955997/1000000 [4:42:29<06:12, 118.26step/s]


Episode 1912 finished at step 500 (956000 total). Env Reward: -10.64, Steps: 500, Delivered: 4


Total Steps Trained:  96%|█████████▌| 956020/1000000 [4:42:34<1:04:45, 11.32step/s]


--- Rollout Summary (Steps 955501 to 956000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.78
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.32
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 452.1585
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  96%|█████████▌| 956489/1000000 [4:42:38<06:11, 117.14step/s]


Episode 1913 finished at step 500 (956500 total). Env Reward: -10.61, Steps: 500, Delivered: 4


Total Steps Trained:  96%|█████████▌| 956512/1000000 [4:42:43<1:03:00, 11.50step/s]


--- Rollout Summary (Steps 956001 to 956500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.79
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.30
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 552.0770
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained:  96%|█████████▌| 956990/1000000 [4:42:47<06:08, 116.67step/s]


Episode 1914 finished at step 500 (957000 total). Env Reward: 5.33, Steps: 500, Delivered: 11


Total Steps Trained:  96%|█████████▌| 957014/1000000 [4:42:51<1:01:39, 11.62step/s]


--- Rollout Summary (Steps 956501 to 957000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 1335.8218
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  96%|█████████▌| 957496/1000000 [4:42:56<05:57, 118.94step/s]


Episode 1915 finished at step 500 (957500 total). Env Reward: -11.34, Steps: 500, Delivered: 3


Total Steps Trained:  96%|█████████▌| 957519/1000000 [4:43:00<1:02:12, 11.38step/s]


--- Rollout Summary (Steps 957001 to 957500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.74
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 576.9153
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  96%|█████████▌| 957994/1000000 [4:43:04<05:46, 121.25step/s]


Episode 1916 finished at step 500 (958000 total). Env Reward: -11.87, Steps: 500, Delivered: 3


Total Steps Trained:  96%|█████████▌| 958018/1000000 [4:43:09<59:16, 11.81step/s]  


--- Rollout Summary (Steps 957501 to 958000) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0051
Avg Critic Loss (per minibatch): 433.9719
Avg Entropy (per minibatch): 2.5592
------------------------------


Total Steps Trained:  96%|█████████▌| 958489/1000000 [4:43:13<05:46, 119.83step/s]


Episode 1917 finished at step 500 (958500 total). Env Reward: -9.62, Steps: 500, Delivered: 5


Total Steps Trained:  96%|█████████▌| 958510/1000000 [4:43:18<1:03:56, 10.81step/s]


--- Rollout Summary (Steps 958001 to 958500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0022
Avg Critic Loss (per minibatch): 464.4108
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  96%|█████████▌| 958989/1000000 [4:43:22<05:58, 114.28step/s]


Episode 1918 finished at step 500 (959000 total). Env Reward: -7.52, Steps: 500, Delivered: 7


Total Steps Trained:  96%|█████████▌| 959013/1000000 [4:43:27<59:04, 11.56step/s]  


--- Rollout Summary (Steps 958501 to 959000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): 0.0005
Avg Critic Loss (per minibatch): 478.5969
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  96%|█████████▌| 959492/1000000 [4:43:31<05:37, 120.05step/s]


Episode 1919 finished at step 500 (959500 total). Env Reward: -11.59, Steps: 500, Delivered: 3


Total Steps Trained:  96%|█████████▌| 959516/1000000 [4:43:35<56:57, 11.85step/s]  


--- Rollout Summary (Steps 959001 to 959500) ---
Update Duration: 4.44s
Avg Episode Reward (last 100): -7.78
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.31
Avg Actor Loss (per minibatch): -0.0013
Avg Critic Loss (per minibatch): 626.0193
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  96%|█████████▌| 959998/1000000 [4:43:40<05:52, 113.56step/s]


Episode 1920 finished at step 500 (960000 total). Env Reward: -2.55, Steps: 500, Delivered: 12
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001920_map1.pth


Total Steps Trained:  96%|█████████▌| 960010/1000000 [4:43:44<1:22:22,  8.09step/s]


--- Rollout Summary (Steps 959501 to 960000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0072
Avg Critic Loss (per minibatch): 488.1343
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  96%|█████████▌| 960497/1000000 [4:43:49<05:38, 116.72step/s]


Episode 1921 finished at step 500 (960500 total). Env Reward: -8.76, Steps: 500, Delivered: 6


Total Steps Trained:  96%|█████████▌| 960520/1000000 [4:43:53<58:38, 11.22step/s]  


--- Rollout Summary (Steps 960001 to 960500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.38
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 819.9884
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  96%|█████████▌| 960991/1000000 [4:43:57<05:21, 121.16step/s]


Episode 1922 finished at step 500 (961000 total). Env Reward: -11.31, Steps: 500, Delivered: 2


Total Steps Trained:  96%|█████████▌| 961015/1000000 [4:44:02<58:07, 11.18step/s]  


--- Rollout Summary (Steps 960501 to 961000) ---
Update Duration: 4.73s
Avg Episode Reward (last 100): -7.73
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.35
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 437.9553
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  96%|█████████▌| 961500/1000000 [4:44:06<05:20, 120.11step/s]


Episode 1923 finished at step 500 (961500 total). Env Reward: -0.13, Steps: 500, Delivered: 6


Total Steps Trained:  96%|█████████▌| 961513/1000000 [4:44:11<1:12:39,  8.83step/s]


--- Rollout Summary (Steps 961001 to 961500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.75
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.33
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 1111.4815
Avg Entropy (per minibatch): 2.5593
------------------------------


Total Steps Trained:  96%|█████████▌| 961989/1000000 [4:44:15<05:38, 112.36step/s]


Episode 1924 finished at step 500 (962000 total). Env Reward: -7.10, Steps: 500, Delivered: 8


Total Steps Trained:  96%|█████████▌| 962012/1000000 [4:44:20<57:12, 11.07step/s]  


--- Rollout Summary (Steps 961501 to 962000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.71
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 594.3216
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  96%|█████████▌| 962496/1000000 [4:44:24<05:09, 121.31step/s]


Episode 1925 finished at step 500 (962500 total). Env Reward: -8.58, Steps: 500, Delivered: 6


Total Steps Trained:  96%|█████████▋| 962519/1000000 [4:44:29<54:49, 11.40step/s]  


--- Rollout Summary (Steps 962001 to 962500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.72
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.37
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 454.9644
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  96%|█████████▋| 962995/1000000 [4:44:33<05:24, 114.18step/s]


Episode 1926 finished at step 500 (963000 total). Env Reward: -8.05, Steps: 500, Delivered: 7


Total Steps Trained:  96%|█████████▋| 963018/1000000 [4:44:38<56:06, 10.98step/s]  


--- Rollout Summary (Steps 962501 to 963000) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.70
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.39
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 543.1527
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained:  96%|█████████▋| 963490/1000000 [4:44:42<05:14, 115.94step/s]


Episode 1927 finished at step 500 (963500 total). Env Reward: -4.99, Steps: 500, Delivered: 9


Total Steps Trained:  96%|█████████▋| 963513/1000000 [4:44:47<54:39, 11.12step/s]  


--- Rollout Summary (Steps 963001 to 963500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.65
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.43
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 578.7296
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  96%|█████████▋| 963990/1000000 [4:44:51<05:50, 102.78step/s]


Episode 1928 finished at step 500 (964000 total). Env Reward: -5.94, Steps: 500, Delivered: 8


Total Steps Trained:  96%|█████████▋| 964012/1000000 [4:44:56<57:18, 10.47step/s]  


--- Rollout Summary (Steps 963501 to 964000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.48
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 908.2247
Avg Entropy (per minibatch): 2.5597
------------------------------


Total Steps Trained:  96%|█████████▋| 964494/1000000 [4:45:00<04:57, 119.37step/s]


Episode 1929 finished at step 500 (964500 total). Env Reward: -8.64, Steps: 500, Delivered: 6


Total Steps Trained:  96%|█████████▋| 964517/1000000 [4:45:05<53:55, 10.97step/s]  


--- Rollout Summary (Steps 964001 to 964500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.50
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 604.7568
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  96%|█████████▋| 964995/1000000 [4:45:09<04:53, 119.35step/s]


Episode 1930 finished at step 500 (965000 total). Env Reward: -1.96, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001930_map1.pth


Total Steps Trained:  97%|█████████▋| 965018/1000000 [4:45:14<51:37, 11.29step/s]  


--- Rollout Summary (Steps 964501 to 965000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 1331.1220
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  97%|█████████▋| 965499/1000000 [4:45:18<05:18, 108.17step/s]


Episode 1931 finished at step 500 (965500 total). Env Reward: -7.80, Steps: 500, Delivered: 7


Total Steps Trained:  97%|█████████▋| 965521/1000000 [4:45:22<52:13, 11.00step/s]  


--- Rollout Summary (Steps 965001 to 965500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 577.4688
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  97%|█████████▋| 965990/1000000 [4:45:26<04:47, 118.32step/s]


Episode 1932 finished at step 500 (966000 total). Env Reward: -8.65, Steps: 500, Delivered: 6


Total Steps Trained:  97%|█████████▋| 966014/1000000 [4:45:31<48:57, 11.57step/s]  


--- Rollout Summary (Steps 965501 to 966000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 366.1218
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  97%|█████████▋| 966493/1000000 [4:45:35<04:51, 114.94step/s]


Episode 1933 finished at step 500 (966500 total). Env Reward: -10.02, Steps: 500, Delivered: 5


Total Steps Trained:  97%|█████████▋| 966516/1000000 [4:45:40<50:22, 11.08step/s]  


--- Rollout Summary (Steps 966001 to 966500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 454.8727
Avg Entropy (per minibatch): 2.5585
------------------------------


Total Steps Trained:  97%|█████████▋| 966996/1000000 [4:45:44<04:53, 112.54step/s]


Episode 1934 finished at step 500 (967000 total). Env Reward: -6.06, Steps: 500, Delivered: 9


Total Steps Trained:  97%|█████████▋| 967019/1000000 [4:45:49<49:49, 11.03step/s]  


--- Rollout Summary (Steps 966501 to 967000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.58
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 451.0790
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  97%|█████████▋| 967496/1000000 [4:45:53<04:36, 117.61step/s]


Episode 1935 finished at step 500 (967500 total). Env Reward: -9.03, Steps: 500, Delivered: 6


Total Steps Trained:  97%|█████████▋| 967519/1000000 [4:45:58<48:56, 11.06step/s]  


--- Rollout Summary (Steps 967001 to 967500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0076
Avg Critic Loss (per minibatch): 515.3272
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  97%|█████████▋| 967992/1000000 [4:46:02<04:22, 121.85step/s]


Episode 1936 finished at step 500 (968000 total). Env Reward: -10.22, Steps: 500, Delivered: 4


Total Steps Trained:  97%|█████████▋| 968016/1000000 [4:46:07<46:32, 11.45step/s]  


--- Rollout Summary (Steps 967501 to 968000) ---
Update Duration: 4.60s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0045
Avg Critic Loss (per minibatch): 570.1263
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  97%|█████████▋| 968500/1000000 [4:46:11<04:27, 117.66step/s]


Episode 1937 finished at step 500 (968500 total). Env Reward: -7.78, Steps: 500, Delivered: 7


Total Steps Trained:  97%|█████████▋| 968512/1000000 [4:46:16<1:02:08,  8.45step/s]


--- Rollout Summary (Steps 968001 to 968500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 419.8941
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  97%|█████████▋| 968996/1000000 [4:46:20<04:26, 116.21step/s]


Episode 1938 finished at step 500 (969000 total). Env Reward: -7.95, Steps: 500, Delivered: 7


Total Steps Trained:  97%|█████████▋| 969019/1000000 [4:46:25<46:09, 11.19step/s]  


--- Rollout Summary (Steps 968501 to 969000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 603.1876
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  97%|█████████▋| 969498/1000000 [4:46:29<04:31, 112.49step/s]


Episode 1939 finished at step 500 (969500 total). Env Reward: -9.31, Steps: 500, Delivered: 5


Total Steps Trained:  97%|█████████▋| 969522/1000000 [4:46:34<44:37, 11.38step/s]  


--- Rollout Summary (Steps 969001 to 969500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0076
Avg Critic Loss (per minibatch): 488.1714
Avg Entropy (per minibatch): 2.5618
------------------------------


Total Steps Trained:  97%|█████████▋| 969996/1000000 [4:46:38<04:30, 111.09step/s]


Episode 1940 finished at step 500 (970000 total). Env Reward: 2.74, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001940_map1.pth


Total Steps Trained:  97%|█████████▋| 970019/1000000 [4:46:43<45:40, 10.94step/s]  


--- Rollout Summary (Steps 969501 to 970000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.47
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 879.6136
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  97%|█████████▋| 970490/1000000 [4:46:47<04:10, 117.93step/s]


Episode 1941 finished at step 500 (970500 total). Env Reward: -10.91, Steps: 500, Delivered: 4


Total Steps Trained:  97%|█████████▋| 970513/1000000 [4:46:52<43:42, 11.24step/s]


--- Rollout Summary (Steps 970001 to 970500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 568.7610
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  97%|█████████▋| 970996/1000000 [4:46:56<04:25, 109.30step/s]


Episode 1942 finished at step 500 (971000 total). Env Reward: -7.37, Steps: 500, Delivered: 7


Total Steps Trained:  97%|█████████▋| 971018/1000000 [4:47:01<45:28, 10.62step/s]  


--- Rollout Summary (Steps 970501 to 971000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 539.2733
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  97%|█████████▋| 971497/1000000 [4:47:05<04:02, 117.65step/s]


Episode 1943 finished at step 500 (971500 total). Env Reward: -8.69, Steps: 500, Delivered: 6


Total Steps Trained:  97%|█████████▋| 971521/1000000 [4:47:10<41:17, 11.50step/s]


--- Rollout Summary (Steps 971001 to 971500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.48
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0049
Avg Critic Loss (per minibatch): 660.4559
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  97%|█████████▋| 971990/1000000 [4:47:14<04:01, 115.94step/s]


Episode 1944 finished at step 500 (972000 total). Env Reward: -6.51, Steps: 500, Delivered: 8


Total Steps Trained:  97%|█████████▋| 972012/1000000 [4:47:18<43:15, 10.78step/s]


--- Rollout Summary (Steps 971501 to 972000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0093
Avg Critic Loss (per minibatch): 515.3546
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  97%|█████████▋| 972491/1000000 [4:47:23<04:04, 112.51step/s]


Episode 1945 finished at step 500 (972500 total). Env Reward: -10.70, Steps: 500, Delivered: 4


Total Steps Trained:  97%|█████████▋| 972514/1000000 [4:47:27<41:28, 11.05step/s]


--- Rollout Summary (Steps 972001 to 972500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.44
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 530.9934
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained:  97%|█████████▋| 972992/1000000 [4:47:31<03:46, 119.27step/s]


Episode 1946 finished at step 500 (973000 total). Env Reward: -9.59, Steps: 500, Delivered: 5


Total Steps Trained:  97%|█████████▋| 973015/1000000 [4:47:36<39:49, 11.29step/s]


--- Rollout Summary (Steps 972501 to 973000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.45
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0004
Avg Critic Loss (per minibatch): 285.0329
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  97%|█████████▋| 973500/1000000 [4:47:40<04:02, 109.15step/s]


Episode 1947 finished at step 500 (973500 total). Env Reward: -10.31, Steps: 500, Delivered: 4


Total Steps Trained:  97%|█████████▋| 973511/1000000 [4:47:45<55:24,  7.97step/s] 


--- Rollout Summary (Steps 973001 to 973500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 456.2545
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  97%|█████████▋| 973996/1000000 [4:47:49<03:47, 114.11step/s]


Episode 1948 finished at step 500 (974000 total). Env Reward: -8.34, Steps: 500, Delivered: 7


Total Steps Trained:  97%|█████████▋| 974020/1000000 [4:47:54<37:48, 11.45step/s]


--- Rollout Summary (Steps 973501 to 974000) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0093
Avg Critic Loss (per minibatch): 584.6838
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  97%|█████████▋| 974493/1000000 [4:47:58<03:43, 114.14step/s]


Episode 1949 finished at step 500 (974500 total). Env Reward: -10.09, Steps: 500, Delivered: 4


Total Steps Trained:  97%|█████████▋| 974516/1000000 [4:48:03<37:19, 11.38step/s]


--- Rollout Summary (Steps 974001 to 974500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0029
Avg Critic Loss (per minibatch): 659.3393
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  98%|█████████▊| 975000/1000000 [4:48:07<03:51, 108.04step/s]


Episode 1950 finished at step 500 (975000 total). Env Reward: -6.74, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001950_map1.pth


Total Steps Trained:  98%|█████████▊| 975011/1000000 [4:48:12<54:05,  7.70step/s] 


--- Rollout Summary (Steps 974501 to 975000) ---
Update Duration: 4.68s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 399.2661
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  98%|█████████▊| 975490/1000000 [4:48:16<03:23, 120.64step/s]


Episode 1951 finished at step 500 (975500 total). Env Reward: 19.45, Steps: 500, Delivered: 7


Total Steps Trained:  98%|█████████▊| 975514/1000000 [4:48:21<34:47, 11.73step/s]


--- Rollout Summary (Steps 975001 to 975500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0008
Avg Critic Loss (per minibatch): 2040.5492
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  98%|█████████▊| 975998/1000000 [4:48:25<03:15, 123.01step/s]


Episode 1952 finished at step 500 (976000 total). Env Reward: -12.26, Steps: 500, Delivered: 2


Total Steps Trained:  98%|█████████▊| 976011/1000000 [4:48:29<45:27,  8.80step/s] 


--- Rollout Summary (Steps 975501 to 976000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.46
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 487.8014
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  98%|█████████▊| 976496/1000000 [4:48:33<03:27, 113.03step/s]


Episode 1953 finished at step 500 (976500 total). Env Reward: -10.55, Steps: 500, Delivered: 4


Total Steps Trained:  98%|█████████▊| 976518/1000000 [4:48:38<36:07, 10.83step/s]


--- Rollout Summary (Steps 976001 to 976500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.43
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0025
Avg Critic Loss (per minibatch): 626.7439
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  98%|█████████▊| 976992/1000000 [4:48:42<03:13, 118.83step/s]


Episode 1954 finished at step 500 (977000 total). Env Reward: -8.28, Steps: 500, Delivered: 6


Total Steps Trained:  98%|█████████▊| 977015/1000000 [4:48:47<34:27, 11.12step/s]


--- Rollout Summary (Steps 976501 to 977000) ---
Update Duration: 4.61s
Avg Episode Reward (last 100): -7.37
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0103
Avg Critic Loss (per minibatch): 627.3638
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  98%|█████████▊| 977494/1000000 [4:48:51<03:06, 120.73step/s]


Episode 1955 finished at step 500 (977500 total). Env Reward: -8.75, Steps: 500, Delivered: 6


Total Steps Trained:  98%|█████████▊| 977518/1000000 [4:48:56<32:04, 11.68step/s]


--- Rollout Summary (Steps 977001 to 977500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.32
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0055
Avg Critic Loss (per minibatch): 495.7551
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  98%|█████████▊| 977999/1000000 [4:49:00<03:09, 116.08step/s]


Episode 1956 finished at step 500 (978000 total). Env Reward: -7.72, Steps: 500, Delivered: 7


Total Steps Trained:  98%|█████████▊| 978011/1000000 [4:49:05<43:58,  8.33step/s] 


--- Rollout Summary (Steps 977501 to 978000) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.34
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 521.2142
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  98%|█████████▊| 978497/1000000 [4:49:09<03:04, 116.33step/s]


Episode 1957 finished at step 500 (978500 total). Env Reward: -10.36, Steps: 500, Delivered: 4


Total Steps Trained:  98%|█████████▊| 978520/1000000 [4:49:14<32:22, 11.06step/s]


--- Rollout Summary (Steps 978001 to 978500) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.38
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0033
Avg Critic Loss (per minibatch): 527.7985
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  98%|█████████▊| 978992/1000000 [4:49:18<03:06, 112.50step/s]


Episode 1958 finished at step 500 (979000 total). Env Reward: -12.61, Steps: 500, Delivered: 2


Total Steps Trained:  98%|█████████▊| 979015/1000000 [4:49:23<30:53, 11.32step/s]


--- Rollout Summary (Steps 978501 to 979000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.40
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.57
Avg Actor Loss (per minibatch): -0.0017
Avg Critic Loss (per minibatch): 583.3534
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  98%|█████████▊| 979498/1000000 [4:49:27<02:51, 119.27step/s]


Episode 1959 finished at step 500 (979500 total). Env Reward: 3.46, Steps: 500, Delivered: 9


Total Steps Trained:  98%|█████████▊| 979521/1000000 [4:49:32<29:58, 11.39step/s]


--- Rollout Summary (Steps 979001 to 979500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.42
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 978.0361
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  98%|█████████▊| 979999/1000000 [4:49:36<02:47, 119.60step/s]


Episode 1960 finished at step 500 (980000 total). Env Reward: -8.90, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001960_map1.pth


Total Steps Trained:  98%|█████████▊| 980011/1000000 [4:49:40<39:21,  8.46step/s] 


--- Rollout Summary (Steps 979501 to 980000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.41
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.56
Avg Actor Loss (per minibatch): -0.0021
Avg Critic Loss (per minibatch): 468.8007
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  98%|█████████▊| 980493/1000000 [4:49:45<02:44, 118.90step/s]


Episode 1961 finished at step 500 (980500 total). Env Reward: -8.09, Steps: 500, Delivered: 6


Total Steps Trained:  98%|█████████▊| 980516/1000000 [4:49:49<29:43, 10.92step/s]


--- Rollout Summary (Steps 980001 to 980500) ---
Update Duration: 4.67s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 892.0290
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  98%|█████████▊| 980999/1000000 [4:49:53<02:39, 119.06step/s]


Episode 1962 finished at step 500 (981000 total). Env Reward: -10.82, Steps: 500, Delivered: 4


Total Steps Trained:  98%|█████████▊| 981011/1000000 [4:49:58<38:43,  8.17step/s] 


--- Rollout Summary (Steps 980501 to 981000) ---
Update Duration: 4.65s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.49
Avg Actor Loss (per minibatch): -0.0046
Avg Critic Loss (per minibatch): 540.2937
Avg Entropy (per minibatch): 2.5611
------------------------------


Total Steps Trained:  98%|█████████▊| 981495/1000000 [4:50:03<02:38, 116.49step/s]


Episode 1963 finished at step 500 (981500 total). Env Reward: -5.48, Steps: 500, Delivered: 9


Total Steps Trained:  98%|█████████▊| 981518/1000000 [4:50:07<27:25, 11.23step/s]


--- Rollout Summary (Steps 981001 to 981500) ---
Update Duration: 4.48s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0077
Avg Critic Loss (per minibatch): 596.6060
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  98%|█████████▊| 981993/1000000 [4:50:12<02:31, 118.57step/s]


Episode 1964 finished at step 500 (982000 total). Env Reward: -10.27, Steps: 500, Delivered: 4


Total Steps Trained:  98%|█████████▊| 982015/1000000 [4:50:16<26:50, 11.17step/s]


--- Rollout Summary (Steps 981501 to 982000) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 649.4091
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  98%|█████████▊| 982495/1000000 [4:50:20<02:30, 116.05step/s]


Episode 1965 finished at step 500 (982500 total). Env Reward: -12.27, Steps: 500, Delivered: 2


Total Steps Trained:  98%|█████████▊| 982518/1000000 [4:50:25<25:40, 11.35step/s]


--- Rollout Summary (Steps 982001 to 982500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): 0.0005
Avg Critic Loss (per minibatch): 683.5470
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  98%|█████████▊| 982993/1000000 [4:50:29<02:30, 112.99step/s]


Episode 1966 finished at step 500 (983000 total). Env Reward: -9.86, Steps: 500, Delivered: 5


Total Steps Trained:  98%|█████████▊| 983016/1000000 [4:50:34<25:25, 11.13step/s]


--- Rollout Summary (Steps 982501 to 983000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): 0.0003
Avg Critic Loss (per minibatch): 533.7816
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained:  98%|█████████▊| 983494/1000000 [4:50:38<02:26, 113.05step/s]


Episode 1967 finished at step 500 (983500 total). Env Reward: -9.59, Steps: 500, Delivered: 5


Total Steps Trained:  98%|█████████▊| 983517/1000000 [4:50:43<25:04, 10.96step/s]


--- Rollout Summary (Steps 983001 to 983500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0019
Avg Critic Loss (per minibatch): 476.5207
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  98%|█████████▊| 983997/1000000 [4:50:47<02:18, 115.73step/s]


Episode 1968 finished at step 500 (984000 total). Env Reward: -9.71, Steps: 500, Delivered: 5


Total Steps Trained:  98%|█████████▊| 984020/1000000 [4:50:52<24:17, 10.97step/s]


--- Rollout Summary (Steps 983501 to 984000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.51
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 550.6583
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  98%|█████████▊| 984493/1000000 [4:50:56<02:12, 117.28step/s]


Episode 1969 finished at step 500 (984500 total). Env Reward: 3.17, Steps: 500, Delivered: 9


Total Steps Trained:  98%|█████████▊| 984517/1000000 [4:51:01<22:34, 11.43step/s]


--- Rollout Summary (Steps 984001 to 984500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.54
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0068
Avg Critic Loss (per minibatch): 892.1193
Avg Entropy (per minibatch): 2.5607
------------------------------


Total Steps Trained:  98%|█████████▊| 984993/1000000 [4:51:05<02:06, 118.83step/s]


Episode 1970 finished at step 500 (985000 total). Env Reward: -8.32, Steps: 500, Delivered: 6
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001970_map1.pth


Total Steps Trained:  99%|█████████▊| 985016/1000000 [4:51:10<22:38, 11.03step/s]


--- Rollout Summary (Steps 984501 to 985000) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.52
Avg Actor Loss (per minibatch): -0.0026
Avg Critic Loss (per minibatch): 454.6779
Avg Entropy (per minibatch): 2.5608
------------------------------


Total Steps Trained:  99%|█████████▊| 985489/1000000 [4:51:14<02:00, 119.94step/s]


Episode 1971 finished at step 500 (985500 total). Env Reward: -8.10, Steps: 500, Delivered: 7


Total Steps Trained:  99%|█████████▊| 985511/1000000 [4:51:19<21:46, 11.09step/s]


--- Rollout Summary (Steps 985001 to 985500) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.54
Avg Actor Loss (per minibatch): -0.0056
Avg Critic Loss (per minibatch): 606.2792
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  99%|█████████▊| 985999/1000000 [4:51:23<01:58, 118.23step/s]


Episode 1972 finished at step 500 (986000 total). Env Reward: -8.28, Steps: 500, Delivered: 6


Total Steps Trained:  99%|█████████▊| 986011/1000000 [4:51:28<27:54,  8.36step/s] 


--- Rollout Summary (Steps 985501 to 986000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.55
Avg Actor Loss (per minibatch): -0.0048
Avg Critic Loss (per minibatch): 447.5061
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained:  99%|█████████▊| 986491/1000000 [4:51:32<01:54, 117.64step/s]


Episode 1973 finished at step 500 (986500 total). Env Reward: -9.60, Steps: 500, Delivered: 5


Total Steps Trained:  99%|█████████▊| 986513/1000000 [4:51:37<20:29, 10.97step/s]


--- Rollout Summary (Steps 986001 to 986500) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0018
Avg Critic Loss (per minibatch): 628.5846
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  99%|█████████▊| 986995/1000000 [4:51:41<01:48, 119.40step/s]


Episode 1974 finished at step 500 (987000 total). Env Reward: -8.59, Steps: 500, Delivered: 6


Total Steps Trained:  99%|█████████▊| 987019/1000000 [4:51:46<18:29, 11.70step/s]


--- Rollout Summary (Steps 986501 to 987000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.59
Avg Actor Loss (per minibatch): -0.0089
Avg Critic Loss (per minibatch): 560.0473
Avg Entropy (per minibatch): 2.5613
------------------------------


Total Steps Trained:  99%|█████████▊| 987490/1000000 [4:51:50<01:46, 117.18step/s]


Episode 1975 finished at step 500 (987500 total). Env Reward: -7.52, Steps: 500, Delivered: 7


Total Steps Trained:  99%|█████████▉| 987513/1000000 [4:51:55<18:51, 11.03step/s]


--- Rollout Summary (Steps 987001 to 987500) ---
Update Duration: 4.64s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0047
Avg Critic Loss (per minibatch): 496.4728
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained:  99%|█████████▉| 987995/1000000 [4:51:59<01:48, 110.88step/s]


Episode 1976 finished at step 500 (988000 total). Env Reward: -8.62, Steps: 500, Delivered: 6


Total Steps Trained:  99%|█████████▉| 988017/1000000 [4:52:04<18:33, 10.77step/s]


--- Rollout Summary (Steps 987501 to 988000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 661.0894
Avg Entropy (per minibatch): 2.5591
------------------------------


Total Steps Trained:  99%|█████████▉| 988490/1000000 [4:52:08<01:37, 118.16step/s]


Episode 1977 finished at step 500 (988500 total). Env Reward: -8.59, Steps: 500, Delivered: 6


Total Steps Trained:  99%|█████████▉| 988513/1000000 [4:52:12<17:03, 11.22step/s]


--- Rollout Summary (Steps 988001 to 988500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0038
Avg Critic Loss (per minibatch): 740.4396
Avg Entropy (per minibatch): 2.5609
------------------------------


Total Steps Trained:  99%|█████████▉| 988990/1000000 [4:52:17<01:32, 119.49step/s]


Episode 1978 finished at step 500 (989000 total). Env Reward: -11.57, Steps: 500, Delivered: 3


Total Steps Trained:  99%|█████████▉| 989013/1000000 [4:52:21<15:51, 11.55step/s]


--- Rollout Summary (Steps 988501 to 989000) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0062
Avg Critic Loss (per minibatch): 521.1616
Avg Entropy (per minibatch): 2.5599
------------------------------


Total Steps Trained:  99%|█████████▉| 989494/1000000 [4:52:25<01:29, 117.75step/s]


Episode 1979 finished at step 500 (989500 total). Env Reward: 0.28, Steps: 500, Delivered: 6


Total Steps Trained:  99%|█████████▉| 989517/1000000 [4:52:30<15:40, 11.15step/s]


--- Rollout Summary (Steps 989001 to 989500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0053
Avg Critic Loss (per minibatch): 462.9743
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  99%|█████████▉| 989992/1000000 [4:52:34<01:23, 119.25step/s]


Episode 1980 finished at step 500 (990000 total). Env Reward: -10.79, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001980_map1.pth


Total Steps Trained:  99%|█████████▉| 990014/1000000 [4:52:39<15:09, 10.98step/s]


--- Rollout Summary (Steps 989501 to 990000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0060
Avg Critic Loss (per minibatch): 771.1132
Avg Entropy (per minibatch): 2.5596
------------------------------


Total Steps Trained:  99%|█████████▉| 990499/1000000 [4:52:43<01:20, 117.97step/s]


Episode 1981 finished at step 500 (990500 total). Env Reward: -12.20, Steps: 500, Delivered: 3


Total Steps Trained:  99%|█████████▉| 990511/1000000 [4:52:48<19:09,  8.26step/s] 


--- Rollout Summary (Steps 990001 to 990500) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 464.2975
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  99%|█████████▉| 990995/1000000 [4:52:52<01:18, 114.53step/s]


Episode 1982 finished at step 500 (991000 total). Env Reward: -9.61, Steps: 500, Delivered: 5


Total Steps Trained:  99%|█████████▉| 991018/1000000 [4:52:57<14:13, 10.52step/s]


--- Rollout Summary (Steps 990501 to 991000) ---
Update Duration: 4.78s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0054
Avg Critic Loss (per minibatch): 701.8729
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  99%|█████████▉| 991499/1000000 [4:53:01<01:10, 120.48step/s]


Episode 1983 finished at step 500 (991500 total). Env Reward: -13.80, Steps: 500, Delivered: 1


Total Steps Trained:  99%|█████████▉| 991512/1000000 [4:53:06<16:05,  8.79step/s] 


--- Rollout Summary (Steps 991001 to 991500) ---
Update Duration: 4.52s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0031
Avg Critic Loss (per minibatch): 492.3983
Avg Entropy (per minibatch): 2.5605
------------------------------


Total Steps Trained:  99%|█████████▉| 991993/1000000 [4:53:10<01:08, 117.53step/s]


Episode 1984 finished at step 500 (992000 total). Env Reward: 2.12, Steps: 500, Delivered: 8


Total Steps Trained:  99%|█████████▉| 992017/1000000 [4:53:15<11:34, 11.49step/s]


--- Rollout Summary (Steps 991501 to 992000) ---
Update Duration: 4.53s
Avg Episode Reward (last 100): -7.51
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0039
Avg Critic Loss (per minibatch): 904.5509
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  99%|█████████▉| 992496/1000000 [4:53:19<01:04, 117.12step/s]


Episode 1985 finished at step 500 (992500 total). Env Reward: -9.38, Steps: 500, Delivered: 5


Total Steps Trained:  99%|█████████▉| 992519/1000000 [4:53:24<11:14, 11.08step/s]


--- Rollout Summary (Steps 992001 to 992500) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.64
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0061
Avg Critic Loss (per minibatch): 599.1321
Avg Entropy (per minibatch): 2.5601
------------------------------


Total Steps Trained:  99%|█████████▉| 992992/1000000 [4:53:28<01:01, 113.59step/s]


Episode 1986 finished at step 500 (993000 total). Env Reward: -7.42, Steps: 500, Delivered: 7


Total Steps Trained:  99%|█████████▉| 993015/1000000 [4:53:33<10:25, 11.16step/s]


--- Rollout Summary (Steps 992501 to 993000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.62
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.61
Avg Actor Loss (per minibatch): -0.0009
Avg Critic Loss (per minibatch): 510.9790
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained:  99%|█████████▉| 993494/1000000 [4:53:37<00:57, 112.19step/s]


Episode 1987 finished at step 500 (993500 total). Env Reward: -6.61, Steps: 500, Delivered: 8


Total Steps Trained:  99%|█████████▉| 993517/1000000 [4:53:42<09:56, 10.87step/s]


--- Rollout Summary (Steps 993001 to 993500) ---
Update Duration: 4.59s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0050
Avg Critic Loss (per minibatch): 480.4601
Avg Entropy (per minibatch): 2.5603
------------------------------


Total Steps Trained:  99%|█████████▉| 993999/1000000 [4:53:46<00:52, 114.24step/s]


Episode 1988 finished at step 500 (994000 total). Env Reward: -8.53, Steps: 500, Delivered: 6


Total Steps Trained:  99%|█████████▉| 994011/1000000 [4:53:51<12:10,  8.20step/s] 


--- Rollout Summary (Steps 993501 to 994000) ---
Update Duration: 4.57s
Avg Episode Reward (last 100): -7.69
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0036
Avg Critic Loss (per minibatch): 645.0906
Avg Entropy (per minibatch): 2.5600
------------------------------


Total Steps Trained:  99%|█████████▉| 994497/1000000 [4:53:55<00:46, 119.56step/s]


Episode 1989 finished at step 500 (994500 total). Env Reward: -1.92, Steps: 500, Delivered: 3


Total Steps Trained:  99%|█████████▉| 994520/1000000 [4:54:00<08:14, 11.07step/s]


--- Rollout Summary (Steps 994001 to 994500) ---
Update Duration: 4.63s
Avg Episode Reward (last 100): -7.63
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.60
Avg Actor Loss (per minibatch): -0.0079
Avg Critic Loss (per minibatch): 430.6740
Avg Entropy (per minibatch): 2.5606
------------------------------


Total Steps Trained:  99%|█████████▉| 994991/1000000 [4:54:04<00:44, 113.70step/s]


Episode 1990 finished at step 500 (995000 total). Env Reward: -6.93, Steps: 500, Delivered: 8
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_001990_map1.pth


Total Steps Trained: 100%|█████████▉| 995014/1000000 [4:54:09<07:33, 11.00step/s]


--- Rollout Summary (Steps 994501 to 995000) ---
Update Duration: 4.54s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.62
Avg Actor Loss (per minibatch): -0.0080
Avg Critic Loss (per minibatch): 462.0047
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained: 100%|█████████▉| 995489/1000000 [4:54:13<00:38, 116.74step/s]


Episode 1991 finished at step 500 (995500 total). Env Reward: -5.73, Steps: 500, Delivered: 9


Total Steps Trained: 100%|█████████▉| 995512/1000000 [4:54:17<06:37, 11.28step/s]


--- Rollout Summary (Steps 995001 to 995500) ---
Update Duration: 4.47s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.63
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 560.8197
Avg Entropy (per minibatch): 2.5602
------------------------------


Total Steps Trained: 100%|█████████▉| 995992/1000000 [4:54:22<00:35, 114.34step/s]


Episode 1992 finished at step 500 (996000 total). Env Reward: -6.29, Steps: 500, Delivered: 9


Total Steps Trained: 100%|█████████▉| 996016/1000000 [4:54:26<05:45, 11.52step/s]


--- Rollout Summary (Steps 995501 to 996000) ---
Update Duration: 4.45s
Avg Episode Reward (last 100): -7.55
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0067
Avg Critic Loss (per minibatch): 650.6287
Avg Entropy (per minibatch): 2.5610
------------------------------


Total Steps Trained: 100%|█████████▉| 996500/1000000 [4:54:30<00:32, 108.38step/s]


Episode 1993 finished at step 500 (996500 total). Env Reward: -9.27, Steps: 500, Delivered: 6


Total Steps Trained: 100%|█████████▉| 996511/1000000 [4:54:35<07:07,  8.16step/s] 


--- Rollout Summary (Steps 996001 to 996500) ---
Update Duration: 4.46s
Avg Episode Reward (last 100): -7.56
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.69
Avg Actor Loss (per minibatch): -0.0043
Avg Critic Loss (per minibatch): 584.2013
Avg Entropy (per minibatch): 2.5612
------------------------------


Total Steps Trained: 100%|█████████▉| 996988/1000000 [4:54:39<00:24, 122.46step/s]


Episode 1994 finished at step 500 (997000 total). Env Reward: 8.10, Steps: 500, Delivered: 5


Total Steps Trained: 100%|█████████▉| 997012/1000000 [4:54:44<04:13, 11.79step/s]


--- Rollout Summary (Steps 996501 to 997000) ---
Update Duration: 4.49s
Avg Episode Reward (last 100): -7.49
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0040
Avg Critic Loss (per minibatch): 1251.6736
Avg Entropy (per minibatch): 2.5594
------------------------------


Total Steps Trained: 100%|█████████▉| 997499/1000000 [4:54:48<00:21, 113.70step/s]


Episode 1995 finished at step 500 (997500 total). Env Reward: -9.86, Steps: 500, Delivered: 4


Total Steps Trained: 100%|█████████▉| 997511/1000000 [4:54:53<05:01,  8.25step/s] 


--- Rollout Summary (Steps 997001 to 997500) ---
Update Duration: 4.51s
Avg Episode Reward (last 100): -7.57
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.67
Avg Actor Loss (per minibatch): -0.0088
Avg Critic Loss (per minibatch): 620.6646
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained: 100%|█████████▉| 997999/1000000 [4:54:57<00:16, 118.12step/s]


Episode 1996 finished at step 500 (998000 total). Env Reward: -11.01, Steps: 500, Delivered: 4


Total Steps Trained: 100%|█████████▉| 998011/1000000 [4:55:01<03:53,  8.52step/s] 


--- Rollout Summary (Steps 997501 to 998000) ---
Update Duration: 4.55s
Avg Episode Reward (last 100): -7.61
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0058
Avg Critic Loss (per minibatch): 496.8115
Avg Entropy (per minibatch): 2.5615
------------------------------


Total Steps Trained: 100%|█████████▉| 998494/1000000 [4:55:06<00:12, 121.77step/s]


Episode 1997 finished at step 500 (998500 total). Env Reward: -10.64, Steps: 500, Delivered: 4


Total Steps Trained: 100%|█████████▉| 998518/1000000 [4:55:11<02:09, 11.47step/s]


--- Rollout Summary (Steps 998001 to 998500) ---
Update Duration: 4.62s
Avg Episode Reward (last 100): -7.59
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): -0.0044
Avg Critic Loss (per minibatch): 450.3294
Avg Entropy (per minibatch): 2.5604
------------------------------


Total Steps Trained: 100%|█████████▉| 998990/1000000 [4:55:15<00:08, 117.26step/s]


Episode 1998 finished at step 500 (999000 total). Env Reward: -9.40, Steps: 500, Delivered: 5


Total Steps Trained: 100%|█████████▉| 999012/1000000 [4:55:19<01:30, 10.92step/s]


--- Rollout Summary (Steps 998501 to 999000) ---
Update Duration: 4.50s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.64
Avg Actor Loss (per minibatch): -0.0052
Avg Critic Loss (per minibatch): 579.5710
Avg Entropy (per minibatch): 2.5595
------------------------------


Total Steps Trained: 100%|█████████▉| 999498/1000000 [4:55:24<00:04, 116.43step/s]


Episode 1999 finished at step 500 (999500 total). Env Reward: -6.41, Steps: 500, Delivered: 8


Total Steps Trained: 100%|█████████▉| 999521/1000000 [4:55:28<00:43, 11.02step/s]


--- Rollout Summary (Steps 999001 to 999500) ---
Update Duration: 4.56s
Avg Episode Reward (last 100): -7.60
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.65
Avg Actor Loss (per minibatch): -0.0016
Avg Critic Loss (per minibatch): 422.3163
Avg Entropy (per minibatch): 2.5598
------------------------------


Total Steps Trained: 100%|█████████▉| 999999/1000000 [4:55:33<00:00, 114.56step/s]


Episode 2000 finished at step 500 (1000000 total). Env Reward: -10.17, Steps: 500, Delivered: 4
Saved checkpoint to mappo_checkpoints_map1/mappo_checkpoint_ep_002000_map1.pth


Total Steps Trained: 100%|██████████| 1000000/1000000 [4:55:37<00:00, 56.38step/s]


--- Rollout Summary (Steps 999501 to 1000000) ---
Update Duration: 4.68s
Avg Episode Reward (last 100): -7.58
Avg Episode Duration (last 100): 500.00
Avg Delivered Packages (last 100): 5.66
Avg Actor Loss (per minibatch): 0.0019
Avg Critic Loss (per minibatch): 620.1758
Avg Entropy (per minibatch): 2.5592
------------------------------
Training finished.
Saved final checkpoint to mappo_checkpoints_map1/mappo_final_ep_002000_steps_1000000.pth



