In [1]:
# All imports go here
import numpy as np
import copy
import math
import random
import csv
import os
import pandas as pd
from tqdm import tqdm
import pygame
import time
import pickle
import turtle
# from parameters import parameters as p

pygame 2.6.1 (SDL 2.28.4, Python 3.12.7)
Hello from the pygame community. https://www.pygame.org/contribute.html


## Parameters and Hyper Parameters

<details>
    <summary> Simulation Hyperparameters </summary>

## Simulation Hyperparameters:
1) Number_of_experiment : int
2) Number_of_episodes : int
3) Number_of_epochs : int
4) POI_distribution : string
5) Agent_distribution : string
6) Reward_type : int
</details>
<details>
    <summary> Domain Parameters </summary>

## Domain Parameters:
1) X_dimension : float
2) y_dimension : float
3) Number_of_POIs : int
4) Number_of_agents : int
</details>
<details>
    <summary> Fire Parameters </summary>

## Fire Parameters:
1) Value : float
2) Level : int
3) Coupling : int
</details>
<details>
    <summary> Agent Parameters </summary>

## Agent Parameters:
1) Max_step : int
2) Sensor_resolution : float
3) Number_of_sectors : int
4) Sensor_radius : float
</details>
<details>
    <summary> Q-Learning Parameters </summary>

## Q-Learning Parameters:
1) Epsilon : float
2) Epsilon_decay_factor : float
3) Learning_rate : float
4) Discount_factor : float
</details>

___
<details>
    <summary> Functions </summary>

## Functions:
1) 
</details>


In [2]:
parameters = {}
# Domain Params
parameters["x_dim"] = 50.0
parameters["y_dim"] = 50.0
parameters["n_pois"] = 5
parameters["n_agents"] = 3

# Fire Params
parameters["value"] = 100
parameters["level"] = int(2)
parameters["hazard_coupling"] =   {1: {"coupling": 1, "opti_coupling": 1},
                                   2: {"coupling": 2, "opti_coupling": 2},
                                   3: {"coupling": 3, "opti_coupling": 4},
                                   4: {"coupling": 3, "opti_coupling": 5},
                                  }
parameters["coupling"] = int(parameters["hazard_coupling"].get(parameters["level"], 1)["coupling"])
parameters["opti_coupling"] = int(parameters["hazard_coupling"].get(parameters["level"], 1)["opti_coupling"])

# Agent Params
parameters["max_step"] = 1.5
parameters["sensor_res"] = 90.0 # Sector size in degrees
parameters["n_sectors"] = int(360.0 / parameters["sensor_res"])
parameters["sensor_radius"] = 3.0
parameters["observation_size"] = int(2 * parameters["n_sectors"])

# Q-Learning Params
parameters["epsilon"] = 0.9
parameters["epsilon_decay_factor"] = 0.95
parameters["learning_rate"] = 0.1
parameters["discount_factor"] = 0.95

# Simulation Hyperparams
parameters["n_experiments"] = 1
parameters["n_episodes"] = int(1000)
parameters["n_epochs"] = int(20000)
parameters["poi_distribution"] = "Random"
parameters["agent_distribution"] = "OneRandom" # OneRandom, AllRandom
parameters["reward_type"] = 2 # 0: Global rewards, 1: Difference Rewards, 2: D++ Rewards
   
    

## Helper Functions

<details>
    <summary>Functions</summary>

## Functions:
1) save_poi_config_csv()
2) save_agent_config_csv()
3) get_angle()
4) get_euclidean_distance()
5) get_squared_distance()
</details>

## Description:


In [3]:
# Helper Functions
def save_poi_config_csv(pois_info, config_id):
    """
    Saves POIs' configuration to a csv file in a folder called World_Config
    """
    dir_name = './Configs'  # Intended directory for output files

    if not os.path.exists(dir_name):  # If Data directory does not exist, create it
        os.makedirs(dir_name)

    pfile_name = os.path.join(dir_name, f'POI_Config{config_id}.csv')

    with open(pfile_name, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for poi_id in range(len(pois_info)):
            writer.writerow(pois_info[poi_id, :])

    csvfile.close()

def save_agent_config_csv(agent_infos, config_id):
    """
    Saves Agents' configuration to a csv file in a folder called World_Config
    """
    dir_name = './Configs'  # Intended directory for output files

    if not os.path.exists(dir_name):  # If Data directory does not exist, create it
        os.makedirs(dir_name)

    pfile_name = os.path.join(dir_name, f'Agent_Config{config_id}.csv')

    row = np.zeros(3)
    with open(pfile_name, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for agent_id in range(len(agent_infos)):
            writer.writerow(agent_infos[agent_id, :])

    csvfile.close()

def create_pickle_file(input_data, dir_name, file_name):
    """
    Create a pickle file using provided data in the specified directory
    """
    if not os.path.exists(dir_name):  # If Data directory does not exist, create it
        os.makedirs(dir_name)

    path_name = os.path.join(dir_name, file_name)
    rover_file = open(path_name, 'wb')
    pickle.dump(input_data, rover_file)
    rover_file.close()

def get_angle(source_x, source_y, target_x, target_y, radians=False):
    radian_angle = math.atan2(target_y - source_y, target_x - source_x)
    if radians:
        return radian_angle
    return math.degrees(radian_angle)

def get_euclidean_distance(source_x, source_y, target_x, target_y):
    d = math.sqrt((target_y - source_y)**2 + (target_x - source_x)**2)
    if d < 0.01:
        d = 0.01 
    return d

def get_squared_distance(source_x, source_y, target_x, target_y):
    d =(target_y - source_y)**2 + (target_x - source_x)**2
    if d < 0.0001:
        d = 0.0001 
    return d
    

## Agent Class

<details>
    <summary> Parameters </summary>

### Parameters:
1) ID : int
2) Location : (x: float, y: float, theta: float)
4) Step_size : float
5) Sensor_radius : float
6) Sensor_resolution : float in degrees.
7) Number_of_sectors : int (360/resolution)
8) Current_observation : ndarray
9) Past_observation : ndarray
10) Action_space (up, down, left, right)
11) Q-table : dict
12) Local_reward : float
13) Current_action_dir : float (angle phi)
14) Learning_rate : float
15) Discount_factor : float
16) Epsilon : float 
17) Snapshot : list (stores parameters that remain same across configs)
</details>


<details>
  <summary> Functions </summary>
    
### Functions:
1) reset()
2) setup_sector_angles()
3) get_sector_from_direction()
4) get_direction_from_sector()
5) move()
6) turn()
7) new_location()
8) update_history()
9) get_Qvalue()
10) set_Qvalue()
11) update_Qvalue()
12) scan_surrounding()
13) scan_for_pois()
14) scan_for_agents()
15) **select_action()**
16) e_greedy()
</details>

### Desciption:


In [4]:
class Agent:
    def __init__(self, agent_id, x_pos, y_pos, theta):
        self.state_size = 8 # (size of the encoding for observations from environment) This needs to change when the actual observation size is known.
        self.agent_id = agent_id
        self.loc = np.array([x_pos, y_pos, theta])
        self.max_step = p["max_step"]
        
        # Sensor specs.
        self.sensor_range = p["sensor_radius"]
        self.sensor_res = p["sensor_res"]
        self.n_sectors = p["n_sectors"]
        self.sector_angles = self.setup_sector_angles()
        
        # MDP params
        self.curr_obs = np.zeros(self.state_size)
        self.prev_obs = np.zeros(self.state_size)
        self.action_space = [i for i in range(self.n_sectors)] # 0 - Front, 1 - Left, 2 - Back, 3 - Right
        self.Qtable = {}
        self.l_reward = 0.0
        self.curr_action_dir = 0.0
        self.lr = p["learning_rate"]
        self.dscf = p["discount_factor"]

        # Logging info
        self.snap = [self.lr, self.dscf]
        self.path = [self.loc.copy()]

    def reset(self, config):
        self.loc[0] = config[0]
        self.loc[1] = config[1]
        self.loc[2] = config[2]
        self.curr_obs = np.zeros(self.state_size)
        self.prev_obs = self.curr_obs.copy()
        self.l_reward = 0.0
        self.curr_action_dir = 0.0
        self.lr = self.snap[0]
        self.dscf = self.snap[1]
        self.path = [self.loc.copy()]

    def setup_sector_angles(self):
        half = self.sensor_res/2
        angles = []
        n = self.n_sectors
        
        s = 90.0 - half
        for i in range(n):
            angles.append(s)
            s += self.sensor_res
        return angles
        
    def get_sector_from_direction(self, direction):
        # direction should be in range [0, 360)
        # direction = direction - self.loc[2]  # This line will change direction from world coordinate system to local coordiate system.
        if direction < 0:
            while direction < 0:
                direction += 360
        elif direction >= 360:
            while direction >= 360:
                direction -= 360

        for i in range(len(self.sector_angles) - 1):
            if direction >= self.sector_angles[i] and direction < self.sector_angles[i + 1]:
                # print("Quadrant = ", i)
                return i, direction
                
        # print("Quadrant = ", len(self.sector_angles) - 1)
        return len(self.sector_angles) - 1, direction

    def get_direction_from_sector(self, sector):
        direction = self.sector_angles[sector] + self.sensor_res/2
        if direction >= 360.0:
            direction -= 360.0
        rad_angle = math.radians(direction) 
        dir_vector = [math.cos(rad_angle), math.sin(rad_angle)]
        return direction, dir_vector

    def move(self, new_location):
        self.loc[:2] = new_location
        self.update_history()

    def turn(self, new_direction):
        self.loc[2] = new_direction
        self.update_history()

    def new_location(self, new_location):
        self.loc = new_location
        self.update_history()
    
    def update_history(self):
        self.path.append(self.loc.copy())

    def get_Qvalue(self, observation, action):
        return self.Qtable.get((tuple(observation), action), 0.0)

    def set_Qvalue(self, observation, action, value):
        self.Qtable[(tuple(observation), action)] = value
    
    def update_Qvalue(self):
        curr_Q = self.get_Qvalue(self.curr_obs.tolist(), self.get_sector_from_direction(self.curr_action_dir))
        next_Qs = [self.get_Qvalue(self.curr_obs.tolist(), next_sector) for next_sector in range(self.n_sectors)]
        best_Q = np.max(next_Qs)
        td_error = self.l_reward + self.dscf * (best_Q - curr_Q)
        new_Q = curr_Q + self.lr * td_error
        if self.agent_id == 0:
            if new_Q != 0.0:
                print(self.prev_obs.tolist(), ":", new_Q, "\n\n")
        self.set_Qvalue(self.prev_obs.tolist(), self.get_sector_from_direction(self.curr_action_dir), new_Q)

    def scan_surrounding(self, agents, pois, distance_table):
        observation_for_pois = self.scan_for_pois(pois, distance_table)
        observation_for_other_agents = self.scan_for_agents(agents)
        self.prev_obs = self.curr_obs.copy()
        self.curr_obs = np.concatenate((observation_for_pois, observation_for_other_agents))
    
    def scan_for_pois(self, pois, dist_table):
        poi_state = np.zeros(self.n_sectors)
        state_mask = np.zeros(self.n_sectors)
        dist_sq_to_pois = dist_table[:, self.agent_id]
        
        detection_radius_sq = self.sensor_range**2
        poi_ids_in_range = np.where(dist_sq_to_pois <= detection_radius_sq)[0]
        # poi_ids_out_range = np.where(dist_sq_to_pois > detection_radius_sq)[0]
        
        for poi_id in poi_ids_in_range:
            poi = pois[poi_id]
            angle = get_angle(self.loc[0], self.loc[1], poi.loc[0], poi.loc[1])
            poi_sector, angle = self.get_sector_from_direction(angle)
            dist_sq = dist_sq_to_pois[poi_id]
            if poi.poi_id in poi_ids_in_range:
                # Detected the POI
                state_mask[poi_sector] = 1
                poi_state[poi_sector] += float(poi.val/dist_sq) 

        for idx in range(self.n_sectors):
            if state_mask[idx] == 0:
                poi_state[idx] = -1
        return poi_state
    
    def scan_for_agents(self, agents):
        agent_state = np.zeros(self.n_sectors)
        state_mask = np.zeros(self.n_sectors)

        x_self, y_self, theta_self = self.loc
        detection_radius_sq = self.sensor_range**2

        for other_agent in agents:
            if other_agent.agent_id == self.agent_id:
                continue

            x_other, y_other, theta_other = other_agent.loc
            dist_sq = (x_other - x_self)**2 + (y_other - y_self)**2
            if dist_sq < 0.01:
                dist_sq = 0.01
            if dist_sq <= detection_radius_sq:
                angle = get_angle(x_self, y_self, x_other, y_other)
                agent_sector, angle = self.get_sector_from_direction(angle)

                state_mask[agent_sector] = 1
                agent_state[agent_sector] += 1/dist_sq
        for idx in range(self.n_sectors):
            if state_mask[idx] == 0:
                agent_state[idx] = -1
        return agent_state
    
    def e_greedy(self, epsilon):
        if np.random.rand() < epsilon:
            # We explore
            angle = np.random.uniform(0.0, 360.0)
            rad_angle = math.radians(angle)
            dir_vector = [math.cos(rad_angle), math.sin(rad_angle)]
            return dir_vector, angle
        else:
            # We exploit
            # q_values = []
            max_Qvalue = -1000.0
            best_action_sector = None
            for action in range(self.n_sectors): # Action here is a sector of choice NOT a direction vector.
                # q_values.append(self.get_Qvalue(self.curr_obs.tolist(), action))
                Q_val = self.get_Qvalue(self.curr_obs.tolist(), action)
                if Q_val > max_Qvalue:
                    # found new max value
                    best_action_sector = action
                    max_Qvalue = Q_val
            # max_Qvalue = np.max(q_values)
            # best_action_sector = np.argmax(q_values)
            
            angle, dir_vector = self.get_direction_from_sector(best_action_sector)
            return dir_vector, angle

In [5]:
# def get_sector_from_direction(res, n_sect, direction):
#     # direction should be in range [0, 360)
#     if direction < 0:
#         while direction < 0:
#             direction += 360
#     elif direction >= 360:
#         while direction >= 360:
#             direction -= 360

#     half = res/2
#     angles = []
#     n = n_sect
        
#     s = 90.0 - half
    
#     for i in range(n):
#         angles.append(s)
#         s += res
        
#     flag = False
#     for i in range(len(angles) - 1):
#         if direction >= angles[i] and direction < angles[i + 1]:
#             flag = True
#             print("Quadrant = ", i, direction)
#     if not flag:        
#         if (direction >= 0.0 and direction < angles[0]) or (direction >= angles[-1] and direction < 360): 
#             print("Quadrant = ", len(angles) - 1, direction)
#     print(angles)
#     return angles


# def get_direction_from_sector(angles, res, sector):
#     sector_angles = angles
#     direction = sector_angles[sector] + res/2
    
#     # if sector >= len(sector_angles) - 1:
#     if direction >= 360.0:
#         direction -= 360.0
#     print(direction)
    
# res = 60
# n_sect = int(360/res)
# sect_angles = get_sector_from_direction(res, n_sect, 40)
# for i in range(n_sect):
#     get_direction_from_sector(sect_angles, res, i)

## Point of Interest Class

<details>
    <summary> Parameters </summary>

### Parameters:
1) ID : int
2) Location : (x: float, y: float)
3) Value : float
4) Level : int (what is the hazard level of POI)
5) Coupling : int (minimum number of agents reqired)
6) Detected : bool (True if it's detected, False otherwise)
7) Done : bool (True if it's harvested, False otherwise)
8) Snapshot : list (stores parameters that stay constant across configs)
</details>

<details>
  <summary> Functions </summary>
    
### Functions:
1) reset()
2) set_detected()
3) set_done()
</details>

### Desciption:


In [6]:
class POI:
    def __init__(self, poi_id, x_pos, y_pos, value, level, coupling, opti_coupling):
        self.poi_id = int(poi_id)
        self.loc = np.array([x_pos, y_pos])
        self.val = value
        self.level = int(level)
        self.coupling = int(coupling)
        self.opti_coupling = int(opti_coupling)
        self.detected = False
        self.done = False
        self.snap = [self.detected]

    def reset(self, config):
        self.loc[0] = config[0]
        self.loc[1] = config[1]
        self.val = config[2]
        self.level = config[3]
        self.coupling = config[4]
        self.opti_coupling = config[5]
        self.detected = self.snap[0]
        self.done = False

    def set_detected(self, flag):
        self.detected = flag

    def set_done(self, flag):
        self.done = flag

## Forest Domain Class

<details>
    <summary> Parameters </summary>

### Parameters:
1) X : float
2) Y : float
3) Number of POI : int
4) Number of Agents : int
5) Done : bool (True if all fires are done, False otherwise)
6) Global_rewards : list (one reward for each poi)
7) Agents : list (objects of Agent Class)
8) Agent_configurations : 2D List (stores different positional configurations of Agents) 
9) POIs : list (objects of POI Class)
10) POI_configurations : 2D List (stores different positional configurations of POIs)
11) POI_agent_distances : 2D array of floats (distance squared values for each poi and agent pair)
</details>

<details>
  <summary> Functions </summary>
    
### Functions:
1) reset()
2) **show_simulation_details()**
3) goals_done()
4) create_forest()
5) setup_forest()
6) create_poi_config()
7) create_agent_config()
6) load_poi_config()
7) load_agent_config()
8) update_distance_table()
9) select_joint_action()
10) calculate_global_reward()
11) **calculate_difference_reward()**
12) step()
13) execute()
</details>

### Desciption:

In [7]:
class ForestDomain:
    def __init__(self):
        self.X = p["x_dim"]
        self.Y = p["y_dim"]
        self.n_pois = p["n_pois"]
        self.n_agents = p["n_agents"]
        self.done = False
        self.global_rewards = np.zeros(self.n_pois)
        self.agents = np.empty(self.n_agents, dtype=object)
        self.agent_configs = [[] for _ in range(self.n_agents)]
        self.pois = np.empty(self.n_pois, dtype=object)
        self.poi_configs = [[] for _ in range(self.n_pois)]
        self.distance_table = np.ones((self.n_pois, self.n_agents)) # stores squared eucliedian distances
        
    def reset(self, config_id):
        self.done = False
        self.global_rewards = np.zeros(self.n_pois)
        self.distance_table[:, :] = 1000.0
        for idx, poi in enumerate(self.pois):
            poi.reset(self.poi_configs[idx][config_id])
        for idx, agent in enumerate(self.agents):
            agent.reset(self.agent_configs[idx][config_id])
        
    def goals_done(self):
        for poi in self.pois:
            if poi.done:
                continue
            else:
                return False
        return True

    def create_forest(self, config_id):
        poi_infos = self.create_poi_config()
        save_poi_config_csv(poi_infos, config_id)
        
        agent_infos = self.create_agent_config(poi_infos)
        save_agent_config_csv(agent_infos, config_id)
        
    def setup_forest(self):
        f1 = self.load_poi_config()
        # print(self.pois[:].poi_id)
        f2 = self.load_agent_config()
        if f1 and f2:
            print("Forest Ready.")
            return True
        return False
        
    def create_poi_config(self):
        all_poi_params = np.zeros((self.n_pois, 7)) # (n_pois) X (poi_id, x_pos, y_pos, value, level, coupling, opti_coupling)
        if p["poi_distribution"] == "Random":
            for idx in range(self.n_pois):
                # all_poi_params[idx, 0] = int(idx)
                
                x = random.uniform(0, self.X - 1.0)
                y = random.uniform(0, self.Y - 1.0)
    
                too_close = True
                while too_close:
                    count = 0
                    for i in range(self.n_pois):
                        if i != idx:
                            x_dist = x - all_poi_params[idx, 1]
                            y_dist = y - all_poi_params[idx, 2]
                            dist_sq = x_dist** 2 + y_dist**2
                            min_dist_sq = (p["sensor_radius"] * 2.1)** 2
                            if dist_sq <= min_dist_sq:
                                count += 1
                    if count == 0:
                        too_close = False
                    else:
                        x = random.uniform(0, self.X - 1.0)
                        y = random.uniform(0, self.Y - 1.0)
                all_poi_params[idx, 0] = x
                all_poi_params[idx, 1] = y
                all_poi_params[idx, 2] = p["value"]
                all_poi_params[idx, 3] = p["level"]
                all_poi_params[idx, 4] = p["coupling"]
                all_poi_params[idx, 5] = p["opti_coupling"]
        print("POI Configurations generated.")
        return all_poi_params

    def create_agent_config(self, poi_info):
        all_agent_params = np.zeros((self.n_agents, 4)) # (n_agents) X (agent_id, x_pos, y_pos, theta)
        if p["agent_distribution"] == "OneRandom":
            x_pos = random.uniform(0.0, self.X - 1.0)
            y_pos = random.uniform(0.0, self.Y - 1.0)
            theta = random.uniform(0.0, 360.0)
            buffer = 3.0
            
            too_close = True
            while too_close:
                count = 0
                for poi_idx in range(self.n_pois):
                    x_dist = x_pos - poi_info[poi_idx, 1]
                    y_dist = y_pos - poi_info[poi_idx, 2]
                    dist_sq = x_dist**2 + y_dist**2
                    min_dist_sq = (p["sensor_radius"] * 2.0 + buffer)** 2
                    if dist_sq <= min_dist_sq:
                        count += 1
                
                if count == 0:
                    too_close = False
                else:
                    x_pos = random.uniform(0.0, self.X - 1.0)
                    y_pos = random.uniform(0.0, self.Y - 1.0)
            
            for idx in range(self.n_agents):
                # all_agent_params[idx, 0] = int(idx)
                all_agent_params[idx, 0] = x_pos
                all_agent_params[idx, 1] = y_pos
                all_agent_params[idx, 2] = theta
                
        # elif p["agent_distribution"] == "AllRandom":
        print("Agent Configurations generated.")
        return all_agent_params
    
    def load_poi_config(self):
        for cf_id in range(p["n_experiments"]):
            csv_input = []
            status = False
            with open(f'./Configs/POI_Config{cf_id}.csv', mode='r') as csvfile:
                csv_reader = csv.reader(csvfile, delimiter=',')
                for row in csv_reader:
                    csv_input.append(row)
    
            for idx in range(self.n_pois):
                poi_id = idx #int(float(csv_input[idx][0]))
                x_pos = float(csv_input[idx][0])
                y_pos = float(csv_input[idx][1])
                value = float(csv_input[idx][2])
                level = int(float(csv_input[idx][3]))
                coupling = int(float(csv_input[idx][4]))
                opti_coupling = int(float(csv_input[idx][5]))
                
                if cf_id == 0:
                    self.pois[idx] = POI(poi_id, x_pos, y_pos, value, level, coupling, opti_coupling)
                    
                self.poi_configs[poi_id].append((x_pos, y_pos, value, level, coupling, opti_coupling))

        status = True
        return status

    def load_agent_config(self):
        status = False
        for cf_id in range(p["n_experiments"]):
            csv_input = []
            with open(f'./Configs/Agent_Config{cf_id}.csv', mode='r') as csvfile:
                csv_reader = csv.reader(csvfile, delimiter=',')

                for row in csv_reader:
                    csv_input.append(row)

            for idx in range(self.n_agents):
                agent_id = idx #int(float(csv_input[idx][0]))
                x_pos = float(csv_input[idx][0])
                y_pos = float(csv_input[idx][1])
                theta = float(csv_input[idx][2])

                if cf_id == 0:
                    self.agents[idx] = Agent(agent_id, x_pos, y_pos, theta)

                self.agent_configs[idx].append((x_pos, y_pos, theta))
                
        status = True
        return status

    def update_distance_table(self):
        for poi in self.pois:
            # for each poi
            poi_x, poi_y = poi.loc
            for agent in self.agents:
                a_x, a_y, a_t = agent.loc
                dist_sq = (poi_x - a_x)**2 + (poi_y - a_y)**2
                if dist_sq < 0.01:
                    dist_sq = 0.01
                self.distance_table[poi.poi_id, agent.agent_id] = dist_sq
        '''
        print("ForestDomain:: Distance Table: \n")
        for r in self.distance_table:
            for c in r:
                print(c, " ")
            print("\n")
        '''
    
    def select_joint_action(self, epsilon):
        joint_action = np.zeros((self.n_agents, 2)) # returns x, y direction vector for action taken.
        for agent in self.agents:
            action, angle = agent.e_greedy(epsilon)
            joint_action[agent.agent_id] = action
        return joint_action
    
    def calc_global_reward(self):
        global_reward = np.zeros(self.n_pois)
        # Calculate the global rewards for each POI.
        for poi in self.pois:
            observers = 0
            # Check which agents have detected the POI.
            detection_radius_sq = (p["sensor_radius"]/2.0)**2
            observer_agent_ids = np.where(self.distance_table[poi.poi_id] <= detection_radius_sq)[0]
            observers = len(observer_agent_ids)
            if observers > 1:
                poi.set_detected(True)
            # Get just observer count
            '''
            sorted_dist_sq_from_poi = np.sort(self.distance_table[poi.poi_id])
            for i in range(poi.coupling):
                if sorted_dist_sq_from_poi[i] < detection_radius_sq:
                    observers += 1
             '''       
            if observers >= int(poi.coupling):
                # We have minimum required agents
                dist_sq_sum = np.sum(np.partition(self.distance_table[poi.poi_id], observers-1)[:observers])
                v = poi.val/dist_sq_sum
                reward_full = v  # Add any other partial rewards gained in the process here.
                optimum_coupling = poi.opti_coupling  # When there is a different optimum value than the minimum, change this. 
                global_reward[poi.poi_id] = reward_full * (np.e/float(optimum_coupling)) * np.exp(-1 * float(observers/optimum_coupling)) 
                flag = True
                # If the agent needs to be on the POI's location to complete it. Un-comment the block below. 
                ''' 
                observers_on_poi = 0
                for i in range(observers_in_range):
                    if sorted_dist_sq_from_poi[i] <= 0.001:
                        # Has reached poi
                        continue
                    else:
                        # Atleast 1 is missing.
                        flag = False
                '''
                poi.set_done(flag)
        self.global_rewards = global_reward

    def calc_difference_reward(self):
        difference_rewards = np.zeros(self.n_agents)
        for agent in self.agents:
            counterfactual = 0.0
            counterfactual_rewards = np.zeros(self.n_pois)
            detection_radius_sq = (agent.sensor_range/2.0)**2
            for poi in self.pois:
                observers = 0
                distances = self.distance_table[poi.poi_id, :]
                agents_in_radius = distances <= detection_radius_sq
                agents_in_radius[agent.agent_id] = False

                observers = np.sum(agents_in_radius)
                if observers >= poi.coupling:
                    dist_sq_sum = np.sum(distances[agents_in_radius])
                    v = poi.val/dist_sq_sum
                    counterfactual_rewards[poi.poi_id] = v * (np.e / float(poi.opti_coupling)) * np.exp(-1 * float(observers/poi.opti_coupling))
            difference_rewards[agent.agent_id] = np.sum(self.global_rewards - counterfactual_rewards)
        return difference_rewards  

    def calc_difference_plus_plus(self):
        difference_rewards = self.calc_difference_reward()
        dpp_rewards = np.zeros(self.n_agents)
        n_ghost_agents = self.n_agents - 1
        n_ghost_opti = 0
        for agent in self.agents:
            detection_radius_sq = (agent.sensor_range/2.0)**2
            ghost_global = np.zeros(self.n_pois)
            
            n_ghost = 1
            while n_ghost <= n_ghost_agents:
                poi_rewards = np.zeros(self.n_pois)
                for poi in self.pois:
                    observers = 0
                    dist_sq = self.distance_table[poi.poi_id]
                    ghost_dist_to_add = np.full(n_ghost, dist_sq[agent.agent_id])
                    dist_sq_ghost = np.append(dist_sq, dist_sq[agent.agent_id])
                    agents_in_radius = dist_sq_ghost <= detection_radius_sq
                    observers = np.sum(agents_in_radius)
    
                    if observers >= poi.coupling:
                        dist_sq_sum = np.sum(dist_sq_ghost[agents_in_radius])
                        v = poi.val/dist_sq_sum
                        poi_rewards[poi.poi_id] = v * (np.e / float(poi.opti_coupling)) * np.exp(-1 * float(observers/poi.opti_coupling))
                if np.sum(poi_rewards) > np.sum(ghost_global):
                    ghost_global = poi_rewards.copy()
                    n_ghost_opti = n_ghost
                    n_ghost += 1
                else:
                    # Found the optimal in previous iteration.
                    break
            # ghost_global has the global value with n_ghost agents added.
            dpp_rewards[agent.agent_id] = np.sum(ghost_global - self.global_rewards)/(n_ghost_opti + 1.0)
            
        return np.maximum(dpp_rewards, difference_rewards)
    
    def step(self, joint_action):
        # Take the joint_action
        for agent in self.agents:
            # Calculate displacement.
            dx = 2 * agent.max_step * (joint_action[agent.agent_id, 0] - 0.5)
            dy = 2 * agent.max_step * (joint_action[agent.agent_id, 1] - 0.5)

            # Get new coordinates in world.
            x = np.clip(agent.loc[0] + dx, 0, self.X - 1)
            y = np.clip(agent.loc[1] + dy, 0, self.Y - 1)

            # Move the agent to new position.
            agent.move((x, y))

        # Update Distance table.
        self.update_distance_table()
        
        # Fetch new observations.
        for agent in self.agents:
            agent.scan_surrounding(self.agents, self.pois, self.distance_table)

        # Calculate global reswards for the system after taking this step.
        self.calc_global_reward()

    def execute(self, epsilon, step,  reward_type=0):
        joint_action = self.select_joint_action(epsilon)

        self.step(joint_action)
        rewards = np.zeros(self.n_agents)
        match reward_type:
            case 0:
                # Global rewards
                reward = np.full(self.n_agents, np.sum(self.global_rewards))
            case 1:
                # Difference rewards
                reward = self.calc_difference_reward()
                # print("Using Difference Rewards.")
            case 2:
                # D++ rewards
                reward = self.calc_difference_plus_plus()
                # print("Using D++ Rewards.")
            case _:
                print("Incorrect Reward code.")
                
        for agent in self.agents:
            agent.l_reward = reward[agent.agent_id]
            agent.update_Qvalue()

In [8]:
# def load_poi_config(n_pois, X, Y, radius, value, level, coupling):
#     all_poi_params = np.zeros((n_pois, 6)) # (n_pois) X (poi_id, x_pos, y_pos, value, level, coupling)
#     pois = np.empty(n_pois, dtype=object)
#     for idx in range(n_pois):
        # all_poi_params[idx, 0] = idx
#         x = random.uniform(0, X - 1.0)
#         y = random.uniform(0, Y - 1.0)

#         too_close = True
#         while too_close:
#             count = 0
#             for i in range(n_pois):
#                 if i != idx:
#                     x_dist = x - all_poi_params[idx, 1]
#                     y_dist = y - all_poi_params[idx, 2]
#                     dist_sq = x_dist** 2 + y_dist**2
#                     min_dist_sq = (radius * 2.1)** 2
#                     if dist_sq < min_dist_sq:
#                         count += 1
#             if count == 0:
#                 too_close = False
#             else:
#                 x = random.uniform(0, X - 1.0)
#                 y = random.uniform(0, Y - 1.0)
#         all_poi_params[idx, 1] = x
#         all_poi_params[idx, 2] = y
#         all_poi_params[idx, 3] = value
#         all_poi_params[idx, 4] = level
#         all_poi_params[idx, 5] = coupling
#         pois[idx] = POI(*all_poi_params[idx])
#     for poi in pois:
#         print(poi.poi_id, poi.loc, poi.val)

# load_poi_config(2, 100.0, 100.0, 3, 100, 1, 2)

In [9]:
# p = parameters

In [None]:

if __name__ == "__main__":
    p = parameters
    domain = ForestDomain()
    paths_taken = [[] for _ in range(p["n_experiments"])]
    cumsum_global_reward = []
    for cf_id in range(p["n_experiments"]):
        domain.create_forest(cf_id)
        
    for cf_id in range(p["n_experiments"]):
        domain.setup_forest()
        # Logging Params
        cumsum_global_rewards_over_experiment = []
        
        for episode in tqdm(range(p["n_episodes"]), desc="Current Episode"):
            # print("**********************\n", domain.agents[0].Qtable.values() if domain.agents[0].Qtable.values() != 0.0 else "", "\n\n\n")
            domain.reset(cf_id)
            # print(domain.agents[0].Qtable.values(), "***********************\n")
            epsilon = p["epsilon"]
            decay = p["epsilon_decay_factor"]
    
            # Logging Params
            global_rewards_over_episode = []
            
            for step in range(p["n_epochs"]):
                domain.execute(epsilon, step, reward_type=p["reward_type"])
                global_rewards_over_episode.append(np.sum(domain.global_rewards))
                epsilon *= decay
                
            cumsum_global_rewards_over_experiment.append(np.cumsum(global_rewards_over_episode)[-1])
    
            if episode == p["n_episodes"] - 1 :
                for agent in domain.agents:
                    paths_taken[cf_id].append(agent.path.copy())
                    # print([(key,": ",value) for key, value in agent.Qtable.items() if value != 0.0], "\n\n")
                    
            # print(f"Episode {episode + 1}/{p["n_episodes"]} completed.")
            
        # if cf_id == p["n_experiments"] - 1:
        cumsum_global_reward.append(cumsum_global_rewards_over_experiment.copy())
        print(f"Experiment {cf_id + 1}/{p["n_experiments"]} completed.")

        # Save agent_paths using pickle
        create_pickle_file(paths_taken[cf_id],"Output_Data/",("EXP{0}_Agent_path").format(cf_id))
        create_pickle_file(cumsum_global_rewards_over_experiment, "Output_Data/", ("EXP{0}_cumulative_G").format(cf_id))
    
    frame = pd.DataFrame(cumsum_global_reward)
    print(frame)
    print(np.max(cumsum_global_reward))
    # print("\n", paths_taken)


POI Configurations generated.
Agent Configurations generated.
Forest Ready.


Current Episode:   0%|          | 2/1000 [00:14<2:00:46,  7.26s/it]

[35.12121326047981, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.3809179383155974 




Current Episode:   1%|          | 7/1000 [00:48<1:50:46,  6.69s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6933148292961346 


[-1.0, -1.0, 55.46518634369077, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6274499205130019 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.1909355266613164 


[-1.0, -1.0, -1.0, 175.2748421329053, -1.0, -1.0, -1.0, -1.0] : 1.9827966516284914 




Current Episode:   1%|          | 10/1000 [01:07<1:46:53,  6.48s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 5.192889694343491 


[-1.0, -1.0, -1.0, 16.754050056562257, -1.0, -1.0, -1.0, -1.0] : 4.69956517338086 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 4.69956517338086 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 4.253106481909678 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.8490613661282587 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.483400536346074 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.152477485393197 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.8529921242808434 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.581957872474163 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.3366718745891175 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.1146880465031512 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.913792682085352 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.7319823772872436 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0

Current Episode:   1%|          | 12/1000 [01:19<1:46:10,  6.45s/it]

[-1.0, 27.619102950495538, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7394387962920088 




Current Episode:   1%|▏         | 14/1000 [01:32<1:45:52,  6.44s/it]

[-1.0, 36.74486906229066, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7803009905641018 




Current Episode:   2%|▏         | 23/1000 [02:30<1:44:52,  6.44s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.638696122347892 


[-1.0, -1.0, -1.0, 35.66350157518318, -1.0, -1.0, -1.0, -1.0] : 0.5780199907248422 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5780199907248422 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5231080916059823 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.47341282290341397 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.42843860472758966 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.38773693727846864 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.3509019282370141 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.31756624505449776 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.28739745177432047 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.26009469385576 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.2353856979394628 




Current Episode:   3%|▎         | 27/1000 [02:56<1:43:11,  6.36s/it]

[14.9270112835441, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 5.5842858580174335 




Current Episode:   3%|▎         | 30/1000 [03:15<1:42:41,  6.35s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, 0.487956946258278, -1.0, -1.0] : 0.642049787762647 




Current Episode:   3%|▎         | 33/1000 [03:33<1:41:20,  6.29s/it]

[13.850407735210059, -1.0, -1.0, -1.0, 0.12716184206416442, -1.0, -1.0, -1.0] : 0.5705810319816237 




Current Episode:   4%|▎         | 36/1000 [03:52<1:39:19,  6.18s/it]

[-1.0, 20.273778075363285, -1.0, -1.0, -1.0, -1.0, -1.0, 0.11508061439990472] : 3.529675476380242 




Current Episode:   4%|▍         | 39/1000 [04:11<1:39:18,  6.20s/it]

[-1.0, -1.0, -1.0, 20.63284605470656, -1.0, -1.0, -1.0, -1.0] : 0.665511253139716 




Current Episode:   4%|▍         | 43/1000 [04:35<1:38:04,  6.15s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.43554401186857644, -1.0] : 0.6193596534478101 




Current Episode:   6%|▌         | 55/1000 [05:50<1:38:20,  6.24s/it]

[-1.0, -1.0, -1.0, 16.355331095615618, -1.0, -1.0, -1.0, -1.0] : 0.635774617076552 




Current Episode:   6%|▋         | 63/1000 [06:40<1:38:57,  6.34s/it]

[-1.0, -1.0, 14.601862948342793, -1.0, 2.690401992068353, -1.0, -1.0, -1.0] : 1.743796732910256 


[-1.0, -1.0, -1.0, 28.461233367049623, -1.0, -1.0, -1.0, -1.0] : 4.788307184063351 




Current Episode:   7%|▋         | 73/1000 [07:43<1:37:16,  6.30s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.282404141409592 


[-1.0, 102.59233131276734, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6693320356466166 


[-1.0, -1.0, 53.54656285172933, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.1605757479756806 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.1605757479756806 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.050321051917991 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.9505405519857819 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.8602391995471326 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.778516475590155 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7045574104090903 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6376244564202267 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5770501330603052 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5222303704195762 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.47261848522971645 




Current Episode:   8%|▊         | 76/1000 [08:02<1:35:50,  6.22s/it]

[13.58345020733534, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.9302817198640808 




Current Episode:   8%|▊         | 80/1000 [08:28<1:37:36,  6.37s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6497047430391403 


[-1.0, -1.0, -1.0, 51.97637944313122, -1.0, -1.0, -1.0, -1.0] : 0.587982792450422 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.587982792450422 




Current Episode:   8%|▊         | 83/1000 [08:47<1:37:28,  6.38s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7007398118676957 


[56.05918494941565, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6341695297402646 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6341695297402646 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5739234244149395 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5194006990955202 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.4700576326814458 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.42540215757670846 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.3849889526069212 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.3484150021092637 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.3153155769088836 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.2853605971025397 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.2582513403777984 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.23371746304190755 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0,

Current Episode:   8%|▊         | 84/1000 [08:53<1:36:51,  6.34s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.074319453547341 


[-1.0, -1.0, 20.143474650065542, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.782259105460344 




Current Episode:   8%|▊         | 85/1000 [08:59<1:36:11,  6.31s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7168293589668359 


[-1.0, -1.0, 57.34634871734686, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6487305698649865 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6487305698649865 




Current Episode:   9%|▉         | 88/1000 [09:18<1:34:41,  6.23s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5776803837347326 


[-1.0, -1.0, 11.564035455304785, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.522800747279933 




Current Episode:   9%|▉         | 90/1000 [09:30<1:33:38,  6.17s/it]

[-1.0, 11.620629691499321, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6267814437273281 




Current Episode:   9%|▉         | 91/1000 [09:36<1:33:45,  6.19s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 35.431682742124416, -1.0] : 0.6698342108051244 




Current Episode:  10%|▉         | 99/1000 [10:26<1:33:44,  6.24s/it]

[11.33587550713456, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.3474212654678859 


[107.79370123743088, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.614599342464186 




Current Episode:  10%|█         | 100/1000 [10:33<1:35:24,  6.36s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.456163293678068 


[-1.0, 116.49306349424543, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.8072696060596508 


[-1.0, -1.0, 64.58156848477206, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.3178277807786516 




Current Episode:  10%|█         | 102/1000 [10:46<1:35:43,  6.40s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.4148510325726484 


[-1.0, -1.0, -1.0, 32.4169944526755, -1.0, -1.0, -1.0, -1.0] : 1.280440184478247 




Current Episode:  10%|█         | 103/1000 [10:52<1:35:10,  6.37s/it]

[-1.0, 17.989310811074613, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.746872958350381 




Current Episode:  11%|█         | 108/1000 [11:23<1:31:17,  6.14s/it]

[11.12649194584402, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.0352330618967576 




Current Episode:  11%|█         | 109/1000 [11:29<1:31:34,  6.17s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7511936554739678 


[-1.0, -1.0, 60.09549243791742, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6798302582039408 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6798302582039408 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6152463836745664 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5567979772254826 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5039021693890617 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.45603146329710087 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.4127084742838763 


[18.917118419515155, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 6.120398833527073 




Current Episode:  11%|█         | 111/1000 [11:41<1:32:12,  6.22s/it]

[-1.0, 21.425283241621432, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5828120372260502 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7244545686148415 


[-1.0, -1.0, -1.0, 12.016451721933727, -1.0, -1.0, -1.0, -1.0] : 0.6556313845964316 




Current Episode:  12%|█▏        | 117/1000 [12:19<1:34:05,  6.39s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.9658259261309392 


[-1.0, 77.26607409047513, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.2529154617053702 


[-1.0, -1.0, -1.0, 14.696065990729197, -1.0, -1.0, -1.0, -1.0] : 0.8740724631485 




Current Episode:  12%|█▏        | 120/1000 [12:38<1:32:56,  6.34s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.5321474506882886 


[-1.0, 122.57179605506309, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.759075086359226 


[-1.0, -1.0, 60.72600690873807, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.3865934428729012 




Current Episode:  12%|█▏        | 121/1000 [12:45<1:33:26,  6.38s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.041726314325252 


[-1.0, -1.0, -1.0, 12.807934331734929, -1.0, -1.0, -1.0, -1.0] : 1.8477623144643531 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.8477623144643531 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.6722248945902396 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.5133635296041668 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.369593994291771 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.2394825648340528 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.121731721174818 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.0151672076632101 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.9187263229352052 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.8314473222563606 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7524598266420064 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6809761431110157 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.

Current Episode:  12%|█▏        | 124/1000 [13:04<1:32:38,  6.35s/it]

[-1.0, 11.708770252983543, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.2492676281314026 




Current Episode:  13%|█▎        | 128/1000 [13:29<1:31:08,  6.27s/it]

[20.24309440331268, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.378849471783796 




Current Episode:  13%|█▎        | 129/1000 [13:35<1:31:36,  6.31s/it]

[-1.0, 14.09607919619728, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.9631916260845028 




Current Episode:  13%|█▎        | 132/1000 [13:54<1:31:00,  6.29s/it]

[14.37343235148298, -1.0, -1.0, -1.0, 0.1137041720884715, -1.0, -1.0, -1.0] : 4.654597464207184 


[-1.0, -1.0, -1.0, 18.605803825967186, -1.0, -1.0, -1.0, -1.0] : 0.7514937357613145 




Current Episode:  14%|█▍        | 139/1000 [14:38<1:30:24,  6.30s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6268444540525222 


[-1.0, -1.0, 31.69333975437685, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5672942309175326 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5672942309175326 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.513401278980367 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.4646281574772322 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.42048848251689513 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.3805420766777901 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.34439057939340006 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.31167347435102705 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.2820644942876795 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.25526836733034997 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.2310178724339667 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.20907117455273988 




Current Episode:  15%|█▌        | 152/1000 [15:58<1:26:51,  6.15s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.7486354814638052 


[-1.0, -1.0, -1.0, 27.026991401026965, -1.0, -1.0, -1.0, -1.0] : 1.5825151107247437 


[-1.0, 11.442990503807634, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.5302909927525934 




Current Episode:  15%|█▌        | 153/1000 [16:04<1:26:52,  6.15s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 12.115813030060693 


[-1.0, 969.2650424048554, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 10.964810792204927 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 10.964810792204927 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 9.92315376694546 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 8.98045415908564 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 8.127311013972506 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 7.355216467645118 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 6.656470903218832 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 6.024106167413043 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 5.451816081508804 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 4.933893553765468 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 4.465173666157748 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 4.0409821678727615 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] 

Current Episode:  15%|█▌        | 154/1000 [16:11<1:27:42,  6.22s/it]

[-1.0, 11.458087786766178, -1.0, -1.0, -1.0, -1.0, -1.0, 0.2709741452114184] : 0.6795860370355054 




Current Episode:  16%|█▌        | 155/1000 [16:17<1:27:50,  6.24s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.1825106851410805 


[-1.0, -1.0, -1.0, 31.053127314628153, -1.0, -1.0, -1.0, -1.0] : 2.8801721700526777 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.8801721700526777 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.6065558138976734 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.3589330115773945 




Current Episode:  16%|█▌        | 156/1000 [16:23<1:27:53,  6.25s/it]

[11.348733487393284, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5739267361177411 




Current Episode:  16%|█▌        | 158/1000 [16:36<1:28:05,  6.28s/it]

[-1.0, 14.915619959595432, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 4.939357323092487 




Current Episode:  16%|█▌        | 160/1000 [16:48<1:27:11,  6.23s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6251482357529711 


[-1.0, -1.0, 50.01185886023769, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5657591533564388 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5657591533564388 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5120120337875771 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.46337089057775727 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.41935065597287036 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.37951234365544767 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.34345867100818017 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.31083009726240307 




Current Episode:  17%|█▋        | 167/1000 [17:32<1:26:27,  6.23s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.2016742163266994 


[96.13393730613593, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.0139386761610725 


[-1.0, -1.0, -1.0, 81.1150940928858, -1.0, -1.0, -1.0, -1.0] : 1.087515165775663 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.087515165775663 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.9842012250269749 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.8907021086494123 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.8060854083277181 


[-1.0, -1.0, -1.0, 15.967937082546612, -1.0, -1.0, -1.0, -1.0] : 1.4513540199832171 




Current Episode:  17%|█▋        | 169/1000 [17:44<1:27:05,  6.29s/it]

[20.14557822574501, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.0811997054937679 




Current Episode:  17%|█▋        | 170/1000 [17:51<1:27:42,  6.34s/it]

[13.585920345347843, -1.0, -1.0, -1.0, -1.0, -1.0, 0.22430396674012626, -1.0] : 1.6740153776379536 




Current Episode:  17%|█▋        | 171/1000 [17:57<1:27:46,  6.35s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7829545974810684 


[-1.0, -1.0, -1.0, 27.85941631222145, -1.0, -1.0, -1.0, -1.0] : 0.7085739107203669 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7085739107203669 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.641259389201932 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5803397472277485 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5252074712411124 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.4753127614732068 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.4301580491332521 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.38929303446559316 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.35231019619136184 




Current Episode:  18%|█▊        | 175/1000 [18:22<1:26:37,  6.30s/it]

[-1.0, 18.187584445405722, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 13.02295182888448 




Current Episode:  18%|█▊        | 178/1000 [18:41<1:26:27,  6.31s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 96.27121024368861 


[-1.0, -1.0, 24.42613872324518, -1.0, -1.0, -1.0, -1.0, -1.0] : 87.1254452705382 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 87.1254452705382 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 78.84852796983706 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 71.35791781270254 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 64.57891562049579 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 58.44391863654869 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 52.89174636607656 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 47.86703046129929 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 43.31966256747585 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 39.204294623565644 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 35.47988663432691 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 32.109297404065856 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 

Current Episode:  18%|█▊        | 182/1000 [19:06<1:25:04,  6.24s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7250432808109968 


[-1.0, -1.0, 58.00346246487974, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6561641691339521 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6561641691339521 




Current Episode:  18%|█▊        | 185/1000 [19:25<1:25:29,  6.29s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.285007721203283 


[182.80061769626263, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5651430160619876 


[-1.0, -1.0, -1.0, 15.633182285103508, -1.0, -1.0, -1.0, -1.0] : 2.0679319876889712 




Current Episode:  19%|█▊        | 186/1000 [19:31<1:24:53,  6.26s/it]

[-1.0, -1.0, -1.0, -1.0, 0.11833066913964305, -1.0, -1.0, -1.0] : 1.440450051101183 




Current Episode:  19%|█▊        | 187/1000 [19:37<1:24:32,  6.24s/it]

[-1.0, 12.926325070451844, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.0461838297894714 




Current Episode:  19%|█▉        | 194/1000 [20:21<1:25:20,  6.35s/it]

[-1.0, 21.425264304093385, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.959790004181882 




Current Episode:  20%|█▉        | 195/1000 [20:28<1:25:35,  6.38s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.700831325990406 


[56.066506079232475, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.296515818955914 


[-1.0, -1.0, -1.0, 103.72126551647311, -1.0, -1.0, -1.0, -1.0] : 0.6342523500213174 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6342523500213174 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5739983767692922 




Current Episode:  20%|█▉        | 199/1000 [20:53<1:24:22,  6.32s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 4.058430549419387 


[-1.0, -1.0, -1.0, 15.140204308259671, -1.0, -1.0, -1.0, -1.0] : 3.672879647224545 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.672879647224545 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.3239560807382134 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.0081802530680832 




Current Episode:  20%|██        | 204/1000 [21:24<1:21:34,  6.15s/it]

[-1.0, -1.0, -1.0, 11.669590176713488, -1.0, -1.0, -1.0, -1.0] : 1.0531786316261893 




Current Episode:  20%|██        | 205/1000 [21:30<1:21:22,  6.14s/it]

[14.76449851677079, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.8899025562269346 




Current Episode:  21%|██        | 209/1000 [21:56<1:23:30,  6.33s/it]

[-1.0, 19.865203524798567, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.6323938486476512 




Current Episode:  23%|██▎       | 226/1000 [23:40<1:19:06,  6.13s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5613104480832405 


[-1.0, 44.90483584665924, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.4920813731760836 


[-1.0, -1.0, 199.3665098540867, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7206218944992635 


[-1.0, 57.64975155994107, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.1894473233771519 


[-1.0, -1.0, 95.15578587017217, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5079859555153327 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5079859555153327 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.45972728974137606 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.41605319721594536 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.37652814348043057 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.3407579698497897 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.30838596271405966 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.279089296256224 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.2525758131118827

Current Episode:  23%|██▎       | 227/1000 [23:46<1:19:02,  6.14s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.755067056571738 


[-1.0, -1.0, -1.0, 18.531107472512236, 1.10080928616895, -1.0, -1.0, -1.0] : 0.9935310154412758 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.1615827845140116, -1.0] : 0.9084514139442416 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 0.5963949619082184, -1.0] : 1.5883356861974227 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.5883356861974227 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.4374437960086675 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.300886635387844 




Current Episode:  23%|██▎       | 230/1000 [24:06<1:20:33,  6.28s/it]

[26.228360192995503, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 30.37795326081694 




Current Episode:  23%|██▎       | 232/1000 [24:19<1:21:38,  6.38s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.3598105003189525 


[-1.0, -1.0, 26.421393173520904, -1.0, -1.0, -1.0, 0.4724587480975215, -1.0] : 1.2306285027886519 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.2306285027886519 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.11371879502373 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.0079155094964756 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.9121635360943103 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.8255080001653509 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7470847401496425 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6761116898354265 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.611881079301061 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5537523767674601 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5011459009745515 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.45353704038196907 


[-1.0, -1.0, -1.0, -1.0, 

Current Episode:  23%|██▎       | 233/1000 [24:25<1:22:29,  6.45s/it]

[11.362082427610396, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.6042069647993253 


[128.336557183946, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7113594346886924 




Current Episode:  24%|██▎       | 235/1000 [24:38<1:22:43,  6.49s/it]

[-1.0, 24.543674510635057, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.2890576299668108 




Current Episode:  24%|██▎       | 237/1000 [24:51<1:21:31,  6.41s/it]

[-1.0, 16.235469310305223, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.057501397320489 


[-1.0, -1.0, 24.656582026450668, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5655423225678513 




Current Episode:  24%|██▍       | 239/1000 [25:04<1:21:24,  6.42s/it]

[-1.0, 26.569223195581532, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.8860984006127854 




Current Episode:  24%|██▍       | 242/1000 [25:23<1:20:35,  6.38s/it]

[26.498437224235975, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6832479765788959 




Current Episode:  25%|██▍       | 247/1000 [25:54<1:18:15,  6.24s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 1.9409290491411129, -1.0] : 6.487234648135332 




Current Episode:  25%|██▌       | 250/1000 [26:13<1:17:45,  6.22s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.0453989727641537 


[-1.0, -1.0, -1.0, 11.88166280535169, -1.0, -1.0, -1.0, -1.0] : 1.851086070351559 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.851086070351559 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.6752328936681609 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.5160857687696856 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.3720576207365656 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.241712146766592 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.1237494928237657 




Current Episode:  25%|██▌       | 251/1000 [26:19<1:17:34,  6.21s/it]

[-1.0, 12.622082507262974, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.036782897431818 


[-1.0, -1.0, 43.387656996254485, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.048401485757459 




Current Episode:  25%|██▌       | 252/1000 [26:25<1:18:02,  6.26s/it]

[37.34309855663099, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.3374925049575217 




Current Episode:  26%|██▌       | 256/1000 [26:51<1:17:46,  6.27s/it]

[-1.0, 21.011280542448016, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.4234261438342606 


[-1.0, -1.0, -1.0, 13.416916116121058, -1.0, -1.0, -1.0, -1.0] : 2.4582926854598894 




Current Episode:  26%|██▌       | 259/1000 [27:09<1:16:47,  6.22s/it]

[-1.0, -1.0, -1.0, 23.514929419214234, -1.0, -1.0, -1.0, -1.0] : 0.7813119447826138 




Current Episode:  27%|██▋       | 269/1000 [28:11<1:14:06,  6.08s/it]

[-1.0, 32.41335304386244, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 4.891151910762704 




Current Episode:  28%|██▊       | 279/1000 [29:12<1:12:52,  6.06s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 14.335797737005073 


[-1.0, -1.0, 17.235400168270463, -1.0, -1.0, -1.0, -1.0, -1.0] : 12.973896951989591 




Current Episode:  29%|██▉       | 290/1000 [30:19<1:13:00,  6.17s/it]

[41.11670752696785, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.749456526414141 




Current Episode:  29%|██▉       | 291/1000 [30:26<1:14:44,  6.33s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7857495223862022 


[-1.0, -1.0, 62.85996179089618, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.711103317759513 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.711103317759513 




Current Episode:  29%|██▉       | 294/1000 [30:44<1:12:52,  6.19s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.2532846120276409 


[-1.0, -1.0, 11.39851857639822, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.134222573885015 




Current Episode:  30%|███       | 301/1000 [31:28<1:13:23,  6.30s/it]

[-1.0, 26.003930680881833, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.731188728400689 




Current Episode:  30%|███       | 305/1000 [31:54<1:12:48,  6.28s/it]

[-1.0, -1.0, -1.0, 11.953817470266715, -1.0, -1.0, -1.0, -1.0] : 0.8054322087052181 




Current Episode:  31%|███       | 309/1000 [32:19<1:12:39,  6.31s/it]

[-1.0, -1.0, 11.319974038001892, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.9516742502332587 




Current Episode:  31%|███       | 311/1000 [32:31<1:11:57,  6.27s/it]

[-1.0, 14.164596222688013, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.2352447745259636 




Current Episode:  31%|███▏      | 314/1000 [32:50<1:10:45,  6.19s/it]

[-1.0, -1.0, -1.0, 19.14490137006172, -1.0, -1.0, -1.0, -1.0] : 1.4480934998347987 




Current Episode:  32%|███▏      | 315/1000 [32:56<1:10:19,  6.16s/it]

[16.831005993589805, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.043545994045658 




Current Episode:  32%|███▎      | 325/1000 [33:56<1:05:29,  5.82s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6594296059588427 


[-1.0, -1.0, -1.0, 20.113045215537124, -1.0, -1.0, -1.0, -1.0] : 1.051602060836946 


[-1.0, -1.0, -1.0, 18.350621621190438, -1.0, -1.0, -1.0, -1.0] : 0.5967837933927527 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5967837933927527 




Current Episode:  33%|███▎      | 329/1000 [34:19<1:03:16,  5.66s/it]

[12.30459107655529, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.0224238858228267 




Current Episode:  33%|███▎      | 334/1000 [34:48<1:03:09,  5.69s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6332931398889307 


[50.66345119111445, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5731302915994823 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5731302915994823 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5186829138975314 




Current Episode:  34%|███▍      | 340/1000 [35:25<1:07:00,  6.09s/it]

[-1.0, 28.078384329238652, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6717213082607603 




Current Episode:  34%|███▍      | 343/1000 [35:42<1:05:21,  5.97s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.779109700784666 


[-1.0, -1.0, 12.897749528549442, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.5150942792101225 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6462031590588428 


[-1.0, -1.0, 33.520666391262594, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5848138589482528 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5848138589482528 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.5292565423481688 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.4789771708250927 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.4334743395967089 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.39229427733502154 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.3550263209881945 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.321298820494316 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.290775432547356 


[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.2631517664553572 




Current Episode:  34%|███▍      | 345/1000 [35:54<1:05:04,  5.96s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.025029069162065 


[-1.0, -1.0, 40.517628601035796, -1.0, -1.0, -1.0, -1.0, -1.0] : 2.073718985721159 


[-1.0, -1.0, 34.028665521879965, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.8326513075916688 




Current Episode:  35%|███▌      | 354/1000 [36:49<1:05:51,  6.12s/it]

[-1.0, 36.691659300171956, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 1.9299931791387992 




Current Episode:  36%|███▌      | 355/1000 [36:56<1:05:57,  6.13s/it]

[17.24346737261673, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 9.74887224260069 




Current Episode:  36%|███▌      | 358/1000 [37:13<1:02:51,  5.88s/it]

[13.716121410117779, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.7272163756360059 




Current Episode:  36%|███▌      | 359/1000 [37:19<1:02:21,  5.84s/it]

[37.996839926675285, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.6224556841581393 




Current Episode:  36%|███▌      | 362/1000 [37:36<1:01:15,  5.76s/it]

[-1.0, -1.0, -1.0, 13.876299764193487, -1.0, -1.0, -1.0, 0.15485801402635074] : 1.011304386692883 




Current Episode:  36%|███▋      | 363/1000 [37:42<1:02:20,  5.87s/it]

[-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.5321632719331393 


[282.5730617546511, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 6.271482463555891 


[-1.0, -1.0, 26.166004299624163, -1.0, -1.0, -1.0, -1.0, -1.0] : 3.196607761099491 




Current Episode:  37%|███▋      | 366/1000 [37:59<1:01:24,  5.81s/it]

[15.69882838439522, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] : 0.8662298856463201 




Current Episode:  37%|███▋      | 370/1000 [38:23<1:01:38,  5.87s/it]

# Visualization 

We have used PyGame to visualize the simulation of agents in the world.

<details>
    <summary> Parameters </summary>

### Parameters:
1) 
</details>

<details>
  <summary> Functions </summary>

### Functions:
1)
</details>

pygame.init()

# Window dimensions
WIDTH, HEIGHT = 800, 600
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Rover Domain Simulation")

WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
RED = (255, 0, 0)
GREEN = (0, 255, 0)
BLUE = (0, 0, 255)


import pygame

# Initialize Pygame
pygame.init()

# Window dimensions
WIDTH, HEIGHT = 800, 600
screen = pygame.display.set_mode((WIDTH, HEIGHT))
pygame.display.set_caption("Rover Domain Simulation")

# Colors
WHITE = (255, 255, 255)
BLUE = (0, 0, 255)
RED = (255, 0, 0)

# Load a fire image for the POI
fire_image = pygame.image.load("fire.png")  # Replace with your fire image path
fire_image = pygame.transform.scale(fire_image, (30, 30))  # Scale to desired size


class POI:
    def __init__(self, x, y, coupling_required):
        self.x = x
        self.y = y
        self.coupling_required = coupling_required  # Number of agents required
        self.agents_at_poi = set()  # Track agents currently at the POI
        self.active = True

    def check_coupling(self):
        """Check if the coupling requirement is satisfied."""
        if len(self.agents_at_poi) >= self.coupling_required:
            self.active = False

    def draw(self, screen):
        """Draw the POI if active."""
        if self.active:
            screen.blit(fire_image, (self.x, self.y))


class Rover:
    def __init__(self, x, y, id):
        self.x = x
        self.y = y
        self.id = id  # Unique identifier for the rover
        self.color = BLUE
        self.size = 20

    def move(self, dx, dy):
        self.x += dx
        self.y += dy

    def draw(self, screen):
        pygame.draw.rect(screen, self.color, pygame.Rect(self.x, self.y, self.size, self.size))

    def is_at_poi(self, poi):
        """Check if the rover is at the POI."""
        return (
            poi.x <= self.x <= poi.x + 30 and
            poi.y <= self.y <= poi.y + 30
        )


def run_simulation():
    # Initialize rovers and POI
    rovers = [Rover(100, 100, id=1), Rover(200, 200, id=2)]  # Multiple rovers
    poi = POI(400, 300, coupling_required=2)  # POI requiring 2 agents

    clock = pygame.time.Clock()

    running = True
    while running:
        screen.fill(WHITE)  # Clear screen

        # Handle events (e.g., closing the window)
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False

        # Control rover movement (use arrow keys for the first rover, WASD for the second)
        keys = pygame.key.get_pressed()
        if keys[pygame.K_LEFT]:
            rovers[0].move(-5, 0)
        if keys[pygame.K_RIGHT]:
            rovers[0].move(5, 0)
        if keys[pygame.K_UP]:
            rovers[0].move(0, -5)
        if keys[pygame.K_DOWN]:
            rovers[0].move(0, 5)

        if keys[pygame.K_a]:
            rovers[1].move(-5, 0)
        if keys[pygame.K_d]:
            rovers[1].move(5, 0)
        if keys[pygame.K_w]:
            rovers[1].move(0, -5)
        if keys[pygame.K_s]:
            rovers[1].move(0, 5)

        # Check which rovers are at the POI
        poi.agents_at_poi = {rover.id for rover in rovers if rover.is_at_poi(poi)}
        poi.check_coupling()

        # Draw the rovers and POI
        for rover in rovers:
            rover.draw(screen)

        poi.draw(screen)

        # Update the display
        pygame.display.update()

        # Limit frame rate
        clock.tick(30)

    pygame.quit()


if __name__ == "__main__":
    run_simulation()


In [None]:
def import_agent_paths(config_id):
    """
    Import rover paths from pickle file
    :return:
    """
    dir_name = 'Output_Data/'
    file_name = f'EXP{config_id}_Agent_path'
    rover_path_file = os.path.join(dir_name, file_name)
    infile = open(rover_path_file, 'rb')
    rover_paths = pickle.load(infile)
    infile.close()

    return rover_paths

def import_poi_information(n_poi, config_id):
    """
    Import POI information from saved configuration files
    :return:
    """
    pois = np.zeros((n_poi, 6))

    config_input = []
    with open(f'./Configs/POI_Config{config_id}.csv') as csvfile:
        csv_reader = csv.reader(csvfile, delimiter=',')

        for row in csv_reader:
            config_input.append(row)

    for poi_id in range(n_poi):
        pois[poi_id, 0] = float(config_input[poi_id][0])
        pois[poi_id, 1] = float(config_input[poi_id][1])
        pois[poi_id, 2] = float(config_input[poi_id][2])
        pois[poi_id, 3] = float(config_input[poi_id][3])
        pois[poi_id, 4] = float(config_input[poi_id][4])
        pois[poi_id, 5] = float(config_input[poi_id][5])
    return pois

def stop_viz(screen):
    screen.bye()

p = parameters

def run_agent_visualizer(config_id):
    # Define screen parameters for the
    screen_width = p["x_dim"]*10
    screen_height = p["y_dim"]*10
    screen = turtle.Screen()
    screen.setup(screen_width+20, screen_height+20)  # define pixel width and height of screen
    screen.title("Rover Domain")
    screen.bgcolor("white")
    screen.tracer(0)
    
    rovers = []
    rover_paths = import_agent_paths(config_id)
    for rov_id in range(p["n_agents"]):
        rovers.append(turtle.Turtle())
        rovers[rov_id].shape("circle")
        rovers[rov_id].shapesize(10 / 20)  # Number of pixels you want / 20 (default size)
        rovers[rov_id].color("blue")

    pois = []
    poi_info = import_poi_information(p["n_pois"], config_id)
    for poi_id in range(p["n_pois"]):
        pois.append(turtle.Turtle())
        pois[poi_id].shape("triangle")
        pois[poi_id].shapesize(20 / 20)  # Number of pixels you want / 20 (default size)
        pois[poi_id].color("red")
        pois[poi_id].penup()
        # Convert rover units to pixel units used by screen
        px = ((poi_info[poi_id, 0]/p["x_dim"]) * screen_width) - (screen_width/2)
        py = ((poi_info[poi_id, 1]/p["y_dim"]) * screen_height) - (screen_height/2)
        pois[poi_id].goto(px, py)
        pois[poi_id].stamp()

    # for srun in range(p["n_episodes"]):
    for tstep in range(p["n_epochs"]):
        screen.title(f"Rover Domain{tstep}")
        for rov_id in range(p["n_agents"]):
            rovers[rov_id].clearstamps()
            rovx = ((rover_paths[rov_id][tstep][0]/p["x_dim"])*screen_width) - (screen_width/2)
            rovy = ((rover_paths[rov_id][tstep][1]/p["y_dim"])*screen_height) - (screen_height/2)
            rovers[rov_id].goto(rovx, rovy)
            rovers[rov_id].stamp()
        screen.update()
        time.sleep(0.2)
        
    screen.listen()  # Set the screen to listen for key presses
    screen.onkeypress(stop_viz(screen), "q")
    turtle.done()

In [None]:
# run_agent_visualizer(0)