Install prerequisites

In [None]:
# !pip install --no-index torch-scatter -f https://pytorch-geometric.com/whl/torch-1.11.0+cpu.html
# !pip install --no-index torch-sparse -f https://pytorch-geometric.com/whl/torch-1.11.0+cpu.html
# !pip install --no-index torch-cluster -f https://pytorch-geometric.com/whl/torch-1.11.0+cpu.html
# !pip install --no-index torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.11.0+cpu.html
# !pip install torch-geometric
# !pip install umap
#!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.11.0+cu113.html
#!pip install torch-sparse -f https://data.pyg.org/whl/torch-1.11.0+cu113.html
#!pip install torch-geometric

In [None]:
from google.colab import drive
drive.mount('/content/drive') 

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cd /content/drive/MyDrive/WRSN

/content/drive/MyDrive/WRSN


# Constants and modules

In [None]:
""" WRSN settings """
# K = 50 # Number of charging locations
INFTY = 999999
RANDOM_CHARGING_TIME = 50
STATUS = {'LIVE': 1, 'DEAD': 0}
BASE_INDEX = -1
E_MIN = 540
E_MAX = 10800
ALPHA = 3600
BETA = 30
E_MC = 808000
P = 5
V = 5
E = 100 # Charging rate of MC
ENERGY_STATE = 1000 # Number of states for energy of sensors
PENALTY_PARAMETER_1 = 0.1 # Use for sum of energy of sensors
PENALTY_PARAMETER_2 = 0.5 # Uses for penalize energy of MC, prevents from heading back to depot unnecessary
E_THRESHOLD = 10000
T = 200000
SUM_REWARDS = 0
REWARDS = []
SUM_OBJECTIVE = 0
OBJECTIVE_EPISODE = []
RDNL_PER_EPISODE = []
RDNL_MEAN_PER_EPISODES = []
RECORDED_DEAD_NODES = None
END_EPISODE = [0]
""" DynaQ and training phase settings """
EPS_START = 0.95
EPS_END = 0.05
EPS_DECAY = 800
""" For state graph """
DELTA = 40



Import modules

In [None]:
import math
import pickle
import numpy as np
import random
from sklearn.cluster import KMeans
from matplotlib import pyplot as plt
from collections import namedtuple, deque
from itertools import count
from agent import BaseAgent
from environment import BaseEnvironment
from tqdm import tqdm
from copy import copy as deepcopy

# WRSN

## Stationary

### Point

In [None]:
class Point:
    """
    create a Point class, which is an abstraction of a deployed position in real world
    :arg pos_x, x position in the FoI
    :arg pos_y, y position in the FoI
    """

    def __init__(self, pos_x, pos_y):
        self.pos = np.array([pos_x, pos_y])

    def calculate_distance_to_other_point(self, other_point):
        try:
            return np.linalg.norm(self.pos - other_point.pos)
        except Exception:
            print("Exception at Point.calculate_distance_to_other_point")
            raise


### Base station

In [None]:
class BaseStation:
    """
    create a Base Station class, which is an abstraction of the base station in real world
    :arg pos_x, x position in the FoI
    :arg pos_y, y position in the FoI
    """

    def __init__(self, pos_x, pos_y):
        self.point = Point(pos_x, pos_y)

    def calculate_distance_to_sensor(self, sensor):
        try:
            return self.point.calculate_distance_to_other_point(sensor.point)
        except Exception:
            print("Exception at BaseStation.calculate_distance_to_sensor")
            raise

    def calculate_distance_to_charging_location(self, location):
        try:
            return self.point.calculate_distance_to_other_point(location.point)
        except Exception:
            print("Exception at BaseStation.calculate_distance_to_charging_location")
            raise


### Charging Location

In [None]:
class ChargingLocation:
    """
    create a Charging Location class, which is an abstraction of a charging location in real world
    :arg pos_x, x position in the FoI
    :arg pos_y, y position in the FoI
    """

    def __init__(self, pos_x, pos_y):
        self.point = Point(pos_x, pos_y)
        self.sensors = []
        self.sensors_charging_rate = []

    def calculate_distance_to_sensor(self, sensor):
        try:
            return self.point.calculate_distance_to_other_point(sensor.point)
        except Exception:
            print("Exception at ChargingLocation.calculate_distance_to_sensor")
            raise

    def calculate_distance_to_charging_location(self, other_location):
        try:
            return self.point.calculate_distance_to_other_point(other_location.point)
        except Exception:
            print("Exception at ChargingLocation.calculate_distance_to_charging_location")
            raise

    def calculate_charging_rate_to_sensor(self, sensor): # tính toán tốc độ sạc cho cảm biến
        """
        calculate charging rate to a specific sensor using Friis equation
        :param sensor:
        :return: charging rate
        """
        return ALPHA / ((BETA + self.calculate_distance_to_sensor(sensor)) ** 2)


### Sensor

In [None]:
class Sensor:
    """
    create a Sensor class, which is an abstraction of a deployed sensor in real world
    :arg pos_x, x position in the FoI
    :arg pos_y, y position in the FoI
    :arg consumption_rate: the average consumption rate of sensor (tỉ lệ tiêu thụ năng lượng trung bình của cảm biến)
    :arg init_energy: the initial energy of sensor
    """

    def __init__(self, pos_x, pos_y, consumption_rate, init_energy):
        self.point = Point(pos_x, pos_y)
        self.consumption_rate = consumption_rate
        self.init_energy = init_energy
        self.energy = init_energy
        self.status = STATUS['LIVE']

    def calculate_distance_to_other_sensor(self, other_sensor): 
      # tính khoảng cách đến sensor khác
        try:
            return self.point.calculate_distance_to_other_point(other_sensor.point)
        except BaseException:
            print("Exception at Sensor.calculate_distance_to_other_sensor")
            raise
    def reset_to_init(self):
        self.energy = self.init_energy
        self.status = STATUS['LIVE']

    def recalculate_energy_by_consumption(self, t): 
      # tính năng lượng theo công suất tiêu thụ 
      # trả về nút có chết sau 1 khoảng thười gian t hay không
        """
        calculate the energy of this sensor after a period time
        :param t: the length of the period time
        :return: one counter as dead node
        """
        try:
            if t < 0:
              # Revert to exactly T time
              return 1 if self.status == STATUS['DEAD'] else 0
            
            assert t >= 0
            if self.energy - t * self.consumption_rate <= E_MIN:
                self.energy = E_MIN
                self.status = STATUS['DEAD']
                return 1
            else:
                self.energy -= t * self.consumption_rate
                return 0
        except Exception:
            print("Exception at Sensor.recalculate_energy_by_consumption")
            raise

    def recalculate_energy_by_charging(self, t, charging_rate):

        """
        calculate the energy of this sensor after being charged by MC at some charging location
        :param t: the time which the sensor has been charged
        :param charging_rate: the charging rate from charging location
        :return: spent charging energy
        """
        try:
            if t < 0:
              return 0
            assert t >= 0
            assert charging_rate >= 0
            if self.status == STATUS['DEAD']:
                return 0

            energy = self.energy
            if charging_rate >= self.consumption_rate:
                if self.energy + t * (charging_rate - self.consumption_rate) >= E_MAX:
                    self.energy = E_MAX
                    return charging_rate * (E_MAX - energy) / (charging_rate - self.consumption_rate)
                else:
                    self.energy += t * (charging_rate - self.consumption_rate)
                    return charging_rate * t
            else:
                if self.energy + t * (charging_rate - self.consumption_rate) <= E_MIN:
                    # In this case, although the sensor get charged but it is still consider as a dead node
                    self.energy = E_MIN
                    self.status = STATUS['DEAD']
                    return charging_rate * (energy - E_MIN) / (self.consumption_rate - charging_rate)
                else:
                    self.energy += t * (charging_rate - self.consumption_rate)
                    return charging_rate * t
        except Exception:
            print("Exception at Sensor.recalculate_energy_by_charging")
            raise


## Dynamic

### Network

In [None]:
class Network:
    def __init__(self, file_name, k, pos_x=0, pos_y=0):
        """
        create an abstraction of real life WRSN
        :param file_name: file name to establish sensors data
        :param k: must pass number of charging locations
        :param pos_x: optional position x of base station
        :param pos_y: optional position y of base station
        """
        self.recorded_time = 0  # ghi lại thời gian
        self.recorded_dead_nodes = 0 # ghi lại  số lượng nút chết
        self.recorded_dead_node_list = [] # list các nút chết
        # self.before_charging_energy_list = []
        self.base = None
        self.sensors = [] # list cảm biến
        self.charging_locations = [] # vị trí sạc
        self.charging_locations_distance_matrix = [] # ma trận khoảng cách vị trí sạc
        # Store as n x m matrix
        self.charging_locations_charging_matrix = [] # ma trận vị trí sạc
        # Store as capacity list
        self.charging_locations_charging_lists = []
        self.charging_locations_distance_base = [] # vị trí sạc khoảng cách cơ sở
        self.n = None # số cảm biến
        self.m = None # số điểm sạc
        self.len_time = T//DELTA + 1

        # Setup network
        self.setup_network(file_name, k, pos_x, pos_y)

    def reset_network(self):
        self.recorded_time = 0  # ghi lại thời gian
        self.recorded_dead_nodes = 0 # ghi lại  số lượng nút chết
        self.recorded_dead_node_list.clear() # list các nút chết
        for sensor in self.sensors:
            sensor.reset_to_init()
        


    def setup_network(self, file_name, k, pos_x=0, pos_y=0):
     
        """
        Setting up the network
        :param file_name: contain sensors data
        :param k: number of cluster
        :param pos_x: position x of base station
        :param pos_y: position y of base station
        :return:
        """
        print("** Setting up network **")
        self.create_base_station_data(pos_x, pos_y)
        self.create_sensor_data(file_name)
        self.recalculate_network(k)
        print("** Network is ready **")

      

    def recalculate_network(self, k):
     
        """
        recalculate network after a period of charging (a long one)
        :param k: number of cluster
        :return:
        """
        self.create_charging_location_data(k) 
        self.init_matrices() 
        self.recalculate_matrices() 

    def create_sensor_data(self, file_name):
      
        """
        read data from a file to gather sensors information
        :param file_name: sensor data file name
        :return:
        """
        print("-- Setting up sensors data --")
        try:
            for sensor in SensorReader.read_sensor(file_name):
                self.sensors.append(Sensor(*sensor))
            self.n = len(self.sensors)
            print("-- Done --")
          
        except Exception:
            print("Error at Network.create_sensor_data")
            raise

    def create_charging_location_data(self, k):

        """
        read data from a file to gather charging locations information
        using k-means cluster
        :arg k: number of clusters
        :return:
        """
        print("-- Setting up charging locations --")
        try:
            model = KMeans(n_clusters=k, n_init=10, random_state=0, max_iter=100000)
            model.fit(X=np.array(list(map(lambda x: x.point.pos, self.sensors))),
                      sample_weight=np.array(list(map(lambda x: x.consumption_rate, self.sensors))))
            self.m = k
            for location in model.cluster_centers_:
                self.charging_locations.append(ChargingLocation(*location))
            print("-- Done --")
        except Exception:
            print("Error at Network.create_charging_location_data")
            raise


    def create_base_station_data(self, pos_x=0, pos_y=0):
      # tạo vị trí base
        print("-- Setting up base station --")
        self.base = BaseStation(pos_x, pos_y)
        print("-- Done --")


    def init_matrices(self):
    
        print("-- Initializing matrices --")
        self.charging_locations_distance_matrix = [[0 for _ in range(self.m)] for __ in range(self.m)]
        self.charging_locations_charging_matrix = [[0 for _ in range(self.m)] for __ in range(self.n)]
        self.charging_locations_charging_lists = [[] for _ in range(self.m)]
        self.charging_locations_distance_base = [0 for _ in range(self.m)]
        print("-- Done --")


    def recalculate_matrices(self):
      
        print("-- Calculating matrices --")
        # Calculate distance matrix of charging locations
        for i in range(self.m):
            for j in range(self.m):
                self.charging_locations_distance_matrix[i][j] = self.charging_locations[
                    i].calculate_distance_to_charging_location(self.charging_locations[j])

        # Calculate distance from base station to charging locations
        for i in range(self.m):
            self.charging_locations_distance_base[i] = self.base.calculate_distance_to_charging_location(
                self.charging_locations[i])

        # Calculate charging matrix of charging locations
        for i in range(self.n):
            max_val = (0, 0)
            for j in range(self.m):
                # Compare charging rate and stores the index of charging location
                max_val = max(max_val,
                              (self.charging_locations[j].calculate_charging_rate_to_sensor(self.sensors[i]), j))
            # Sensor i only receives energy from the charging location provides the maximum charging rate
            self.charging_locations_charging_matrix[i][max_val[1]] = max_val[0]
            self.charging_locations_charging_lists[max_val[1]].append(i)
        print("-- Done --")


    def check_time_exceeds(self, time_interval):
     
      """
      check if the recorded time exceeds the limit
      :param time_interval: the next time_interval
      return flag, remaining_time
      """
     
      try:
        flag = False
        if self.recorded_time + time_interval > T:
          time_interval = T - self.recorded_time + 1
          flag = True
        return flag, time_interval
      except Exception:
        print("Exception at Network.check_time_exceeds")


    def recalculate_network_by_consumption(self, time_interval, j):
    
        """
        update the network after a period of time interval, consider only charging consumption
        :param time_interval: the time interval
        :return: a state encoding
        """
        
        global graph

        flag, time_interval = self.check_time_exceeds(time_interval)
        self.recorded_time += time_interval
        for i in range(self.n): 
            if i not in self.recorded_dead_node_list:
                self.sensors[i].recalculate_energy_by_consumption(time_interval) 
        self.recorded_time += time_interval
        if flag:
           self.recalculate_network_episodically()
          # If the maximum time exceeds
           print("## WARNING: TIME EXCEEDED ##")
        # return State.create_state_encoding(feature=list(map(lambda x: x.energy /
        #        x.consumption_rate, self.sensors)) + list(map(lambda x: x.energy, 
        #        self.sensors)))
        time = self.recorded_time // DELTA
        return self.get_observation([(j + 1), time])

         
    def get_observation(self, state):
        return int(state[0] * self.len_time + state[1])
        

    def recalculate_network_episodically(self):
        global RECORDED_DEAD_NODES_LIST
        """
        update the network status after a period of charging time
        :return: state encoding
        """
        dead_node_list = []
        for i in range(self.n):
            if self.sensors[i].status == STATUS['DEAD'] and i not in self.recorded_dead_node_list:
                dead_node_list.append(i)
                self.recorded_dead_node_list.append(i)
                self.recorded_dead_nodes += self.sensors[i].recalculate_energy_by_consumption(0)
        RDNL_PER_EPISODE.append(self.recorded_dead_nodes)
        print("self.recorded_dead_nodes", self.recorded_dead_nodes)
        return

    def recalculate_network_by_charging(self, charging_point_index, time_interval):
      
        """
        update the network status after MC charges at some charging location for some time
        :param charging_point_index: index of charging point
        :param time_interval: time spent on charging
        :return: spent energy
        """
        flag, time_interval = self.check_time_exceeds(time_interval)
        self.recorded_time += time_interval
        spent_energy = 0
        for i in range(self.n):
            if i in self.charging_locations_charging_lists[charging_point_index]:
                spent_energy += self.sensors[i].recalculate_energy_by_charging(time_interval,
                                                                               self.charging_locations_charging_matrix
                                                                               [i][charging_point_index])
            else:
                self.sensors[i].recalculate_energy_by_consumption(time_interval)
        if flag:
          # If the maximum time exceeds
          print("## WARNING: TIME EXCEEDED ##")
        return spent_energy

    def recalculate_by_agent_action(self, action):
        """
        update the network by action of MC. #NOTE: we allow the mc charges sensor even if the energy of sensor belows E_MIN
        :param action: the action has been taken by MC
        :return: the energy agent needs to spend
        """
        to_index, charging_time_interval, travelling_time_interval = action.create_action_decoding()
        if to_index != BASE_INDEX:
            # If the action is not going back to the base station
            self.recalculate_network_by_consumption(travelling_time_interval, to_index)
            # self.before_charging_energy_list = g.predict(action.to_index, self.recorded_time // DELTA)
            spent_energy = self.recalculate_network_by_charging(to_index, charging_time_interval)
            return spent_energy
        else:
            # The agent is heading back to the base station, it is the end of an episode
            self.recalculate_network_by_consumption(travelling_time_interval + charging_time_interval, to_index)
            self.recalculate_network_episodically()
            return 0

    """ Two illustrated functions"""

    def plot_network(self):
        nwp = NetworkPlotter(self.sensors, self.charging_locations, self.base)
        nwp.plot_network()

    def plot_journey(self, journey):
        nwp = NetworkPlotter(self.sensors, self.charging_locations, self.base)
        nwp.plot_journey(journey)


### Agent

In [None]:
class Agent:
    global REWARDS
    def __init__(self, network: Network):
        """
        agent in this environment is also known as mobile charger
        :param network: the environmental network considering
        """
        self.network = network
        self.current_index = BASE_INDEX
        self.energy = E_MC
        self.tabu_list = [0 for i in range(self.network.m+1)]
        self.tabu_counter = 0


    def reset_agent(self):
        self.current_index = BASE_INDEX
        self.energy = E_MC
        self.reset_tabu_list()
        pass


    def reset_tabu_list(self): 
        self.tabu_list = [0 for i in range(self.network.m+1)]
        self.tabu_counter = 0

    def index_tabu_element(self, i): 
        self.tabu_list[i] = 1
        self.tabu_counter += 1
        if self.tabu_counter == self.network.m:
          self.reset_tabu_list()

    def get_state(self): 
        """
        get the state encoding by network
        :return: state encoding by network
        """
        return self.network.recalculate_network_by_consumption(0, self.current_index)

    def get_objective(self):
        return Objective.create_objective_value( sensors_energy_list=[self.network.sensors[i].energy 
                                                                      for i in range(self.network.n)])

    def diffuse_action(self, action):
        global SUM_OBJECTIVE
        global SUM_REWARDS
        """
        defuse the action encoding to the network, this action is committed
        :param action: Action taken
        :return: reward, done of action
        """
        previous_objective = self.get_objective()
        self.energy -= self.network.recalculate_by_agent_action(action)
        to_index, charging_time_interval, travelling_time_interval = action.create_action_decoding()
        self.energy -= P * travelling_time_interval
        assert self.energy > 0
        self.current_index = to_index
        if to_index == BASE_INDEX:
            # MC returns to base station
            self.energy = min(E_MC, self.energy + charging_time_interval * E)
        SUM_REWARDS += (self.get_objective() - previous_objective)
        SUM_OBJECTIVE += self.get_objective()
        #print (self.get_objective())
        return (self.get_objective() - previous_objective ), 1 if to_index == BASE_INDEX else 0
        # return self.get_objective(), 1 if to_index == BASE_INDEX else 0
        
    def create_action(self, to_index, time_interval):
      
        return Action.create_action_encoding(to_index, time_interval, self.calculate_travelling_time(to_index))

    def create_policy_action(self, to_index=0, is_random=True):
        # This version is designated to fully charging
        try:
            if is_random:
                to_index = random.randint(0, self.network.m - 1)
            if to_index == BASE_INDEX:
                return self.create_action(BASE_INDEX, (E_MC - self.energy - 
                       self.calculate_travelling_time(BASE_INDEX) * P) / E)

            charging_time_interval = RANDOM_CHARGING_TIME
            for i in self.network.charging_locations_charging_lists[to_index]:
                if self.network.charging_locations_charging_matrix[i][to_index] > self.network.sensors[
                    i].consumption_rate:
                    # Estimate charging time for sensor i
                    charging_time_interval = max(charging_time_interval, (E_MAX 
                                                 - self.network.sensors[i].energy + self.network.sensors[i].consumption_rate * self.calculate_travelling_time(to_index)) /
                                                 (self.network.charging_locations_charging_matrix[i][to_index] -
                                                  self.network.sensors[i].consumption_rate))
            charging_time_interval = min(charging_time_interval, 2000)
            estimate_energy = 0
            for i in self.network.charging_locations_charging_lists[to_index]:
                estimate_energy += charging_time_interval * self.network.charging_locations_charging_matrix[i][to_index]
            if self.energy < 1000 + estimate_energy + P * self.network.charging_locations_distance_matrix[self.current_index][to_index] / V:
              return self.create_action(BASE_INDEX, (E_MC - self.energy - 
                       self.calculate_travelling_time(BASE_INDEX) * P) / E)

            print("++ Charging time spent at {} is {} ++".format(to_index, charging_time_interval))
            print("++ Energy left of MC is {} ++\n".format(self.energy))
            return self.create_action(to_index, charging_time_interval)
        except Exception:
            print("Error at Agent.create_policy_action")
            raise


    def calculate_travelling_time(self, to_index):
        """
        calculate the time for travelling to the next index
        :param to_index:
        :return:
        """
       
        if self.current_index == BASE_INDEX: # base_index = -1
            return self.network.charging_locations_distance_base[to_index] / V 
        if to_index == BASE_INDEX:
            return self.network.charging_locations_distance_base[self.current_index] / V
        else:
            return self.network.charging_locations_distance_matrix[self.current_index][to_index] / V


### Action

In [None]:
class Action:
    def __init__(self, to_index, charging_time_interval, travelling_time_interval):
        """
        an encoding action
        :param to_index: the next index
        :param charging_time_interval: the charging time spent
        :param travelling_time_interval: the travelling time spent
        """
        self.to_index = to_index
        self.charging_time_interval = charging_time_interval
        self.travelling_time_interval = travelling_time_interval

    @staticmethod
    def create_action_encoding(to_index, charging_time_interval, travelling_time_interval):
        return Action(to_index, charging_time_interval, travelling_time_interval)

    def create_action_decoding(self):
        return self.to_index, self.charging_time_interval, self.travelling_time_interval

    @staticmethod
    def calculate_action_size(agent) -> int:
        try:
            return agent.network.m
        except Exception:
            print("Error at Action.calculate_action_size")
            raise


### State

In [None]:
class State:
    @staticmethod
    def create_state_encoding(**kwargs):
        try:
            sensors_energy_list = kwargs['feature']
            return np.arrayay(list(map(lambda x: 
                                              x, sensors_energy_list)))
        except Exception:
            print("Error at State.create_state_encoding")
            raise


### Objective

In [None]:
class Objective:
    @staticmethod
    def create_objective_value(**kwargs):
        try:
            sensors_energy_list = kwargs['sensors_energy_list']
            temp = sensors_energy_list 
            temp1 = list(filter(lambda x: x > E_MIN, sensors_energy_list)) # list các sensor > E_min
            temp2 = list(filter(lambda x: x < E_MIN, sensors_energy_list))
            if len(temp1) == 0: temp1 = [1]
            if len(temp2) == 0: temp2 = [1]
            
            #temp = sensors_energy_list
           
            #return  (2 * min(temp) - max(temp) + E_MAX + 1) / 1e6 * (min(temp) + E_MAX + 1)
            return (np.mean(temp1) / (np.mean(temp2))) / 1e2
            #return sum(temp1) / sum(temp2)
            
        except Exception:
            print("Error at Objective.create_objective_value")
            raise


## Util

### Base Reader

In [None]:
class BaseReader:
    def __init__(self, file_name):
        """
        :param file_name: (String) File name of input data
        """
        self.file_name = file_name

    @staticmethod
    def read(file_name):
        try:
            f = open(file_name)
            data = f.readlines()
            for data_line in data:
                yield tuple(map(float, data_line.strip().split(" ")))
            f.close()
        except Exception:
            print("Exception at BaseReader.read")
            raise


### Sensor Reader

In [None]:
class SensorReader(BaseReader):
    def __init__(self, file_name):
        super(SensorReader, self).__init__(file_name)

    @staticmethod
    def read_sensor(file_name):
        try:
            return BaseReader.read(file_name)
        except Exception:
            print("Exception at SensorReader.read_sensor")
            raise


### Network Plotter

In [None]:
class NetworkPlotter:
    def __init__(self, sensor_list, charging_location_list, base_station):
        """
        plot the illustration of dynamic
        :param sensor_list: (Sensor[]) List of sensors in dynamic
        :param charging_location_list: (ChargingLocation[]) List of charging locations in dynamic
        :param base_station: (Point) Coordinate of base station
        """
        self.sensor_list = sensor_list
        self.charging_location_list = charging_location_list
        self.base_station = base_station

    def plot_network(self):
        # Plot sensors in dynamic
        for sensor in self.sensor_list:
            plt.plot(sensor.point.pos[0], sensor.point.pos[1], 'bo', linewidth=3, markersize=3)

        # Plot base station
        base = self.base_station
        plt.plot(base.point.pos[0], base.point.pos[1], 'go', linewidth=3, markersize=3, label='Base Station')

        # Plot charging locations
        for location in self.charging_location_list:
            plt.plot(location.point.pos[0], location.point.pos[1], 'r^', linewidth=3, markersize=3)
        plt.legend(loc='upper left')
        plt.show()

    def plot_journey(self, journey):
        self.plot_network()
        location_order = [self.charging_location_list[i] for i in journey]
        travel_x = [self.base_station.point.pos[0]] + [loc.point.pos[0] for loc in location_order] + [
            self.base_station.point.pos[0]]
        travel_y = [self.base_station.point.pos[1]] + [loc.point.pos[1] for loc in location_order] + [
            self.base_station.point.pos[1]]
        plt.plot(travel_x, travel_y, linestyle='dashed', linewidth=2, markersize=2)
        plt.show()


#WRSN Environment

In [None]:
#!/usr/bin/env python


import numpy as np
from copy import deepcopy

class WRSNEnvironment(BaseEnvironment):


    def env_init(self,agent , network, env_info={}):
        self.network = network        
        self.agent = agent 
        self.current_state = None

    def env_start(self):
        """The first method called when the episode starts, called before the
        agent starts.
        Returns:
            The first state observation from the environment.
        """
        self.current_state = self.agent.get_state()
        self.reward_obs_term = (0.0, self.current_state, False)
        return self.reward_obs_term[1]

    def env_step(self, action):

        reward, done = self.agent.diffuse_action(action)
        #if (done): 
          #self.agent.reset_tabu_list()
          #self.network.reset_dead_node()

        self.current_state = self.agent.get_state()

        is_terminal = False
        if(self.network.recorded_time > T): 
            is_terminal = True
            self.network.reset_network()
            self.agent.reset_agent()
        self.reward_obs_term = (reward, self.current_state, is_terminal)
        print("recodrd time ", self.network.recorded_time)
        return self.reward_obs_term
    



    def env_cleanup(self):
        """Cleanup done after the environment ends"""
        pass

    def env_message(self, message):

        if message == "what is the current reward?":
            return "{}".format(self.reward_obs_term[0])

        # else
        return "I don't know how to respond to your message"


# DynaQ Agent


In [None]:


class WRSNAgent(BaseAgent):

    def agent_init(self, agent, network, agent_info):
        self.network = network
        self.agent = agent
        self.num_actions = Action.calculate_action_size(self.agent)
        self.num_index = self.num_actions + 1
        try:
            
            self.num_intervals = agent_info["num_intervals"]
        except:
            print("You need to pass both 'num_states' and 'num_actions' \
                   in agent_info to initialize the action-value table")
        self.gamma = agent_info["discount"]
        self.step_size = agent_info["step_size"]
        self.planning_steps = agent_info["planning_steps"]
        self.rand_generator = np.random.RandomState(agent_info.get('random_seed', 0))
        self.planning_rand_generator = np.random.RandomState(agent_info.get('planning_random_seed', 0))
        self.q_values = np.zeros((self.num_index * self.num_intervals, self.num_actions))
        self.actions = list(range(self.num_actions))
        self.past_action = -1 
        self.past_state = -1
        self.model = {} 
        self.steps_done = 0


    def select_action(self, state):

        if self.agent.energy < E_THRESHOLD:
        # If energy of MC belows threshold, the MC must go back to the depot
         
          action = self.agent.create_policy_action(BASE_INDEX, is_random=False)
          #print("action: " , action.to_index, action.charging_time_interval, action.travelling_time_interval)
          return action

        # eps greedy
        sample = random.random()
        eps_threshold = EPS_END + (EPS_START - EPS_END) * \
                        math.exp(-1. * self.steps_done / EPS_DECAY)
        self.steps_done += 1
        #if(self.steps_done % 1000 == 0): print("self.step_done: ", self.steps_done)
        if(eps_threshold < 0.1): print("eps_threshold :", eps_threshold)

        if sample > eps_threshold:
           list_value_decrease = self.sort_decrease(self.q_values[state])
           print("list_value_decrease: ", list_value_decrease)
           #print("self.q_values[state]", self.q_values[state])
           for value in list_value_decrease:
             #print("value",value)
             list_i = np.where(self.q_values[state] == value)[0]

             #print("list_i :", list_i)
             for i in list_i:
                #print("i :", i)

                if self.agent.tabu_list[i] == 0:
                    possible_to_index = i
                    self.agent.index_tabu_element(possible_to_index)
                    return self.agent.create_policy_action(possible_to_index, is_random=False)
             
             
           #print("action: " , action.to_index, action.charging_time_interval, action.travelling_time_interval)
           
            
        else:
            action = self.agent.create_policy_action()
           
            #print("action: " , action.to_index, action.charging_time_interval, action.travelling_time_interval)
            return action 


    def agent_start(self, state):
      action = self.select_action(state)
      self.past_state = state
      self.past_action = action    
      return self.past_action


    def agent_step(self, reward, state):
      self.q_values[self.past_state][self.past_action.to_index]  += self.step_size * (reward + self.gamma * max(self.q_values[state])
                                                               - self.q_values[self.past_state][self.past_action.to_index])
      self.update_model(self.past_state, self.past_action, state, reward)
      self.planning_step()
      action = self.select_action(state)
      self.past_state = state
      self.past_action = action
      return self.past_action


    def agent_end(self, reward):
      self.q_values[self.past_state][self.past_action.to_index] += self.step_size * (reward - self.q_values[self.past_state][self.past_action.to_index])
      self.update_model(self.past_state, self.past_action, -1, reward)
      self.planning_step()
     
      


    def sort_decrease(self, q_values):
      
      arr = sorted ( q_values , reverse = True)
      return arr


    def planning_step(self):
 
      for i in range(self.planning_steps):
          state = self.planning_rand_generator.choice(list(self.model.keys()))
          action = self.planning_rand_generator.choice(list(self.model[state].keys()))
          next_state, reward = self.model[state][action]
          if next_state != -1:
            self.q_values[state][action.to_index] = self.q_values[state][action.to_index] + self.step_size * (reward + 
                                                     self.gamma * max(self.q_values[next_state]) - self.q_values[state][action.to_index])
          else:
            self.q_values[state][action.to_index] = self.q_values[state][action.to_index] + self.step_size * (reward - self.q_values[state][action.to_index])
  
  
    def update_model(self, past_state, past_action, state, reward):
    
      if past_state not in self.model:
        self.model[past_state] = {}
        
      self.model[past_state][past_action] = (state, reward) 



# Experiment: Dyna-Q agent in the wrsn environment

In [None]:
# RLGLUE
#!/usr/bin/env python

"""Glues together an experiment, agent, and environment.
"""

from __future__ import print_function


class RLGlue:
    """RLGlue class

    args:
        env_name (string): the name of the module where the Environment class can be found
        agent_name (string): the name of the module where the Agent class can be found
    """

    def __init__(self, env_class, agent_class):
        self.environment = env_class()
        self.agent = agent_class()
        self.total_reward = None
        self.last_action = None
        self.num_steps = None
        self.num_episodes = None


    def rl_init(self, agent, network, agent_init_info={}, env_init_info={}):
        """Initial method called when RLGlue experiment is created"""
        self.environment.env_init(agent, network, env_init_info)
        self.agent.agent_init(agent, network, agent_init_info)
        self.total_reward = 0.0
        self.num_steps = 0
        self.num_episodes = 0
        self.network = network
 
        


    def rl_start(self, agent_start_info={}, env_start_info={}):
        last_state = self.environment.env_start()
        self.last_action = self.agent.agent_start(last_state)
        observation = (last_state, self.last_action)
        return observation


    def rl_agent_start(self, observation):
        return self.agent.agent_start(observation)


    def rl_agent_step(self, reward, observation):
        return self.agent.agent_step(reward, observation)


    def rl_agent_end(self, reward):
        self.agent.agent_end(reward)


    def rl_env_start(self):
        self.total_reward = 0.0
        self.num_steps = 1
        this_observation = self.environment.env_start()
        return this_observation


    def rl_env_step(self, action):      
        ro = self.environment.env_step(action)
        (this_reward, _, terminal) = ro
        self.total_reward += this_reward

        if terminal:
            self.num_episodes += 1
        else:
            self.num_steps += 1
        return ro


    def rl_step(self):
              
        (reward, last_state, term) = self.environment.env_step(self.last_action)
        self.total_reward += reward

        if term:
            self.num_episodes += 1
            self.agent.agent_end(reward)
            roat = (reward, last_state, None, term)
            
        else:
            self.num_steps += 1
            self.last_action = self.agent.agent_step(reward, last_state)
            roat = (reward, last_state, self.last_action, term)
        return roat


    def rl_cleanup(self):
        self.environment.env_cleanup()
        self.agent.agent_cleanup()


    def rl_agent_message(self, message):
        return self.agent.agent_message(message)


    def rl_env_message(self, message):
        return self.environment.env_message(message)


    def rl_episode(self, max_steps_this_episode):
        is_terminal = False
        self.rl_start()

        while (not is_terminal) and ((max_steps_this_episode == 0) or
                                     (self.num_steps < max_steps_this_episode)):
            rl_step_result = self.rl_step()
            is_terminal = rl_step_result[3]

        return is_terminal


    def rl_return(self):
        return self.total_reward


    def rl_num_steps(self):
        return self.num_steps


    def rl_num_episodes(self):
        return self.num_episodes


In [None]:
# EXPERIMENT

def run_experiment( agent, network, current_env, current_agent, agent_parameters, environment_parameters, experiment_parameters):

  global SUM_OBJECTIVE
  global RDNL_PER_EPISODE
  global RDNL_MEAN_PER_EPISODES
  global REWARDS
  global SUM_REWARDS

  all_reward_sums = []
  num_runs = experiment_parameters["num_runs"]
  num_episodes = experiment_parameters["num_episodes"]
 

  for run in tqdm(range(num_runs)):
        rl_glue = RLGlue(current_env, current_agent)
        rl_glue.rl_init(agent, network, agent_parameters, environment_parameters)
        reward_sums = []
        #state_visits = np.zeros(m * (m + 1) * num_intervals )
        last_episode_total_reward = 0

        for episode in range(num_episodes):
            
            # Runs an episode
            rl_glue.rl_episode(100000)
             
            OBJECTIVE_EPISODE.append(SUM_OBJECTIVE)
            SUM_OBJECTIVE = 0
            RDNL_MEAN_PER_EPISODES.append(np.mean(RDNL_PER_EPISODE ))
            RDNL_PER_EPISODE.clear()

            REWARDS.append(SUM_REWARDS)
            SUM_REWARDS = 0


            
            print("end episode")
            print("episode :", episode)

            reward_sums.append(rl_glue.rl_return() - last_episode_total_reward)
            last_episode_total_reward = rl_glue.rl_return() 
                              
        all_reward_sums.append(reward_sums)

  return all_reward_sums
       

# Main script

In [None]:
import os
file_name = '7.txt'
path_f = '/content/drive/MyDrive/WRSN/dataset'
pos_x = 250
pos_y = 250
node = 200
m = node // 5
num_intervals = T // DELTA + 1
network = Network(path_f + '/'+ file_name, m, pos_x, pos_y)
agent = Agent(network)

** Setting up network **
-- Setting up base station --
-- Done --
-- Setting up sensors data --
-- Done --
-- Setting up charging locations --
-- Done --
-- Initializing matrices --
-- Done --
-- Calculating matrices --
-- Done --
** Network is ready **


In [None]:
np.random.seed(0)
experiment_parameters = {
    "num_runs" : 1,                     # The number of times we run the experiment
    "num_episodes" : 200,                 # The number of episodes per experiment
} 

# Environment parameters
environment_parameters = { 
    
}

# Agent parameters
agent_parameters = {
    "discount": 0.95,
    "num_intervals":num_intervals,
    "random_seed": 0,  
    "step_size" : 0.05,
    "planning_steps" : 60         # The list of planning_steps we want to try
}

current_agent = WRSNAgent
current_env = WRSNEnvironment

all_reward_sums = run_experiment(agent, network, current_env, current_agent, agent_parameters, environment_parameters, experiment_parameters)


  0%|          | 0/1 [00:00<?, ?it/s]

[1;30;43mKết quả truyền trực tuyến bị cắt bớt đến 5000 dòng cuối.[0m

recodrd time  69700.32704262841
eps_threshold : 0.06589722814179827
list_value_decrease:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
++ Charging time spent at 38 is 1112.504449612343 ++
++ Energy left of MC is 515671.2505345428 ++

recodrd time  70890.67134405316
eps_threshold : 0.06587736902115725
list_value_decrease:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
++ Charging time spent at 39 is 2000 ++
++ Energy left of MC is 511168.22425869456 ++

recodrd time  72980.46567855398
eps_threshold : 0.06585753470890854
list_value_decrease:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,

#Result

In [None]:

# plot results

plt.plot(np.mean(all_reward_sums, axis = 0) , label="Reward")
plt.xlabel("Episodes")
plt.ylabel("Sum of\n rewards\n during\n episode", rotation=0, labelpad=30)
plt.legend()
#plt.ylim(-10,5)
plt.show()

In [None]:

# plot results

plt.plot(OBJECTIVE_EPISODE , label="OBJECTIVE")
plt.xlabel("Episodes")
plt.ylabel("Sum of\n objective\n during\n episode",rotation=0, labelpad=30)
plt.legend()
plt.show()

In [None]:
# plot results

plt.plot(RDNL_MEAN_PER_EPISODES , label="dead node")
plt.xlabel("Episodes")
plt.ylabel("Mean of\n dead node\n during\n episode",rotation=0, labelpad=30)
plt.legend()
plt.show()