In [1]:
"""
Script containing the training and testing loop for DQNAgent
"""

'\nScript containing the training and testing loop for DQNAgent\n'

Import Libraries

In [2]:
import os
import sys
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import random
import torch
import timeit
import json

from multiprocessing import Pool

import pygame

pygame.init()
pygame.mixer.init()
sound = pygame.mixer.Sound('alarm.mpeg')

torch.set_num_threads(os.cpu_count())

pygame 2.5.2 (SDL 2.28.2, Python 3.12.2)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
seed = 0

random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ["CUBLAS_WORKSPACE_CONFIG"]=":4096:8"

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)

Set device to cuda and empty the cache to improve performance.

In [4]:
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
print(device)
torch.cuda.empty_cache()

cpu


Store the name of the folder in which the results are to be stored in folder_name variable.

In [5]:
'''
if(len(sys.argv) <= 1):
    print("Please provide the config file folder name")
    exit()
'''

folder_name = 'results_10x10_Stationary_Map_2'

Use the builtins package to store the name of the configurations file globally. 

In [6]:
from configparser import ConfigParser

import builtins
builtins.current_filename = "{}/config.ini".format(folder_name)
configur = ConfigParser()
configur.read(builtins.current_filename)

['results_10x10_Stationary_Map_3/config.ini']

Obtain certain variable names from the configuration folder.

- num_memory_fill_eps: number of episodes for which to run the -code to fill replay memory of DQNs before actual training starts
- tot_epsiodes: Total number of episodes to train the DQNs
- tot_time: Total time elapsed in each episode
- update_frequency: Number of episodes after which to udpdate the target network of agent
- save_frequency - Frequency to save the policy network to the file
- generate_packets_till: Number of episodes till which IoT Nodes will generate packets
- gap_time - frequency for IotNodes to generate and transmit packages.

- n - rows
- m - columns
- p - In case of a random map, p is the probability that a cell would have a UAV (It would have an IoT node with prob. 1-p)

In [7]:
num_memory_fill_eps = int(configur.get('train_model','num_memory_fill_eps'))
tot_episodes = int(configur.get('train_model','tot_episodes'))
tot_time = int(configur.get('train_model','tot_time'))
update_frequency = int(configur.get('train_model','update_frequency'))
save_frequency = int(configur.get('train_model','save_frequency'))
generate_packets_till = int(configur.get('test_model','generate_packets_till'))
gap_time= int(configur.get('train_model','gap_time'))

n = int(configur.get('map','n'))
m = int(configur.get('map','m'))
p = float(configur.get('map','p'))

Create a Map object and store it in the map_ variable.

In [8]:
# map
%run src/Map.ipynb
map_ = Map(n,m,p)
# map_.generate()
#grid_map = map_.generate()

## Initially
#grid_map = map_.dummyMap()

results_10x10_Stationary_Map_3/config.ini


Read the map

This function obtains the location and name of the text file containing the map from the config file and reads it.
This means it basically had a matrix of all the objects in the map.

In [9]:
map_.from_file_graph()

Store the BaseStation_obj, IotNodes and Agents.

In [10]:
#global variables
IotNodes = map_.getIotNodes()
BaseStation_obj = map_.getBaseStation()
Agents = map_.getAgents()

Fill the experience buffer
- num_memory_fill_eps - number of episodes for which to run the code to fill replay memory of DQNs before actual training starts
- tot_time: Total time elapsed in each episode
- each packet generation takes place in an interval of gap_time.
    - the packets are generated in the IotNodes, and send all the packets to the neighbour with the smallest queue.
- each UAV has a random run in which nothing is displayed and no q_values are stored for plotting. and then the node is transferred from the latest_queue to its original queue, and its latest_queue is made empty.

after each episode reset all the map to its original state.

In [11]:
def fillMemory():

    for _ in range(num_memory_fill_eps):

        for step in range(tot_time):

            if ((step % gap_time == 0) and (step < generate_packets_till)):
                for node in IotNodes:
                    node.run()

            for agent in Agents:
                agent.randomRun()

            for agent in Agents:
                agent.update_state()

        map_.resetAll()             # make queues empty for agents, Recv Packets for BS = 0

train takes as input the result folder, and graphics option is used to render the map.
the step count is initialised to 0, it measures the number of episode
- for each episode
    - In the interval of update frequency, transfer the policyDQN to the value or targetDQN of each UAV.
    -  for time in the max duration of each episode
            - each packet generation takes place in an interval of gap_time.
            - the packets are generated in the IotNodes, and send all the packets to the neighbour with the smallest queue.
       - each UAV has a run in which nothing is status is not displayed and q_values are stored for plotting. and then the node is transferred from the latest_queue to its original queue, and its latest_queue is made empty.
       - If it is the last step in the episode, then render the map.
    - after each episode of over
        - increase the step count by 1.
        - for each agent
            - decreasethe epsilon of epsilon greedy algo
            - store the loss for each agent in the losses list.
        - reset the map
    - at regular episode intervals, save the model for each of the agents

In [12]:
#map_.renderMap()

In [13]:
'''
from tqdm import tqdm
for episode in tqdm(range(5), position=0, leave=True):
    print(episode)
'''

'\nfrom tqdm import tqdm\nfor episode in tqdm(range(5), position=0, leave=True):\n    print(episode)\n'

In [14]:
def train(foldername,graphics=False):

    step_cnt = 0
    old_packets = 0

    for episode in range(tot_episodes):

        # if graphics:
            # print("Episode Number : ", episode)
        tot_packets = 0
        map_.resetAll()
        if step_cnt % update_frequency == 0 and step_cnt!=0:
            for agent in map_.Agents:                    # update the target net after update_frequency steps
                agent.dqn_object.updateTargetNet()

        #print('ep_begin')
        tot_packets = 0
        for step in tqdm(range(tot_time), position=0, leave=True):
        # for step in range(tot_time):

            if ((step%gap_time==0) and (step < generate_packets_till)):
                for node in map_.IotNodes:
                    tot_packets += node.rate
                    node.run()

                for agent in map_.Agents:
                    agent.update_state()

            ##TODO agent order affects current state reason : agent x->y and y->z can transmit same packet in single timestamp(if order is x,y,z)
            for agent in map_.Agents:
                agent.update_state()
                
            for agent in map_.Agents:
                agent.run()

            # if graphics and ((step % 100 == 0) or (step == tot_time - 1)):
            #     #print("Map Rendering")
            #     #print('ep_num', episode)
            #     #print('step', step)

            #     #print('Packets received', BaseStation_obj.getVal())
            #     #print('total packets sent', tot_packets)
            #     # if (episode == tot_episodes - 1):
            #     #     map_.renderMap()


        step_cnt += 1
        # print("Episode Num : ", episode)
        for agent in map_.Agents:
            agent.dqn_object.updateEpsilon()
            agent.saveLoss()
            # print("Loss :", agent.latest_loss)

        for agent in map_.Agents:
            agent.dqn_object.saveModel('./{}/agent_at_{}'.format(foldername,agent.getPosition()))

        # print("Episode Number:",episode,"Packet reached:",BaseStation_obj.packetRecv)
        # print("Packet reached:",BaseStation_obj.packetRecv)
        # map_.resetAll()             # make queues empty for agents, Recv Packets for BS = 0

        # Evaluation
        # _1, num_packets_received, _2,  _3, _4 = eval(1)
        # eval_packets = num_packets_received[0]

        # #print('eval_packets is: ', eval_packets)
        
        # if eval_packets > old_packets:
        #     old_packets = eval_packets
        #     #print("Hello")
        #     for agent in map_.Agents:
        #         agent.dqn_object.saveModel('./{}/agent_at_{}'.format(foldername,agent.getPosition()))

    return

first reset the map and BaseStation and set epsilon = 0 for all agents (pure greedy)

initialize
    - step count = 0, measures the number of time IotNodes generated packets.
    - number of packets = empty list
    - total_ttl = empty list
    - time - empty list
    - t = 0, measures time steps.
loop till all the IotNodes and UAV have sent dropped or expired the packets.
    - at each interval of time steps, 
        - update step count
        - if step_count is less than the number of times packets should be generated
        - generate packets in each of the IotNodes
    - run each agent
    - update each agent
    - render the map is selected.
    - store the packets received by the base station in num_packets
    - store the total_ttl received by the base station in total_ttl
    - append t in time list
    - increase t
    - end=True
    - if any IotNode or agent is having a packet then end=False
    - if end==True break the loop
plot the number of packets and total_ttl wrt time, and save the plots

In [15]:
# def test(folder_name,render=True):
#     """
#     Function to test the agent

#     Parameters
#     ---
#     render: bool
#         Whether to create a pop-up window display the interaction of the agent with the environment

#     Returns
#     ---
#     none

#     idea: generate packets at iot till some time step
#     stop the simulation only when each packet is either dropped or reaches the base station
#     metrics: average ttl over all the packets? (indicates both latency and throughout in some sense)

#     #TODO can also calculate latency only on packets that reached the base station and throughput overall
#     """

#     # reset all agents
#     map_.resetAll()
#     BaseStation_obj.reset()
#     # no need to load model here as train was previously called. so last updated modellen(my_list) is the model to be used

#     # turn off exploration for agents now
#     for agent in Agents:
#             agent.dqn_object.turn_off_exploration()

#     step_cnt = 0
#     num_packets=[]
#     total_ttl=[]
#     time=[]
#     t=0
#     while True:

#         if(t%gap_time==0):
#             step_cnt += 1
#             if step_cnt <= generate_packets_till:
#                 for node in IotNodes:
#                     node.run()

#         for agent in Agents:
#             agent.run(False)

#         for agent in Agents:
#             agent.update_state()

#         if render :
#             map_.renderMap()

#         # check if all iot and uavs have sent out all packets
#         num_packets.append(BaseStation_obj.packetRecv)
#         total_ttl.append(BaseStation_obj.totalTtl)
#         time.append(t)
#         t+=1
#         end = True
#         for agent in Agents:
#             if agent.getVal() != 0:
#                 end = False
#                 break

#         for iot in IotNodes:
#             if iot.getQueueSize() !=0:
#                 end = False
#                 break

#         if end:
#             break
#     os.makedirs("{}/Plots".format(folder_name), exist_ok=True)
#     plt.plot(time,num_packets , color ='blue', label ='Packets Received')
#     plt.savefig('{}/Plots/Packet_Received.png'.format(folder_name))
#     plt.close()

#     plt.plot(time,total_ttl , color ='blue', label ='Sum of TTL')
#     plt.savefig('{}/Plots/SumOfTtl.png'.format(folder_name))
#     plt.close()

In [16]:
def eval(num_eval_episodes):
    # reset all agents
    map_.resetAll()
    BaseStation_obj.reset()
    # no need to load model here as train was previously called. so last updated modellen(my_list) is the model to be used

    # turn off exploration for agents now
    # for agent in map_.Agents:
    #         agent.dqn_object.turn_off_exploration()

    rewards_eval = np.zeros(num_eval_episodes)
    num_packets_received = np.zeros(num_eval_episodes)
    tot_packets_sent = np.zeros(num_eval_episodes)
    mean_ttl_received = np.zeros(num_eval_episodes)
    rec_rate = []

    #gap_time = gap_time
    episode_duration = tot_time #2000 #200
    create_packets_till = generate_packets_till #400

    for ep in range(num_eval_episodes):
        torch.cuda.empty_cache()
        map_.initModels(device)
        map_.loadModel("{}/model_parameters".format(folder_name))
        #print('episode', ep)
        map_.resetAll()
        BaseStation_obj.reset()
        
        rewardAddEval = 0
        tot_packets = 0

        old_packets_rec = 0
        old_packets_in_network = 0
        rate_per_100_steps = {}
        
        #for step in range(episode_duration):
        for step in tqdm(range(episode_duration), position=0, leave=True):
            if ((step % gap_time==0) and (step < create_packets_till)) :
                for node in map_.IotNodes:
                    tot_packets += node.rate
                    node.run()
                    
                for agent in map_.Agents:
                    agent.update_state()
    
            for agent in map_.Agents:
                rewardAddEval += agent.run()
    
            for agent in map_.Agents:
                agent.update_state()

            if (step % 100 == 0) or (step == episode_duration - 1):
                new_packets_rec = BaseStation_obj.getVal()
                
                total_packets_received_100_hops = new_packets_rec - old_packets_rec
                total_packets_rec_per_hop = total_packets_received_100_hops/100

                receiving_rate_info = {}
                receiving_rate_info['total_packets_rec_per_hop'] = total_packets_rec_per_hop
                receiving_rate_info['old_packets_in_network'] = old_packets_in_network
                rate_per_100_steps[step] = receiving_rate_info
                
                old_packets_in_network = tot_packets - BaseStation_obj.getVal()
                old_packets_rec = new_packets_rec

            
        rewards_eval[ep] = rewardAddEval
        num_packets_received[ep] = BaseStation_obj.getVal()
        tot_packets_sent[ep] = tot_packets
        mean_ttl_received[ep] = meanTtl()
        rec_rate.append(rate_per_100_steps)
            
    evalMeanReward = np.mean(rewards_eval)
    return evalMeanReward, num_packets_received, tot_packets_sent, mean_ttl_received, rec_rate

meanTtl
calculate the mean_ttl of all the packets received by the BaseStation and return it

In [17]:
def meanTtl():
    packets = map_.getBaseStation().packets_received
    if len(packets)==0:
        return -1
    return sum([packet.get_ttl() for packet in packets])/len(packets)

generatePlot
- for each agent
    - obtain the list of losses, happens each time run function is chosen which invokes train
    - plot the loss againt time and save the plot. 

In [18]:
def generatePlot(folder_name):
    os.makedirs("{}/Plots".format(folder_name), exist_ok=True)
    for agent in Agents:
        loss = agent.getLoss()
        epi_list = list(range(1,len(loss)+1))
        plt.plot(epi_list, loss, color ='orange', label ='Agent Loss')
        plt.savefig('{}/Plots/agent_at_{}.png'.format(folder_name,agent.getPosition()))
        plt.close()

initialize the DQNAgent for each agent.
if Train is chosen
    fill the memory and tain
then test the model
generate the plots.

In [19]:
os.makedirs("{}/model_parameters".format(folder_name), exist_ok=True)
map_.initModels(device)
fillMemory()

start = timeit.default_timer()
  
train("results_10x10_Stationary_Map_2/model_parameters",True)

stop = timeit.default_timer()

print('Time: ', stop - start)

with open("train_time_stationary_Map_2_MAPR.txt","w") as file:
    file.write("Train Time: " + str(stop - start))

100%|███████████████████████████████████████| 2000/2000 [05:55<00:00,  5.63it/s]


Time:  355.26390269699914


In [20]:
# if configur.get('train_model','train') == 'True':
#     fillMemory()
#     train("{}/model_parameters".format(folder_name),False)
# #generatePlot(folder_name)

# #print('Mean ttl of all packets received by base station: ',meanTtl())

In [21]:
seed = 0

random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
torch.cuda.manual_seed_all(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
os.environ["CUBLAS_WORKSPACE_CONFIG"]=":4096:8"

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)

In [22]:
# map_.initModels(device)
# map_.loadModel("{}/model_parameters".format(folder_name))
evalMeanReward, num_packets_received, tot_packets_sent, mean_ttl_received, rec_rate = eval(10)

print("Finish")
sound.play(maxtime=5000)

100%|███████████████████████████████████████| 2000/2000 [04:36<00:00,  7.23it/s]
100%|███████████████████████████████████████| 2000/2000 [04:13<00:00,  7.87it/s]
100%|███████████████████████████████████████| 2000/2000 [04:10<00:00,  8.00it/s]
100%|███████████████████████████████████████| 2000/2000 [04:13<00:00,  7.90it/s]
100%|███████████████████████████████████████| 2000/2000 [04:14<00:00,  7.87it/s]
100%|███████████████████████████████████████| 2000/2000 [04:13<00:00,  7.87it/s]
100%|███████████████████████████████████████| 2000/2000 [04:16<00:00,  7.80it/s]
100%|███████████████████████████████████████| 2000/2000 [04:13<00:00,  7.90it/s]
100%|███████████████████████████████████████| 2000/2000 [04:12<00:00,  7.92it/s]
100%|███████████████████████████████████████| 2000/2000 [04:13<00:00,  7.90it/s]

Finish





<pygame.mixer.Channel at 0x731991b06cf0>

In [23]:
print(evalMeanReward)
print(num_packets_received)
print(tot_packets_sent)
print(mean_ttl_received)
print(np.mean(num_packets_received))
print(np.mean(mean_ttl_received))

525667.7603668608
[545. 545. 545. 545. 545. 545. 545. 545. 545. 545.]
[4000. 4000. 4000. 4000. 4000. 4000. 4000. 4000. 4000. 4000.]
[743.43486239 743.43486239 743.43486239 743.43486239 743.43486239
 743.43486239 743.43486239 743.43486239 743.43486239 743.43486239]
545.0
743.4348623853211


In [24]:
with open('MAPR_4000_Map_2_stationary_performance_num_packets_received.pkl', 'wb') as file:
    pickle.dump(num_packets_received, file)
with open('MAPR_4000_Map_2_stationary_performance_tot_packets_sent.pkl', 'wb') as file:
    pickle.dump(tot_packets_sent, file)
with open('MAPR_4000_Map_2_stationary_performance_mean_ttl_received.pkl', 'wb') as file:
    pickle.dump(mean_ttl_received, file)
with open('MAPR_4000_Map_2_stationary_performance_rec_rate.pkl', 'wb') as file:
    pickle.dump(rec_rate, file)


In [25]:
# file = open('MAPR_4000_Map_2_stationary_performance.txt', 'w')

# file.write('\n num_packets_received \n')
# file.write(json.dumps(list(num_packets_received)))

# file.write('\n tot_packets_sent \n')
# file.write(json.dumps(list(tot_packets_sent)))

# file.write('\n mean_ttl_received \n')
# file.write(json.dumps(list(mean_ttl_received)))

# file.write('\n average num_packets_received \n')
# file.write(json.dumps(np.mean(num_packets_received)))

# file.write('\n average mean_ttl_received \n')
# file.write(json.dumps(np.mean(mean_ttl_received)))

# file.close()