In [1]:
!pip install gym
!pip install numpy-stl
!pip install torch
!pip install tqdm
!pip install tensorboard 
!pip install SciencePlots

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


In [2]:
import gym
import time
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import scienceplots
import matplotlib
import matplotlib.ticker as ticker
import matplotlib as mpl
from plot_utils import window_mean
import csv
import ast
import torch
import random
import pandas as pd
from joblib import Parallel, delayed
import seaborn as sns
from misc import process_oh_actions
from envs import MECSCHEnvV1
from envs import MECSCHEnvV2
from tqdm.notebook import tqdm
from agent_wrapper import OnPolicyWrapper
from models import MLPCategoricalActor, MLPRelaxedCategoricalActor, BaseMLPNet, BaseMLPActor
from plot_functions import set_fonts, set_style, draw_boxplot, draw_brace
from gym.envs import register
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
from colorsys import rgb_to_hls

In [3]:
def seed_everything(seed: int):
    import random, os
    import numpy as np
    import torch
    
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

In [4]:
def register_envs():  
    # 'MECSCH-v1': Mobile edge computing full computation offloading no communication NOMA with reduction
    gym.envs.register(id='MECSCH-v1',entry_point=MECSCHEnvV2,
         kwargs={'n_ues':8,'n_channels':3,'UE_buffer_capacity':3,'UE_CPU':1e9,'BS_CPU':120e9,'BS_BW':60e6,'hist_msg':1,
                 'hist_comm':1,'hist_obs':1,'hist_act':1,'n_voc_ul':2,'n_voc_dl':5,'arrival_prob':1,'max_iters':30,
                 'reward_com':1,'penality':-1,'silent':True,'NOMA_Scheme':True,'OMA_Scheme':False,'Reduction':True,
                 'Round_robin':False,'semi_static':False,'heuristic':False})
    
    # 'MECSCH-v2': Mobile edge computing full computation offloading no communication NOMA without reduction
    gym.envs.register(id='MECSCH-v2',entry_point=MECSCHEnvV2,
         kwargs={'n_ues':8,'n_channels':3,'UE_buffer_capacity':3,'UE_CPU':1e9,'BS_CPU':120e9,'BS_BW':60e6,'hist_msg':1,
                 'hist_comm':1,'hist_obs':1,'hist_act':1,'n_voc_ul':2,'n_voc_dl':5,'arrival_prob':1,'max_iters':30,
                 'reward_com':1,'penality':-1,'silent':True,'NOMA_Scheme':True,'OMA_Scheme':False,'Reduction':False,
                 'Round_robin':False,'semi_static':False,'heuristic':False})
    
    # 'MECSCH-v3': Mobile edge computing full computation offloading semi-static NOMA 
    gym.envs.register(id='MECSCH-v3',entry_point=MECSCHEnvV2,
         kwargs={'n_ues':8,'n_channels':3,'UE_buffer_capacity':3,'UE_CPU':1e9,'BS_CPU':120e9,'BS_BW':60e6,'hist_msg':1,
                 'hist_comm':1,'hist_obs':1,'hist_act':1,'n_voc_ul':2,'n_voc_dl':5,'arrival_prob':1,'max_iters':30,
                 'reward_com':1,'penality':-1,'silent':True,'NOMA_Scheme':True,'OMA_Scheme':False,'Reduction':False,
                 'Round_robin':False,'semi_static':True,'heuristic':False})
        
    # 'MECSCH-v4': Mobile edge computing full computation offloading round-robin NOMA 
    gym.envs.register(id='MECSCH-v4',entry_point=MECSCHEnvV2,
         kwargs={'n_ues':8,'n_channels':3,'UE_buffer_capacity':3,'UE_CPU':1e9,'BS_CPU':120e9,'BS_BW':60e6,'hist_msg':1,
                 'hist_comm':1,'hist_obs':1,'hist_act':1,'n_voc_ul':2,'n_voc_dl':5,'arrival_prob':1,'max_iters':30,
                 'reward_com':1,'penality':-1,'silent':True,'NOMA_Scheme':True,'OMA_Scheme':False,'Reduction':False,
                 'Round_robin':True,'semi_static':False,'heuristic':False})
     
    # 'MECSCH-v5': Mobile edge computing full computation offloading heuristic NOMA 
    gym.envs.register(id='MECSCH-v5',entry_point=MECSCHEnvV2,
         kwargs={'n_ues':8,'n_channels':3,'UE_buffer_capacity':3,'UE_CPU':1e9,'BS_CPU':120e9,'BS_BW':60e6,'hist_msg':1,
                 'hist_comm':1,'hist_obs':1,'hist_act':1,'n_voc_ul':2,'n_voc_dl':5,'arrival_prob':1,'max_iters':30,
                 'reward_com':1,'penality':-1,'silent':True,'NOMA_Scheme':True,'OMA_Scheme':False,'Reduction':False,
                 'Round_robin':False,'semi_static':False,'heuristic':True})
       
    # 'MECSCH-v6': Mobile edge computing full computation offloading contention-free/contention-based NOMA 
    gym.envs.register(id='MECSCH-v6',entry_point=MECSCHEnvV1,
         kwargs={'n_ues':8,'n_channels':3,'UE_buffer_capacity':3,'UE_CPU':1e9,'BS_CPU':120e9,'BS_BW':60e6,'hist_msg':1,
                 'hist_comm':1,'hist_obs':1,'hist_act':1,'n_voc_ul':2,'n_voc_dl':5,'arrival_prob':1,'max_iters':30,
                 'reward_com':1,'penality':-1,'silent':False,'NOMA_Scheme':True,'OMA_Scheme':False})    

In [5]:
from contention_free_V2 import BaseStation01, UEAgent01
def base_runner(env_id, n_episodes=1000, eval_every=10, n_eval_episodes=10, max_ep_len=25, seed=1024):    
    register_envs()
    env = gym.make(env_id)
    env.seed(seed)
    seed_everything(seed)
    
    n_ues = env.n_ues
    n_channels = env.n_channels
    hist_msg = env.hist_msg
    hist_obs = env.hist_obs
    n_voc_ul = env.n_voc_ul
    n_voc_dl = env.n_voc_dl
        
    agents = [BaseStation01(n_ues,n_channels,hist_msg,hist_obs,n_voc_ul)]
    for ii in range(n_ues):
        agents.append(UEAgent01(n_channels,hist_msg,hist_obs,n_voc_dl))

    for ii, ag in enumerate(agents):
        ag.seed(seed + ii)
        
    train_rewards = []    
    train_success_tasks = []
    train_channel_success = []
    train_channel_collision = []
    train_channel_idle = []
    train_goodput = []
    train_droprate = []
    train_failed = []
    
    eval_rewards = []
    eval_success_tasks = []
    eval_channel_success = []
    eval_channel_collision = []
    eval_channel_idle = []
    eval_goodput = []
    eval_droprate = []
    eval_failed = []
    
    for ep in tqdm(range(n_episodes)):
        done, terminal = False, False
        ep_reward,ep_len,ep_droprate,ep_failed = 0,0,0,0
        ep_success_tasks,ep_channel_success,ep_channel_collision,ep_channel_idle,ep_goodput = 0,0,0,0,0     
        obs = env.reset()
        for ag in agents:
            ag.reset()
    
        while not (done or terminal):
            ep_len += 1
            actions = [ag.act(obs[ii]) for ii, ag in enumerate(agents)]
            next_obs, rewards, dones, info = env.step(actions)
            done = all(dones)
            terminal = ep_len >= env.max_iters
            obs = next_obs          
            ep_reward += np.mean(rewards)
            ep_success_tasks += info["No. of Success Tasks"]
            ep_channel_success += info["Channel Access Success Rate"]
            ep_channel_collision += info["Channel Access Collision Rate"]
            ep_channel_idle += info["Channel Idle Rate"]
            ep_goodput += info["Goodput"]
            ep_droprate += info["Drop Rate"]
            ep_failed += info["Failed"]
 
        eval_rewards.append(ep_reward)
        eval_success_tasks.append(ep_success_tasks)
        eval_channel_success.append(ep_channel_success/ep_len)
        eval_channel_collision.append(ep_channel_collision/ep_len)
        eval_channel_idle.append(ep_channel_idle/ep_len)
        eval_goodput.append(ep_goodput/ep_len)
        eval_droprate.append(ep_droprate)
        eval_failed.append(ep_failed)
        
        if ep % eval_every == 0:
            train_rewards.append(ep_reward)
            train_success_tasks.append(ep_success_tasks)
            train_channel_success.append(ep_channel_success/ep_len)
            train_channel_collision.append(ep_channel_collision/ep_len)
            train_channel_idle.append(ep_channel_idle/ep_len)
            train_goodput.append(ep_goodput/ep_len)
            train_droprate.append(ep_droprate)
            train_failed.append(ep_failed)
     
    #eval_rewards = np.mean(np.array(train_rewards).reshape(-1,eval_every), axis=1).tolist()
    #eval_success_tasks = np.mean(np.array(train_success_tasks).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_success = np.mean(np.array(train_channel_success).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_collision = np.mean(np.array(train_channel_collision).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_idle = np.mean(np.array(train_channel_idle).reshape(-1,eval_every), axis=1).tolist()
    #eval_goodput = np.mean(np.array(train_goodput).reshape(-1,eval_every), axis=1).tolist()
    #eval_droprate = np.mean(np.array(train_droprate).reshape(-1,eval_every), axis=1).tolist()
    #eval_failed = np.mean(np.array(train_failed).reshape(-1,eval_every), axis=1).tolist()
   
    return train_rewards,train_success_tasks,train_channel_success,train_channel_collision,train_channel_idle,\
           train_goodput,train_droprate,train_failed,eval_rewards,eval_success_tasks,eval_channel_success,eval_channel_collision,\
           eval_channel_idle,eval_goodput,eval_droprate,eval_failed           

In [6]:
from contention_based_V2 import BaseStation02, UEAgent02
def contention_base_runner(env_id, n_episodes=1000, eval_every=10, n_eval_episodes=10, max_ep_len=25, seed=1024):    
    register_envs()
    env = gym.make(env_id)
    env.seed(seed)
    seed_everything(seed)
    
    n_ues = env.n_ues
    n_channels = env.n_channels
    hist_msg = env.hist_msg
    hist_obs = env.hist_obs
    n_voc_ul = env.n_voc_ul
    n_voc_dl = env.n_voc_dl
        
    agents = [BaseStation02(n_ues,n_channels,hist_msg,hist_obs,n_voc_ul)]
    for ii in range(n_ues):
        agents.append(UEAgent02(n_channels,hist_msg,hist_obs,n_voc_dl))

    for ii, ag in enumerate(agents):
        ag.seed(seed + ii)
            
    train_rewards = []    
    train_success_tasks = []
    train_channel_success = []
    train_channel_collision = []
    train_channel_idle = []
    train_goodput = []
    train_droprate = []
    train_failed = []
    
    eval_rewards = []
    eval_success_tasks = []
    eval_channel_success = []
    eval_channel_collision = []
    eval_channel_idle = []
    eval_goodput = []
    eval_droprate = []
    eval_failed = []
    
    for ep in tqdm(range(n_episodes)):
        done, terminal = False, False
        ep_reward,ep_len,ep_droprate,ep_failed= 0,0,0,0
        ep_success_tasks,ep_channel_success,ep_channel_collision,ep_channel_idle,ep_goodput = 0,0,0,0,0     
        obs = env.reset()
        for ag in agents:
            ag.reset()
    
        while not (done or terminal):
            ep_len += 1
            actions = [ag.act(obs[ii]) for ii, ag in enumerate(agents)]
            next_obs, rewards, dones, info = env.step(actions)
            done = all(dones)
            terminal = ep_len >= env.max_iters
            obs = next_obs
            ep_reward += np.mean(rewards)
            ep_success_tasks += info["No. of Success Tasks"]
            ep_channel_success += info["Channel Access Success Rate"]
            ep_channel_collision += info["Channel Access Collision Rate"]
            ep_channel_idle += info["Channel Idle Rate"]
            ep_goodput += info["Goodput"]
            ep_droprate += info["Drop Rate"]
            ep_failed += info["Failed"]
           
        eval_rewards.append(ep_reward)
        eval_success_tasks.append(ep_success_tasks)
        eval_channel_success.append(ep_channel_success/ep_len)
        eval_channel_collision.append(ep_channel_collision/ep_len)
        eval_channel_idle.append(ep_channel_idle/ep_len)
        eval_goodput.append(ep_goodput/ep_len)
        eval_droprate.append(ep_droprate)
        eval_failed.append(ep_failed)
        
        if ep % eval_every == 0:
            train_rewards.append(ep_reward)
            train_success_tasks.append(ep_success_tasks)
            train_channel_success.append(ep_channel_success/ep_len)
            train_channel_collision.append(ep_channel_collision/ep_len)
            train_channel_idle.append(ep_channel_idle/ep_len)
            train_goodput.append(ep_goodput/ep_len)
            train_droprate.append(ep_droprate)
            train_failed.append(ep_failed)
    
    #eval_rewards = np.mean(np.array(train_rewards).reshape(-1,eval_every), axis=1).tolist()
    #eval_success_tasks = np.mean(np.array(train_success_tasks).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_success = np.mean(np.array(train_channel_success).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_collision = np.mean(np.array(train_channel_collision).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_idle = np.mean(np.array(train_channel_idle).reshape(-1,eval_every), axis=1).tolist()
    #eval_goodput = np.mean(np.array(train_goodput).reshape(-1,eval_every), axis=1).tolist()
    #eval_droprate = np.mean(np.array(train_droprate).reshape(-1,eval_every), axis=1).tolist()
    #eval_failed = np.mean(np.array(train_failed).reshape(-1,eval_every), axis=1).tolist()
    
    return train_rewards,train_success_tasks,train_channel_success,train_channel_collision,train_channel_idle,train_goodput,\
           train_droprate,train_failed,eval_rewards,eval_success_tasks,eval_channel_success,eval_channel_collision,\
           eval_channel_idle,eval_goodput,eval_droprate,eval_failed

In [7]:
from semi_static import BaseStation03
def semi_static_runner(env_id,n_episodes=1000,eval_every=10,n_eval_episodes=10,max_ep_len=25,seed=1024):    
    register_envs()
    env = gym.make(env_id)
    env.seed(seed)
    seed_everything(seed)
    
    n_ues = env.n_ues
    n_channels = env.n_channels
    hist_msg = env.hist_msg
    hist_obs = env.hist_obs
    n_voc_ul = env.n_voc_ul
    n_voc_dl = env.n_voc_dl
    n_actions = env.BSnA
        
    agents = [BaseStation03(n_ues,n_channels,hist_msg,hist_obs,n_voc_ul,n_actions)]

    for ii, ag in enumerate(agents):
        ag.seed(seed + ii)
            
    train_rewards = []    
    train_success_tasks = []
    train_channel_success = []
    train_channel_collision = []
    train_channel_idle = []
    train_goodput = []
    train_droprate = []
    train_failed = []
    
    eval_rewards = []
    eval_success_tasks = []
    eval_channel_success = []
    eval_channel_collision = []
    eval_channel_idle = []
    eval_goodput = []
    eval_droprate = []
    eval_failed = []
    
    for ep in tqdm(range(n_episodes)):
        done, terminal = False, False
        ep_reward,ep_len,ep_droprate,ep_failed = 0,0,0,0
        ep_success_tasks,ep_channel_success,ep_channel_collision,ep_channel_idle,ep_goodput = 0,0,0,0,0      
        obs = env.reset()
        for ag in agents:
            ag.reset()
    
        while not (done or terminal):
            ep_len += 1
            actions = [ag.act(obs[ii]) for ii, ag in enumerate(agents)]
            next_obs, rewards, dones, info = env.step(actions)
            done = all(dones)
            terminal = ep_len >= env.max_iters
            obs = next_obs
            ep_reward += np.mean(rewards)
            ep_success_tasks += info["No. of Success Tasks"]
            ep_channel_success += info["Channel Access Success Rate"]
            ep_channel_collision += info["Channel Access Collision Rate"]
            ep_channel_idle += info["Channel Idle Rate"]
            ep_goodput += info["Goodput"]
            ep_droprate += info["Drop Rate"]
            ep_failed += info["Failed"]
        
        eval_rewards.append(ep_reward)
        eval_success_tasks.append(ep_success_tasks)
        eval_channel_success.append(ep_channel_success/ep_len)
        eval_channel_collision.append(ep_channel_collision/ep_len)
        eval_channel_idle.append(ep_channel_idle/ep_len)
        eval_goodput.append(ep_goodput/ep_len)
        eval_droprate.append(ep_droprate)
        eval_failed.append(ep_failed)
        
        if ep % eval_every == 0:
            train_rewards.append(ep_reward)
            train_success_tasks.append(ep_success_tasks)
            train_channel_success.append(ep_channel_success/ep_len)
            train_channel_collision.append(ep_channel_collision/ep_len)
            train_channel_idle.append(ep_channel_idle/ep_len)
            train_goodput.append(ep_goodput/ep_len)
            train_droprate.append(ep_droprate)
            train_failed.append(ep_failed)
    
    #eval_rewards = np.mean(np.array(train_rewards).reshape(-1,eval_every), axis=1).tolist()
    #eval_success_tasks = np.mean(np.array(train_success_tasks).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_success = np.mean(np.array(train_channel_success).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_collision = np.mean(np.array(train_channel_collision).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_idle = np.mean(np.array(train_channel_idle).reshape(-1,eval_every), axis=1).tolist()
    #eval_goodput = np.mean(np.array(train_goodput).reshape(-1,eval_every), axis=1).tolist()
    #eval_droprate = np.mean(np.array(train_droprate).reshape(-1,eval_every), axis=1).tolist()
    #eval_failed = np.mean(np.array(train_failed).reshape(-1,eval_every), axis=1).tolist()

    
    return train_rewards, train_success_tasks,train_channel_success,train_channel_collision,train_channel_idle,\
           train_goodput,train_droprate,train_failed,eval_rewards,eval_success_tasks,eval_channel_success,eval_channel_collision,\
           eval_channel_idle,eval_goodput,eval_droprate,eval_failed

In [8]:
from Round_robin import BaseStation04
def Round_robin_runner(env_id,n_episodes=1000,eval_every=10,n_eval_episodes=10,max_ep_len=25,seed=1024):    
    register_envs()
    env = gym.make(env_id)
    env.seed(seed)
    seed_everything(seed)
    
    n_ues = env.n_ues
    n_channels = env.n_channels
    hist_msg = env.hist_msg
    hist_obs = env.hist_obs
    n_voc_ul = env.n_voc_ul
        
    agents = [BaseStation04(n_ues,n_channels,hist_msg,hist_obs,n_voc_ul)]

    for ii, ag in enumerate(agents):
        ag.seed(seed + ii)
            
    train_rewards = []    
    train_success_tasks = []
    train_channel_success = []
    train_channel_collision = []
    train_channel_idle = []
    train_goodput = []
    train_droprate = []
    train_failed = []
    
    eval_rewards = []
    eval_success_tasks = []
    eval_channel_success = []
    eval_channel_collision = []
    eval_channel_idle = []
    eval_goodput = []
    eval_droprate = []
    eval_failed = []
    
    for ep in tqdm(range(n_episodes)):
        done, terminal = False, False
        ep_reward,ep_len,ep_droprate,ep_failed = 0,0,0,0
        ep_success_tasks,ep_channel_success,ep_channel_collision,ep_channel_idle,ep_goodput = 0,0,0,0,0   
        obs = env.reset()
        for ag in agents:
            ag.reset()
    
        while not (done or terminal):
            ep_len += 1
            actions = [ag.act(obs[ii],ep_len) for ii, ag in enumerate(agents)]
            next_obs, rewards, dones, info = env.step(actions)
            done = all(dones)
            terminal = ep_len >= env.max_iters
            obs = next_obs
            ep_reward += np.mean(rewards)
            ep_success_tasks += info["No. of Success Tasks"]
            ep_channel_success += info["Channel Access Success Rate"]
            ep_channel_collision += info["Channel Access Collision Rate"]
            ep_channel_idle += info["Channel Idle Rate"]
            ep_goodput += info["Goodput"]
            ep_droprate += info["Drop Rate"]
            ep_failed += info["Failed"]
         
        eval_rewards.append(ep_reward)
        eval_success_tasks.append(ep_success_tasks)
        eval_channel_success.append(ep_channel_success/ep_len)
        eval_channel_collision.append(ep_channel_collision/ep_len)
        eval_channel_idle.append(ep_channel_idle/ep_len)
        eval_goodput.append(ep_goodput/ep_len)
        eval_droprate.append(ep_droprate)
        eval_failed.append(ep_failed)
        
        if ep % eval_every == 0:    
            train_rewards.append(ep_reward)
            train_success_tasks.append(ep_success_tasks)
            train_channel_success.append(ep_channel_success/ep_len)
            train_channel_collision.append(ep_channel_collision/ep_len)
            train_channel_idle.append(ep_channel_idle/ep_len)
            train_goodput.append(ep_goodput/ep_len)
            train_droprate.append(ep_droprate)
            train_failed.append(ep_failed)
    
    #eval_rewards = np.mean(np.array(train_rewards).reshape(-1,eval_every), axis=1).tolist()
    #eval_success_tasks = np.mean(np.array(train_success_tasks).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_success = np.mean(np.array(train_channel_success).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_collision = np.mean(np.array(train_channel_collision).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_idle = np.mean(np.array(train_channel_idle).reshape(-1,eval_every), axis=1).tolist()
    #eval_goodput = np.mean(np.array(train_goodput).reshape(-1,eval_every), axis=1).tolist()
    #eval_droprate = np.mean(np.array(train_droprate).reshape(-1,eval_every), axis=1).tolist()
    #eval_failed = np.mean(np.array(train_failed).reshape(-1,eval_every), axis=1).tolist()
    
    return train_rewards,train_success_tasks,train_channel_success,train_channel_collision,train_channel_idle,\
           train_goodput,train_droprate,train_failed,eval_rewards,eval_success_tasks,eval_channel_success,eval_channel_collision,\
           eval_channel_idle,eval_goodput,eval_droprate,eval_failed

In [9]:
from heuristics import BaseStation05
def heuristics_runner(env_id,n_episodes=1000,eval_every=10,n_eval_episodes=10,max_ep_len=25,seed=1024):    
    register_envs()
    env = gym.make(env_id)
    env.seed(seed)
    seed_everything(seed)
    
    n_ues = env.n_ues
    n_channels = env.n_channels    
    n_actions = env.BSnA
    Channel_Matrix = env.Channel_Matrix
    UE_Power = env.UE_Power
    Noise_Power = env.Noise_Power
    BS_BW = env.BS_BW
    BS_CPU = env.BS_CPU
    uplink_th = env.uplink_th
    
    agents = [BaseStation05(n_ues,n_channels,n_actions,Channel_Matrix,UE_Power,Noise_Power,BS_BW,BS_CPU,uplink_th)]

    for ii, ag in enumerate(agents):
        ag.seed(seed + ii)
            
    train_rewards = []    
    train_success_tasks = []
    train_channel_success = []
    train_channel_collision = []
    train_channel_idle = []
    train_goodput = []
    train_droprate = []
    train_failed = []
    
    eval_rewards = []
    eval_success_tasks = []
    eval_channel_success = []
    eval_channel_collision = []
    eval_channel_idle = []
    eval_goodput = []
    eval_droprate = []
    eval_failed = []
    
    for ep in tqdm(range(n_episodes)):
        done, terminal = False, False
        ep_reward,ep_len,ep_droprate,ep_failed = 0,0,0,0
        ep_success_tasks,ep_channel_success,ep_channel_collision,ep_channel_idle,ep_goodput = 0,0,0,0,0      
        obs = env.reset()
        for ag in agents:
            ag.reset()
    
        while not (done or terminal):
            ep_len += 1
            actions = [ag.act(obs[ii]) for ii, ag in enumerate(agents)]
            next_obs, rewards, dones, info = env.step(actions)
            done = all(dones)
            terminal = ep_len >= env.max_iters
            obs = next_obs
            ep_reward += np.mean(rewards)
            ep_success_tasks += info["No. of Success Tasks"]
            ep_channel_success += info["Channel Access Success Rate"]
            ep_channel_collision += info["Channel Access Collision Rate"]
            ep_channel_idle += info["Channel Idle Rate"]
            ep_goodput += info["Goodput"]
            ep_droprate += info["Drop Rate"]
            ep_failed += info["Failed"]
        
        eval_rewards.append(ep_reward)
        eval_success_tasks.append(ep_success_tasks)
        eval_channel_success.append(ep_channel_success/ep_len)
        eval_channel_collision.append(ep_channel_collision/ep_len)
        eval_channel_idle.append(ep_channel_idle/ep_len)
        eval_goodput.append(ep_goodput/ep_len)
        eval_droprate.append(ep_droprate)
        eval_failed.append(ep_failed)
            
        if ep % eval_every == 0:
            train_rewards.append(ep_reward)
            train_success_tasks.append(ep_success_tasks)
            train_channel_success.append(ep_channel_success/ep_len)
            train_channel_collision.append(ep_channel_collision/ep_len)
            train_channel_idle.append(ep_channel_idle/ep_len)
            train_goodput.append(ep_goodput/ep_len)
            train_droprate.append(ep_droprate)
            train_failed.append(ep_failed)
    
    #eval_rewards = np.mean(np.array(train_rewards).reshape(-1,eval_every), axis=1).tolist()
    #eval_success_tasks = np.mean(np.array(train_success_tasks).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_success = np.mean(np.array(train_channel_success).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_collision = np.mean(np.array(train_channel_collision).reshape(-1,eval_every), axis=1).tolist()
    #eval_channel_idle = np.mean(np.array(train_channel_idle).reshape(-1,eval_every), axis=1).tolist()
    #eval_goodput = np.mean(np.array(train_goodput).reshape(-1,eval_every), axis=1).tolist()
    #eval_droprate = np.mean(np.array(train_droprate).reshape(-1,eval_every), axis=1).tolist()
    #eval_failed = np.mean(np.array(train_failed).reshape(-1,eval_every), axis=1).tolist()

    return train_rewards,train_success_tasks,train_channel_success,train_channel_collision,train_channel_idle,\
           train_goodput,train_droprate,train_failed,eval_rewards,eval_success_tasks,eval_channel_success,eval_channel_collision,\
           eval_channel_idle,eval_goodput,eval_droprate,eval_failed

In [10]:
def test_agents(agents,env_id="MECSCH-v0",n_episodes=10,max_ep_len=25):
    register_envs()
    env = gym.make(env_id)
    env.seed(1234)
       
    eval_rewards = []
    eval_success_tasks = []
    eval_channel_success = []
    eval_channel_collision = []
    eval_channel_idle = []
    eval_goodput = []
    eval_droprate = []
    eval_failed = []
    
    for ep in range(n_episodes):
        obs = env.reset()
        ep_reward, ep_len, ep_droprate, ep_failed,ep_success_tasks,ep_channel_success,ep_channel_collision,ep_channel_idle,ep_goodput = 0,0,0,0,0,0,0,0,0  
        done, terminal = False, False
        while not (done or terminal):
            ep_len += 1
            actions = agents.act(obs,explore=False)
            next_obs, rewards, dones, info = env.step(actions)
            terminal = ep_len > max_ep_len
            done = all(dones)
            obs = next_obs
            ep_reward += np.mean(rewards)
            ep_success_tasks += info["No. of Success Tasks"]
            ep_channel_success += info["Channel Access Success Rate"]
            ep_channel_collision += info["Channel Access Collision Rate"]
            ep_channel_idle += info["Channel Idle Rate"]
            ep_goodput += info["Goodput"]
            ep_droprate += info["Drop Rate"]
            ep_failed += info["Failed"]
            
        eval_rewards.append(ep_reward)
        eval_success_tasks.append(ep_success_tasks)
        eval_channel_success.append(ep_channel_success/ep_len)
        eval_channel_collision.append(ep_channel_collision/ep_len)
        eval_channel_idle.append(ep_channel_idle/ep_len)
        eval_goodput.append(ep_goodput/ep_len)
        eval_droprate.append(ep_droprate)
        eval_failed.append(ep_failed)
        
    return eval_rewards,eval_success_tasks,eval_channel_success,eval_channel_collision,eval_channel_idle,eval_goodput,\
           eval_droprate,eval_failed

In [11]:
def run_sim_on(env_id="MECSCH-v0",n_episodes=1000,max_ep_len=25,parameter_sharing=False,seed=1024,disable_tqdm=False,
               eval_every=10,n_eval_episodes=10,**kwargs):
    
    register_envs()
    env = gym.make(env_id)
    agents = OnPolicyWrapper(env,parameter_sharing=parameter_sharing,**kwargs)
    env.seed(seed)
    seed_everything(seed)    
    batch_size = kwargs.get("batch_size",64)
    
    train_rewards = []    
    train_success_tasks = []
    train_channel_success = []
    train_channel_collision = []
    train_channel_idle = []
    train_goodput = []
    train_droprate = []
    train_failed = []
    
    evals_rewards = []
    evals_success_tasks = []
    evals_channel_success = []
    evals_channel_collision = []
    evals_channel_idle = []
    evals_goodput = []
    evals_droprate = []
    evals_failed = []
    total_count = 0
    
    with tqdm(total=n_episodes,desc="Training",disable=disable_tqdm) as pbar:
        for ep in range(n_episodes):
            obs = env.reset()
            ep_reward,ep_len,ep_droprate,ep_failed,ep_success_tasks,ep_channel_success,ep_channel_collision,ep_channel_idle,ep_goodput = 0,0,0,0,0,0,0,0,0     
            done, terminal = False, False
            while not (done or terminal):
                ep_len += 1
                total_count += 1
                actions = agents.act(obs)
                values = agents.estimate_value(obs)
                next_obs, rewards, dones, info = env.step(actions)               
                terminal = ep_len > max_ep_len
                agents.experience(ep, obs, actions, rewards, next_obs, dones, values)
                done = all(dones)
                if total_count >= batch_size:
                    next_values = agents.estimate_value(next_obs)
                    critic_loss, policy_loss = agents.update(next_values)
                    total_count = 0
                obs = next_obs
                ep_reward += np.mean(rewards)
                ep_success_tasks += info["No. of Success Tasks"]
                ep_channel_success += info["Channel Access Success Rate"]
                ep_channel_collision += info["Channel Access Collision Rate"]
                ep_channel_idle += info["Channel Idle Rate"]
                ep_goodput += info["Goodput"]
                ep_droprate += info["Drop Rate"]
                ep_failed += info["Failed"]
                
            pbar.set_postfix({"episode": ep+1,"Training reward": np.round(ep_reward, decimals=2)})
            pbar.update(1)
            
            train_rewards.append(ep_reward)
            train_success_tasks.append(ep_success_tasks)
            train_channel_success.append(ep_channel_success/ep_len)
            train_channel_collision.append(ep_channel_collision/ep_len)
            train_channel_idle.append(ep_channel_idle/ep_len)
            train_goodput.append(ep_goodput/ep_len)
            train_droprate.append(ep_droprate)
            train_failed.append(ep_failed)
            
            if ep % eval_every == 0:
                eval_reward,eval_success_tasks,eval_channel_success,eval_channel_collision,eval_channel_idle,eval_goodput,eval_droprate,eval_failed = test_agents(agents, env_id, n_episodes=n_eval_episodes, max_ep_len=max_ep_len)
                evals_rewards.append(np.mean(eval_reward))
                evals_success_tasks.append(np.mean(eval_success_tasks))
                evals_channel_success.append(np.mean(eval_channel_success))
                evals_channel_collision.append(np.mean(eval_channel_collision))
                evals_channel_idle.append(np.mean(eval_channel_idle))
                evals_goodput.append(np.mean(eval_goodput))
                evals_droprate.append(np.mean(eval_droprate))
                evals_failed.append(np.mean(eval_failed))
                 
    feval_reward,feval_success_tasks,feval_channel_success,feval_channel_collision,feval_channel_idle,feval_goodput,feval_droprate,feval_failed = test_agents(agents, env_id, n_episodes=n_eval_episodes, max_ep_len=max_ep_len)

    return evals_rewards,evals_success_tasks,evals_channel_success,evals_channel_collision,evals_channel_idle,evals_goodput,evals_droprate,evals_failed,\
           feval_reward,feval_success_tasks,feval_channel_success,feval_channel_collision,feval_channel_idle,feval_goodput,feval_droprate,feval_failed

In [None]:
envs = ["MECSCH-v1","MECSCH-v2"]
labels = ["Proposed with Reduction","Proposed No-Reduction"]
results_mappo = {}

for env,label in zip(envs,labels):
    env_id = env
    n_episodes = 2001
    max_ep_len = 15
    n_seeds = 2
    eval_every = 100
    n_eval_episodes = 2001
    parameter_sharing = False
    disable_tqdm = False

    mappo_params = {"actor_lr":1e-3,"critic_lr":1e-3,"gamma": 0.99,"gae": True,"gae_lmb":0.95,"shuffle": False,"model":"MAPPO"}
    list_seeds = 1024 * np.arange(1, n_seeds+1)
    # Results for MAPPO 
    mappo_params["local_critic"] = True
    results_mappo[label] = Parallel(n_jobs=-1,verbose=10)(delayed(run_sim_on)(env_id,n_episodes,max_ep_len,parameter_sharing,seed, disable_tqdm, eval_every, n_eval_episodes, **mappo_params) for seed in list_seeds.tolist())

results_mappo["Semi-static"] = Parallel(n_jobs=-1,verbose=10)(delayed(semi_static_runner)("MECSCH-v3",n_episodes=n_episodes,eval_every=eval_every,n_eval_episodes=n_eval_episodes,max_ep_len=max_ep_len,seed=seed) for seed in list_seeds.tolist())
results_mappo["Round-robin"] = Parallel(n_jobs=-1,verbose=10)(delayed(Round_robin_runner)("MECSCH-v4",n_episodes=n_episodes,eval_every=eval_every,n_eval_episodes=n_eval_episodes,max_ep_len=max_ep_len,seed=seed) for seed in list_seeds.tolist())
results_mappo["Heuristics"] = Parallel(n_jobs=-1,verbose=10)(delayed(heuristics_runner)("MECSCH-v5",n_episodes=n_episodes,eval_every=eval_every,n_eval_episodes=n_eval_episodes,max_ep_len=max_ep_len,seed=seed) for seed in list_seeds.tolist())
results_mappo["Contention-free"] = Parallel(n_jobs=-1,verbose=10)(delayed(base_runner)("MECSCH-v6",n_episodes=n_episodes,eval_every=eval_every,n_eval_episodes=n_eval_episodes, max_ep_len=max_ep_len, seed=seed) for seed in list_seeds.tolist())
results_mappo["Contention-based"] = Parallel(n_jobs=-1,verbose=10)(delayed(contention_base_runner)("MECSCH-v6",n_episodes=n_episodes, eval_every=eval_every,n_eval_episodes=n_eval_episodes, max_ep_len=max_ep_len, seed=seed) for seed in list_seeds.tolist())

results = [results_mappo["Proposed with Reduction"],results_mappo["Proposed No-Reduction"],results_mappo["Semi-static"],\
           results_mappo["Round-robin"],results_mappo["Heuristics"],results_mappo["Contention-free"],results_mappo["Contention-based"]]

schemes = ["Proposed with Reduction","Proposed No-Reduction","Semi-static","Round-robin","Heuristics","Contention-free",\
           "Contention-based"]

eval_reward_results = {"Proposed with Reduction":[],"Proposed No-Reduction":[],"Contention-free":[],"Contention-based":[],"Semi-static":[],"Round-robin":[],"Heuristics":[]} 
eval_success_tasks_results = {"Proposed with Reduction":[],"Proposed No-Reduction":[],"Contention-free":[],"Contention-based":[],"Semi-static":[],"Round-robin":[],"Heuristics":[]}
eval_channel_success_results = {"Proposed with Reduction":[],"Proposed No-Reduction":[],"Contention-free":[],"Contention-based":[],"Semi-static":[],"Round-robin":[],"Heuristics":[]}
eval_channel_collision_results = {"Proposed with Reduction":[],"Proposed No-Reduction":[],"Contention-free":[],"Contention-based":[],"Semi-static":[],"Round-robin":[],"Heuristics":[]}
eval_channel_idle_results = {"Proposed with Reduction":[],"Proposed No-Reduction":[],"Contention-free":[],"Contention-based":[],"Semi-static":[],"Round-robin":[],"Heuristics":[]}
eval_goodput_results = {"Proposed with Reduction":[],"Proposed No-Reduction":[],"Contention-free":[],"Contention-based":[],"Semi-static":[],"Round-robin":[],"Heuristics":[]}
eval_URLLC_results = {"Proposed with Reduction":[],"Proposed No-Reduction":[],"Contention-free":[],"Contention-based":[],"Semi-static":[],"Round-robin":[],"Heuristics":[]}
eval_droprate_results = {"Proposed with Reduction":[],"Proposed No-Reduction":[],"Contention-free":[],"Contention-based":[],"Semi-static":[],"Round-robin":[],"Heuristics":[]}
eval_failed_results = {"Proposed with Reduction":[],"Proposed No-Reduction":[],"Contention-free":[],"Contention-based":[],"Semi-static":[],"Round-robin":[],"Heuristics":[]}

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
  actions = torch.stack([d.sample() for d in dists]).T
  actions = torch.stack([d.sample() for d in dists]).T
  logger.warn("Overriding environment {}".format(id))
  logger.warn("Overriding environment {}".format(id))
  logger.warn("Overriding environment {}".format(id))
  logger.warn("Overriding environment {}".format(id))
  logger.warn("Overriding environment {}".format(id))
  logger.warn("Overriding environment {}".format(id))
  logger.warn("Overriding environment {}".format(id))
  logger.warn("Overriding environment {}".format(id))
  logger.warn("Overriding environment {}".format(id))
  logger.warn("Overriding environment {}".format(id))
  logger.warn("Overriding environment {}".format(id))
  logger.warn("Overriding environment {}".format(id))
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed: 12.6min remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed: 12.6min finished
[Parallel(n_jo

Training:   0%|          | 0/2001 [00:00<?, ?it/s]
Training:   0%|          | 0/2001 [00:00<?, ?it/s]


In [None]:
data_0 = {"episodes":np.arange(0,n_episodes,eval_every,dtype=np.int16)}

for number, result in enumerate(list_seeds):    
    Seed_0_reward = pd.DataFrame(data=data_0)
    Seed_0_Success_Task = pd.DataFrame(data=data_0)
    Seed_0_channel_success = pd.DataFrame(data=data_0) 
    Seed_0_channel_collision = pd.DataFrame(data=data_0)
    Seed_0_channel_idle = pd.DataFrame(data=data_0) 
    Seed_0_goodput = pd.DataFrame(data=data_0)
    Seed_0_droprate = pd.DataFrame(data=data_0)
    Seed_0_failed = pd.DataFrame(data=data_0)
    
    Seed_1_reward = pd.DataFrame(data=data_0)
    Seed_1_Success_Task = pd.DataFrame(data=data_0)
    Seed_1_channel_success = pd.DataFrame(data=data_0) 
    Seed_1_channel_collision = pd.DataFrame(data=data_0) 
    Seed_1_channel_idle = pd.DataFrame(data=data_0) 
    Seed_1_goodput = pd.DataFrame(data=data_0)
    Seed_1_droprate = pd.DataFrame(data=data_0)
    Seed_1_failed = pd.DataFrame(data=data_0)
    
    Seed_2_reward = pd.DataFrame(data=data_0)
    Seed_2_Success_Task = pd.DataFrame(data=data_0)
    Seed_2_channel_success = pd.DataFrame(data=data_0) 
    Seed_2_channel_collision = pd.DataFrame(data=data_0) 
    Seed_2_channel_idle = pd.DataFrame(data=data_0) 
    Seed_2_goodput = pd.DataFrame(data=data_0)
    Seed_2_droprate = pd.DataFrame(data=data_0)
    Seed_2_failed = pd.DataFrame(data=data_0)
    
    Seed_3_reward = pd.DataFrame(data=data_0)
    Seed_3_Success_Task = pd.DataFrame(data=data_0)
    Seed_3_channel_success = pd.DataFrame(data=data_0) 
    Seed_3_channel_collision = pd.DataFrame(data=data_0) 
    Seed_3_channel_idle = pd.DataFrame(data=data_0) 
    Seed_3_goodput = pd.DataFrame(data=data_0)
    Seed_3_droprate = pd.DataFrame(data=data_0)
    Seed_3_failed = pd.DataFrame(data=data_0)
    
    Seed_4_reward = pd.DataFrame(data=data_0)
    Seed_4_Success_Task = pd.DataFrame(data=data_0)
    Seed_4_channel_success = pd.DataFrame(data=data_0) 
    Seed_4_channel_collision = pd.DataFrame(data=data_0) 
    Seed_4_channel_idle = pd.DataFrame(data=data_0) 
    Seed_4_goodput = pd.DataFrame(data=data_0)
    Seed_4_droprate = pd.DataFrame(data=data_0)
    Seed_4_failed = pd.DataFrame(data=data_0)
    
for key, results_ind in zip(schemes, results):
    eval_reward_results[key] = np.mean([np.array(result[8]) for result in results_ind],axis=0) 
    eval_success_tasks_results[key] = np.mean([np.array(result[9]) for result in results_ind],axis=0)  
    eval_channel_success_results[key] = np.mean([np.array(result[10]) for result in results_ind],axis=0)      
    eval_channel_collision_results[key] = np.mean([np.array(result[11]) for result in results_ind],axis=0)  
    eval_channel_idle_results[key] = np.mean([np.array(result[12]) for result in results_ind],axis=0)  
    eval_goodput_results[key] = np.mean([np.array(result[13]) for result in results_ind],axis=0)
    eval_droprate_results[key] = np.mean([np.array(result[14]) for result in results_ind],axis=0)
    eval_failed_results[key] = np.mean([np.array(result[15]) for result in results_ind],axis=0)
    
    for number, result in enumerate(results_ind):
        if number == 0:
            Seed_0_reward[key] = window_mean(np.array(result[0]),10) 
            Seed_0_Success_Task[key] = window_mean(np.array(result[1]), 10) 
            Seed_0_channel_success[key] = window_mean(np.array(result[2]),10) 
            Seed_0_channel_collision[key] = window_mean(np.array(result[3]),10) 
            Seed_0_channel_idle[key] = window_mean(np.array(result[4]),10)    
            Seed_0_goodput[key] = window_mean(np.array(result[5]),10) 
            Seed_0_droprate[key] = window_mean(np.array(result[6]),10) 
            Seed_0_failed[key] = window_mean(np.array(result[7]),10) 
        elif number == 1:
            Seed_1_reward[key] = window_mean(np.array(result[0]),10) 
            Seed_1_Success_Task[key] = window_mean(np.array(result[1]), 10) 
            Seed_1_channel_success[key] = window_mean(np.array(result[2]),100) 
            Seed_1_channel_collision[key] = window_mean(np.array(result[3]),10) 
            Seed_1_channel_idle[key] = window_mean(np.array(result[4]),10)    
            Seed_1_goodput[key] = window_mean(np.array(result[5]),10)
            Seed_1_droprate[key] = window_mean(np.array(result[6]),10)
            Seed_1_failed[key] = window_mean(np.array(result[7]),10) 
        elif number == 2:
            Seed_2_reward[key] = window_mean(np.array(result[0]),10) 
            Seed_2_Success_Task[key] = window_mean(np.array(result[1]), 10) 
            Seed_2_channel_success[key] = window_mean(np.array(result[2]),10) 
            Seed_2_channel_collision[key] = window_mean(np.array(result[3]),10) 
            Seed_2_channel_idle[key] = window_mean(np.array(result[4]),10) 
            Seed_2_goodput[key] = window_mean(np.array(result[5]),10) 
            Seed_2_droprate[key] = window_mean(np.array(result[6]),10) 
            Seed_2_failed[key] = window_mean(np.array(result[7]),10) 
        elif number == 3:
            Seed_3_reward[key] = window_mean(np.array(result[0]),10) 
            Seed_3_Success_Task[key] = window_mean(np.array(result[1]), 10) 
            Seed_3_channel_success[key] = window_mean(np.array(result[2]),10) 
            Seed_3_channel_collision[key] = window_mean(np.array(result[3]),10)
            Seed_3_channel_idle[key] = window_mean(np.array(result[4]),10) 
            Seed_3_goodput[key] = window_mean(np.array(result[5]),10) 
            Seed_3_droprate[key] = window_mean(np.array(result[6]),10) 
            Seed_3_failed[key] = window_mean(np.array(result[7]),10) 
        elif number == 4:
            Seed_4_reward[key] = window_mean(np.array(result[0]),10) 
            Seed_4_Success_Task[key] = window_mean(np.array(result[1]), 10) 
            Seed_4_channel_success[key] = window_mean(np.array(result[2]),10) 
            Seed_4_channel_collision[key] = window_mean(np.array(result[3]),10)  
            Seed_4_channel_idle[key] = window_mean(np.array(result[4]),10) 
            Seed_4_goodput[key] = window_mean(np.array(result[5]),10) 
            Seed_4_droprate[key] = window_mean(np.array(result[6]),10) 
            Seed_4_failed[key] = window_mean(np.array(result[7]),10) 

train_reward = pd.concat([Seed_0_reward,Seed_1_reward,Seed_2_reward,Seed_3_reward,Seed_4_reward], axis=0)
train_reward.to_csv('train_Reward_Data.csv',index=False)

train_success_tasks = pd.concat([Seed_0_Success_Task,Seed_1_Success_Task,Seed_2_Success_Task,Seed_3_Success_Task,Seed_4_Success_Task], axis=0)
train_success_tasks.to_csv('train_success_tasks_Data.csv',index=False)

train_channel_success = pd.concat([Seed_0_channel_success,Seed_1_channel_success,Seed_2_channel_success,Seed_3_channel_success,Seed_4_channel_success], axis=0)
train_channel_success.to_csv('train_channel_success_Data.csv',index=False)

train_channel_collision = pd.concat([Seed_0_channel_collision,Seed_1_channel_collision,Seed_2_channel_collision,Seed_3_channel_collision,Seed_4_channel_collision], axis=0)
train_channel_collision.to_csv('train_channel_collision_Data.csv',index=False)

train_channel_idle = pd.concat([Seed_0_channel_idle,Seed_1_channel_idle,Seed_2_channel_idle,Seed_3_channel_idle,Seed_4_channel_idle], axis=0)
train_channel_idle.to_csv('train_channel_idle_Data.csv',index=False)

train_goodput = pd.concat([Seed_0_goodput,Seed_1_goodput,Seed_2_goodput,Seed_3_goodput,Seed_4_goodput], axis=0)
train_goodput.to_csv('train_goodput_Data.csv',index=False)

train_droprate = pd.concat([Seed_0_droprate,Seed_1_droprate,Seed_2_droprate,Seed_3_droprate,Seed_4_droprate], axis=0)
train_droprate.to_csv('train_droprate_Data.csv',index=False)

train_failed = pd.concat([Seed_0_failed,Seed_1_failed,Seed_2_failed,Seed_3_failed,Seed_4_failed], axis=0)
train_failed.to_csv('train_failed_Data.csv',index=False)

eval_reward = pd.DataFrame.from_dict(eval_reward_results)
eval_reward.to_csv('eval_Reward_Data.csv',index=False)

eval_success_tasks = pd.DataFrame.from_dict(eval_success_tasks_results)
eval_success_tasks.to_csv('eval_success_tasks_Data.csv',index=False)

eval_channel_success = pd.DataFrame.from_dict(eval_channel_success_results)
eval_channel_success.to_csv('eval_channel_success_Data.csv',index=False)

eval_channel_collision = pd.DataFrame.from_dict(eval_channel_collision_results)
eval_channel_collision.to_csv('eval_channel_collision_Data.csv',index=False)

eval_channel_idle = pd.DataFrame.from_dict(eval_channel_idle_results)
eval_channel_idle.to_csv('eval_channel_idle_Data.csv',index=False)

eval_goodput = pd.DataFrame.from_dict(eval_goodput_results)
eval_goodput.to_csv('eval_goodput_Data.csv',index=False)

eval_droprate = pd.DataFrame.from_dict(eval_droprate_results)
eval_droprate.to_csv('eval_droprate_Data.csv',index=False)

eval_failed = pd.DataFrame.from_dict(eval_failed_results)
eval_failed.to_csv('eval_failed_Data.csv',index=False)

In [None]:
# Plot Style:
colors = sns.color_palette(n_colors= 15).as_hex()
mpl.rcParams.update({"font.size": 15, "axes.labelsize": 15, "lines.markersize": 10})
sns.set(rc={'figure.figsize': (8,6)})
sns.set_context("notebook")
sns.set_style("whitegrid", {"grid.color": ".6", "grid.linestyle": ":"})
linestyles = ["--", "-.",":","-","--","-.",":","-","--", "-.",":","-","--","-.",":","-"]
markers = ["s","o","d","v","P","<","X","s","o","d","v","P","<","X"]
set_fonts()
#colors = set_style()
# Define the size and style of the circle to show the mean of the box plots:
meanpointprops = dict(marker='o', markeredgecolor='black',markerfacecolor='lightgray', markersize=3)

In [None]:
train_reward_results = pd.read_csv('train_Reward_Data.csv')
train_success_tasks_results = pd.read_csv('train_success_tasks_Data.csv')
train_channel_success_results = pd.read_csv('train_channel_success_Data.csv')
train_channel_collision_results = pd.read_csv('train_channel_collision_Data.csv')
train_channel_idle_results = pd.read_csv('train_channel_idle_Data.csv')
train_goodput_results = pd.read_csv('train_goodput_Data.csv')
train_droprate_results = pd.read_csv('train_droprate_Data.csv')
train_failed_results = pd.read_csv('train_failed_Data.csv')

eval_reward_results = pd.read_csv('eval_Reward_Data.csv')
eval_success_tasks_results = pd.read_csv('eval_success_tasks_Data.csv')
eval_channel_success_results = pd.read_csv('eval_channel_success_Data.csv')
eval_channel_collision_results = pd.read_csv('eval_channel_collision_Data.csv')
eval_channel_idle_results = pd.read_csv('eval_channel_idle_Data.csv')
eval_goodput_results = pd.read_csv('eval_goodput_Data.csv')
eval_droprate_results = pd.read_csv('eval_droprate_Data.csv')
eval_failed_results = pd.read_csv('eval_failed_Data.csv')

In [None]:
X_axis = np.arange(0,n_episodes+1,eval_every) 
step_plot = int(0.05 * n_episodes)
width = int(0.03*n_episodes) # Width of the box plots

fig1 = plt.figure("Figure 1")  
data_success_tasks = []
y_max_ls =[]
i = 0
for key, value in train_reward_results.iloc[:,1:10].items():
    g = sns.lineplot(data=train_reward_results,x='episodes',y=key,label=key,linestyle=linestyles[i],color=colors[i],marker=markers[i],
                     markevery=eval_every, markersize=5)
    i +=1    
g.axes.xaxis.set_major_formatter(ticker.EngFormatter())
plt.axvline(n_episodes + 0.7*step_plot, color='k', linestyle="--")
plt.draw()
locs, labels = plt.xticks() 
for key, value in eval_reward_results.items():
    data_success_tasks.append(value)
for ii, test_data in enumerate(data_success_tasks):
    ymax = draw_boxplot(test_data, color=colors[ii], positions=[n_episodes + (1.5+ii)*step_plot],
                        widths=width, showfliers=False, meanprops=meanpointprops, showmeans=True, whis=1.0)
    y_max_ls.append(ymax)
end_tick = len(data_success_tasks) + 2
plt.xticks(locs[1:-1], labels[1:-1])
ylim_ = max(y_max_ls)
plt.ylim(-0.02, ylim_)
draw_brace(g.axes, [0, n_episodes], ylim_, "Train")
draw_brace(g.axes, [n_episodes + 0.8*step_plot, n_episodes + end_tick*step_plot], ylim_, "Test")
x_min, _ = g.axes.get_xlim()
plt.xlim(x_min, n_episodes + end_tick*step_plot)
plt.legend(ncol=3, frameon=True, shadow=True,bbox_to_anchor=(0.1,1.1))
plt.xlabel('Episode')
plt.ylabel("Reward")
plt.savefig("reward.jpg",dpi=600,bbox_inches='tight')
plt.show()


fig2 = plt.figure("Figure 2")  
data_success_tasks = []
y_max_ls =[]
i = 0
for key, value in train_success_tasks_results.iloc[:,1:10].items():
    g = sns.lineplot(data=train_success_tasks_results,x='episodes',y=key,label=key,linestyle=linestyles[i],color=colors[i],marker=markers[i],
                     markevery=eval_every, markersize=5)
    i +=1    
g.axes.xaxis.set_major_formatter(ticker.EngFormatter())
plt.axvline(n_episodes + 0.7*step_plot, color='k', linestyle="--")
plt.draw()
locs, labels = plt.xticks() 
for key, value in eval_success_tasks_results.items():
    data_success_tasks.append(value)
for ii, test_data in enumerate(data_success_tasks):
    ymax = draw_boxplot(test_data, color=colors[ii], positions=[n_episodes + (1.5+ii)*step_plot],
                        widths=width, showfliers=False, meanprops=meanpointprops, showmeans=True, whis=1.0)
    y_max_ls.append(ymax)
end_tick = len(data_success_tasks) + 2
plt.xticks(locs[1:-1], labels[1:-1])
ylim_ = max(y_max_ls)
plt.ylim(-0.02, ylim_)
draw_brace(g.axes, [0, n_episodes], ylim_, "Train")
draw_brace(g.axes, [n_episodes + 0.8*step_plot, n_episodes + end_tick*step_plot], ylim_, "Test")
x_min, _ = g.axes.get_xlim()
plt.xlim(x_min, n_episodes + end_tick*step_plot)
plt.legend(ncol=2, frameon=True, shadow=True,bbox_to_anchor=(0.1,1.1))
plt.xlabel('Episode')
plt.ylabel("No. of Successful Tasks")
plt.savefig("task.jpg",dpi=600,bbox_inches='tight')
plt.show()

fig3 = plt.figure("Figure 3")
data_channel_success = []
y_max_ls =[]
i = 0
for key, value in train_channel_success_results.iloc[:,1:10].items():
    g = sns.lineplot(data=train_channel_success_results,x='episodes',y=key,label=key,linestyle=linestyles[i],color=colors[i],marker=markers[i],
                     markevery=eval_every, markersize=5)
    i +=1    
g.axes.xaxis.set_major_formatter(ticker.EngFormatter())
plt.axvline(n_episodes + 0.7*step_plot, color='k', linestyle="--")
plt.draw()
locs, labels = plt.xticks() 
for key, value in eval_channel_success_results.items():
    data_channel_success.append(value)
for ii, test_data in enumerate(data_channel_success):
    ymax = draw_boxplot(test_data, color=colors[ii], positions=[n_episodes + (1.5+ii)*step_plot],
                        widths=width, showfliers=False, meanprops=meanpointprops, showmeans=True, whis=1.0)
    y_max_ls.append(ymax)
end_tick = len(data_success_tasks) + 4
plt.xticks(locs[1:-1], labels[1:-1])
ylim_ = max(y_max_ls)
plt.ylim(-0.02, ylim_)
draw_brace(g.axes, [0, n_episodes], ylim_, "Train")
draw_brace(g.axes, [n_episodes + 0.8*step_plot, n_episodes + end_tick*step_plot], ylim_, "Test")
x_min, _ = g.axes.get_xlim()
plt.xlim(x_min, n_episodes + end_tick*step_plot)
plt.legend(ncol=2, frameon=True, shadow=True,bbox_to_anchor=(0.1,1.2))
plt.xlabel('Episode')
plt.ylabel("Channels Access Success Rate")
plt.savefig("channel_success.jpg",dpi=600,bbox_inches='tight')
plt.show()


fig4 = plt.figure("Figure 4")  
data_channel_collision = []
y_max_ls =[]
i = 0
for key, value in train_channel_collision_results.iloc[:,1:10].items():
    g = sns.lineplot(data=train_channel_collision_results,x='episodes',y=key,label=key,linestyle=linestyles[i],color=colors[i],marker=markers[i],
                     markevery=eval_every, markersize=5)
    i +=1    
g.axes.xaxis.set_major_formatter(ticker.EngFormatter())
plt.axvline(n_episodes + 0.7*step_plot, color='k', linestyle="--")
plt.draw()
locs, labels = plt.xticks() 
for key, value in eval_channel_collision_results.items():
    data_channel_collision.append(value)
for ii, test_data in enumerate(data_channel_collision):
    ymax = draw_boxplot(test_data, color=colors[ii], positions=[n_episodes + (1.5+ii)*step_plot],
                        widths=width, showfliers=False, meanprops=meanpointprops, showmeans=True, whis=1.0)
    y_max_ls.append(ymax)
end_tick = len(data_success_tasks) + 4
plt.xticks(locs[1:-1], labels[1:-1])
ylim_ = max(y_max_ls)
plt.ylim(-0.02, ylim_)
draw_brace(g.axes, [0, n_episodes], ylim_, "Train")
draw_brace(g.axes, [n_episodes + 0.8*step_plot, n_episodes + end_tick*step_plot], ylim_, "Test")
x_min, _ = g.axes.get_xlim()
plt.xlim(x_min, n_episodes + end_tick*step_plot)
plt.legend(ncol=2, frameon=True, shadow=True,bbox_to_anchor=(0.1,1.2))
plt.xlabel('Episode')
plt.ylabel("Channels Access Collision Rate")
plt.savefig("channel_collision.jpg",dpi=600,bbox_inches='tight')
plt.show()


fig5 = plt.figure("Figure 5")  
data_channel_idle = []
y_max_ls =[]
i = 0
for key, value in train_channel_idle_results.iloc[:,1:10].items():
    g = sns.lineplot(data=train_channel_idle_results,x='episodes',y=key,label=key,linestyle=linestyles[i],color=colors[i],marker=markers[i],
                     markevery=eval_every, markersize=5)
    i +=1    
g.axes.xaxis.set_major_formatter(ticker.EngFormatter())
plt.axvline(n_episodes + 0.7*step_plot, color='k', linestyle="--")
plt.draw()
locs, labels = plt.xticks() 
for key, value in eval_channel_idle_results.items():
    data_channel_idle.append(value)
for ii, test_data in enumerate(data_channel_idle):
    ymax = draw_boxplot(test_data, color=colors[ii], positions=[n_episodes + (1.5+ii)*step_plot],
                        widths=width, showfliers=False, meanprops=meanpointprops, showmeans=True, whis=1.0)
    y_max_ls.append(ymax)
end_tick = len(data_success_tasks) + 4
plt.xticks(locs[1:-1], labels[1:-1])
ylim_ = max(y_max_ls)
plt.ylim(-0.02, ylim_)
draw_brace(g.axes, [0, n_episodes], ylim_, "Train")
draw_brace(g.axes, [n_episodes + 0.8*step_plot, n_episodes + end_tick*step_plot], ylim_, "Test")
x_min, _ = g.axes.get_xlim()
plt.xlim(x_min, n_episodes + end_tick*step_plot)
plt.legend(ncol=2, frameon=True, shadow=True,bbox_to_anchor=(0.1,1.2))
plt.xlabel('Episode')
plt.ylabel("Channels Idle Rate")
plt.savefig("channel_idle.jpg",dpi=600,bbox_inches='tight')
plt.show()



fig7 = plt.figure("Figure 7")  
data_goodput = []
y_max_ls =[]
i = 0
for key, value in train_goodput_results.iloc[:,1:10].items():
    g = sns.lineplot(data=train_goodput_results,x='episodes',y=key,label=key,linestyle=linestyles[i],color=colors[i],marker=markers[i],
                     markevery=eval_every, markersize=5)
    i +=1    
g.axes.xaxis.set_major_formatter(ticker.EngFormatter())
plt.axvline(n_episodes + 0.7*step_plot, color='k', linestyle="--")
plt.draw()
locs, labels = plt.xticks() 
for key, value in eval_goodput_results.items():
    data_goodput.append(value)
for ii, test_data in enumerate(data_goodput):
    ymax = draw_boxplot(test_data, color=colors[ii], positions=[n_episodes + (1.5+ii)*step_plot],
                        widths=width, showfliers=False, meanprops=meanpointprops, showmeans=True, whis=1.0)
    y_max_ls.append(ymax)
end_tick = len(data_goodput) + 4
plt.xticks(locs[1:-1], labels[1:-1])
ylim_ = max(y_max_ls)
plt.ylim(-0.02, ylim_)
draw_brace(g.axes, [0, n_episodes], ylim_, "Train")
draw_brace(g.axes, [n_episodes + 0.8*step_plot, n_episodes + end_tick*step_plot], ylim_, "Test")
x_min, _ = g.axes.get_xlim()
plt.xlim(x_min, n_episodes + end_tick*step_plot)
plt.legend(ncol=2, frameon=True, shadow=True,bbox_to_anchor=(0.1,1.2))
plt.xlabel('Episode')
plt.ylabel("Goodput")
plt.savefig("goodput.jpg",dpi=600,bbox_inches='tight')
plt.show()


fig10 = plt.figure("Figure 10")  
data_droprate = []
y_max_ls =[]
i = 0
for key, value in train_droprate_results.iloc[:,1:10].items():
    g = sns.lineplot(data=train_droprate_results,x='episodes',y=key,label=key,linestyle=linestyles[i],color=colors[i],marker=markers[i],
                     markevery=eval_every, markersize=5)
    i +=1    
g.axes.xaxis.set_major_formatter(ticker.EngFormatter())
plt.axvline(n_episodes + 0.7*step_plot, color='k', linestyle="--")
plt.draw()
locs, labels = plt.xticks() 
for key, value in eval_droprate_results.items():
    data_droprate.append(value)
for ii, test_data in enumerate(data_droprate):
    ymax = draw_boxplot(test_data, color=colors[ii], positions=[n_episodes + (1.5+ii)*step_plot],
                        widths=width, showfliers=False, meanprops=meanpointprops, showmeans=True, whis=1.0)
    y_max_ls.append(ymax)
end_tick = len(data_droprate) + 4
plt.xticks(locs[1:-1], labels[1:-1])
ylim_ = max(y_max_ls)
plt.ylim(-0.02, ylim_)
draw_brace(g.axes, [0, n_episodes], ylim_, "Train")
draw_brace(g.axes, [n_episodes + 0.8*step_plot, n_episodes + end_tick*step_plot], ylim_, "Test")
x_min, _ = g.axes.get_xlim()
plt.xlim(x_min, n_episodes + end_tick*step_plot)
plt.legend(ncol=2, frameon=True, shadow=True,bbox_to_anchor=(0.1,1.2))
plt.xlabel('Episode')
plt.ylabel("No. of Packets Dropped")
plt.savefig("drop.jpg",dpi=600,bbox_inches='tight')
plt.show()


fig11 = plt.figure("Figure 11")  
data_droprate = []
y_max_ls =[]
i = 0
for key, value in train_failed_results.iloc[:,1:10].items():
    g = sns.lineplot(data=train_failed_results,x='episodes',y=key,label=key,linestyle=linestyles[i],color=colors[i],marker=markers[i],
                     markevery=eval_every, markersize=5)
    i +=1    
g.axes.xaxis.set_major_formatter(ticker.EngFormatter())
plt.axvline(n_episodes + 0.7*step_plot, color='k', linestyle="--")
plt.draw()
locs, labels = plt.xticks() 
for key, value in eval_failed_results.items():
    data_droprate.append(value)
for ii, test_data in enumerate(data_droprate):
    ymax = draw_boxplot(test_data, color=colors[ii], positions=[n_episodes + (1.5+ii)*step_plot],
                        widths=width, showfliers=False, meanprops=meanpointprops, showmeans=True, whis=1.0)
    y_max_ls.append(ymax)
end_tick = len(data_droprate) + 4
plt.xticks(locs[1:-1], labels[1:-1])
ylim_ = max(y_max_ls)
plt.ylim(-0.02, ylim_)
draw_brace(g.axes, [0, n_episodes], ylim_, "Train")
draw_brace(g.axes, [n_episodes + 0.8*step_plot, n_episodes + end_tick*step_plot], ylim_, "Test")
x_min, _ = g.axes.get_xlim()
plt.xlim(x_min, n_episodes + end_tick*step_plot)
plt.legend(ncol=2, frameon=True, shadow=True,bbox_to_anchor=(0.1,1.2))
plt.xlabel('Episode')
plt.ylabel("No. of Failed Tasks")
plt.savefig("failed.jpg",dpi=600,bbox_inches='tight')
plt.show()

In [None]:
import matplotlib.pyplot as plt
schemes = ["Proposed with Reduction","Proposed No-Reduction","Contention-free","Contention-based","Semi-static","Round-robin","Heuristics"]
values = []
for key, value in eval_droprate_results.items():
    values.append(np.mean(value))    
fig , ax = plt.subplots()
ax.bar(schemes,values, color=colors)
plt.xticks(rotation=-70)
ax.set_xlabel('Schemes')
ax.set_ylabel("No. of Packets Dropped")
plt.savefig("drop.pdf",dpi=1500,bbox_inches='tight')
plt.show()

values = []
for key, value in eval_channel_idle_results.items():
    values.append(np.mean(value))    
fig , ax = plt.subplots()
ax.bar(schemes,values, color=colors)
plt.xticks(rotation=-70)
ax.set_xlabel('Schemes')
ax.set_ylabel("Channels Idle Rate")
plt.savefig("idle.pdf",dpi=1500,bbox_inches='tight')
plt.show()

values = []
for key, value in eval_channel_collision_results.items():
    values.append(np.mean(value))    
fig , ax = plt.subplots()
ax.bar(schemes,values, color=colors)
plt.xticks(rotation=-70)
ax.set_xlabel('Schemes')
ax.set_ylabel("Channels Access Collision Rate")
plt.savefig("collision.pdf",dpi=1500,bbox_inches='tight')
plt.show()