In [None]:
import os
import numpy as np
import pickle
import gym
import pandas as pd
import stable_baselines3
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3 import PPO
from stable_baselines3.ppo.policies import MlpPolicy as MLP_PPO
from stable_baselines3 import DQN
from stable_baselines3.dqn.policies import MlpPolicy as MLP_DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common import results_plotter
import matplotlib.pyplot as plt
import seaborn as sns
from explainable.utils import evaluate_policy
#from explainable.envs.deeprmsa_env import shortest_available_path_first_fit
from explainable.dagger import DAgger_Policy
stable_baselines3.__version__ # printing out stable_baselines version used

In [None]:
def get_EnvExpert(top_name, k, alg_name, base_log_dir='./tmp/', only_spectrum_obs = False, mean_service_holding_time=10):
    topology_dir = '/topologies/demo/' +  top_name +f'_{k}.h5'
    with open(f'..{topology_dir}', 'rb') as f:
        topology = pickle.load(f)
    assert k <= topology.graph['k_paths']
    
    node_request_probabilities = np.array([1/11, 1/11, 1/11, 1/11,
                                       1/11, 1/11, 1/11, 1/11,
                                       1/11, 1/11, 1/11])

    env_args = dict(topology=topology, seed=10, 
                    allow_rejection=False, # the agent cannot proactively reject a request
                    j=1, # consider only the first suitable spectrum block for the spectrum assignment
                    mean_service_holding_time=mean_service_holding_time, # value is not set as in the paper to achieve comparable reward values
                    episode_length=50, node_request_probabilities=node_request_probabilities, num_spectrum_resources = 358)

    # Create log dir
    log_dir = "./tmp/deeprmsa-dqn-sbpp-agent-{}-cost239/".format(mean_service_holding_time)
    env = gym.make('DeepRMSA-v0', **env_args)

    # logs will be saved in log_dir/monitor.csv
    # in this case, on top of the usual monitored things, we also monitor service and bit rate blocking probabilities
    env = Monitor(env, log_dir + 'testing', info_keywords=('episode_service_blocking_rate','bit_rate_blocking_rate','failure', 'episode_failure',
                        'failure_slots','episode_failure_slots', 
                        'failure_disjointness','episode_failure_disjointness', 'failure_shared_disjointness',
                        'episode_failure_shared_disjointness','shared_counter','episode_shared_counter', 'dpp_counter',
                        'episode_dpp_counter','compactness', 'throughput', 'available_slots_working', 'available_slots_backup'))
    
    expert = DQN.load(log_dir +'best_model')
        
    return env, expert

In [None]:
def get_EnvExpert_Heuristic(top_name, k, alg_name, base_log_dir='./tmp/', only_spectrum_obs = False, mean_service_holding_time=10):
    topology_dir = '/topologies/demo/' +  top_name +f'_{k}.h5'
    with open(f'..{topology_dir}', 'rb') as f:
        topology = pickle.load(f)
    assert k <= topology.graph['k_paths']
    
    node_request_probabilities = np.array([1/11, 1/11, 1/11, 1/11,
                                       1/11, 1/11, 1/11, 1/11,
                                       1/11, 1/11, 1/11])

    env_args = dict(topology=topology, seed=10, 
                    allow_rejection=False, # the agent cannot proactively reject a request
                    j=1, # consider only the first suitable spectrum block for the spectrum assignment
                    mean_service_holding_time=mean_service_holding_time, # value is not set as in the paper to achieve comparable reward values
                    episode_length=50, node_request_probabilities=node_request_probabilities, num_spectrum_resources = 358)

    # Create log dir
    log_dir = "./tmp/deeprmsa-dqn-sbpp-heuristic-{}-cost239/".format(mean_service_holding_time)
    env = gym.make('DeepRMSAKSP-v0', **env_args)

    # logs will be saved in log_dir/monitor.csv
    # in this case, on top of the usual monitored things, we also monitor service and bit rate blocking probabilities
    env = Monitor(env, log_dir + 'testing', info_keywords=('episode_service_blocking_rate','bit_rate_blocking_rate','failure', 'episode_failure',
                        'failure_slots','episode_failure_slots', 
                        'failure_disjointness','episode_failure_disjointness', 'failure_shared_disjointness',
                        'episode_failure_shared_disjointness','shared_counter','episode_shared_counter', 'dpp_counter',
                        'episode_dpp_counter','compactness', 'throughput', 'available_slots_working', 'available_slots_backup'))
    
    expert = DQN.load(log_dir +'best_model')
        
    return env, expert

In [None]:
alg_name = 'DQN'
top_name = 'cost239'
k_path = 10
traffics = [50, 100, 150, 200, 250, 300, 350, 400, 450, 500]
holding_time = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
n_eval_episodes = 2000
use_heuristic_trainer = False

In [12]:
import time as t

rewards = {
    'KSPFF':[],
    'Agent':[]      
} 
blocking_rates = {
    'KSPFF':[],
    'Agent':[]   
} 
failure_slots = {
    'KSPFF':[],
    'Agent':[]   
} 
failure_disjointness = {
    'KSPFF':[],
    'Agent':[]   
} 
evaluation_time = {
    'KSPFF':[],
    'Agent':[]  
}
number_of_hops_working_path = {
    'KSPFF':[],
    'Agent':[]  
}
number_of_hops_successful_working_path = {
    'KSPFF':[],
    'Agent':[]  
}
number_of_hops_failed_working_path = {
    'KSPFF':[],
    'Agent':[]  
}
number_of_hops_backup_path = {
    'KSPFF':[],
    'Agent':[]  
}
number_of_hops_successful_backup_path = {
    'KSPFF':[],
    'Agent':[]  
}
number_of_hops_failed_backup_path = {
    'KSPFF':[],
    'Agent':[]  
}
dpp_counter = {
    'KSPFF':[],
    'Agent':[]  
}
sbpp_counter = {
    'KSPFF':[],
    'Agent':[]  
}


for ht in holding_time:  
    for key in rewards:
        if key == 'KSPFF':
            env_heuristic, expert_heuristic = get_EnvExpert_Heuristic(top_name, k_path, alg_name, mean_service_holding_time=ht)
            start = t.time()
            mean_reward, _, df = evaluate_policy(env_heuristic, n_eval_episodes, model = expert_heuristic, return_dataframe=True)
            end = t.time()
            timer = end - start
            timer = round(timer/60,2)
        elif key == 'Agent':
            env, expert = get_EnvExpert(top_name,k_path, alg_name, mean_service_holding_time=ht)
            start = t.time()
            mean_reward, _, df = evaluate_policy(env, n_eval_episodes, model = expert, return_dataframe=True)
            end = t.time()
            timer = end - start
            timer = round(timer/60,2)
        else:
            raise Exception("\n\nSorry, key not found")

        
        evaluation_time[key].append(timer)
        
        rewards[key].append(mean_reward)
        blocking_rates[key].append(df['service_blocking_rate'][len(df['service_blocking_rate'])-1])
        
        failure_disjointness[key].append(df['failure_disjointness'][len(df['failure_disjointness'])-1])
        failure_slots[key].append(df['failure_slots'][len(df['failure_slots'])-1])
        
        number_of_hops_working_path[key].append(df['length_of_working_path'][len(df['length_of_working_path'])-1])
        number_of_hops_successful_working_path[key].append(df['length_of_successful_working_path'][len(df['length_of_successful_working_path'])-1])
        number_of_hops_failed_working_path[key].append(df['length_of_failed_working_path'][len(df['length_of_failed_working_path'])-1])
        
        number_of_hops_backup_path[key].append(df['length_of_backup_path'][len(df['length_of_backup_path'])-1])
        number_of_hops_successful_backup_path[key].append(df['length_of_successful_backup_path'][len(df['length_of_successful_backup_path'])-1])
        number_of_hops_failed_backup_path[key].append(df['length_of_failed_backup_path'][len(df['length_of_failed_backup_path'])-1])
        
        dpp_counter[key].append(df['dpp_counter'][len(df['dpp_counter'])-1])
        sbpp_counter[key].append(df['shared_counter'][len(df['shared_counter'])-1])
        
        print(f'Done for {key} with expert mean_reward = {mean_reward} with duration of {timer} minutes')

In [None]:
os.makedirs('./plots_compare2/', exist_ok=True)

df_r = pd.DataFrame(rewards)
df_r.to_csv(f'./plots_compare2/rewards_{k_path}.csv')
df_r = pd.DataFrame(blocking_rates)
df_r.to_csv(f'./plots_compare2/blocking_rates_{k_path}.csv')
df_r = pd.DataFrame(failure_disjointness)
df_r.to_csv(f'./plots_compare2/failure_disjointness_{k_path}.csv')
df_r = pd.DataFrame(failure_slots)
df_r.to_csv(f'./plots_compare2/failure_slots_{k_path}.csv')
df_r = pd.DataFrame(evaluation_time)
df_r.to_csv(f'./plots_compare2/evaluation_time_{k_path}.csv')

df_r = pd.DataFrame(number_of_hops_working_path)
df_r.to_csv(f'./plots_compare2/number_of_hops_working_path_{k_path}.csv')
df_r = pd.DataFrame(number_of_hops_successful_working_path)
df_r.to_csv(f'./plots_compare2/number_of_hops_successful_working_path_{k_path}.csv')
df_r = pd.DataFrame(number_of_hops_failed_working_path)
df_r.to_csv(f'./plots_compare2/number_of_hops_failed_working_path_{k_path}.csv')

df_r = pd.DataFrame(number_of_hops_backup_path)
df_r.to_csv(f'./plots_compare2/number_of_hops_backup_path_{k_path}.csv')
df_r = pd.DataFrame(number_of_hops_successful_backup_path)
df_r.to_csv(f'./plots_compare2/number_of_hops_successful_backup_path_{k_path}.csv')
df_r = pd.DataFrame(number_of_hops_failed_backup_path)
df_r.to_csv(f'./plots_compare2/number_of_hops_failed_backup_path_{k_path}.csv')

df_r = pd.DataFrame(dpp_counter)
df_r.to_csv(f'./plots_compare2/dpp_counter_{k_path}.csv')
df_r = pd.DataFrame(sbpp_counter)
df_r.to_csv(f'./plots_compare2/sbpp_counter_{k_path}.csv')

In [None]:
base_output_dir = './plots_compare/blocking_rate/'
colors = sns.color_palette("colorblind")

output_dir = base_output_dir
os.makedirs(output_dir, exist_ok=True)
plt.figure()

for i, key in enumerate(blocking_rates):
    plt.plot(traffics, blocking_rates[key], '-x',label=key, color=colors[i])
plt.xlabel("traffic")
plt.ylabel("blocking rate")
plt.legend()
plt.yscale('log')
# plt.yticks(ticks=[5*10e-4, 10e-3,5*10e-3,10e-2,5*10e-2],labels=[ 5*10e-4, 10e-3,5*10e-3,10e-2,5*10e-2])
# plt.yticks(ticks=[x/100 for x in range(1, 25,5)],labels=[x/100 for x in range(1, 25,5)])
plt.grid(axis='both', which='both')
# plt.savefig(output_dir + f'{top_name}_{k_path}.png')
plt.show()

In [None]:
base_output_dir = './plots_compare/rewards/'

output_dir = base_output_dir
os.makedirs(output_dir, exist_ok=True)
plt.figure()

for i, key in enumerate(rewards):
    plt.plot(traffics, rewards[key], '-x', label=key, color=colors[i])
plt.xlabel("traffic")
plt.ylabel("reward")
plt.legend()
# plt.savefig(output_dir + f'{top_name}_{k_path}.png')
plt.show()

In [None]:
base_output_dir = './plots_compare/failure_disjointness/'
colors = sns.color_palette("colorblind")

output_dir = base_output_dir
os.makedirs(output_dir, exist_ok=True)
plt.figure()

for i, key in enumerate(rewards):
    plt.plot(traffics, failure_disjointness[key], '-x', label=key, color=colors[i])
plt.xlabel("traffic")
plt.ylabel("failure due to disjointness rate")
plt.legend(loc = "lower right")
# plt.savefig(output_dir + f'{top_name}_{k_path}.png')
plt.show()

In [None]:
base_output_dir = './plots_compare/failure_slots/'
colors = sns.color_palette("colorblind")

output_dir = base_output_dir
os.makedirs(output_dir, exist_ok=True)
plt.figure()

for i, key in enumerate(rewards):
    plt.plot(traffics, failure_slots[key], label=key, color=colors[i])
plt.xlabel("traffic")
plt.ylabel("failure due to frequency slots rate")
plt.legend(loc = "lower right")
# plt.savefig(output_dir + f'{top_name}_{k_path}.png')
plt.yticks(ticks=[5*10e-4, 10e-3,5*10e-3,10e-2,5*10e-2],labels=[ 5*10e-4, 10e-3,5*10e-3,10e-2,5*10e-2])
plt.yscale('log')
plt.grid(axis='both', which='both')
plt.show()

In [None]:
base_output_dir = './plots_compare/evaluation_time/'
colors = sns.color_palette("colorblind")

output_dir = base_output_dir
os.makedirs(output_dir, exist_ok=True)
plt.figure()

for i, key in enumerate(rewards):
    plt.plot(traffics, evaluation_time[key], label=key, color=colors[i])
plt.xlabel("traffic")
plt.ylabel("evaluation time (mins)")
plt.legend()
# plt.savefig(output_dir + f'{top_name}_{k_path}.png')
plt.show()