In [None]:
import os
import numpy as np
import pickle
import gym
import pandas as pd
import stable_baselines3
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3 import PPO
from stable_baselines3.ppo.policies import MlpPolicy as MLP_PPO
from stable_baselines3 import DQN
from stable_baselines3.dqn.policies import MlpPolicy as MLP_DQN
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common import results_plotter
import matplotlib.pyplot as plt
import seaborn as sns
from explainable.utils import evaluate_policy
#from explainable.envs.deeprmsa_env import shortest_available_path_first_fit
from explainable.dagger import DAgger_Policy
stable_baselines3.__version__ # printing out stable_baselines version used

In [None]:
def get_EnvExpert(top_name, k, alg_name, base_log_dir='./tmp/', only_spectrum_obs = False, mean_service_holding_time=10):
    topology_dir = '/topologies/demo/' +  top_name +f'_{k}.h5'
    with open(f'..{topology_dir}', 'rb') as f:
        topology = pickle.load(f)
    assert k <= topology.graph['k_paths']
    
    node_request_probabilities = np.array([1/11, 1/11, 1/11, 1/11,
                                       1/11, 1/11, 1/11, 1/11,
                                       1/11, 1/11, 1/11])

    env_args = dict(topology=topology, seed=10, 
                    allow_rejection=False, # the agent cannot proactively reject a request
                    j=1, # consider only the first suitable spectrum block for the spectrum assignment
                    mean_service_holding_time=mean_service_holding_time, # value is not set as in the paper to achieve comparable reward values
                    episode_length=50, node_request_probabilities=node_request_probabilities, num_spectrum_resources = 358)

    # Create log dir
    log_dir = "./tmp/deeprmsa-dqn-sbpp-agent-{}-cost239/".format(mean_service_holding_time)
    env = gym.make('DeepRMSA-v0', **env_args)

    # logs will be saved in log_dir/monitor.csv
    # in this case, on top of the usual monitored things, we also monitor service and bit rate blocking probabilities
    env = Monitor(env, log_dir + 'testing', info_keywords=('episode_service_blocking_rate','bit_rate_blocking_rate','failure', 'episode_failure',
                        'failure_slots','episode_failure_slots', 
                        'failure_disjointness','episode_failure_disjointness', 'failure_shared_disjointness',
                        'episode_failure_shared_disjointness','shared_counter','episode_shared_counter', 'dpp_counter',
                        'episode_dpp_counter','compactness', 'throughput', 'available_slots_working', 'available_slots_backup'))
    
    expert = DQN.load(log_dir +'best_model')
        
    return env, expert

In [None]:
def get_EnvExpert_Heuristic(top_name, k, alg_name, base_log_dir='./tmp/', only_spectrum_obs = False, mean_service_holding_time=10):
    topology_dir = '/topologies/demo/' +  top_name +f'_{k}.h5'
    with open(f'..{topology_dir}', 'rb') as f:
        topology = pickle.load(f)
    assert k <= topology.graph['k_paths']
    
    node_request_probabilities = np.array([1/11, 1/11, 1/11, 1/11,
                                       1/11, 1/11, 1/11, 1/11,
                                       1/11, 1/11, 1/11])

    env_args = dict(topology=topology, seed=10, 
                    allow_rejection=False, # the agent cannot proactively reject a request
                    j=1, # consider only the first suitable spectrum block for the spectrum assignment
                    mean_service_holding_time=mean_service_holding_time, # value is not set as in the paper to achieve comparable reward values
                    episode_length=50, node_request_probabilities=node_request_probabilities, num_spectrum_resources = 358)

    # Create log dir
    log_dir = "./tmp/deeprmsa-dqn-sbpp-heuristic-{}-cost239/".format(mean_service_holding_time)
    env = gym.make('DeepRMSAKSP-v0', **env_args)

    # logs will be saved in log_dir/monitor.csv
    # in this case, on top of the usual monitored things, we also monitor service and bit rate blocking probabilities
    env = Monitor(env, log_dir + 'testing', info_keywords=('episode_service_blocking_rate','bit_rate_blocking_rate','failure', 'episode_failure',
                        'failure_slots','episode_failure_slots', 
                        'failure_disjointness','episode_failure_disjointness', 'failure_shared_disjointness',
                        'episode_failure_shared_disjointness','shared_counter','episode_shared_counter', 'dpp_counter',
                        'episode_dpp_counter','compactness', 'throughput', 'available_slots_working', 'available_slots_backup'))
    
    expert = DQN.load(log_dir +'best_model')
        
    return env, expert

In [None]:
alg_name = 'DQN'
top_name = 'cost239'
k_path = 10
traffics = [100, 200, 300, 400, 500]
holding_time = [10, 20, 30, 40, 50]
n_eval_episodes = 2000
use_heuristic_trainer = False

### Training a DT with RL agent

In [None]:
from sklearn import tree
from explainable.dagger import DAgger
from explainable.utils import collect_transitions

for ht in holding_time: 
    env, expert = get_EnvExpert(top_name, k_path, alg_name, mean_service_holding_time=ht)

    ### Collecting Expert Demostrations:
    demostrations = collect_transitions(expert, env, 20000)

    # Decision Trees Dagger Trainer:
    tree_regr = tree.DecisionTreeClassifier(max_depth=15) # depth is set only for visualization purposes
    tree_dagger = DAgger(expert, tree_regr, env, demostrations.copy(), max_depth=15, min_samples_split = 1000)

    # Training the student policy:
    tree_dagger.train(expert, env)

    # saving the student policy:
    save_dir = "./tmp_students/" + f'cost239_{k_path}_{ht}/'
    tree_dagger.policy.save(save_dir)

In [None]:
from sklearn import tree
from explainable.dagger import DAgger
from explainable.utils import collect_transitions

for ht in holding_time: 
    env_heuristic, expert_heuristic = get_EnvExpert_Heuristic(top_name, k_path, alg_name, mean_service_holding_time=ht)

    ### Collecting Expert Demostrations:
    demostrations = collect_transitions(expert_heuristic, env_heuristic, 20000)

    # Decision Trees Dagger Trainer:
    tree_regr = tree.DecisionTreeClassifier(max_depth=15) # depth is set only for visualization purposes
    tree_dagger = DAgger(expert_heuristic, tree_regr, env_heuristic, demostrations.copy(), max_depth=15, min_samples_split = 6000)

    # Training the student policy:
    tree_dagger.train(expert_heuristic, env_heuristic)

    # saving the student policy:
    save_dir = "./tmp_students/" + f'cost239_heuristic_{k_path}_{ht}/'
    tree_dagger.policy.save(save_dir)

#### Evaluating results

In [None]:
import time as t

rewards = {
    'Student':[],
    #'Agent':[],
    'KSPFF Student':[],
    #'KSPFF':[]      
} 
blocking_rates = {
    'Student':[],
    #'Agent':[],
    'KSPFF Student':[],
    #'KSPFF':[]    
} 
failure_slots = {
    'Student':[],
    #'Agent':[],
    'KSPFF Student':[],
    #'KSPFF':[]    
} 
failure_disjointness = {
    'Student':[],
    #'Agent':[],
    'KSPFF Student':[],
    #'KSPFF':[]   
} 
evaluation_time = {
    'Student':[],
    #'Agent':[],
    'KSPFF Student':[],
    #'KSPFF':[]  
}


for ht in holding_time:  
    for key in rewards:
        if key == 'Student':
            env, expert = get_EnvExpert(top_name, k_path, alg_name, mean_service_holding_time=ht)
            policy = DAgger_Policy.load(f'./tmp_students/cost239_{k_path}_{ht}/model.h5',env.observation_space,env.action_space)
            start = t.time()
            mean_reward, _, df = evaluate_policy(env, n_eval_episodes, model = policy, return_dataframe=True)
            end = t.time()
            timer = end - start
            timer = round(timer/60,2)
        elif key == 'Agent':
            env, expert = get_EnvExpert(top_name,k_path, alg_name, mean_service_holding_time=ht)
            start = t.time()
            mean_reward, _, df = evaluate_policy(env, n_eval_episodes, model = expert, return_dataframe=True)
            end = t.time()
            timer = end - start
            timer = round(timer/60,2)
        elif key == 'KSPFF Student':
            env, expert = get_EnvExpert_Heuristic(top_name, k_path, alg_name, mean_service_holding_time=ht)
            policy = DAgger_Policy.load(f'./tmp_students/cost239_heuristic_{k_path}_{ht}/model.h5',env.observation_space,env.action_space)
            start = t.time()
            mean_reward, _, df = evaluate_policy(env, n_eval_episodes, model = policy, return_dataframe=True)
            end = t.time()
            timer = end - start
            timer = round(timer/60,2)
        elif key == 'KSPFF':
            env, expert = get_EnvExpert_Heuristic(top_name,k_path, alg_name, mean_service_holding_time=ht)
            start = t.time()
            mean_reward, _, df = evaluate_policy(env, n_eval_episodes, model = expert, return_dataframe=True)
            end = t.time()
            timer = end - start
            timer = round(timer/60,2)
        else:
            raise Exception("\n\nSorry, key not found")

        
        evaluation_time[key].append(timer)
        rewards[key].append(mean_reward)
        blocking_rates[key].append(df['service_blocking_rate'][len(df['service_blocking_rate'])-1])
        
        print(f'Done for {key} with expert mean_reward = {mean_reward} with duration of {timer} minutes')

#### Saving results:

In [None]:
os.makedirs('./plots_students/', exist_ok=True)

df_r = pd.DataFrame(rewards)
df_r.to_csv(f'./plots_students/rewards_{k_path}.csv')
df_r = pd.DataFrame(blocking_rates)
df_r.to_csv(f'./plots_students/blocking_rates_{k_path}.csv')
df_r = pd.DataFrame(evaluation_time)
df_r.to_csv(f'./plots_students/evaluation_times_{k_path}.csv')

### Plotting results

#### Blocking rate

In [None]:
base_output_dir = './plots_students/blocking_rate/'
colors = sns.color_palette("colorblind")

output_dir = base_output_dir
os.makedirs(output_dir, exist_ok=True)
plt.figure()

for i, key in enumerate(rewards):
    plt.plot(traffics, blocking_rates[key], label=key, color=colors[i])
plt.xlabel("traffic")
plt.ylabel("blocking rate")
plt.legend()
plt.yscale('log')
plt.yticks(ticks=[5*10e-4, 10e-3,5*10e-3,10e-2,5*10e-2],labels=[ 5*10e-4, 10e-3,5*10e-3,10e-2,5*10e-2])
# plt.yticks(ticks=[x/100 for x in range(1, 25,5)],labels=[x/100 for x in range(1, 25,5)])
# plt.savefig(output_dir + f'{top_name}_{k_path}.png')
plt.show()

#### Reward

In [None]:
base_output_dir = './plots_students/rewards/'

output_dir = base_output_dir
os.makedirs(output_dir, exist_ok=True)
plt.figure()

for i, key in enumerate(rewards):
    plt.plot(traffics, rewards[key], label=key, color=colors[i])
plt.xlabel("traffic")
plt.ylabel("reward")
plt.legend()
# plt.savefig(output_dir + f'{top_name}_{k_path}.png')
plt.show()

#### Features importance for the student trained with agent

In [None]:
base_output_dir_features = './plots_students/features_importance/'

# loading saved DT classifier:
#for ht in holding_time: 
env, expert = get_EnvExpert(top_name, k_path, alg_name, mean_service_holding_time=10)
policy_agent = DAgger_Policy.load(f'./tmp_students/cost239_{k_path}_10/model.h5',env.observation_space,env.action_space)

output_dir = base_output_dir_features
os.makedirs(output_dir, exist_ok=True)

importances_sk = policy_agent.student.feature_importances_
importance = pd.DataFrame(importances_sk)
importance.to_csv(f'./plots_students/features_importance_{k_path}.csv')

plt.figure()
plt.bar([x for x in range(len(importances_sk))], importances_sk)
plt.xlabel("feature")
plt.ylabel("importance")
plt.title(f"Agent Features importance {top_name}")
# plt.savefig(output_dir + f'{top_name}_{k_path}.png')
plt.show()

In [None]:
base_output_dir_features = './plots_students/features_importance/'

# loading saved DT classifier:
#for ht in holding_time: 
env, expert = get_EnvExpert_Heuristic(top_name, k_path, alg_name, mean_service_holding_time=10)
policy_kspff = DAgger_Policy.load(f'./tmp_students/cost239_heuristic_{k_path}_10/model.h5',env.observation_space,env.action_space)

output_dir = base_output_dir_features
os.makedirs(output_dir, exist_ok=True)

importances_sk = policy_kspff.student.feature_importances_
importance = pd.DataFrame(importances_sk)
importance.to_csv(f'./plots_students/features_importance_heuristic_{k_path}.csv')

plt.figure()
plt.bar([x for x in range(len(importances_sk))], importances_sk)
plt.xlabel("feature")
plt.ylabel("importance")
plt.title(f"KSPFF Features importance {top_name}")
# plt.savefig(output_dir + f'{top_name}_heuristic_{k_path}}.png')
plt.show()

#### Decision tree structure

In [None]:
import graphviz
from sklearn import tree
dot_data = tree.export_graphviz(policy_agent.student, out_file=None, filled=True, rotate=True, max_depth=3)
graph = graphviz.Source(dot_data, format="png", directory='./plots_students/DTClassifier/')
graph

In [None]:
graph.render("DT Classifier DRL Agent")

In [None]:
import graphviz
from sklearn import tree
dot_data = tree.export_graphviz(policy_kspff.student, out_file=None, filled=True, rotate=True, max_depth=4)
graph = graphviz.Source(dot_data, format="png", directory='./plots_students/DTClassifier/')
graph

In [None]:
graph.render("DT Classifier KSPFF Heuristics")