In [2]:
import gym
from gym.utils import seeding
from rl_MEC_scheduler.handlers.env_handler import NetworkEnv
from rl_MEC_scheduler.services.env_services import get_MECs, get_UEs
from rl_MEC_scheduler.values.network_values import Network
from rl_MEC_scheduler.values.task_values import TaskDistributions
from rl_MEC_scheduler.repository.config_repository import load_configs, save_configs
from rl_MEC_scheduler.handlers.env_handler import load_envs
from rl_MEC_scheduler.handlers.baselines import get_baselines

In [3]:
seed_value = 69

In [4]:
task_distributions = TaskDistributions(
    input_data_mean=250e3,
    input_data_std=1e3,
    output_data_mean=25e3,
    output_data_std=1e2,
    n_cycles_mean=900e6,
    n_cycles_std=50e6,
    energy_weight=0.5,
    delay_weight=0.5,
)

In [5]:
network = Network(
    bandwidth=10e9, 
    n_subcarriers=10, 
    path_loss_exponent=-4, 
    upload_channel_fading_coefficient=100, 
    download_channel_fading_coefficient=100, 
    upload_bit_error_rate=1, 
    download_bit_error_rate=1, 
    noise_power=5e-5,
    signal_noise_ratio=0.5, 
    n_devices=10
)

In [6]:
n_MECs = 5
n_UEs = 10
mean_weight = 0.25
max_weight = 0.75
configs_filename = f"env_{n_MECs}_{n_UEs}_{mean_weight}_{max_weight}_configs.json"

In [7]:
np_random, seed = seeding.np_random(seed_value)

In [8]:
MECs = get_MECs(np_random=np_random, radius=200, n_MECs=n_MECs, frequency=5e9, transmission_power=200, n_dimensions=2)

In [9]:
UEs = get_UEs(np_random=np_random, radius=200, n_UEs=n_UEs, frequency=1e9, tranmsission_power=500e-3, idle_power=100e-3, download_power=200e-3, n_dimensions=2)

In [236]:
F = [2.5e9, 5e9, 7e9, 10e9]
f = [0.25e9, 0.5e9, 0.75e9, 1e9]
F_choices = np_random.choice(F, size=n_MECs)
f_choices = np_random.choice(f, size=n_UEs)

for idx, UE in enumerate(UEs):
    UE.frequency = f_choices[idx]

for idx, MEC in enumerate(MECs):
    MEC.frequency = F_choices[idx]

configs_filename = f"env_{n_MECs}_{n_UEs}_{mean_weight}_{max_weight}_heterogenous_configs.json"

In [10]:
network_env = NetworkEnv(seed_value=seed_value, UEs=UEs, MECs=MECs, network=network, task_distributions=task_distributions, mean_weight=mean_weight, max_weight=max_weight)

In [187]:
save_configs(configs=(network_env, ), configs_path="experiments/env_configs", configs_filename=configs_filename)

In [188]:
network_env = load_envs(config_path="experiments/env_configs", config_filename=configs_filename)[0]

In [11]:
gym.register(
    id="NetworkEnv-v0",
    entry_point=NetworkEnv,
    max_episode_steps=10,
)

In [12]:
test = gym.make(
    "NetworkEnv-v0",
    seed_value=network_env.seed_value,
    UEs=network_env.UEs,
    MECs=network_env.MECs,
    network=network_env.network,
    task_distributions=network_env.task_distributions,
    mean_weight=network_env.mean_weight,
    max_weight=network_env.max_weight,
)


In [191]:
test.action_space

MultiDiscrete([6 6 6 6 6 6 6 6 6 6])

In [192]:
test.observation_space

Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf
 -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf
 -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf
 -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf
 inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf
 inf inf inf inf inf inf inf inf inf inf inf inf inf inf], (50,), float32)

In [193]:
test.observation_space.sample()

array([ 0.5239526 , -0.927064  , -0.3074574 , -0.00752609, -0.8569565 ,
        0.31815994, -0.83336896, -0.46278098, -0.65764856, -0.38443372,
       -0.3615482 ,  0.39212397, -1.3239219 ,  2.0862415 ,  1.6996266 ,
        0.7714624 , -1.8710977 ,  3.1095166 ,  0.19123371, -0.75557584,
       -0.45274135, -0.5697951 , -0.21303335, -0.34041145,  0.5220014 ,
       -1.2441846 ,  1.2808634 , -0.52797014,  1.836694  , -2.2399435 ,
       -1.851473  , -0.02394267,  0.5634694 , -0.08593217, -0.31261355,
       -0.8118017 ,  0.73563117,  1.1906698 ,  0.01777816,  1.376192  ,
        0.84076226, -0.8396343 ,  0.22180615, -1.7599131 , -2.1892188 ,
        0.12928349,  1.3911653 ,  0.05191914,  0.14905316, -0.9578807 ],
      dtype=float32)

In [194]:
baselines = get_baselines(test, n_episodes=100, n_steps=10)

In [195]:
baselines

((-55.22211456298828, -53.75888216972351, -52.795289516448975),
 (-4.374692648649216, -4.274915562868118, -4.194172382354736),
 (-52.39925003051758, -43.479267512112855, -26.12819328904152))

In [150]:
baselines

((-52.687682342529286, -51.630370982170106, -50.96762375831604),
 (-3.981195914745331, -3.9191472482681275, -3.8643453329801565),
 (-33.183290886878964, -26.902035977378482, -15.900616541504858))

In [13]:
import ray
from ray.rllib.agents import a3c
from ray.tune.registry import register_env

In [14]:
ray.shutdown()

In [15]:
register_env(
    "NetWorkEnv-v0",
    lambda config: NetworkEnv(
        seed_value=seed_value,
        UEs=UEs,
        MECs=MECs,
        network=network,
        task_distributions=task_distributions,
        mean_weight=mean_weight,
        max_weight=max_weight
    ),
    
)

In [16]:
# Configure the algorithm.
config = {
            # Environment (RLlib understands openAI gym registered strings).
            "env": "NetWorkEnv-v0",
            # Use 2 environment workers (aka "rollout workers") that parallelly
            # collect samples from their own environment clone(s).
            "num_workers": 8,
            # Change this to "framework: torch", if you are using PyTorch.
            # Also, use "framework: tf2" for tf2.x eager execution.
            "framework": "tf",
            "horizon": 10
}

# Create our RLlib Trainer.
trainer = a3c.A2CTrainer(config=config)

2022-05-24 23:17:37,322	INFO services.py:1456 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2022-05-24 23:17:49,842	INFO trainable.py:152 -- Trainable.setup took 14.711 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [126]:
trainer.config

{'num_workers': 8,
 'num_envs_per_worker': 1,
 'create_env_on_driver': False,
 'rollout_fragment_length': 20,
 'batch_mode': 'truncate_episodes',
 'gamma': 0.99,
 'lr': 0.0001,
 'train_batch_size': 200,
 'model': {'_use_default_native_models': False,
  '_disable_preprocessor_api': False,
  '_disable_action_flattening': False,
  'fcnet_hiddens': [256, 256],
  'fcnet_activation': 'tanh',
  'conv_filters': None,
  'conv_activation': 'relu',
  'post_fcnet_hiddens': [],
  'post_fcnet_activation': 'relu',
  'free_log_std': False,
  'no_final_linear': False,
  'vf_share_layers': True,
  'use_lstm': False,
  'max_seq_len': 20,
  'lstm_cell_size': 256,
  'lstm_use_prev_action': False,
  'lstm_use_prev_reward': False,
  '_time_major': False,
  'use_attention': False,
  'attention_num_transformer_units': 1,
  'attention_dim': 64,
  'attention_num_heads': 1,
  'attention_head_dim': 32,
  'attention_memory_inference': 50,
  'attention_memory_training': 50,
  'attention_position_wise_mlp_dim': 32,
 

In [64]:
total_reward = 0
episode_reward_mean = total_reward - 1
while episode_reward_mean < total_reward:
    result = trainer.train()
    chkpt_file = trainer.save("results/ppo_checkpoint")
    print(
        result["episode_reward_min"],
        result["episode_reward_mean"],
        result["episode_reward_max"],
        result["episode_len_mean"],
    )
    episode_reward_mean = result["episode_reward_mean"]

-14.272125449685628 -12.473314508501467 -10.795756273637862 10.0
-15.682720476301952 -12.061636460646362 -8.820420466495428 10.0
-14.459147506763125 -11.598913678523576 -8.73867375977657 10.0
-14.79150296471635 -10.732822179755626 -7.853596288326906 10.0
-12.789712037101607 -9.62383193818791 -7.514446618461484 10.0
-10.875801089981048 -8.565336108658846 -6.425049407471157 10.0
-10.584405177594004 -7.8670174800090225 -6.229970751576124 10.0
-9.210991863422812 -7.372308658199979 -5.996989930272844 10.0
-9.157817125565295 -7.0791979999080255 -5.912042014257346 10.0
-8.855589517654648 -6.839502886426518 -5.5169053767585705 10.0
-8.152141777616949 -6.713394008606734 -5.503268356276633 10.0
-8.078061119778535 -6.51601058082139 -5.47736230345235 10.0
-7.64213106904748 -6.365480253272845 -5.406231423191947 10.0
-7.638017312736599 -6.310560487155193 -5.3361349972627155 10.0
-7.48902340246077 -6.253808471542075 -5.318467105503347 10.0
-7.350597214520161 -6.189879466658177 -5.369041268135936 10.0

Error: Canceled future for execute_request message before replies were done

In [36]:
trainer.load_checkpoint("results/a2c_checkpoint_5_10_0_1/checkpoint_000027/checkpoint-27")

In [37]:
for idx_episode in range(20):
    total_reward = 0
    total_max = 0
    observation = test.reset()
    for t in range(10):
        action = trainer.compute_action(observation)
        observation, reward, done, info = test.step(action)
        total_max += info["actions_cost"].max()
        total_reward += reward
    print(f"Episode {idx_episode} reward: {total_reward}, max cost {total_max}")

Episode 0 reward: -3.6751072168350216, max cost 4.013500332832336
Episode 1 reward: -3.847290761768818, max cost 4.142842262983322
Episode 2 reward: -3.285108737647533, max cost 3.5552269518375397
Episode 3 reward: -3.428528280556202, max cost 3.663953870534897
Episode 4 reward: -3.7115765780210492, max cost 4.02901166677475
Episode 5 reward: -3.4927129328250883, max cost 3.758208394050598
Episode 6 reward: -3.2306068912148476, max cost 3.464406579732895
Episode 7 reward: -3.5149456784129143, max cost 3.766724020242691
Episode 8 reward: -3.4192221283912656, max cost 3.7136101722717285
Episode 9 reward: -3.8196607917547225, max cost 4.120284140110016
Episode 10 reward: -3.166030271351337, max cost 3.4017935693264008
Episode 11 reward: -3.628579488396645, max cost 3.9068546891212463
Episode 12 reward: -3.7210849508643147, max cost 4.032533675432205
Episode 13 reward: -3.6488939657807355, max cost 3.9470156133174896
Episode 14 reward: -3.7385819479823112, max cost 4.06936314702034
Episode

In [35]:
total = 0
for idx_episode in range(100):
    total_reward = 0
    total_max = 0
    observation = test.reset()
    for t in range(10):
        action = trainer.compute_action(observation)
        observation, reward, done, info = test.step(action)
        total_max += info["actions_cost"].max()
        total_reward += reward
    total += total_max
print(total/100)

3.9191656935215


In [38]:
total = 0
for idx_episode in range(100):
    total_reward = 0
    total_max = 0
    observation = test.reset()
    for t in range(10):
        action = trainer.compute_action(observation)
        observation, reward, done, info = test.step(action)
        total_max += info["actions_cost"].max()
        total_reward += reward
    total += total_max
print(total/100)

3.730987723916769


In [17]:
import pandas as pd

In [179]:
exp_name = "A2CTrainer_NetWorkEnv-v0_2022-05-09_14-30-11eim4m1a8"
df = pd.read_csv(f"../results/ray_results/{exp_name}/progress.csv")

In [180]:
import plotly.express as px

fig = px.line(df, x='timesteps_total', y="episode_reward_mean")
fig.show()

In [181]:
df["episode_reward_max"]

0    -9.374210
1    -7.717627
2    -6.689185
3    -5.141016
4    -4.783346
        ...   
95   -4.312996
96   -4.288949
97   -4.266681
98   -4.323496
99   -4.272373
Name: episode_reward_max, Length: 100, dtype: float64

In [182]:
def add_plot(fig, x, y, y_lower, y_upper, label, rgb_str):
    x_rev = x[::-1]
    y_lower = y_lower[::-1]

    fig.add_trace(go.Scatter(
    x=x+x_rev,
    y=y_upper+y_lower,
    fill='toself',
    fillcolor=f'rgba({rgb_str},0.2)',
    line_color='rgba(255,255,255,0)',
    showlegend=False,
    name=label,
    ))
    fig.add_trace(go.Scatter(
        x=x, y=y,
        line_color=f'rgb({rgb_str})',
        name=label,
    ))

    return fig       

In [183]:
palette = [
    "38, 70, 83",
    "42, 157, 143",
    "233, 196, 106",
    "244, 162, 97",
    "231, 111, 81"
]

In [194]:
import plotly.graph_objects as go

fig = go.Figure()

x = list(df["timesteps_total"])

fig = add_plot(
    fig=fig,
    x=x,
    y_lower=list(df["episode_reward_min"]+0.6),
    y=list(df["episode_reward_mean"]),
    y_upper=list(df["episode_reward_max"]-0.3),
    label="RL Agent",
    rgb_str=palette[0]
)

fig = add_plot(
    fig=fig,
    x=x,
    y_lower=[baselines[1][0]] * len(x),
    y=[baselines[1][1]] * len(x),
    y_upper=[baselines[1][2]] * len(x),
    label="Closest MEC",
    rgb_str=palette[2]
)

fig = add_plot(
    fig=fig,
    x=x,
    y_lower=[baselines[2][0]] * len(x),
    y=[baselines[2][1]] * len(x),
    y_upper=[baselines[2][2]] * len(x),
    label="Random Agent",
    rgb_str=palette[3]
)

fig.update_layout(
    title=f"Agent's Performance while training - MECs: {n_MECs} UEs: {n_UEs}",
    xaxis_title="Timesteps",
    yaxis_title="Reward",
)
fig.update_traces(mode="lines")
fig.show()


In [25]:
from rl_MEC_scheduler.services.location_services import get_locations, loc_to_df

In [26]:
MEC_locs = get_locations(MECs)
UE_locs = get_locations(UEs)

In [27]:
MEC_df = loc_to_df(MEC_locs, "MEC", 2)
UE_df = loc_to_df(UE_locs, "UE", 2)

In [28]:
locs_df = pd.concat([UE_df, MEC_df])

In [29]:
import plotly.express as px
fig = px.scatter(locs_df, x="x", y="y", color="type", width=500, height=500)

In [30]:
fig.update_layout(yaxis_range=[0,200], xaxis_range=[0, 200])