In [59]:
import gym
from gym.utils import seeding
from rl_MEC_scheduler.handlers.env_handler import NetworkEnv
from rl_MEC_scheduler.services.env_services import get_MECs, get_UEs
from rl_MEC_scheduler.values.network_values import Network
from rl_MEC_scheduler.values.task_values import TaskDistributions
from rl_MEC_scheduler.repository.config_repository import load_configs, save_configs
from rl_MEC_scheduler.handlers.env_handler import load_envs

In [51]:
seed_value = 42

In [52]:
task_distributions = TaskDistributions(
    input_data_mean=250e3,
    input_data_std=1e3,
    output_data_mean=25e3,
    output_data_std=1e2,
    n_cycles_mean=900e6,
    n_cycles_std=50e6,
    energy_weight=0.5,
    delay_weight=0.5,
)

In [53]:
network = Network(
    bandwidth=10e9, 
    n_subcarriers=10, 
    path_loss_exponent=-4, 
    upload_channel_fading_coefficient=100, 
    download_channel_fading_coefficient=100, 
    upload_bit_error_rate=1, 
    download_bit_error_rate=1, 
    noise_power=5e-5,
    signal_noise_ratio=0.5, 
    n_devices=10
)

In [54]:
np_random, seed = seeding.np_random(seed_value)

In [55]:
MECs = get_MECs(np_random=np_random, radius=200, n_MECs=5, frequency=5e9, transmission_power=200, n_dimensions=2)

In [56]:
UEs = get_UEs(np_random=np_random, radius=200, n_UEs=5, frequency=1e9, tranmsission_power=500e-3, idle_power=100e-3, download_power=200e-3, n_dimensions=2)

In [57]:
network_env = NetworkEnv(seed_value=seed_value, UEs=UEs, MECs=MECs, network=network, task_distributions=task_distributions)

In [58]:
save_configs(configs=(network_env, ), configs_path="env_configs", configs_filename="env1_configs.json")

In [63]:
test = load_envs(config_path="env_configs", config_filename="env1_configs.json")[0]

In [65]:
test == network_env

True

In [43]:
gym.register(
    id="NetworkEnv-v0",
    entry_point=NetworkEnv,
    max_episode_steps=10,
)


[33mWARN: Overriding environment NetworkEnv-v0[0m



In [44]:
test = gym.make(
    "NetworkEnv-v0",
    seed_value=network_env.seed_value,
    UEs=network_env.UEs,
    MECs=network_env.MECs,
    network=network_env.network,
    task_distributions=network_env.task_distributions,
)


In [45]:
test.action_space

MultiDiscrete([6 6 6 6 6])

In [46]:
test.observation_space

Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf
 -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf inf
 inf inf inf inf inf inf inf], (25,), float32)

In [47]:
test.observation_space.sample()

array([ 1.2128292 , -0.20525928, -0.64920694, -0.9584335 , -1.1524115 ,
       -1.343209  , -0.33186376,  0.1893206 , -1.0685507 ,  0.39804345,
       -0.92518365, -0.6380645 , -2.1159277 ,  1.0417508 , -1.332248  ,
       -0.65282404, -1.2015101 , -0.793805  ,  0.4771188 ,  1.3640146 ,
        0.29421574, -0.9001448 , -1.0805522 ,  0.5416504 ,  0.09637395],
      dtype=float32)

In [48]:
import ray
from ray.rllib.agents import a3c
from ray.tune.registry import register_env

KeyboardInterrupt: 

In [61]:
ray.shutdown()

In [62]:
register_env(
    "NetWorkEnv-v0",
    lambda config: NetworkEnv(
        seed_value=seed_value,
        UEs=UEs,
        MECs=MECs,
        network=network,
        task_distributions=task_distributions,
    ),
    
)

In [63]:
# Configure the algorithm.
config = {
            # Environment (RLlib understands openAI gym registered strings).
            "env": "NetWorkEnv-v0",
            # Use 2 environment workers (aka "rollout workers") that parallelly
            # collect samples from their own environment clone(s).
            "num_workers": 8,
            # Change this to "framework: torch", if you are using PyTorch.
            # Also, use "framework: tf2" for tf2.x eager execution.
            "framework": "tf",
            "horizon": 10
}

# Create our RLlib Trainer.
trainer = a3c.A2CTrainer(config=config)

2022-05-08 21:44:05,492	INFO services.py:1456 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8266[39m[22m
2022-05-08 21:44:21,154	INFO trainable.py:152 -- Trainable.setup took 17.863 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [64]:
total_reward = 0
episode_reward_mean = total_reward - 1
while episode_reward_mean < total_reward:
    result = trainer.train()
    chkpt_file = trainer.save("results/ppo_checkpoint")
    print(
        result["episode_reward_min"],
        result["episode_reward_mean"],
        result["episode_reward_max"],
        result["episode_len_mean"],
    )
    episode_reward_mean = result["episode_reward_mean"]

-14.272125449685628 -12.473314508501467 -10.795756273637862 10.0
-15.682720476301952 -12.061636460646362 -8.820420466495428 10.0
-14.459147506763125 -11.598913678523576 -8.73867375977657 10.0
-14.79150296471635 -10.732822179755626 -7.853596288326906 10.0
-12.789712037101607 -9.62383193818791 -7.514446618461484 10.0
-10.875801089981048 -8.565336108658846 -6.425049407471157 10.0
-10.584405177594004 -7.8670174800090225 -6.229970751576124 10.0
-9.210991863422812 -7.372308658199979 -5.996989930272844 10.0
-9.157817125565295 -7.0791979999080255 -5.912042014257346 10.0
-8.855589517654648 -6.839502886426518 -5.5169053767585705 10.0
-8.152141777616949 -6.713394008606734 -5.503268356276633 10.0
-8.078061119778535 -6.51601058082139 -5.47736230345235 10.0
-7.64213106904748 -6.365480253272845 -5.406231423191947 10.0
-7.638017312736599 -6.310560487155193 -5.3361349972627155 10.0
-7.48902340246077 -6.253808471542075 -5.318467105503347 10.0
-7.350597214520161 -6.189879466658177 -5.369041268135936 10.0

Error: Canceled future for execute_request message before replies were done

In [None]:
for idx_episode in range(4):
    total_reward = 0
    observation = test.reset()
    for t in range(10):
        action = trainer.compute_action(observation)
        print(action)
        observation, reward, done, info = test.step(action)
        total_reward += reward
    print(f"Episode {idx_episode} reward: {total_reward}")

[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[0 0 0 0 0]
[1 5 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 4]
Episode 0 reward: -20000.000000039043
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[0 0 0 0 0]
[4 0 0 0 0]
[1 0 0 0 0]
[1 3 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
Episode 1 reward: -20000.000000014697
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[0 0 0 0 0]
[0 0 0 0 0]
Episode 2 reward: -20000.000000026892
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 3]
[0 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
[1 0 0 0 0]
Episode 3 reward: -20000.000000019958
