In [1]:
import gymnasium as gym
import numpy as np
import torch as th
import json
import math
from gymnasium.wrappers import FlattenObservation, TimeLimit
from typing import Callable
from stable_baselines3.common.vec_env import VecNormalize
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor
from stable_baselines3 import PPO, A2C, DQN, TD3
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
from sb3_contrib import MaskablePPO, RecurrentPPO
#from stable_baselines3.common.evaluation import evaluate_policy
from sb3_contrib.common.maskable.evaluation import evaluate_policy
#from nfv_allocation_env import NfvAllocEnv
from nfv_allocation_cpu_env import NfvAllocEnv
from nfv_allocation_cpu_env_duration import NfvAllocEnvDuration
from nfv_allocation_cpu_env_rew import NfvAllocEnvRew
from env_utils import PDUSession

2024-07-18 12:32:48.549335: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-18 12:32:48.551052: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-07-18 12:32:48.574980: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-18 12:32:48.575004: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-18 12:32:48.575642: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [2]:
qi_dict = {3:0.3, 5:0.4, 9:0.3}
#qi_dict = {3:1.0}
env_kwargs={'config_file': 'nuc_host_models.json', 'obs_metric':'cpu', 'rw_metric':'cpu', 'flat_lerr': True, 'dt_scale': 1.5, 'duration_mean':40, 'duration_scale':5, 'qi_dict': qi_dict}
log_dir = "logs/alloc/PPO_sb3_.monitor.csv"
info_keys = ['qos_breach','power_per_mbit','latency_error','allocation_error','total_power','total_traffic','reward']
seed = None
parallel_envs = 8
gamma = 0.05

def make_env(**env_par):
    env = NfvAllocEnvRew(**env_par)
    # flatten observation space from Dict to vector
    #env = FlattenObservation(env)
    # add time limit to create episodes
    env = TimeLimit(env, 1000)
    # add Monitor wrapper (already added by make_vec_env?)
    env = Monitor(env, log_dir, info_keywords=tuple(info_keys))
    env.set_gamma(gamma)
    return env

# Instantiate the env
vec_env = make_vec_env(make_env, n_envs=parallel_envs, seed=seed, env_kwargs=env_kwargs)
vec_env = VecNormalize(vec_env, norm_reward=False) #, norm_obs= False

env_seeds = vec_env.seed(seed)
vec_env.reset()

print(env_seeds)

#env = make_env(**env_kwargs)
#env.print_env()

#model_name = f"PPO_scale{env_kwargs['dt_scale']}_env{parallel_envs}_{env_kwargs['config_file'].split('_')[0]}_{env_kwargs['obs_metric']}_{env_kwargs['rw_metric']}_seed{seed}_g{math.modf(gamma)[0]}_qi359_p631"
#print(model_name)

[3726753924, 3726753925, 3726753926, 3726753927, 3726753928, 3726753929, 3726753930, 3726753931]


  logger.warn(


In [3]:
#eval_env = make_vec_env(make_env, n_envs=1, env_kwargs=env_kwargs)
#eval_env = make_env(**env_kwargs)
#eval_callback = EvalCallback(eval_env, log_path= log_dir, eval_freq=500, deterministic=True, render=False)
#Recurrent
#model_name = f"PPO_rew10_net128_scale{env_kwargs['dt_scale']}_env{parallel_envs}_{env_kwargs['config_file'].split('_')[0]}_{env_kwargs['obs_metric']}_llearn_flat_npseed{seed}_g{str(gamma%1)[2:]}_qi359_p343"
model_name = f"PPO_rew10_net128_nodur_scale{env_kwargs['dt_scale']}_env{parallel_envs}_{env_kwargs['config_file'].split('_')[0]}_{env_kwargs['obs_metric']}_qi359_p343"
print(model_name)

PPO_rew10_net128_nodur_scale1.5_env8_nuc_cpu_qi359_p343


In [4]:
class TensorboardCallback(BaseCallback):
    """
    Custom callback for plotting additional values in tensorboard.
    """

    def __init__(self, verbose=0):
        super().__init__(verbose)
        self.episode_errors = 0
        self.ppms = 0
        self.step_count = 0

    def _on_step(self) -> bool:
        self.step_count +=1
        # Log power per mbit
        ppm = self.training_env.env_method("power_per_mbit")[0]
        self.ppms += ppm
        #self.logger.record("rollout/power_per_mbit", ppm)

        le = self.training_env.get_attr("latency_error")[0]
        #self.logger.record("rollout/latency_error", max(le,0) ) #positive values are latencies better than the required

        if le>0:
            self.episode_errors +=1
        if self.step_count >= 1000:
            self.logger.record("rollout/episodes_errors", self.episode_errors)
            self.episode_errors = 0
            self.logger.record("rollout/power_per_mbit", self.ppms/self.step_count)
            self.ppms = 0
            self.step_count = 0
        
        return True

In [5]:
def linear_schedule(initial_value: float) -> Callable[[float], float]:
    """
    Linear learning rate schedule.

    :param initial_value: Initial learning rate.
    :return: schedule that computes
      current learning rate depending on remaining progress
    """
    def func(progress_remaining: float) -> float:
        """
        Progress will decrease from 1 (beginning) to 0.

        :param progress_remaining:
        :return: current learning rate
        """
        return progress_remaining * initial_value

    return func

In [6]:
# Custom actor (pi) and value function (vf) networks
# of two layers of size 128 each with Relu activation function
# Note: an extra linear layer will be added on top of the pi and the vf nets, respectively
policy_kwargs = dict(activation_fn=th.nn.ReLU,
                     net_arch=dict(pi=[128, 128], vf=[128, 128]))

In [7]:
# Train the agent
total_timesteps = 6_000_000

#batch_size=32, n_steps=100,
model = MaskablePPO('MlpPolicy', vec_env, learning_rate=linear_schedule(0.001), gamma=0.95, batch_size=128, policy_kwargs=policy_kwargs, seed=seed, verbose=0, tensorboard_log="./nfv_allocation_tensorboard/") #policy_kwargs=policy_kwargs
#model = RecurrentPPO('MlpLstmPolicy', vec_env, learning_rate=linear_schedule(0.001), gamma=0.95, seed=seed, verbose=0, tensorboard_log="./nfv_allocation_tensorboard/")
#model = A2C('MlpPolicy', vec_env, gamma=0.99, seed=seed, verbose=0, tensorboard_log="./nfv_allocation_tensorboard/")
#model = DQN('MlpPolicy', vec_env, learning_rate=linear_schedule(0.001), gamma=0.99, seed=seed, verbose=0, tensorboard_log="./nfv_allocation_tensorboard/")

model.learn(total_timesteps, progress_bar=True, tb_log_name=model_name, callback=TensorboardCallback());

Output()

In [8]:
#model.save(f'saved models/{model_name}_onlylat')

In [9]:
#model = MaskablePPO.load(path='saved models/PPO_rew10_net128_scale1.5_env8_nuc_cpu_llearn_flat_npseedNone_g05_qi359_p343_onlylat', env=vec_env)

In [10]:
curriculum = False
#cur_scale = 0.5

if curriculum:
    curriculum_timesteps = 5_000_000

    #vec_env.env_method("set_dt_scale", cur_scale)
    vec_env.env_method("set_rw_metric", "cpu")
    model.learn(total_timesteps, progress_bar=True, tb_log_name=model_name+'_C_latrew', callback=TensorboardCallback());

In [11]:
model.save(f'saved models/{model_name}')

In [12]:
print('training env evaluation')
evaluate_policy(model, vec_env, n_eval_episodes=10, warn=False)

training env evaluation


(823.3375106, 4.066744770368366)

In [13]:
ev_seed = None#seed
ev_env = make_vec_env(make_env, n_envs=parallel_envs, seed=ev_seed, env_kwargs=env_kwargs)
ev_env = VecNormalize(ev_env, norm_reward=False) 
ev_env.seed(ev_seed)
ev_env.reset()
print('test env evaluation')
evaluate_policy(model, ev_env, n_eval_episodes=10, warn=False)

  logger.warn(


test env evaluation


(-96.43053330000001, 28.017483312351533)

In [14]:
#vec_env.env_method("print_env")