In [1]:
import gymnasium as gym
import numpy as np
from gymnasium.wrappers import FlattenObservation, TimeLimit
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common import results_plotter
from stable_baselines3.common.results_plotter import load_results, ts2xy
from stable_baselines3 import PPO, A2C, DQN, TD3
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import VecNormalize
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.callbacks import BaseCallback, EvalCallback
from stable_baselines3.common.evaluation import evaluate_policy
from nfv_env import NfvEnv

2024-02-09 12:34:59.727479: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-09 12:34:59.729342: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-09 12:34:59.755422: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-09 12:34:59.755450: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-09 12:34:59.756126: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [2]:
log_dir = "logs/"

def make_env(**env_par):
    env = NfvEnv(**env_par)
    # flatten observation space from Dict to vector
    env = FlattenObservation(env)
    # add time limit to create episodes
    env = TimeLimit(env, 1000)
    # add Monitor wrapper (already added by make_vec_env?)
    env = Monitor(env, log_dir)
    return env

# Instantiate the env
max_traffic = 8000

vec_env = make_vec_env(make_env, n_envs=1, env_kwargs={'max_traffic':max_traffic, 'traffic_gen':'sin'})
#normalize observations and reward with moving average
vec_env = VecNormalize(vec_env)

#env = make_vec_env(make_env, n_envs=1, env_kwargs={'max_traffic':max_traffic, 'traffic_gen':'sin'})
#env = make_env()
#env = VecNormalize(env)

created NFV scaling environment
with max traffic:  8000
with max instances:  100
created NFV scaling environment
with max traffic:  8000
with max instances:  100


In [3]:
eval_env = make_vec_env(make_env, n_envs=1)
eval_env = VecNormalize(eval_env)
eval_callback = EvalCallback(eval_env, log_path= log_dir, eval_freq=500, deterministic=True, render=False)

created NFV scaling environment
with max traffic:  8000
with max instances:  100


In [4]:
# Train the agent
total_timesteps = 500_000

model = PPO('MlpPolicy', vec_env, gamma=0.99, verbose=0, tensorboard_log="./nfv_scaling_tensorboard/")
model.learn(total_timesteps, progress_bar=True, callback=eval_callback);

Output()



AssertionError: Training and eval env are not wrapped the same way, see https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html#evalcallback and warning above.

In [None]:
# Test the trained agent

test_env = make_vec_env(make_env, n_envs=1, env_kwargs={'max_traffic':max_traffic, 'traffic_gen':'mix'})
obs = test_env.reset()
#print("obs=", obs)
n_steps = 1000

actions = []
traffics = []
powers = []
instances = []
optimal = []
rewards = []

for step in range(n_steps):
    action, _ = model.predict(obs, deterministic=True)
    #print(f"Step {step + 1}")
    #print("Action: ", action)
    actions.append(action)
    obs, reward, terminated, info = test_env.step(action)
    
    instances.append(obs[0][0]) #obs space flattened in alphabetical order
    traffics.append(info[0]['req_traffic'])
    optimal.append(info[0]['optimal_instances'])
    rewards.append(reward[0])

In [None]:
import matplotlib.pyplot as plt

fig=plt.figure()
ax=fig.add_subplot(111, label="traffics")
ax2=fig.add_subplot(111, label="intances", frame_on=False)
#ax3=fig.add_subplot(111, label="optimal", frame_on=False)

ax.plot(traffics, color="C0", alpha=1)
ax.set_ylabel("traffic request", color="C0")
ax.tick_params(axis='y', colors="C0")

ax2.plot(instances, color="C1", linestyle='dashed')
ax2.yaxis.tick_right()
ax2.set_ylabel("instances", color="C1")       
ax2.yaxis.set_label_position('right') 
ax2.tick_params(axis='y', colors="C1")
ax2.yaxis.get_major_locator().set_params(integer=True)

#ax3.plot(optimal, color="C3", alpha= 1, linestyle='dotted')

plt.show()

In [None]:
plt.plot(optimal, label ="optimal")
plt.plot(instances, linestyle='dashed', label ="instantiated")
plt.legend()
plt.show()

In [None]:
#model.save('saved models/PPO_sin_500k_rew3_stable')

In [None]:
plt.plot(rewards)
plt.ylim(-10,10)
plt.show()
#TODO optimal rewards in info from env