In [1]:
import torch
import torch.nn as nn
import math
import numpy as np
import datetime

np.random.seed(42)

import sys
print(sys.executable) # just to check which python

import gym
from gym import spaces

from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.utils import set_random_seed

/usr/local/opt/python@3.9/bin/python3.9


In [2]:
class RequestType:
    def __init__(self, request_type, bandwidth, service_rate, arrival_rate, source, sink, distribution, switch_rate=None):
        # distribution is 1x2 if elastic and 1x1 if static
        
        self.type = request_type
        self.bw = bandwidth
        self.service_rate = service_rate
        self.arrival_rate = arrival_rate
        self.source = source
        self.sink = sink
        self.distribution = distribution
        self.switch_rate = switch_rate
        
        self.num_made = 0
        self.num_accepted = 0

class Request:
    def __init__(self, request_type, service_time, arrival_time, source, sink, transfer_rate, distribution=None, parent_elastic=None, bw_dist=None, request_type_template=None):
        self.type = request_type
        self.service_time = service_time
        self.arrival_time = arrival_time
        self.source = source
        self.sink = sink
        self.bw = transfer_rate
        self.request_type = request_type
        self.parent_elastic = parent_elastic
        self.accepted = None
        self.path = None
        self.bw_dist = bw_dist
        self.expired = False
        
        self.blueprint = request_type_template
        
        if request_type == "elastic":
            self.distribution = distribution
            self.scale_requests = []
                   
        if request_type_template is not None:
            request_type_template.num_made += 1
            
    def add_scale_request(self, req): 
        # we store related scale requests for elastic requests
        # not used if static request
        self.scale_requests.append(req)
            
    def get_encoding(self, nodes_in_environment):
        # as per our notes, this SHOULD return 1x5 tensor,
        # but we have one hot encodings INSIDE this tensor,
        # so we will flatten this and return, so the size will be
        # larger than 1x5
        
        # nodes_in_environment is a list of all the nodes in our graph
        # eg ["a", "b", "c"]
        
        # request is [one hot source, one hot destination, bw, service time, one hot type]
                
        one_hot_source = nn.functional.one_hot(torch.tensor([nodes_in_environment.index(self.source)]), num_classes=len(nodes_in_environment)).flatten().float()
        one_hot_dest   = nn.functional.one_hot(torch.tensor([nodes_in_environment.index(self.sink)]), num_classes=len(nodes_in_environment)).flatten().float()
    
        if self.request_type == "static":
            one_hot_type = torch.tensor([1, 0, 0])
        elif self.request_type == "elastic":
            one_hot_type = torch.tensor([0, 1, 0])
        elif self.request_type == "scale":
            one_hot_type = torch.tensor([0, 0, 1])
            
        encoding = torch.cat([one_hot_source, 
                             one_hot_dest,
                             torch.tensor([self.bw]).float(), 
                             torch.tensor([self.service_time]).float(),
                             one_hot_type.float()])
        
        return encoding

In [3]:
class Link:
    def __init__(self, node_1, node_2, bw_capacity):
        self.serving_requests = []
        self.nodes = [node_1, node_2]
        self.total_bw = bw_capacity
        
    def reset(self):
        self.serving_requests = []
        
    def add_request(self, request_obj):
        self.serving_requests.append(request_obj)
        
    def remove_request(self, request_obj):
        self.serving_requests.remove(request_obj)
        
    def remaining_bw(self): 
        # subtracting bw being used from total bw capacity
        bw_being_used = 0
        for req in self.serving_requests:
            bw_being_used += req.bw
            
        return (self.total_bw - bw_being_used)

In [4]:
class Environment(gym.Env):
    # requests_in_service_encoder = nn.RNN(????, 7)
    metadata = {'render.modes': ['human']}
    
    def __init__(self, nodes, links, request_blueprints, use_RNN=False, sb3_compat=False):
        super(Environment, self).__init__()
                
        """
        nodes: list of strings where each string is just a name or identifier of a node
        links: list of tuples where in tuple t, t[0] is first node, t[1] is another node, and t[2] is bw capacity of the link
        request_blueprints: list of DeploymentRequest objects
        """
        self.nodes = nodes
        self.links = {}
        self.request_history = []
        self.E_history = []
        self.past_distributions = []
        self.request_blueprints = request_blueprints
        self.last_time = 0
        self.episode_timesteps = 600
        self.use_RNN = use_RNN
        self.sb3_compat = sb3_compat
        self.precomputed_paths = {}
        
        if sb3_compat:
            self.request_being_considered = None
        
        for link in links:
            if link[0] not in self.nodes or link[1] not in self.nodes:
                raise Exception("Node in link " + str(link) + " doesn't exist")
            
            link_obj = Link(*link)

            self.links[link[0] + link[1]] = link_obj
            self.links[link[1] + link[0]] = link_obj
            
        self.precompute_paths()
            
        self.request_list = self.create_requests()
        self.request_queue = iter(self.request_list)
        
        # Setup gym-specific code
        env_encoding_size = self.get_encoding(increment_iterator=False).size()
        req_encoding_size = self.request_list[0].get_encoding(self.nodes).size()
        # print(env_encoding_size[0] + req_encoding_size[0])
        
        self.action_space = spaces.Box(low=-1, high=1,
                                      shape=(4,), dtype=np.float32)
        self.observation_space = spaces.Box(low=-100, high=100,
                                      shape=(env_encoding_size[0] + req_encoding_size[0],), dtype=np.float32)
        
        # TODO, WRITE RNN logic
        
        #if use_RNN:
        #    self.requests_in_service_encoder = nn.RNN
        
    def precompute_paths(self):
        for req_type in self.request_blueprints:
            self.precomputed_paths[req_type.source + req_type.sink] = self.search(req_type.source, req_type.sink, [], [])
            
    def add_request(self, request, path=None): # we want to add this request to a link or path
        # path: a list of nodes that the request traverses including source and sink
        # if no path is specified, path is assumed to be [req.source, req.sink]
        
        if path is not None: 
            nodes = [[path[i], path[i + 1]] for i in range(len(path) - 1)]
            for node_pair in nodes:
                self.links[node_pair[0] + node_pair[1]].add_request(request)
        
        else:
            self.links[request.source + request.sink].add_request(request)
        
        request.blueprint.num_accepted += 1
        self.request_history.append(request)
        # print(self.links[request.source + request.sink])
    
    def reset(self):
        for link in self.links.values():
            link.reset()
        self.request_history = []
        self.E_history = []
        self.past_distributions = []
        self.last_time = 0
        self.request_list = self.create_requests()
        self.request_queue = iter(self.request_list)
        self.precomputed_paths = {}

        self.precompute_paths()
        
        return self.get_encoding()
        
    def reward(self, request, decision):
        base_rate = 1         # 1 when static
        type_bonus = 0.9      # 0.9 when static
        bw = request.bw
        if request.type == "elastic":
            #base_rate = request.bw
            type_bonus = 1.1                # 1.1 when elastic
            bw = np.array(request.bw_dist).dot(request.distribution)
            
        r = bw * base_rate * request.service_time * type_bonus
        
        # if remaining bandwidth on link(s) < 0, very "bad" reward
        if request.path is not None:
            path_length = len(request.path)
            
            r *= math.pow(0.9, path_length - 2)
            
            nodes = [[request.path[i], request.path[i + 1]] for i in range(len(request.path) - 1)]
            for node_pair in nodes:
                if self.links[node_pair[0] + node_pair[1]].remaining_bw() < 0:
                    print("exceeded: " + str(-r * 10))
                    return (-r * 10)
        else:
            # path is direct, so no decrease of reward needed
            remaining_bw = self.links[request.source + request.sink].remaining_bw()
            if remaining_bw < 0:
                print("exceeded: " + str(-r * 10))
                return (-r * 10)
        
        if decision == "accept":
            return r
        
        if decision == "reject":
            if request.type == "static" or request.type == "elastic":
                return 0
            elif request.type == "scale":
                if len(self.past_distributions) == 0:
                    return -1 * r
                
                else:
                    current_sum = torch.from_numpy(np.sum(self.past_distributions, axis=0))

                    average_past_distribution = current_sum / len(self.past_distributions)
                    current_req_distribution = torch.tensor(request.parent_elastic.distribution)

                    return -1 * r * math.exp(-nn.functional.kl_div(average_past_distribution, current_req_distribution))

                """
                past_distributions = []
                for req in self.request_history:
                    if req.request_type == "elastic":
                        past_distributions.append(req.distribution)
                
                average_past_distribution = torch.mean(past_distributions, dim=1)
                current_req_distribution = torch.tensor(request.distribution)
                
                if bool(average_past_distribution[0] < current_req_distribution[0]):
                    return -1 * r * math.exp(-nn.functional.kl_div(average_past_distribution, current_req_distribution))
                else:
                    return 0
                """
                
    def next_req(self):
        next_req = next(self.request_queue)
        if self.sb3_compat:
            self.request_being_considered = next_req
                    
        return next_req
                
    def step(self, action, req=None):
        # what happens if we have two requests that come in on the same timestep but there is only enough bandwidth for one?
        # do we the decision on the second request with knowledge of the first request
        # essentially, after we accept the first request, will we submit an updated encoding of the network to the policy network?
 
        # actions is a Nx2 matrix where the first column in the request and second is the decision
        # decision is either "accept" or "reject"
        # this is given by our agent
        
        # if req is None, that means we are using sb3_compat=True and we can get the req from self.request_being_considered
                            
        if req is None:
            req = self.request_being_considered
            
        if action[0] > 0:
            # accept request
            paths = self.precomputed_paths[req.source + req.sink]
            paths.sort(key=lambda x: len(x)) # sort by shortest path
            # select the path we are using
            path = paths[action[1:4].argmax()]
            
            self.add_request(req, path)
            req.accepted = True

            reward = self.reward(req, "accept")
        elif action[0] < 0:
            # reject
            req.accepted = False
            reward = self.reward(req, "reject")

        obs = self.get_encoding()
        
        done = req.arrival_time > 600
        info = {}
        
        return obs, reward, done, info
        
    def update_requests(self, current_time):
        # here, we remove expired requests and update E_history based off of the request stats
        
        for link in self.links.values():
            for request in link.serving_requests.copy():
                if (request.arrival_time + request.service_time) < current_time:
                    # request has expired, let's remove it from the links
                    link.remove_request(request)

                    if request.type == "elastic" and not request.expired:
                        time_on_higher_bw = 0
                        for scale_req in request.scale_requests:
                            time_on_higher_bw += scale_req.service_time

                        time_on_lower_bw = request.service_time - time_on_higher_bw

                        # calculate E[history]
                        request_time = np.array([time_on_lower_bw, time_on_higher_bw])
                        request_bw = request.bw
                        result = (request_time / request_time.sum()).dot(request_bw)
                        self.past_distributions.append(request_time / request_time.sum())
                        self.E_history.append(result)
                        request.expired = True

    def get_encoding(self, increment_iterator=True):
        links_processed = [] 
        # these will store links that we have already encoded so we don't encode them again
        
        current_encoding = []
        
        # h = torch.zeros(7) # assuming 7 for h0 size
        # last_out = None
        
        env_encoding = []
        
        if increment_iterator:
            next_req = self.next_req()

            while next_req.type == "scale":
                if not next_req.parent_elastic.accepted:
                    next_req = self.next_req()
                elif next_req.parent_elastic.accepted:
                    break
                        
            """
            while next_req.type == "scale":
                if next_req.parent_elastic.accepted:
                    next_req.accepted = True # we must accept since we accepted elastic req
                    self.add_request(next_req, next_req.parent_elastic.path)
                next_req = self.next_req()
            """
            self.update_requests(next_req.arrival_time)

        for link in self.links.values():
            if link in links_processed:
                continue

                        
            # Commented because we don't want to encode any queue for phase 1
            
            # for req in link.serving_requests
                # request is [one hot source, one hot destination, bw, service time, one hot type]
                
                # one_hot_source = nn.functional.one_hot(torch.tensor([self.nodes.index(req.source)]), num_classes=len(self.nodes))
                # one_hot_dest   = nn.functional.one_hot(torch.tensor([self.nodes.index(req.sink)]), num_classes=len(self.nodes))

                # req_tensor = torch.Tensor([]) # mismatched dimensions??!
                # last_out, h = self.requests_in_service_encoder(req_tensor, h)

            # current_encoding.append(torch.cat(torch.Tensor([link.remaining_bw]), last_out))
            # torch.stack(current_encoding)
            
            # check implementation later
            
            env_encoding.append(link.remaining_bw())
            
            links_processed.append(link)
            
        if not increment_iterator:
            return torch.tensor(env_encoding)
        
        if self.sb3_compat:
            return torch.cat([torch.tensor(env_encoding).float(), torch.tensor(next_req.get_encoding(self.nodes)).float()])
        else:
            return torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes)), next_req
    
    def create_requests(self):
        requests = []
        
        for request_type in self.request_blueprints:
            arrival_times = []
            service_times = []
            last_arrival = 0
        
            while last_arrival < self.episode_timesteps: # we want to generate requests till we reach episode end
                last_arrival += np.random.exponential(request_type.arrival_rate)
                arrival_times.append(last_arrival)
                                
            for _ in arrival_times:
                service_times.append(np.random.exponential(request_type.service_rate))
                
            for arrival_time, service_time in zip(arrival_times, service_times):
                # start creating requests
                
                new_request = Request(request_type.type, service_time, arrival_time, request_type.source, request_type.sink, 
                                      request_type.bw[0], request_type.distribution, bw_dist=request_type.bw, request_type_template=request_type)
                requests.append(new_request)
                
                if request_type.type == "elastic": 
                    # we will start with the first bandwidth element as starting bw
                    # WE ASSUME that bw[0] < bw[1]
                    timesteps_from_deployment = 0
                    current_bw = request_type.bw[0]
                    while timesteps_from_deployment < service_time:
                        if current_bw == request_type.bw[0]:
                            # we want to generate a scale request to increase bw
                            scale_bw = request_type.bw[1] - current_bw
                            scale_service_time = np.random.exponential(request_type.switch_rate[1])
                            scale_request = Request("scale", scale_service_time, \
                                                    arrival_time + timesteps_from_deployment, request_type.source, \
                                                   request_type.sink, scale_bw, parent_elastic=new_request,
                                                   request_type_template=request_type)
                            requests.append(scale_request)
                            new_request.add_scale_request(scale_request)
                            
                            timesteps_from_deployment += scale_service_time
                            current_bw = request_type.bw[1] # request_type.bw[0] + scale_bw
                        elif current_bw == request_type.bw[1]:
                            # we want to go to lower bw and spend some time there
                            time_spent_on_lower_bw = np.random.exponential(request_type.switch_rate[0])
                            timesteps_from_deployment += time_spent_on_lower_bw
                            current_bw = request_type.bw[0]
                            
        # sort requests by arrival time
        requests.sort(key=lambda x: x.arrival_time)
        return requests
    
    def search(self, source, dest, visited_a, paths):
        visited_a.append(source)
        # print(visited_a)

        for link in set(self.links.values()):
            visited = visited_a.copy()
            if source in link.nodes:
                if dest in link.nodes:
                    visited.append(dest)
                    paths.append(visited)

                x = link.nodes.copy()
                x.remove(source)
                if x[0] not in visited:
                    self.search(x[0], dest, visited.copy(), paths)
        return paths
    
    def print_statistics(self):
        for req_type in self.request_blueprints:
            print(req_type.source + " | " +
                   req_type.sink + " | " + 
                 "BW: " + str(req_type.bw) + " | " +
                 "Arrival rate: " + str(req_type.arrival_rate) + " | " +
                 "Acceptance rate: " + str(req_type.num_accepted / req_type.num_made))
            
        resources_used = 0
        for req in self.request_list:
            if req.accepted == True:
                resources_used += req.bw * req.service_time
            
        print("BW used: " + str(resources_used))

In [5]:
def make_standard_env(sb3_compat=False):
    env = Environment(["a", "b", "c", "d", "e", "f"], [["a", "b", 10], ["a", "c", 10], ["b", "d", 10], \
                                                    ["c", "d", 20], ["c", "e", 10], ["d", "f", 10], \
                                                    ["e", "f", 10]], \
                          [RequestType("static", [2], 0.5, 0.75, "a", "b", [1]), \
                          RequestType("static", [8], 1, 1.5, "a", "b", [1]), \
                          RequestType("elastic", [4, 9], 1, 1.5, "a", "b", [0.8, 0.2], switch_rate=[0.08, 0.02]), \
                          RequestType("static", [1], 1, 1.5, "c", "d", [1]), \
                          RequestType("static", [7], 0.5, 0.75, "c", "d", [1]), \
                          RequestType("elastic", [3, 13], 2, 3, "c", "d", [0.9, 0.1], switch_rate=[0.09, 0.01]), \
                          RequestType("static", [3], 0.5, 0.75, "e", "f", [1]), \
                           RequestType("static", [6], 1, 1.5, "e", "f", [1]), \
                            RequestType("elastic", [5, 8], 2, 3, "e", "f", [0.7, 0.3], switch_rate=[0.07, 0.03])],
                         sb3_compat=sb3_compat)

    return env

### Choose shortest viable path

In [6]:
env = make_standard_env(sb3_compat=False)

In [7]:
def policy(env_encoding, next_req_encoding, next_req_obj):    
    # find all paths between source and sink
    paths = env.precomputed_paths[next_req_obj.source + next_req_obj.sink] 
    # (env.search(next_req_obj.source, next_req_obj.sink, [], []))
    paths.sort(key=lambda x: len(x)) # sort by shortest path
    selection = 0
    for path in paths:
        # check if this path works
        works = True
        nodes = [[path[i], path[i + 1]] for i in range(len(path) - 1)]
        for node_pair in nodes:
            if env.links[node_pair[0] + node_pair[1]].remaining_bw() < next_req_obj.bw:
                works = False
                
        if works:
            selection = paths.index(path)
            selection_one_hot = nn.functional.one_hot(torch.tensor([selection]), num_classes=3).flatten()
            next_req_obj.path = path
            return torch.cat([torch.tensor([1]), selection_one_hot])
        
    return torch.cat([torch.tensor([-1]), torch.tensor([0,0,0])])

In [8]:
# ONLY WORKS WITH sb3_compat=False when creating env object

total_reward = 0
env_encoding, next_req_encoding, next_req_obj = env.reset()
done = False

while not done:
    decision = policy(env_encoding, next_req_encoding, next_req_obj)
    
    obs, reward, done, info = env.step(decision, next_req_obj)
    env_encoding, next_req_encoding, next_req_obj = obs
    
    total_reward += reward

print(total_reward)

  return torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes)), next_req


11929.026038920432


In [9]:
env.print_statistics()

a | b | BW: [2] | Arrival rate: 0.75 | Acceptance rate: 0.4870530209617756
a | b | BW: [8] | Arrival rate: 1.5 | Acceptance rate: 0.26838709677419353
a | b | BW: [4, 9] | Arrival rate: 1.5 | Acceptance rate: 0.3286232950642724
c | d | BW: [1] | Arrival rate: 1.5 | Acceptance rate: 0.5038659793814433
c | d | BW: [7] | Arrival rate: 0.75 | Acceptance rate: 0.38267148014440433
c | d | BW: [3, 13] | Arrival rate: 3 | Acceptance rate: 0.23015406327178642
e | f | BW: [3] | Arrival rate: 0.75 | Acceptance rate: 0.46183699870633893
e | f | BW: [6] | Arrival rate: 1.5 | Acceptance rate: 0.3296432964329643
e | f | BW: [5, 8] | Arrival rate: 3 | Acceptance rate: 0.3038709677419355
BW used: 12511.685723575798


### PPO

In [10]:
def make_env(rank, seed=42):        
    def _init():
        env = make_standard_env(sb3_compat=True)
        env.seed(seed + rank)
        return env

    set_random_seed(seed)
    return _init

env = SubprocVecEnv([make_env(i) for i in range(4)])
env
                # self, request_type, bandwidth, service_rate, arrival_rate, source, sink, distribution, switch_rate=None
    

<stable_baselines3.common.vec_env.subproc_vec_env.SubprocVecEnv at 0x1376fa610>

In [9]:
policy_kwargs = dict(activation_fn=nn.ReLU,
                     net_arch=[dict(pi=[128, 128, 128], vf=[128, 128, 128])])
model = PPO(MlpPolicy, env, verbose=1, policy_kwargs=policy_kwargs, tensorboard_log="./multilink/", device="cpu")
model.learn(total_timesteps=100000000)

Using cpu device
Logging to ./multilink/PPO_23
-----------------------------
| time/              |      |
|    fps             | 640  |
|    iterations      | 1    |
|    time_elapsed    | 12   |
|    total_timesteps | 8192 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 418         |
|    iterations           | 2           |
|    time_elapsed         | 39          |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.008864902 |
|    clip_fraction        | 0.106       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.66       |
|    explained_variance   | -0.00971    |
|    learning_rate        | 0.0003      |
|    loss                 | 1.11e+03    |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.00628    |
|    std                  | 0.995       |
|    value_loss           | 1

-----------------------------------------
| time/                   |             |
|    fps                  | 323         |
|    iterations           | 12          |
|    time_elapsed         | 303         |
|    total_timesteps      | 98304       |
| train/                  |             |
|    approx_kl            | 0.008964956 |
|    clip_fraction        | 0.121       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.58       |
|    explained_variance   | 0.139       |
|    learning_rate        | 0.0003      |
|    loss                 | 232         |
|    n_updates            | 110         |
|    policy_gradient_loss | -0.00899    |
|    std                  | 0.977       |
|    value_loss           | 1.99e+03    |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 322         |
|    iterations           | 13          |
|    time_elapsed         | 329   

----------------------------------------
| time/                   |            |
|    fps                  | 317        |
|    iterations           | 23         |
|    time_elapsed         | 594        |
|    total_timesteps      | 188416     |
| train/                  |            |
|    approx_kl            | 0.01731756 |
|    clip_fraction        | 0.209      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.39      |
|    explained_variance   | 0.103      |
|    learning_rate        | 0.0003     |
|    loss                 | 212        |
|    n_updates            | 220        |
|    policy_gradient_loss | -0.00878   |
|    std                  | 0.931      |
|    value_loss           | 615        |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 316         |
|    iterations           | 24          |
|    time_elapsed         | 620         |
|    total_

----------------------------------------
| time/                   |            |
|    fps                  | 315        |
|    iterations           | 34         |
|    time_elapsed         | 883        |
|    total_timesteps      | 278528     |
| train/                  |            |
|    approx_kl            | 0.06543201 |
|    clip_fraction        | 0.213      |
|    clip_range           | 0.2        |
|    entropy_loss         | -5.17      |
|    explained_variance   | 0.149      |
|    learning_rate        | 0.0003     |
|    loss                 | 87.5       |
|    n_updates            | 330        |
|    policy_gradient_loss | -0.0103    |
|    std                  | 0.882      |
|    value_loss           | 355        |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 315         |
|    iterations           | 35          |
|    time_elapsed         | 909         |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 314         |
|    iterations           | 45          |
|    time_elapsed         | 1173        |
|    total_timesteps      | 368640      |
| train/                  |             |
|    approx_kl            | 0.018855538 |
|    clip_fraction        | 0.216       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.99       |
|    explained_variance   | 0.0941      |
|    learning_rate        | 0.0003      |
|    loss                 | 69.1        |
|    n_updates            | 440         |
|    policy_gradient_loss | -0.0103     |
|    std                  | 0.841       |
|    value_loss           | 621         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 314         |
|    iterations           | 46          |
|    time_elapsed         | 1199  

----------------------------------------
| time/                   |            |
|    fps                  | 313        |
|    iterations           | 56         |
|    time_elapsed         | 1464       |
|    total_timesteps      | 458752     |
| train/                  |            |
|    approx_kl            | 0.06251149 |
|    clip_fraction        | 0.234      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.75      |
|    explained_variance   | 0.156      |
|    learning_rate        | 0.0003     |
|    loss                 | 159        |
|    n_updates            | 550        |
|    policy_gradient_loss | -0.0105    |
|    std                  | 0.797      |
|    value_loss           | 451        |
----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 313        |
|    iterations           | 57         |
|    time_elapsed         | 1490       |
|    total_times

-----------------------------------------
| time/                   |             |
|    fps                  | 312         |
|    iterations           | 67          |
|    time_elapsed         | 1755        |
|    total_timesteps      | 548864      |
| train/                  |             |
|    approx_kl            | 0.029876767 |
|    clip_fraction        | 0.266       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.68       |
|    explained_variance   | 0.131       |
|    learning_rate        | 0.0003      |
|    loss                 | 60.3        |
|    n_updates            | 660         |
|    policy_gradient_loss | -0.00867    |
|    std                  | 0.785       |
|    value_loss           | 177         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 312         |
|    iterations           | 68          |
|    time_elapsed         | 1781  

----------------------------------------
| time/                   |            |
|    fps                  | 311        |
|    iterations           | 78         |
|    time_elapsed         | 2049       |
|    total_timesteps      | 638976     |
| train/                  |            |
|    approx_kl            | 0.04598602 |
|    clip_fraction        | 0.24       |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.43      |
|    explained_variance   | 0.444      |
|    learning_rate        | 0.0003     |
|    loss                 | 67.9       |
|    n_updates            | 770        |
|    policy_gradient_loss | -0.00871   |
|    std                  | 0.736      |
|    value_loss           | 182        |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 311         |
|    iterations           | 79          |
|    time_elapsed         | 2076        |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 310         |
|    iterations           | 89          |
|    time_elapsed         | 2348        |
|    total_timesteps      | 729088      |
| train/                  |             |
|    approx_kl            | 0.031931773 |
|    clip_fraction        | 0.262       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.12       |
|    explained_variance   | 0.244       |
|    learning_rate        | 0.0003      |
|    loss                 | 303         |
|    n_updates            | 880         |
|    policy_gradient_loss | -0.0138     |
|    std                  | 0.681       |
|    value_loss           | 296         |
-----------------------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 310        |
|    iterations           | 90         |
|    time_elapsed         | 2374      

-----------------------------------------
| time/                   |             |
|    fps                  | 309         |
|    iterations           | 100         |
|    time_elapsed         | 2645        |
|    total_timesteps      | 819200      |
| train/                  |             |
|    approx_kl            | 0.035458855 |
|    clip_fraction        | 0.27        |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.93       |
|    explained_variance   | 0.331       |
|    learning_rate        | 0.0003      |
|    loss                 | 45.9        |
|    n_updates            | 990         |
|    policy_gradient_loss | -0.0121     |
|    std                  | 0.651       |
|    value_loss           | 179         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 309         |
|    iterations           | 101         |
|    time_elapsed         | 2672  

-----------------------------------------
| time/                   |             |
|    fps                  | 308         |
|    iterations           | 111         |
|    time_elapsed         | 2945        |
|    total_timesteps      | 909312      |
| train/                  |             |
|    approx_kl            | 0.031531774 |
|    clip_fraction        | 0.261       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.7        |
|    explained_variance   | 0.209       |
|    learning_rate        | 0.0003      |
|    loss                 | 64.3        |
|    n_updates            | 1100        |
|    policy_gradient_loss | -0.012      |
|    std                  | 0.619       |
|    value_loss           | 336         |
-----------------------------------------
---------------------------------------
| time/                   |           |
|    fps                  | 308       |
|    iterations           | 112       |
|    time_elapsed         | 2972      |
| 

----------------------------------------
| time/                   |            |
|    fps                  | 307        |
|    iterations           | 122        |
|    time_elapsed         | 3249       |
|    total_timesteps      | 999424     |
| train/                  |            |
|    approx_kl            | 0.03568279 |
|    clip_fraction        | 0.291      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.48      |
|    explained_variance   | 0.46       |
|    learning_rate        | 0.0003     |
|    loss                 | 174        |
|    n_updates            | 1210       |
|    policy_gradient_loss | -0.0113    |
|    std                  | 0.585      |
|    value_loss           | 129        |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 307         |
|    iterations           | 123         |
|    time_elapsed         | 3277        |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 306         |
|    iterations           | 133         |
|    time_elapsed         | 3558        |
|    total_timesteps      | 1089536     |
| train/                  |             |
|    approx_kl            | 0.043337252 |
|    clip_fraction        | 0.313       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.22       |
|    explained_variance   | 0.326       |
|    learning_rate        | 0.0003      |
|    loss                 | 110         |
|    n_updates            | 1320        |
|    policy_gradient_loss | -0.0105     |
|    std                  | 0.549       |
|    value_loss           | 198         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 306         |
|    iterations           | 134         |
|    time_elapsed         | 3585  

----------------------------------------
| time/                   |            |
|    fps                  | 305        |
|    iterations           | 144        |
|    time_elapsed         | 3864       |
|    total_timesteps      | 1179648    |
| train/                  |            |
|    approx_kl            | 0.06556843 |
|    clip_fraction        | 0.309      |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.12      |
|    explained_variance   | 0.255      |
|    learning_rate        | 0.0003     |
|    loss                 | 48.4       |
|    n_updates            | 1430       |
|    policy_gradient_loss | -0.0144    |
|    std                  | 0.536      |
|    value_loss           | 211        |
----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 305         |
|    iterations           | 145         |
|    time_elapsed         | 3891        |
|    total_

-----------------------------------------
| time/                   |             |
|    fps                  | 304         |
|    iterations           | 155         |
|    time_elapsed         | 4169        |
|    total_timesteps      | 1269760     |
| train/                  |             |
|    approx_kl            | 0.090153456 |
|    clip_fraction        | 0.345       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.98       |
|    explained_variance   | 0.198       |
|    learning_rate        | 0.0003      |
|    loss                 | 265         |
|    n_updates            | 1540        |
|    policy_gradient_loss | -0.00961    |
|    std                  | 0.517       |
|    value_loss           | 404         |
-----------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 304         |
|    iterations           | 156         |
|    time_elapsed         | 4196  

KeyboardInterrupt: 

In [11]:
env = Environment(["a", "b", "c", "d", "e", "f"], [["a", "b", 10], ["a", "c", 10], ["b", "d", 10], \
                                                    ["c", "d", 20], ["c", "e", 10], ["d", "f", 10], \
                                                    ["e", "f", 10]], \
                          [RequestType("static", [2], 0.5, 0.75, "a", "b", [1]), \
                          RequestType("static", [8], 1, 1.5, "a", "b", [1]), \
                          RequestType("elastic", [4, 9], 1, 1.5, "a", "b", [0.8, 0.2], switch_rate=[0.08, 0.02]), \
                          RequestType("static", [1], 1, 1.5, "c", "d", [1]), \
                          RequestType("static", [7], 0.5, 0.75, "c", "d", [1]), \
                          RequestType("elastic", [3, 13], 2, 3, "c", "d", [0.9, 0.1], switch_rate=[0.09, 0.01]), \
                          RequestType("static", [3], 0.5, 0.75, "e", "f", [1]), \
                           RequestType("static", [6], 1, 1.5, "e", "f", [1]), \
                            RequestType("elastic", [5, 8], 2, 3, "e", "f", [0.7, 0.3], switch_rate=[0.07, 0.03])],
                         sb3_compat=True)

env

<__main__.Environment at 0x7fa4aece84f0>

In [14]:
total_reward = 0
observation = env.reset()
done = False
while not done:
    action, _states = model.predict(observation, deterministic=True)
        
    observation, reward, done, info = env.step(action)
        
    total_reward += reward
    
print(total_reward)

  return torch.cat([torch.tensor(env_encoding).float(), torch.tensor(next_req.get_encoding(self.nodes)).float()])


exceeded: -0.13142528692712474
exceeded: -0.14775145690201816
exceeded: -0.10371693925766648
exceeded: -0.059099656926096006
exceeded: -0.8749381029437504
exceeded: -0.039091305817006025
exceeded: -0.26771513207064146
exceeded: -1.6402572134419158
exceeded: -2.1014860272366125
exceeded: -19.497560475305093
exceeded: -0.7763375636576899


KeyboardInterrupt: 

In [None]:
env.print_statistics()

In [None]:
model.learn(total_timesteps=6000000, reset_num_timesteps=False)

In [None]:
model.save("3M_trained")

## TD3

In [11]:
from stable_baselines3 import TD3
from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

In [12]:
env = make_standard_env(sb3_compat=True)

env

<__main__.Environment at 0x1380b6fd0>

In [13]:
n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

In [None]:
model = TD3("MlpPolicy", env, action_noise=action_noise, verbose=1, tensorboard_log="./multilink/", device="cuda")
model.learn(total_timesteps=100000000)

In [None]:
total_reward = 0
observation = env.reset()
done = False
while not done:
    action, _states = model.predict(observation, deterministic=True)

    observation, reward, done, info = env.step(action)
    print(observation)
    
    total_reward += reward
    
print(total_reward)