In [1]:
import torch
import torch.nn as nn
import math
import numpy as np
import datetime

np.random.seed(42)

import sys
print(sys.executable) # just to check which python

import gym
from gym import spaces

from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy

/usr/local/opt/python@3.9/bin/python3.9


In [2]:
class RequestType:
    def __init__(self, request_type, bandwidth, service_rate, arrival_rate, source, sink, distribution, switch_rate=None):
        # distribution is 1x2 if elastic and 1x1 if static
        
        self.type = request_type
        self.bw = bandwidth
        self.service_rate = service_rate
        self.arrival_rate = arrival_rate
        self.source = source
        self.sink = sink
        self.distribution = distribution
        self.switch_rate = switch_rate
        
        self.num_made = 0
        self.num_accepted = 0

class Request:
    def __init__(self, request_type, service_time, arrival_time, source, sink, transfer_rate, distribution=None, parent_elastic=None, bw_dist=None, request_type_template=None):
        self.type = request_type
        self.service_time = service_time
        self.arrival_time = arrival_time
        self.source = source
        self.sink = sink
        self.bw = transfer_rate
        self.request_type = request_type
        self.parent_elastic = parent_elastic
        self.accepted = None
        self.path = None
        self.bw_dist = bw_dist
        
        self.blueprint = request_type_template
        
        if request_type == "elastic":
            self.distribution = distribution
            self.scale_requests = []
                   
        if request_type_template is not None:
            request_type_template.num_made += 1
            
    def add_scale_request(self, req): 
        # we store related scale requests for elastic requests
        # not used if static request
        self.scale_requests.append(req)
            
    def get_encoding(self, nodes_in_environment):
        # as per our notes, this SHOULD return 1x5 tensor,
        # but we have one hot encodings INSIDE this tensor,
        # so we will flatten this and return, so the size will be
        # larger than 1x5
        
        # nodes_in_environment is a list of all the nodes in our graph
        # eg ["a", "b", "c"]
        
        # request is [one hot source, one hot destination, bw, service time, one hot type]
                
        one_hot_source = nn.functional.one_hot(torch.tensor([nodes_in_environment.index(self.source)]), num_classes=len(nodes_in_environment)).flatten()
        one_hot_dest   = nn.functional.one_hot(torch.tensor([nodes_in_environment.index(self.sink)]), num_classes=len(nodes_in_environment)).flatten()
    
        if self.request_type == "static":
            one_hot_type = torch.tensor([1, 0, 0])
        elif self.request_type == "elastic":
            one_hot_type = torch.tensor([0, 1, 0])
        elif self.request_type == "scale":
            one_hot_type = torch.tensor([0, 0, 1])
            
        encoding = torch.cat([one_hot_source, 
                             one_hot_dest,
                             torch.tensor([self.bw]), 
                             torch.tensor([self.service_time]),
                             one_hot_type])
        
        return encoding

In [3]:
class Link:
    def __init__(self, node_1, node_2, bw_capacity):
        self.serving_requests = []
        self.nodes = [node_1, node_2]
        self.total_bw = bw_capacity
        
    def reset(self):
        self.serving_requests = []
        
    def add_request(self, request_obj):
        self.serving_requests.append(request_obj)
        
    def remove_request(self, request_obj):
        self.serving_requests.remove(request_obj)
        
    def remaining_bw(self): 
        # subtracting bw being used from total bw capacity
        bw_being_used = 0
        for req in self.serving_requests:
            bw_being_used += req.bw
            
        return (self.total_bw - bw_being_used)

In [4]:
class Environment(gym.Env):
    # requests_in_service_encoder = nn.RNN(????, 7)
    metadata = {'render.modes': ['human']}
    
    def __init__(self, nodes, links, request_blueprints, use_RNN=False, sb3_compat=False):
        super(Environment, self).__init__()
                
        """
        nodes: list of strings where each string is just a name or identifier of a node
        links: list of tuples where in tuple t, t[0] is first node, t[1] is another node, and t[2] is bw capacity of the link
        request_blueprints: list of DeploymentRequest objects
        """
        self.nodes = nodes
        self.links = {}
        self.request_history = []
        self.E_history = []
        self.past_distributions = []
        self.request_blueprints = request_blueprints
        self.last_time = 0
        self.episode_timesteps = 600
        self.use_RNN = use_RNN
        self.sb3_compat = sb3_compat
        self.precomputed_paths = {}
        
        if sb3_compat:
            self.request_being_considered = None
        
        for link in links:
            if link[0] not in self.nodes or link[1] not in self.nodes:
                raise Exception("Node in link " + str(link) + " doesn't exist")
            
            link_obj = Link(*link)

            self.links[link[0] + link[1]] = link_obj
            self.links[link[1] + link[0]] = link_obj
            
        self.request_list = self.create_requests()
        self.request_queue = iter(self.request_list)
        
        # Setup gym-specific code
        env_encoding_size = self.get_encoding(increment_iterator=False).size()
        req_encoding_size = self.request_list[0].get_encoding(self.nodes).size()
        # print(env_encoding_size[0] + req_encoding_size[0])
        
        self.action_space = spaces.Box(low=0, high=math.inf,
                                      shape=(4,), dtype=np.float32)
        self.observation_space = spaces.Box(low=-math.inf, high=math.inf,
                                      shape=(env_encoding_size[0] + req_encoding_size[0],), dtype=np.float32)
        
        # TODO, WRITE RNN logic
        
        #if use_RNN:
        #    self.requests_in_service_encoder = nn.RNN
        
    def precompute_paths(self):
        for req_type in self.request_blueprints:
            self.precomputed_paths[req_type.source + req_type.sink] = env.search(req_type.source, req_type.sink, [], [])
            
    def add_request(self, request, path=None): # we want to add this request to a link or path
        # path: a list of nodes that the request traverses including source and sink
        # if no path is specified, path is assumed to be [req.source, req.sink]
        
        if path is not None: 
            nodes = [[path[i], path[i + 1]] for i in range(len(path) - 1)]
            for node_pair in nodes:
                env.links[node_pair[0] + node_pair[1]].add_request(request)
        
        else:
            self.links[request.source + request.sink].add_request(request)
        
        request.accepted = True
        request.blueprint.num_accepted += 1
        self.request_history.append(request)
        # print(self.links[request.source + request.sink])
    
    def reset(self):
        for link in self.links.values():
            link.reset()
        self.request_history = []
        self.E_history = []
        self.past_distributions = []
        self.last_time = 0
        self.request_list = env.create_requests()
        self.request_queue = iter(self.request_list)
        
        return env.get_encoding()
        
    def reward(self, request, decision):
        base_rate = 1         # 1 when static
        type_bonus = 0.9      # 0.9 when static
        bw = request.bw
        if request.type == "elastic":
            #base_rate = request.bw
            type_bonus = 1.1                # 1.1 when elastic
            bw = np.array(request.bw_dist).dot(request.distribution)
            
        r = bw * base_rate * request.service_time * type_bonus
        
        # if remaining bandwidth on link(s) < 0, very "bad" reward
        if request.path is not None:
            path_length = len(request.path)
            
            r *= math.pow(0.9, path_length - 2)
            
            nodes = [[request.path[i], request.path[i + 1]] for i in range(len(request.path) - 1)]
            for node_pair in nodes:
                if self.links[node_pair[0] + node_pair[1]].remaining_bw() < 0:
                    print("exceeded: " + str(-r * 10))
                    return (-r * 10)
        else:
            # path is direct, so no decrease of reward needed
            remaining_bw = self.links[request.source + request.sink].remaining_bw()
            if remaining_bw < 0:
                print("exceeded: " + str(-r * 10))
                return (-r * 10)
        
        if decision == "accept":
            return r
        
        if decision == "reject":
            if request.type == "static" or request.type == "elastic":
                return 0
            elif request.type == "scale":
                if len(self.past_distributions) == 0:
                    return -1 * r
                
                else:
                    current_sum = torch.from_numpy(np.sum(self.past_distributions, axis=0))

                    average_past_distribution = current_sum / len(self.past_distributions)
                    current_req_distribution = torch.tensor(request.parent_elastic.distribution)

                    return -1 * r * math.exp(-nn.functional.kl_div(average_past_distribution, current_req_distribution))

                """
                past_distributions = []
                for req in self.request_history:
                    if req.request_type == "elastic":
                        past_distributions.append(req.distribution)
                
                average_past_distribution = torch.mean(past_distributions, dim=1)
                current_req_distribution = torch.tensor(request.distribution)
                
                if bool(average_past_distribution[0] < current_req_distribution[0]):
                    return -1 * r * math.exp(-nn.functional.kl_div(average_past_distribution, current_req_distribution))
                else:
                    return 0
                """
                
    def next_req(self):
        next_req = next(self.request_queue)
        if self.sb3_compat:
            self.request_being_considered = next_req
        return next_req
                
    def step(self, action, req=None):
        # what happens if we have two requests that come in on the same timestep but there is only enough bandwidth for one?
        # do we the decision on the second request with knowledge of the first request
        # essentially, after we accept the first request, will we submit an updated encoding of the network to the policy network?
 
        # actions is a Nx2 matrix where the first column in the request and second is the decision
        # decision is either "accept" or "reject"
        # this is given by our agent
        
        # if req is None, that means we are using sb3_compat=True and we can get the req from self.request_being_considered
                
        if req is None:
            req = self.request_being_considered
            
        if action[0] > 0.5:
            # accept request
            paths = env.precomputed_paths[req.source + req.sink]
            paths.sort(key=lambda x: len(x)) # sort by shortest path
            # select the path we are using
            path = paths[action[1:4].argmax()]
            
            self.add_request(req, path)
        
            reward = env.reward(req, "accept")
        elif action[0] < 0.5:
            # reject
            reward = env.reward(req, "reject")
        
        obs = env.get_encoding()
        
        done = req.arrival_time > 600
        info = {}
        
        return obs, reward, done, info
        
    def update_requests(self, current_time):
        # here, we remove expired requests and update E_history based off of the request stats
        
        for link in self.links.values():
            for request in link.serving_requests.copy():
                if (request.arrival_time + request.service_time) > self.last_time and (request.arrival_time + request.service_time) < current_time:
                    # request has expired, let's remove it from the links
                    for link in self.links.values():
                        if request in link.serving_requests:
                            link.remove_request(request)

                    if request.type == "elastic":
                        time_on_higher_bw = 0
                        for scale_req in request.scale_requests:
                            time_on_higher_bw += scale_req.service_time

                        time_on_lower_bw = request.service_time - time_on_higher_bw

                        # calculate E[history]
                        request_time = np.array([time_on_lower_bw, time_on_higher_bw])
                        request_bw = request.bw
                        result = (request_time / request_time.sum()).dot(request_bw)
                        self.past_distributions.append(request_time / request_time.sum())
                        self.E_history.append(result)

    def get_encoding(self, increment_iterator=True):
        links_processed = [] 
        # these will store links that we have already encoded so we don't encode them again
        
        current_encoding = []
        
        # h = torch.zeros(7) # assuming 7 for h0 size
        # last_out = None
        
        env_encoding = []
        
        if increment_iterator:
            next_req = self.next_req()

            while next_req.type == "scale":
                if not next_req.parent_elastic.accepted:
                    next_req = self.next_req()
                elif next_req.parent_elastic.accepted:
                    break
            
            """
            while next_req.type == "scale":
                if next_req.parent_elastic.accepted:
                    next_req.accepted = True # we must accept since we accepted elastic req
                    self.add_request(next_req, next_req.parent_elastic.path)
                next_req = self.next_req()
            """
            self.update_requests(next_req.arrival_time)

        for link in self.links.values():
            if link in links_processed:
                continue

                        
            # Commented because we don't want to encode any queue for phase 1
            
            # for req in link.serving_requests
                # request is [one hot source, one hot destination, bw, service time, one hot type]
                
                # one_hot_source = nn.functional.one_hot(torch.tensor([self.nodes.index(req.source)]), num_classes=len(self.nodes))
                # one_hot_dest   = nn.functional.one_hot(torch.tensor([self.nodes.index(req.sink)]), num_classes=len(self.nodes))

                # req_tensor = torch.Tensor([]) # mismatched dimensions??!
                # last_out, h = self.requests_in_service_encoder(req_tensor, h)

            # current_encoding.append(torch.cat(torch.Tensor([link.remaining_bw]), last_out))
            # torch.stack(current_encoding)
            
            # check implementation later
            
            env_encoding.append(link.remaining_bw())
            
            links_processed.append(link)
            
        if not increment_iterator:
            return torch.tensor(env_encoding)
        
        if self.sb3_compat:
            return torch.cat([torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes))])
        else:
            return torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes)), next_req
    
    def create_requests(self):
        requests = []
        
        for request_type in self.request_blueprints:
            arrival_times = []
            service_times = []
            last_arrival = 0
        
            while last_arrival < self.episode_timesteps: # we want to generate requests till we reach episode end
                last_arrival += np.random.exponential(request_type.arrival_rate)
                arrival_times.append(last_arrival)
                                
            for _ in arrival_times:
                service_times.append(np.random.exponential(request_type.service_rate))
                
            for arrival_time, service_time in zip(arrival_times, service_times):
                # start creating requests
                
                new_request = Request(request_type.type, service_time, arrival_time, request_type.source, request_type.sink, 
                                      request_type.bw[0], request_type.distribution, bw_dist=request_type.bw, request_type_template=request_type)
                requests.append(new_request)
                
                if request_type.type == "elastic": 
                    # we will start with the first bandwidth element as starting bw
                    # WE ASSUME that bw[0] < bw[1]
                    timesteps_from_deployment = 0
                    current_bw = request_type.bw[0]
                    while timesteps_from_deployment < service_time:
                        if current_bw == request_type.bw[0]:
                            # we want to generate a scale request to increase bw
                            scale_bw = request_type.bw[1] - current_bw
                            scale_service_time = np.random.exponential(request_type.switch_rate[1])
                            scale_request = Request("scale", scale_service_time, \
                                                    arrival_time + timesteps_from_deployment, request_type.source, \
                                                   request_type.sink, scale_bw, parent_elastic=new_request,
                                                   request_type_template=request_type)
                            requests.append(scale_request)
                            new_request.add_scale_request(scale_request)
                            
                            timesteps_from_deployment += scale_service_time
                            current_bw = request_type.bw[1] # request_type.bw[0] + scale_bw
                        elif current_bw == request_type.bw[1]:
                            # we want to go to lower bw and spend some time there
                            time_spent_on_lower_bw = np.random.exponential(request_type.switch_rate[0])
                            timesteps_from_deployment += time_spent_on_lower_bw
                            current_bw = request_type.bw[0]
                            
        # sort requests by arrival time
        requests.sort(key=lambda x: x.arrival_time)
        return requests
    
    def search(self, source, dest, visited_a, paths):
        visited_a.append(source)
        # print(visited_a)

        for link in set(env.links.values()):
            visited = visited_a.copy()
            if source in link.nodes:
                if dest in link.nodes:
                    visited.append(dest)
                    paths.append(visited)

                x = link.nodes.copy()
                x.remove(source)
                if x[0] not in visited:
                    self.search(x[0], dest, visited.copy(), paths)
        return paths
    
    def print_statistics(self):
        for req_type in self.request_blueprints:
            print(req_type.source + " | " +
                   req_type.sink + " | " + 
                 "BW: " + str(req_type.bw) + " | " +
                 "Arrival rate: " + str(req_type.arrival_rate) + " | " +
                 "Acceptance rate: " + str(req_type.num_accepted / req_type.num_made))
            
        resources_used = 0
        for req in self.request_list:
            if req.accepted == True:
                resources_used += req.bw * req.service_time
            
        print("BW used: " + str(resources_used))

In [5]:
env = Environment(["a", "b", "c", "d", "e", "f"], [["a", "b", 10], ["a", "c", 10], ["b", "d", 10], \
                                                   ["c", "d", 20], ["c", "e", 10], ["d", "f", 10], \
                                                   ["e", "f", 10]], \
                  [RequestType("static", [2], 0.5, 0.75, "a", "b", [1]), \
                  RequestType("static", [8], 1, 1.5, "a", "b", [1]), \
                  RequestType("elastic", [4, 9], 1, 1.5, "a", "b", [0.8, 0.2], switch_rate=[0.08, 0.02]), \
                  RequestType("static", [1], 1, 1.5, "c", "d", [1]), \
                  RequestType("static", [7], 0.5, 0.75, "c", "d", [1]), \
                  RequestType("elastic", [3, 13], 2, 3, "c", "d", [0.9, 0.1], switch_rate=[0.09, 0.01]), \
                  RequestType("static", [3], 0.5, 0.75, "e", "f", [1]), \
                   RequestType("static", [6], 1, 1.5, "e", "f", [1]), \
                    RequestType("elastic", [5, 8], 2, 3, "e", "f", [0.7, 0.3], switch_rate=[0.07, 0.03])],
                 sb3_compat=True)


                # self, request_type, bandwidth, service_rate, arrival_rate, source, sink, distribution, switch_rate=None
    
env.precompute_paths()

### Choose shortest viable path

In [6]:
def policy(env_encoding, next_req_encoding, next_req_obj):    
    # find all paths between source and sink
    paths = env.precomputed_paths[next_req_obj.source + next_req_obj.sink] 
    # (env.search(next_req_obj.source, next_req_obj.sink, [], []))
    paths.sort(key=lambda x: len(x)) # sort by shortest path
    selection = 0
    for path in paths:
        # check if this path works
        works = True
        nodes = [[path[i], path[i + 1]] for i in range(len(path) - 1)]
        for node_pair in nodes:
            if env.links[node_pair[0] + node_pair[1]].remaining_bw() < next_req_obj.bw:
                works = False
                
        if works:
            selection = paths.index(path)
            selection_one_hot = nn.functional.one_hot(torch.tensor([selection]), num_classes=3).flatten()
            next_req_obj.path = path
            return torch.cat([torch.tensor([1]), selection_one_hot])
        
    return torch.cat([torch.tensor([0]), torch.tensor([0,0,0])])

In [7]:
# ONLY WORKS WITH sb3_compat=False when creating env object

total_reward = 0
env_encoding, next_req_encoding, next_req_obj = env.reset()
done = False

while not done:
    decision = policy(env_encoding, next_req_encoding, next_req_obj)
    
    obs, reward, done, info = env.step(decision, next_req_obj)
    env_encoding, next_req_encoding, next_req_obj = obs
    
    total_reward += reward

print(total_reward)

  return torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes)), next_req


11829.620237371953


In [8]:
env.print_statistics()

a | b | BW: [2] | Arrival rate: 0.75 | Acceptance rate: 0.48520345252774355
a | b | BW: [8] | Arrival rate: 1.5 | Acceptance rate: 0.2670967741935484
a | b | BW: [4, 9] | Arrival rate: 1.5 | Acceptance rate: 0.3766068099303307
c | d | BW: [1] | Arrival rate: 1.5 | Acceptance rate: 0.5038659793814433
c | d | BW: [7] | Arrival rate: 0.75 | Acceptance rate: 0.3832731648616125
c | d | BW: [3, 13] | Arrival rate: 3 | Acceptance rate: 0.23579912971892272
e | f | BW: [3] | Arrival rate: 0.75 | Acceptance rate: 0.4553686934023286
e | f | BW: [6] | Arrival rate: 1.5 | Acceptance rate: 0.3284132841328413
e | f | BW: [5, 8] | Arrival rate: 3 | Acceptance rate: 0.40795698924731183
BW used: 12529.47497216749


### PPO

In [6]:
model = PPO(MlpPolicy, env, verbose=1)
model.learn(total_timesteps=600000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


  return torch.cat([torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes))])


exceeded: -76.11723038718691
exceeded: -3.597510090968576
exceeded: -21.20412444609125
exceeded: -0.6575604339719677
exceeded: -93.64222533713806
exceeded: -1.525571636038893
exceeded: -0.5782132472615229
exceeded: -0.39402911056504986
exceeded: -0.24352947043225823
exceeded: -0.6367470720869921
exceeded: -1.0138355215000556
exceeded: -1.0710911086908839
exceeded: -49.72308306244169
exceeded: -88.24099588489429
exceeded: -172.46477246045006
exceeded: -12.342519766637963
exceeded: -33.659249112903126
exceeded: -100.52652605968534
exceeded: -331.4671474855761
exceeded: -0.09548062120440502
exceeded: -0.2117843945972941
exceeded: -99.07152992878274
exceeded: -45.15424794459871
exceeded: -0.40412531876885066
exceeded: -0.33978246317760136
exceeded: -0.9308292910462106
exceeded: -1.316701314548507
exceeded: -6.503329963030068
exceeded: -0.2604857057196127
exceeded: -1.6923065950576044
exceeded: -2.3638361650096043
exceeded: -8.174299148065344
exceeded: -10.236669036972874
exceeded: -0.30028

exceeded: -0.11667945645068045
-----------------------------------------
| time/                   |             |
|    fps                  | 1286        |
|    iterations           | 2           |
|    time_elapsed         | 3           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008135144 |
|    clip_fraction        | 0.0835      |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.7        |
|    explained_variance   | 0.00608     |
|    learning_rate        | 0.0003      |
|    loss                 | 1.78e+03    |
|    n_updates            | 10          |
|    policy_gradient_loss | -0.012      |
|    std                  | 1.01        |
|    value_loss           | 3e+03       |
-----------------------------------------
exceeded: -0.31022261255174105
exceeded: -3.400906136200254
exceeded: -0.19233032596100064
exceeded: -0.9310975157638569
exceeded: -15.139289863990854
exceeded: -464.85725

exceeded: -3.203999235641
exceeded: -37.14668470648571
exceeded: -0.6647470340966113
exceeded: -6.7719727120918405
exceeded: -1.5337448935569842
exceeded: -26.296764886401583
exceeded: -28.247836642265398
exceeded: -11.469644542020864
exceeded: -4.8173732052385745
exceeded: -0.0011345666809514634
exceeded: -23.125785483426906
exceeded: -3.3362786197308125
exceeded: -2.084696237260892
exceeded: -2.703722620165018
exceeded: -70.87175224194313
exceeded: -0.45648442447898885
exceeded: -1.0300647402695204
exceeded: -2.295998250476709
exceeded: -233.24290406236167
exceeded: -28.41475509954435
exceeded: -61.160689357229465
exceeded: -38.77855411837084
exceeded: -78.46460797044523
exceeded: -1.3850957340528667
exceeded: -11.25652087207033
exceeded: -1.0236637640071116
exceeded: -41.255930662488424
exceeded: -1.9847615208653422
exceeded: -91.90276865073317
exceeded: -0.8995838502627195
exceeded: -1.3385175500685242
exceeded: -2.246923607487408
exceeded: -0.8410433114241747
exceeded: -0.77626802

exceeded: -74.35389575288143
exceeded: -0.3555954355413682
exceeded: -2.7143441185281936
exceeded: -0.6592503911073732
exceeded: -0.8607130332938944
exceeded: -99.44452421242245
exceeded: -1.1536887892090268
exceeded: -0.7655049691725297
exceeded: -1.1719717751647032
exceeded: -0.2515144840383435
exceeded: -4.583551638764939
exceeded: -0.5491453509561011
exceeded: -1.4540619824068537
exceeded: -1.9751734669791643
exceeded: -28.14447906043901
exceeded: -0.3051770446607046
exceeded: -2.4712263701826793
exceeded: -33.19341531791984
exceeded: -5.098553872187498
exceeded: -8.339949461448773
exceeded: -18.449693224016798
exceeded: -2.013194708319296
exceeded: -0.9444049698464307
exceeded: -21.452681051068907
exceeded: -0.586979297682269
exceeded: -63.13791527523618
exceeded: -170.0798636359545
exceeded: -0.9624472777496246
exceeded: -0.3714333106943756
exceeded: -1.283567539077077
exceeded: -0.08939862605926574
exceeded: -0.1337508451674292
exceeded: -1.3102092270180918
exceeded: -0.42157526

exceeded: -0.5196177973223108
exceeded: -0.11219386666039166
exceeded: -0.37569971982597805
exceeded: -0.16364729461108177
exceeded: -18.325674420776995
exceeded: -1.9186034229185411
exceeded: -9.079479019226879
exceeded: -14.264905789256336
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.47e+03    |
|    ep_rew_mean          | -3.63e+03   |
| time/                   |             |
|    fps                  | 1080        |
|    iterations           | 9           |
|    time_elapsed         | 17          |
|    total_timesteps      | 18432       |
| train/                  |             |
|    approx_kl            | 0.006643992 |
|    clip_fraction        | 0.0537      |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.64       |
|    explained_variance   | 0.076       |
|    learning_rate        | 0.0003      |
|    loss                 | 205         |
|    n_updates            | 80          |
| 

exceeded: -72.92144538667127
exceeded: -21.65449196606413
exceeded: -16.260559753841598
exceeded: -4.805272301314624
exceeded: -73.38077892479278
exceeded: -29.15391850676706
exceeded: -0.47563775543358394
exceeded: -0.08205457035107114
exceeded: -238.78200572055317
exceeded: -17.6044118165269
exceeded: -4.473762282937587
exceeded: -123.1671153007899
exceeded: -39.386453409460536
exceeded: -1.7829275220797645
exceeded: -5.829738801128044
exceeded: -5.4195963717118385
exceeded: -94.11151299149266
exceeded: -51.52152052793216
exceeded: -0.9299570251063226
exceeded: -84.80091383870766
exceeded: -7.0667270533007285
exceeded: -5.058430411812871
exceeded: -94.6712309036208
exceeded: -0.07398065486117929
exceeded: -11.245428159985448
exceeded: -29.06787380012708
exceeded: -201.21578517567005
exceeded: -74.43720724331331
exceeded: -30.992815898041975
exceeded: -222.00946373507367
exceeded: -15.069881366574158
exceeded: -3.2150106270480387
exceeded: -0.3714834117565063
exceeded: -2.908080007108

exceeded: -1.0625957671780653
exceeded: -0.018491983294775707
exceeded: -1.7852418952416476
exceeded: -28.178969893560865
exceeded: -73.90476155733923
exceeded: -213.69359313979692
exceeded: -122.9750370955372
exceeded: -156.31043556894994
exceeded: -2.4304557929341226
exceeded: -2.420633487602609
exceeded: -0.8415651634772474
exceeded: -1.5572287552277055
exceeded: -10.80943755058053
exceeded: -0.9488500452719892
exceeded: -0.8619246905707953
exceeded: -0.8387901474464161
exceeded: -0.16817204926164828
exceeded: -3.163992578495664
exceeded: -0.4400613934631537
exceeded: -90.08392012297513
exceeded: -1.7206667106563311
exceeded: -14.884623642940804
exceeded: -0.21872955337116182
exceeded: -0.3506497958309723
exceeded: -0.10283880040152647
exceeded: -1.172905032939512
exceeded: -0.8878339634597594
exceeded: -1.7720350663796625
exceeded: -2.014392390415004
exceeded: -3.331052419838418
exceeded: -0.8167865280322234
exceeded: -0.8788135119049676
exceeded: -0.02097730381594694
exceeded: -1.

exceeded: -1.1943239421391496
exceeded: -1.2716795313906402
exceeded: -0.22236111314582405
exceeded: -0.9353403086162577
exceeded: -2.506194413256258
exceeded: -0.256825586473277
exceeded: -21.18003901101124
exceeded: -0.1182391468583542
exceeded: -33.45203833386024
exceeded: -8.957810721280369
exceeded: -0.8690792554848674
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.55e+03     |
|    ep_rew_mean          | -1.92e+03    |
| time/                   |              |
|    fps                  | 1123         |
|    iterations           | 21           |
|    time_elapsed         | 38           |
|    total_timesteps      | 43008        |
| train/                  |              |
|    approx_kl            | 0.0042290557 |
|    clip_fraction        | 0.0329       |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.58        |
|    explained_variance   | 0.101        |
|    learning_rate        | 0

exceeded: -0.0005388356814052321
exceeded: -3.3054851274431862
exceeded: -0.22668128152603484
exceeded: -0.17646220069107305
exceeded: -0.23613924454186974
exceeded: -0.06698314259484933
exceeded: -48.85120565965639
exceeded: -56.992781186679466
exceeded: -1.055783338565366
exceeded: -1.358796431723848
exceeded: -0.31027589942997674
exceeded: -2.7660607706373956
exceeded: -0.9714826410512549
exceeded: -1.5445313451257328
exceeded: -29.799156539125022
exceeded: -6.337680086678112
exceeded: -55.38249156170717
exceeded: -0.9898423119604011
exceeded: -0.16003893789645982
exceeded: -0.9182110900289887
exceeded: -0.08502086068732048
exceeded: -64.05063207490527
exceeded: -5.09423010831637
exceeded: -0.9658875300658589
exceeded: -5.516979478322907
exceeded: -3.4357984691299013
exceeded: -128.87087675893235
exceeded: -221.6270627879517
exceeded: -1.5051949500498305
exceeded: -0.5589183777193418
exceeded: -0.6625325932972725
exceeded: -1.3507438890313441
exceeded: -1.0435268405959752
exceeded: 

exceeded: -10.76327281426731
exceeded: -2.0253200324955625
exceeded: -0.11947783262930906
exceeded: -0.9473306681002903
exceeded: -0.38858280487739116
exceeded: -0.045472271970515205
exceeded: -0.820354524204393
exceeded: -0.3646552033326026
exceeded: -1.1831644971963962
exceeded: -148.40438204063705
exceeded: -1.0031478557216043
exceeded: -87.50667559012552
exceeded: -0.5656599580390843
exceeded: -0.4471006181512488
exceeded: -1.781272191154562
exceeded: -281.36031504883294
exceeded: -143.6206669322645
exceeded: -0.8748105420935813
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.79e+03    |
|    ep_rew_mean          | -1.06e+03   |
| time/                   |             |
|    fps                  | 1138        |
|    iterations           | 29          |
|    time_elapsed         | 52          |
|    total_timesteps      | 59392       |
| train/                  |             |
|    approx_kl            | 0.014943946 |

exceeded: -1.0728435090637467
exceeded: -1.5720266553548146
exceeded: -0.9251173544656289
exceeded: -0.8139117796428694
exceeded: -70.19913642705563
exceeded: -1.0015125985177762
exceeded: -2.1397312989439166
exceeded: -0.5167209829304613
exceeded: -2.357274437038915
exceeded: -3.8430848173880907
exceeded: -0.19156470086375957
exceeded: -1.375743737174933
exceeded: -1.277714777869858
exceeded: -1.344895702970025
exceeded: -0.4862095968155363
exceeded: -0.6480987002410378
exceeded: -0.03873646063829424
exceeded: -0.06090385077321833
exceeded: -0.0828146291188573
exceeded: -0.11349441053495637
exceeded: -0.6842080333506441
exceeded: -0.6669225953926127
exceeded: -0.2809455095156309
exceeded: -0.3492393530888618
exceeded: -0.3665190143546621
exceeded: -0.5389732772896071
exceeded: -0.7308781127602733
exceeded: -0.798484081979288
exceeded: -2.3329630369690877
exceeded: -0.21463429819668547
exceeded: -0.2266590580589625
exceeded: -0.11061575818844302
exceeded: -0.7168350211933462
exceeded: 

exceeded: -104.95030631694165
exceeded: -0.22472593738061708
exceeded: -2.501611647084458
exceeded: -35.23032520112133
exceeded: -1.0802457976418696
exceeded: -0.109085038853558
exceeded: -1.6057502598571798
exceeded: -0.6305786585684524
exceeded: -0.16112621821326883
exceeded: -0.2404538797710393
exceeded: -0.12105938183260631
exceeded: -3.704706916498033
exceeded: -0.3508372891974889
exceeded: -1.5066157160054026
exceeded: -0.18287569320647828
exceeded: -0.1516793452865439
exceeded: -0.5857343180978783
exceeded: -2.093267417077392
exceeded: -0.6688183201124955
exceeded: -0.3468868938046863
exceeded: -0.662034220432528
exceeded: -0.053595544148320726
exceeded: -0.3922653911879599
exceeded: -1.260537595595205
exceeded: -24.613428872140407
exceeded: -0.2722342113979531
exceeded: -0.00042227125258766573
exceeded: -17.57187331248816
exceeded: -13.796980064749942
exceeded: -3.378506008129376
exceeded: -0.46559958312628713
exceeded: -31.320076804421575
exceeded: -1.5778728220346676
exceeded

exceeded: -0.05889466639845382
exceeded: -78.24942425542588
exceeded: -0.261617598920322
exceeded: -0.29890256870490206
exceeded: -1.3297888755471226
exceeded: -4.984667582441926
exceeded: -7.435817830557849
exceeded: -0.23377591362199832
exceeded: -0.2021925307534243
exceeded: -0.15468698351115892
exceeded: -0.15892078139032498
exceeded: -0.2476448837057716
exceeded: -0.4997269683807597
exceeded: -0.08503139525276536
exceeded: -0.9410260600895408
exceeded: -0.44279594352434143
exceeded: -47.539678058609816
exceeded: -0.33284193456961936
exceeded: -0.7816731821930871
exceeded: -0.4464891126233592
exceeded: -0.05686501938229868
exceeded: -0.5216124908034364
exceeded: -0.7711345352931193
exceeded: -2.057903386665694
exceeded: -0.055300078190817906
exceeded: -0.15680160468943727
exceeded: -0.09819479925994895
exceeded: -0.3177689300917877
exceeded: -0.23315810998627867
exceeded: -4.068077696776112
exceeded: -0.7162636944000184
exceeded: -0.4223058657299678
exceeded: -1.6749949327763136
ex

exceeded: -3.67089684446785
exceeded: -31.00448467641899
exceeded: -0.2549322056018493
exceeded: -1.1497197002247537
exceeded: -66.10899255635005
exceeded: -6.494952996933686
exceeded: -43.22850541867788
exceeded: -8.260482858072336
exceeded: -1.5489470755134689
exceeded: -21.540003631105602
exceeded: -0.5631367958065334
exceeded: -18.891321032103466
exceeded: -1.324132548381499
exceeded: -0.9115037762310162
exceeded: -0.9655052727463019
exceeded: -1.760664946204874
exceeded: -0.4180553131960832
exceeded: -6.607281871511584
exceeded: -0.8339274835549193
exceeded: -0.365290774468482
exceeded: -0.9807846468745229
exceeded: -0.6124908762470369
exceeded: -0.4218116428841958
exceeded: -0.5750595780250876
exceeded: -0.019012247380204936
exceeded: -4.537208390965385
exceeded: -2.920572974884873
exceeded: -4.72989231886957
exceeded: -2.640962094186662
exceeded: -59.69546988170525
exceeded: -72.0886072835142
exceeded: -0.005677774101225248
-----------------------------------------
| rollout/   

exceeded: -96.41805312908363
exceeded: -0.6555695832043201
exceeded: -0.6221829104236896
exceeded: -0.43790800656384127
exceeded: -2.3477926484041283
exceeded: -0.8591795049087937
exceeded: -0.5502159369094344
exceeded: -3.557188497362595
exceeded: -0.09227453540461895
exceeded: -0.0541218918158865
exceeded: -0.0021611932144873537
exceeded: -0.1715154571816591
exceeded: -1.0868500462537138
exceeded: -0.643895388197121
exceeded: -0.2841985740936055
exceeded: -0.571247937244207
exceeded: -0.7471735694385695
exceeded: -0.6159208089452655
exceeded: -2.7030840917258363
exceeded: -121.80298898985026
exceeded: -0.17252796486081368
exceeded: -1.2044203558138404
exceeded: -1.9809937181812853
exceeded: -0.17291455875293205
exceeded: -0.6599674714831354
exceeded: -0.32468989416411054
exceeded: -0.6740584629482609
exceeded: -0.1994310847615088
exceeded: -0.1455173661311881
exceeded: -1.9720406129541352
exceeded: -1.2828722847062202
exceeded: -0.4545259679441689
exceeded: -0.7832242440561482
exceed

exceeded: -0.4138961775179711
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 8.75e+03     |
|    ep_rew_mean          | -222         |
| time/                   |              |
|    fps                  | 1131         |
|    iterations           | 50           |
|    time_elapsed         | 90           |
|    total_timesteps      | 102400       |
| train/                  |              |
|    approx_kl            | 0.0098698875 |
|    clip_fraction        | 0.0808       |
|    clip_range           | 0.2          |
|    entropy_loss         | -5.34        |
|    explained_variance   | 0.333        |
|    learning_rate        | 0.0003       |
|    loss                 | 85.4         |
|    n_updates            | 490          |
|    policy_gradient_loss | -0.0105      |
|    std                  | 0.919        |
|    value_loss           | 368          |
------------------------------------------
exceeded: -16.1856551219

exceeded: -38.42750167715898
exceeded: -0.13632197399786788
exceeded: -12.498399318475537
exceeded: -8.526608380144772
exceeded: -9.11989976240948
exceeded: -25.355430087743276
exceeded: -1.2649194282831253
exceeded: -1.8638397099534234
exceeded: -87.48220432589284
exceeded: -0.7792586909418722
exceeded: -1.2821037509407485
exceeded: -133.40307754619747
exceeded: -2.090317807255583
exceeded: -0.8347299425349054
exceeded: -0.1797879031511243
exceeded: -0.004590759378245972
exceeded: -0.561538416123887
exceeded: -19.0723903462224
exceeded: -7.15149540981381
exceeded: -0.36533676739792265
exceeded: -0.19857178661351949
exceeded: -4.867124311039868
exceeded: -2.5215366534716424
exceeded: -2.2012556699441936
exceeded: -0.31855686565760016
exceeded: -0.3163844496059898
exceeded: -0.16244924133263522
exceeded: -4.584172688283313
exceeded: -201.77644156536036
exceeded: -95.04124739265046
exceeded: -1.0842507440848195
exceeded: -0.972704113663435
exceeded: -0.20198188180590038
exceeded: -0.2150

exceeded: -13.211827905608404
exceeded: -0.4370207332135919
exceeded: -0.29274931880775124
exceeded: -0.5218247660719815
exceeded: -0.8707032879107743
exceeded: -3.935134550141563
exceeded: -112.21885883024903
exceeded: -0.4260590474893371
exceeded: -75.84336794636752
exceeded: -0.27575519225815437
exceeded: -78.73961280204634
exceeded: -0.40325854330845373
exceeded: -19.8835997502029
exceeded: -14.837056554654389
exceeded: -2.9817410487733853
exceeded: -0.16487954121965048
exceeded: -0.21074768950456335
exceeded: -1.6451381379433336
exceeded: -0.1846566427193222
exceeded: -1.951950818001047
exceeded: -1.6120893174769604
exceeded: -0.014594710309395785
exceeded: -1.4066013773166766
exceeded: -1.360315643190895
exceeded: -47.39566612962018
exceeded: -4.344215977172298
exceeded: -2.5019286454044645
exceeded: -1.945462374200106
exceeded: -0.4272776550445926
exceeded: -0.12879223808269769
exceeded: -1.8831095744054303
exceeded: -14.460595462668502
exceeded: -7.18763883468167
exceeded: -1.3

exceeded: -0.0032497478301172014
exceeded: -2.3766326349427906
exceeded: -2.572343230707478
exceeded: -0.45531474458893667
exceeded: -9.262517474974528
exceeded: -2.1220127011860073
exceeded: -3.2762610507769714
exceeded: -0.06404043799938805
exceeded: -34.12442724756302
exceeded: -90.53833034388077
exceeded: -0.7110025222333085
exceeded: -0.22854699438956713
exceeded: -1.4170324297883752
exceeded: -6.0768901991592275
exceeded: -55.7288872474243
exceeded: -45.08460822855682
exceeded: -0.15935079255029727
exceeded: -0.1502201763184109
exceeded: -0.06230949848409979
exceeded: -1.6368926238577783
exceeded: -1.863537535276263
exceeded: -0.15896746584304847
exceeded: -0.2909356245282592
exceeded: -1.393714839118817
exceeded: -1.8897535402981003
exceeded: -0.31929887425522435
exceeded: -0.556774536005129
exceeded: -0.3720747454118648
exceeded: -1.596372765632952
exceeded: -0.7378045131327681
exceeded: -6.145538543708973
exceeded: -4.074174356911813
exceeded: -0.28613094935385563
------------

exceeded: -0.0666961081738285
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.45e+03    |
|    ep_rew_mean          | 44.7        |
| time/                   |             |
|    fps                  | 1104        |
|    iterations           | 65          |
|    time_elapsed         | 120         |
|    total_timesteps      | 133120      |
| train/                  |             |
|    approx_kl            | 0.011790503 |
|    clip_fraction        | 0.12        |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.24       |
|    explained_variance   | 0.439       |
|    learning_rate        | 0.0003      |
|    loss                 | 99.5        |
|    n_updates            | 640         |
|    policy_gradient_loss | -0.0146     |
|    std                  | 0.902       |
|    value_loss           | 205         |
-----------------------------------------
exceeded: -2.75815947553229
exceeded: -4.15096

exceeded: -1.7981393659496958
exceeded: -0.13174503549297106
exceeded: -2.8547278405420684
exceeded: -37.9736771167757
exceeded: -0.3292757061053765
exceeded: -1.0008998342345605
exceeded: -0.7253363863245941
exceeded: -2.7833860580043126
exceeded: -90.70282934503132
exceeded: -1.216245105915698
exceeded: -20.118950354357782
exceeded: -1.611369560690557
exceeded: -0.33970944270521586
exceeded: -13.445819669802187
exceeded: -0.41989525682998696
exceeded: -0.051173855438313834
exceeded: -1.9818752082036715
exceeded: -0.3499408816142449
exceeded: -5.6427640511868
exceeded: -1.6334973674272766
exceeded: -0.4125760317683605
exceeded: -0.20512358043163254
exceeded: -32.32426233412945
exceeded: -1.990267312292885
exceeded: -1.0921969411230257
exceeded: -0.10573009730456853
exceeded: -2.064750440414808
exceeded: -21.974554227820136
exceeded: -2.0611816243667533
exceeded: -1.1980112124461386
exceeded: -1.101068580132409
exceeded: -0.5029475060802597
exceeded: -0.40567658563697256
exceeded: -23.

exceeded: -134.10948117503017
exceeded: -3.027315575483641
exceeded: -43.475904755311774
exceeded: -0.7450891272326723
exceeded: -0.5867989166326035
exceeded: -0.9518704476072004
exceeded: -2.4298252483378873
exceeded: -0.11052435695386671
exceeded: -0.3241759925646276
exceeded: -0.1833599894972712
exceeded: -8.00774690132845
exceeded: -39.903784961324064
exceeded: -18.974026394319615
exceeded: -3.1619889767739284
exceeded: -17.253581788000048
exceeded: -0.2084556348436341
exceeded: -2.9563342438115177
exceeded: -1.2697166448992485
exceeded: -0.3981765891757622
exceeded: -0.2780365304667781
exceeded: -0.5060918832732673
exceeded: -0.10970141251401061
exceeded: -0.38852498357749177
exceeded: -0.012145500572415289
exceeded: -0.4104722583348918
exceeded: -0.856007393792948
exceeded: -68.63628987519849
exceeded: -0.23916909285402824
exceeded: -23.949862155535836
exceeded: -87.43922985826727
exceeded: -0.30961912443710704
exceeded: -0.29737813186428314
exceeded: -0.07154522135388652
exceede

exceeded: -0.9219138807455979
exceeded: -4.313008579466706
exceeded: -0.37710562031479256
exceeded: -0.06318779588846755
exceeded: -0.030859804153160995
exceeded: -1.5344908712350989
exceeded: -0.8393436737066934
exceeded: -0.8075857936929337
exceeded: -0.30580738424868426
exceeded: -1.134093169006878
exceeded: -1.8505903342043908
exceeded: -25.06909877038918
exceeded: -1.3693278131995703
exceeded: -0.14262065544419344
exceeded: -1.2695663386233185
exceeded: -0.05422824435062998
exceeded: -12.673356912018527
exceeded: -0.609153976947796
exceeded: -0.9168080329776889
exceeded: -0.17067840148239227
exceeded: -0.2041281507880974
exceeded: -0.4443616172535306
exceeded: -1.0931358371267468
exceeded: -1.7925466614672416
exceeded: -0.1729765606146016
exceeded: -1.6091495700636684
exceeded: -0.03218294793166082
exceeded: -0.051229144579350766
exceeded: -0.1687498663871933
exceeded: -31.15150924998112
exceeded: -146.49610449550184
exceeded: -0.3151744736757644
exceeded: -0.1863980513947391
exce

exceeded: -0.251457647743548
exceeded: -0.4235206897008239
exceeded: -0.2388887485498731
exceeded: -21.171943122909553
exceeded: -0.9353638367847124
exceeded: -0.5304535358580278
exceeded: -1.3291612555254257
exceeded: -0.1440902914643186
exceeded: -0.6528204515613505
exceeded: -0.24077438982725033
exceeded: -48.432654821242
exceeded: -2.403082945011651
exceeded: -0.008669106614705067
exceeded: -1.6114776486915567
exceeded: -0.7381919317730495
exceeded: -1.619779340587256
exceeded: -2.6840890452064565
exceeded: -11.825692550586723
exceeded: -10.385640464468768
exceeded: -0.02985790126516393
exceeded: -0.18830304405369616
exceeded: -0.4448666834070414
exceeded: -11.798111028116132
exceeded: -1.161245890100879
exceeded: -0.2852858275028104
exceeded: -0.0946612792242544
exceeded: -1.5261255198631507
exceeded: -14.695656044631216
exceeded: -0.4064406595727026
exceeded: -1.4748993695108845
exceeded: -0.26645365010925215
exceeded: -15.689664566919166
exceeded: -0.07087251440774348
----------

exceeded: -1.1024722332868298
exceeded: -1.1680737402625156
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.88e+03    |
|    ep_rew_mean          | 315         |
| time/                   |             |
|    fps                  | 1086        |
|    iterations           | 81          |
|    time_elapsed         | 152         |
|    total_timesteps      | 165888      |
| train/                  |             |
|    approx_kl            | 0.013123816 |
|    clip_fraction        | 0.166       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.11       |
|    explained_variance   | 0.239       |
|    learning_rate        | 0.0003      |
|    loss                 | 22.4        |
|    n_updates            | 800         |
|    policy_gradient_loss | -0.0104     |
|    std                  | 0.879       |
|    value_loss           | 85.1        |
-----------------------------------------
exceeded: -43.65

exceeded: -0.42479214105407564
exceeded: -0.11449593954399051
exceeded: -0.015202683279818881
exceeded: -0.3355837114726481
exceeded: -0.2335518728755387
exceeded: -1.9916038840376096
exceeded: -2.458685533887862
exceeded: -7.765105244335815
exceeded: -1.4579098118178921
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.01e+04    |
|    ep_rew_mean          | 422         |
| time/                   |             |
|    fps                  | 1087        |
|    iterations           | 85          |
|    time_elapsed         | 160         |
|    total_timesteps      | 174080      |
| train/                  |             |
|    approx_kl            | 0.012916432 |
|    clip_fraction        | 0.136       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.11       |
|    explained_variance   | 0.447       |
|    learning_rate        | 0.0003      |
|    loss                 | 49.9        |
|    n_updates

exceeded: -112.14567263397726
exceeded: -210.26511842109133
exceeded: -0.012010354502079454
exceeded: -3.308141229350101
exceeded: -0.7439226027905831
exceeded: -0.6581441353796234
exceeded: -1.8562030290729923
exceeded: -17.62523396081868
exceeded: -0.017429842989570878
exceeded: -2.561589183477317
exceeded: -17.765724717466814
exceeded: -0.2346075657027487
exceeded: -1.9914749957136482
exceeded: -1.2468704278555327
exceeded: -4.0261934201019205
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.01e+04     |
|    ep_rew_mean          | 422          |
| time/                   |              |
|    fps                  | 1090         |
|    iterations           | 89           |
|    time_elapsed         | 167          |
|    total_timesteps      | 182272       |
| train/                  |              |
|    approx_kl            | 0.0076621175 |
|    clip_fraction        | 0.0798       |
|    clip_range           | 0.2  

exceeded: -1.4232524550283125
exceeded: -1.0692502355649152
exceeded: -0.9426810307888625
exceeded: -0.2765990361261527
exceeded: -0.014500587047896314
exceeded: -0.20997298523919056
exceeded: -9.238424840652954
exceeded: -1.7364846551440325
exceeded: -0.17183177760173735
exceeded: -111.85108441697464
exceeded: -0.12683013613668562
exceeded: -10.362482999134201
exceeded: -0.08723603079395573
exceeded: -0.31608692980355557
exceeded: -16.065148801103316
exceeded: -57.33492129502458
exceeded: -0.3102985831398631
exceeded: -0.8706571481001191
exceeded: -1.572492820296523
exceeded: -0.26280059313658144
exceeded: -33.93385311460424
exceeded: -0.49879894027206056
exceeded: -0.029116588443175506
exceeded: -0.22210896000727864
exceeded: -0.2556846410093933
exceeded: -0.054894187545861395
exceeded: -0.06142209812156611
exceeded: -0.2526537373210857
exceeded: -2.860507319132573
exceeded: -1.2690790728496042
exceeded: -1.0230414735666806
exceeded: -0.5575884056783406
exceeded: -46.90117614975583
e

exceeded: -0.9449566585626936
exceeded: -0.5114444230150754
exceeded: -1.0165938563481807
exceeded: -0.2961087873225668
exceeded: -0.4819849727927777
exceeded: -0.6648768303227386
exceeded: -1.7613624553232385
exceeded: -0.05271727533561239
exceeded: -4.032408864103067
exceeded: -0.5030438848608233
exceeded: -0.6943949518126145
exceeded: -3.1523210849859753
exceeded: -0.07101245259420572
exceeded: -0.4078616009858429
exceeded: -0.2598363591271473
exceeded: -1.5432047930465522
exceeded: -4.1997002639246075
exceeded: -0.9051489058326949
exceeded: -15.719316623166124
exceeded: -0.18919661622940798
exceeded: -1.395150719360121
exceeded: -0.5334992147554369
exceeded: -98.1302030410073
exceeded: -1.0184759444344724
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.03e+04    |
|    ep_rew_mean          | 619         |
| time/                   |             |
|    fps                  | 1080        |
|    iterations           | 9

exceeded: -140.4324867337311
exceeded: -0.03823960909601705
exceeded: -5.438039439832215
exceeded: -6.078854849548478
exceeded: -0.10974655304441368
exceeded: -0.4666888735462776
exceeded: -12.803569596907012
exceeded: -0.3078302861441265
exceeded: -3.428180482797583
exceeded: -5.075608395702051
exceeded: -47.29922930619958
exceeded: -55.148584668887715
exceeded: -13.154330988944196
exceeded: -20.33155428835716
exceeded: -3.4009734166258028
exceeded: -122.992846358337
exceeded: -1.1665156115591282
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.03e+04    |
|    ep_rew_mean          | 619         |
| time/                   |             |
|    fps                  | 1077        |
|    iterations           | 99          |
|    time_elapsed         | 188         |
|    total_timesteps      | 202752      |
| train/                  |             |
|    approx_kl            | 0.011714319 |
|    clip_fraction        | 0.107  

exceeded: -0.937716218663867
exceeded: -0.3080844467329188
exceeded: -18.470276479836322
exceeded: -0.08418864805839683
exceeded: -0.5220685040928368
exceeded: -0.07649087513600666
exceeded: -0.3004944873082404
exceeded: -3.643383997613663
exceeded: -1.295833266124218
exceeded: -0.4815439932929809
exceeded: -47.52964872688927
exceeded: -0.09561159839419975
exceeded: -0.02959053345841171
exceeded: -0.6443636977149237
exceeded: -0.5021121497671494
exceeded: -1.572270030942481
exceeded: -0.11799858980532585
exceeded: -0.5602827987897013
exceeded: -1.6585372932465023
exceeded: -0.8071299854753144
exceeded: -1.2507980554095721
exceeded: -0.0683709820793678
exceeded: -0.039426332962988093
exceeded: -0.7272951527304351
exceeded: -0.8770695491431477
exceeded: -0.5884719098771064
exceeded: -1.043042440825725
exceeded: -3.9181621500736625
exceeded: -1.502731265087069
exceeded: -1.1100707249553805
exceeded: -59.65273390549935
exceeded: -0.6201012490249633
exceeded: -0.46643796342059285
exceeded: 

exceeded: -0.5563025880440611
exceeded: -12.707575862387372
exceeded: -0.7514033817847087
exceeded: -0.05435807766871291
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.05e+04    |
|    ep_rew_mean          | 673         |
| time/                   |             |
|    fps                  | 1072        |
|    iterations           | 107         |
|    time_elapsed         | 204         |
|    total_timesteps      | 219136      |
| train/                  |             |
|    approx_kl            | 0.008718489 |
|    clip_fraction        | 0.107       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.94       |
|    explained_variance   | 0.0832      |
|    learning_rate        | 0.0003      |
|    loss                 | 68.6        |
|    n_updates            | 1060        |
|    policy_gradient_loss | -0.0102     |
|    std                  | 0.839       |
|    value_loss           | 1.62e+03   

exceeded: -0.8263820726103828
exceeded: -2.7345940156910093
exceeded: -0.9992044989196338
exceeded: -0.8469213302499345
exceeded: -0.18858456242762583
exceeded: -2.0410439997614
exceeded: -1.5344160507263913
exceeded: -0.3853519605911533
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.07e+04    |
|    ep_rew_mean          | 761         |
| time/                   |             |
|    fps                  | 1068        |
|    iterations           | 110         |
|    time_elapsed         | 210         |
|    total_timesteps      | 225280      |
| train/                  |             |
|    approx_kl            | 0.014270717 |
|    clip_fraction        | 0.0991      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.89       |
|    explained_variance   | 0.342       |
|    learning_rate        | 0.0003      |
|    loss                 | 129         |
|    n_updates            | 1090        |
|    p

exceeded: -11.717919078729174
exceeded: -0.19391225717854776
exceeded: -6.782028285803756
exceeded: -1.6520462032392034
exceeded: -3.4014219830084698
exceeded: -1.0394500410911853
exceeded: -4.0340054169451856
exceeded: -0.4807760821494841
exceeded: -0.3544055252250639
exceeded: -0.18751058541098753
exceeded: -0.1303176855825682
exceeded: -1.3808593495245165
exceeded: -0.035208552207842865
exceeded: -0.008103263411000799
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.07e+04    |
|    ep_rew_mean          | 761         |
| time/                   |             |
|    fps                  | 1068        |
|    iterations           | 114         |
|    time_elapsed         | 218         |
|    total_timesteps      | 233472      |
| train/                  |             |
|    approx_kl            | 0.014316531 |
|    clip_fraction        | 0.128       |
|    clip_range           | 0.2         |
|    entropy_loss         | -

exceeded: -236.88197856937148
exceeded: -89.38432737289122
exceeded: -0.16149657721915786
exceeded: -0.38517867441381515
exceeded: -0.601345962748932
exceeded: -45.859818811995794
exceeded: -0.23479903943858463
exceeded: -42.82786156182128
exceeded: -1.5332470937637224
exceeded: -9.914722763294613
exceeded: -0.7613081607686178
exceeded: -0.18443512565080916
exceeded: -0.3175371341530025
exceeded: -0.13680661382072887
exceeded: -0.7262546287218942
exceeded: -0.0882475179944381
exceeded: -0.8190058072382872
exceeded: -0.7000841866023131
exceeded: -0.16265247094289692
exceeded: -0.994448132027626
exceeded: -0.05183631822735894
exceeded: -0.4142655113202752
exceeded: -10.142776999875888
exceeded: -0.9249461986691052
exceeded: -0.3446535795168227
exceeded: -1.2933907175405166
exceeded: -1.5154120173387828
exceeded: -2.283893766622512
exceeded: -0.05402133164872162
exceeded: -0.08847170024910181
exceeded: -0.11001662839848937
exceeded: -0.0045179479671598405
exceeded: -0.7284474859332999
exc

exceeded: -0.8606850303772473
exceeded: -0.1539172496214657
exceeded: -0.4362116634937488
exceeded: -0.4304048240022676
exceeded: -2.049626739723024
exceeded: -0.23821530823938772
exceeded: -69.63669269353103
exceeded: -0.779995855962583
exceeded: -0.12559187544356354
exceeded: -0.7015179968312799
exceeded: -0.9321522280231166
exceeded: -0.5968466891646615
exceeded: -0.9966919296187664
exceeded: -0.7509910166797176
exceeded: -0.1849197883368201
exceeded: -0.11186435839229916
exceeded: -2.373021132602923
exceeded: -0.8107894886343534
exceeded: -0.40511660010013406
exceeded: -0.5580911162169634
exceeded: -50.87764167430434
exceeded: -4.503319830327746
exceeded: -16.5255425369902
exceeded: -0.8714897646657798
exceeded: -51.1302406006307
exceeded: -10.452702777270295
exceeded: -84.88139655305999
exceeded: -0.07624704863803673
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.09e+04    |
|    ep_rew_mean          | 1.02e+03    

exceeded: -0.1375756955122774
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.09e+04    |
|    ep_rew_mean          | 1.02e+03    |
| time/                   |             |
|    fps                  | 1068        |
|    iterations           | 127         |
|    time_elapsed         | 243         |
|    total_timesteps      | 260096      |
| train/                  |             |
|    approx_kl            | 0.015674971 |
|    clip_fraction        | 0.14        |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.71       |
|    explained_variance   | 0.146       |
|    learning_rate        | 0.0003      |
|    loss                 | 104         |
|    n_updates            | 1260        |
|    policy_gradient_loss | -0.00773    |
|    std                  | 0.79        |
|    value_loss           | 718         |
-----------------------------------------
exceeded: -0.7303564213838815
exceeded: -0.059

exceeded: -2.7104045630865676
exceeded: -0.18755387974451065
exceeded: -0.5823276918930667
exceeded: -30.806828186818723
exceeded: -37.689919845458235
exceeded: -0.9535695289961593
exceeded: -45.21062333931986
exceeded: -0.6096265266169703
exceeded: -0.3214956253584162
exceeded: -0.44684481417805405
exceeded: -1.1026612136011746
exceeded: -2.8651655745564004
exceeded: -1.3153147793054973
exceeded: -0.34475545375670386
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.09e+04    |
|    ep_rew_mean          | 1.07e+03    |
| time/                   |             |
|    fps                  | 1068        |
|    iterations           | 132         |
|    time_elapsed         | 253         |
|    total_timesteps      | 270336      |
| train/                  |             |
|    approx_kl            | 0.012805834 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.6

exceeded: -0.6181359815682654
exceeded: -0.23703573225201144
exceeded: -0.17353935122409203
exceeded: -1.5113597683250635
exceeded: -0.045283899610665784
exceeded: -0.0048607471395717096
exceeded: -1.1785184382542209
exceeded: -1.2244172922645071
exceeded: -7.31620367614172
exceeded: -0.9051886077799547
exceeded: -0.857302353320414
exceeded: -0.300208898067723
exceeded: -0.45573732047269894
exceeded: -0.4104221399462929
exceeded: -0.21916119151940733
exceeded: -0.0495702090071004
exceeded: -2.9362119903038035
exceeded: -1.2771451370358842
exceeded: -198.99900851849458
exceeded: -1.507286944791065
exceeded: -0.11133223876793913
exceeded: -1.2242560083371496
exceeded: -0.32574554687487867
exceeded: -1.1184612388812094
exceeded: -0.6047349883946074
exceeded: -2.6339976380326653
exceeded: -0.3965539382022555
exceeded: -0.009552870011458329
exceeded: -0.2631598538951805
exceeded: -7.24524832121639
exceeded: -1.228663643098453
exceeded: -2.338605070289033
exceeded: -22.472463360479118
exceed

exceeded: -0.33541353145919284
exceeded: -1.55162893322912
exceeded: -0.2500050254801755
exceeded: -17.188578308621523
exceeded: -36.894964169353976
exceeded: -0.8379192657905388
exceeded: -0.42892450688990325
exceeded: -0.05507990930418706
exceeded: -0.3452086367481835
exceeded: -0.6791876159290542
exceeded: -1.3828030591849187
exceeded: -31.906802199754104
exceeded: -24.730951150606103
exceeded: -1.8575588209163107
exceeded: -0.38831255350749416
exceeded: -0.9573777911137287
exceeded: -32.90651195503066
exceeded: -2.0884665425888747
exceeded: -0.5855389092668252
exceeded: -0.6481213129949781
exceeded: -2.911517633977136
exceeded: -1.5198079384042504
exceeded: -1.565548609738402
exceeded: -1.3672741186930095
exceeded: -87.06006256244527
exceeded: -0.16174845523644552
exceeded: -1.663837785216361
exceeded: -0.5246532132503485
exceeded: -1.1937889675617022
exceeded: -0.8033209990633874
exceeded: -127.67281110340669
exceeded: -4.545141595663046
exceeded: -0.24045429675745267
exceeded: -0

exceeded: -0.26080853337162735
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.11e+04    |
|    ep_rew_mean          | 1.16e+03    |
| time/                   |             |
|    fps                  | 1064        |
|    iterations           | 141         |
|    time_elapsed         | 271         |
|    total_timesteps      | 288768      |
| train/                  |             |
|    approx_kl            | 0.022532504 |
|    clip_fraction        | 0.196       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.65       |
|    explained_variance   | 0.147       |
|    learning_rate        | 0.0003      |
|    loss                 | 62.2        |
|    n_updates            | 1400        |
|    policy_gradient_loss | -0.0173     |
|    std                  | 0.779       |
|    value_loss           | 191         |
-----------------------------------------
exceeded: -1.6108123627075885
exceeded: -2.01

exceeded: -23.245982884320377
exceeded: -1.8425804233222165
exceeded: -0.4698806929444626
exceeded: -0.14328409084278448
exceeded: -2.1790559131298237
exceeded: -71.77644587875537
exceeded: -0.8716033612090837
exceeded: -1.6971767323347882
exceeded: -0.4250928780192045
exceeded: -0.617711047800353
exceeded: -1.6554799220329826
exceeded: -1.4430068764353474
exceeded: -0.09848976123113236
exceeded: -2.3612023207220547
exceeded: -0.9438018014210546
exceeded: -0.5956061650992157
exceeded: -2.110057194826096
exceeded: -1.4500923192388977
exceeded: -31.51064267087266
exceeded: -1.3490875700404379
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.11e+04     |
|    ep_rew_mean          | 1.16e+03     |
| time/                   |              |
|    fps                  | 1061         |
|    iterations           | 145          |
|    time_elapsed         | 279          |
|    total_timesteps      | 296960       |
| train/       

exceeded: -469.9764302787024
exceeded: -0.3899621633071929
exceeded: -46.080066768271344
exceeded: -1.3501169541370772
exceeded: -0.4554796630948923
exceeded: -0.13673061950302065
exceeded: -0.5579827510863749
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.12e+04   |
|    ep_rew_mean          | 1.23e+03   |
| time/                   |            |
|    fps                  | 1058       |
|    iterations           | 149        |
|    time_elapsed         | 288        |
|    total_timesteps      | 305152     |
| train/                  |            |
|    approx_kl            | 0.01246619 |
|    clip_fraction        | 0.152      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.49      |
|    explained_variance   | 0.199      |
|    learning_rate        | 0.0003     |
|    loss                 | 66.8       |
|    n_updates            | 1480       |
|    policy_gradient_loss | -0.0153    |
|    std   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.12e+04    |
|    ep_rew_mean          | 1.23e+03    |
| time/                   |             |
|    fps                  | 1055        |
|    iterations           | 153         |
|    time_elapsed         | 296         |
|    total_timesteps      | 313344      |
| train/                  |             |
|    approx_kl            | 0.012245355 |
|    clip_fraction        | 0.125       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.47       |
|    explained_variance   | 0.0458      |
|    learning_rate        | 0.0003      |
|    loss                 | 348         |
|    n_updates            | 1520        |
|    policy_gradient_loss | -0.0142     |
|    std                  | 0.748       |
|    value_loss           | 1.16e+03    |
-----------------------------------------
exceeded: -333.32221614482694
exceeded: -0.035338192898508175
exceeded: -2.4

exceeded: -14.595770962542687
exceeded: -0.8399918152786617
exceeded: -181.75393704637312
exceeded: -33.75089412552341
exceeded: -0.6742457081621935
exceeded: -0.08221414690340653
exceeded: -1.5045331169070728
exceeded: -23.064789005893374
exceeded: -0.8632566330702003
exceeded: -0.7663535129434018
exceeded: -80.92503026657722
exceeded: -31.405713011280746
exceeded: -39.84904098432394
exceeded: -19.051624549800188
exceeded: -19.54429398039496
exceeded: -0.2929183569082825
exceeded: -0.06785963226544431
exceeded: -186.86125123126968
exceeded: -2.1900579148782393
exceeded: -1.1008844347798639
exceeded: -2.291849784566628
exceeded: -0.34094405275604833
exceeded: -4.296388922937011
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.12e+04    |
|    ep_rew_mean          | 1.27e+03    |
| time/                   |             |
|    fps                  | 1049        |
|    iterations           | 159         |
|    time_elapsed  

exceeded: -0.6109163553426157
exceeded: -2.77826743692833
exceeded: -1.3844810932637612
exceeded: -1.1801984911396415
exceeded: -0.16122630507370328
exceeded: -0.9388172391582994
exceeded: -2.4712760671314578
exceeded: -0.2519958728950471
exceeded: -5.27705565005123
exceeded: -3.0022179928596255
exceeded: -9.7964483148224
exceeded: -28.04021737679086
exceeded: -0.06820574547611871
exceeded: -0.42515410144055243
exceeded: -0.5083231849373628
exceeded: -0.8162927797785084
exceeded: -1.5370489436269752
exceeded: -0.7710529958223991
exceeded: -0.42678466533073744
exceeded: -1.0847071137020057
exceeded: -0.6718064065721387
exceeded: -1.8854611000833574
exceeded: -2.5419364148120716
exceeded: -251.59366895017027
exceeded: -0.736380516884123
exceeded: -0.36188154368030656
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.13e+04    |
|    ep_rew_mean          | 1.48e+03    |
| time/                   |             |
|    fps      

exceeded: -0.19318311608794256
exceeded: -187.37534196983677
exceeded: -0.3946150137713301
exceeded: -0.08781105444795234
exceeded: -2.5073710683237396
exceeded: -53.140232166426394
exceeded: -0.3443651622513932
exceeded: -0.045008869854970046
exceeded: -1.0483541296240069
exceeded: -3.3124158899407012
exceeded: -0.3083647771499179
exceeded: -0.059978782272153114
exceeded: -0.07342587526323063
exceeded: -0.43625783042537036
exceeded: -0.982436743729011
exceeded: -0.5513141230991337
exceeded: -0.856575652211115
exceeded: -0.9913977322824978
exceeded: -0.3687758404303533
exceeded: -0.04845455296222034
exceeded: -0.5006074959902596
exceeded: -1.7382159242434378
exceeded: -0.42840149289924806
exceeded: -0.8042673164411458
exceeded: -0.08621373298275671
exceeded: -0.7432363964467436
exceeded: -0.1189981196643587
exceeded: -21.14381212456719
exceeded: -2.0480808808667317
exceeded: -0.43889608776747796
exceeded: -1.0780691729280003
exceeded: -180.0045066708793
exceeded: -0.44474009183648966
e

exceeded: -2.2999928413309263
exceeded: -3.7383784740575132
exceeded: -4.5510817407602815
exceeded: -20.11632036353365
exceeded: -121.94177145453999
exceeded: -0.17148969367561417
exceeded: -1.2118286636174855
exceeded: -1.1654025733165343
exceeded: -1.755245350071967
exceeded: -0.27700454460558954
exceeded: -0.3184451245397785
exceeded: -0.4100806775667082
exceeded: -0.5725927259659557
exceeded: -0.8752360759484167
exceeded: -1.1627915784810081
exceeded: -0.6685644479444596
exceeded: -0.42408891143025235
exceeded: -0.7118398160377999
exceeded: -4.584957224693883
exceeded: -0.9242186485035002
exceeded: -2.652847320046538
exceeded: -0.09044311408185346
exceeded: -0.16149352237389075
exceeded: -0.4006359631613953
exceeded: -0.3762011631425115
exceeded: -0.008806189187817748
exceeded: -0.1269759172106798
exceeded: -0.6403185536413007
exceeded: -19.028057257025615
exceeded: -0.7311892894658838
exceeded: -3.624206860380684
exceeded: -3.7377511132701913
exceeded: -0.2633641911241128
exceeded

exceeded: -91.5960206182224
exceeded: -0.5663048975344711
exceeded: -9.766503417462056
exceeded: -1.0386455865614237
exceeded: -0.613085424929864
exceeded: -4.0339424880417765
exceeded: -6.602057093585168
exceeded: -2.778713193869305
exceeded: -1.6011918424077687
exceeded: -2.736932772078312
exceeded: -0.030064566105432775
exceeded: -0.004667637445491254
exceeded: -154.0037657764071
exceeded: -0.26852604581343426
exceeded: -0.21891076584362038
exceeded: -0.8871509721683188
exceeded: -0.11846082016166119
exceeded: -0.6163417594951134
exceeded: -31.579105320817376
exceeded: -0.07154717149413305
exceeded: -0.2290924981949076
exceeded: -91.9021252348193
exceeded: -0.10439334600549352
exceeded: -0.3964482340349681
exceeded: -0.14597887465954568
exceeded: -1.0024616257018206
exceeded: -2.028614397924328
exceeded: -2.3830227196613727
exceeded: -1.0212726413435633
exceeded: -1.8030310518702888
exceeded: -0.09411678698626716
----------------------------------------
| rollout/                |  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.14e+04    |
|    ep_rew_mean          | 1.6e+03     |
| time/                   |             |
|    fps                  | 1042        |
|    iterations           | 182         |
|    time_elapsed         | 357         |
|    total_timesteps      | 372736      |
| train/                  |             |
|    approx_kl            | 0.028116237 |
|    clip_fraction        | 0.255       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.98       |
|    explained_variance   | 0.453       |
|    learning_rate        | 0.0003      |
|    loss                 | 15.9        |
|    n_updates            | 1810        |
|    policy_gradient_loss | -0.0149     |
|    std                  | 0.663       |
|    value_loss           | 32.4        |
-----------------------------------------
exceeded: -0.5296892387555103
exceeded: -0.6026241710504848
exceeded: -2.016

exceeded: -0.9565701368019206
exceeded: -9.519925599827772
exceeded: -13.533275254588702
exceeded: -35.73647038939934
exceeded: -4.1396058805722245
exceeded: -146.25032533003008
exceeded: -21.64963087258194
exceeded: -90.00531277177151
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.15e+04    |
|    ep_rew_mean          | 1.68e+03    |
| time/                   |             |
|    fps                  | 1041        |
|    iterations           | 187         |
|    time_elapsed         | 367         |
|    total_timesteps      | 382976      |
| train/                  |             |
|    approx_kl            | 0.014456157 |
|    clip_fraction        | 0.141       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.97       |
|    explained_variance   | 0.327       |
|    learning_rate        | 0.0003      |
|    loss                 | 298         |
|    n_updates            | 1860        |
|    pol

exceeded: -143.96542483288243
exceeded: -1.0881051454325716
exceeded: -0.1676925065063457
exceeded: -0.811931319576106
exceeded: -0.42435791097311115
exceeded: -3.2868584465850015
exceeded: -0.12706303193745216
exceeded: -2.2390758718024246
exceeded: -6.937098443887048e-05
exceeded: -0.41694344268840283
exceeded: -0.036865175910749885
exceeded: -0.3527583453879204
exceeded: -0.3272848251424103
exceeded: -2.8478642308160773
exceeded: -185.97093253732464
exceeded: -0.4733807149484205
exceeded: -0.44012989804148744
exceeded: -0.6764416451947699
exceeded: -20.709359504825706
exceeded: -0.4841624276785137
exceeded: -0.04728449171563281
exceeded: -7.07640548580585
exceeded: -162.51385060162727
exceeded: -32.65590980375114
exceeded: -0.737395990174638
exceeded: -0.2975865749675676
exceeded: -0.8612048099294649
exceeded: -0.3064587882807546
exceeded: -0.15243041111554714
exceeded: -0.5696668386488009
exceeded: -1.5400986957671066
exceeded: -0.1263755184087105
exceeded: -1.1504822586489525
exce

exceeded: -24.92979985360161
exceeded: -0.838877700059621
exceeded: -1.8519994092868965
exceeded: -0.9200662206499735
exceeded: -2.4975621358408744
exceeded: -10.609988117705
exceeded: -0.5107551822557814
exceeded: -0.8241770104505987
exceeded: -2.525165360704223
exceeded: -1.7647653189644574
exceeded: -0.0651978218577738
exceeded: -0.16492915743907766
exceeded: -2.4197890363326673
exceeded: -1.2480047146844289
exceeded: -0.7557655995182802
exceeded: -0.2866791388319548
exceeded: -2.0599112681191203
exceeded: -1.0848064334610943
exceeded: -0.22121852157175237
exceeded: -2.960429854050781
exceeded: -0.6226917722148688
exceeded: -1.3166948592708438
exceeded: -0.08526714809511254
exceeded: -0.7866335480893325
exceeded: -0.13648673416119295
exceeded: -1.9823793899428621
exceeded: -0.5989508517467055
exceeded: -0.12279970896142635
exceeded: -0.39054659264351893
exceeded: -1.249062986063299
exceeded: -0.9135495205562072
exceeded: -0.4346055680009038
exceeded: -0.7771844924919286
exceeded: -0

exceeded: -0.19644734540560535
exceeded: -9.085352662534657
exceeded: -54.82711105414903
exceeded: -2.3393357177872454
exceeded: -0.8198317993865929
exceeded: -0.47417064697799666
exceeded: -0.05304826578120532
exceeded: -0.472179124270185
exceeded: -0.9558899883547917
exceeded: -12.471200111624174
exceeded: -3.4128973093037054
exceeded: -178.60153307445694
exceeded: -1.1147288191231932
exceeded: -0.30680723897897083
exceeded: -0.6808738271562254
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.16e+04    |
|    ep_rew_mean          | 1.7e+03     |
| time/                   |             |
|    fps                  | 1043        |
|    iterations           | 199         |
|    time_elapsed         | 390         |
|    total_timesteps      | 407552      |
| train/                  |             |
|    approx_kl            | 0.013638587 |
|    clip_fraction        | 0.137       |
|    clip_range           | 0.2         |
|  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.16e+04    |
|    ep_rew_mean          | 1.7e+03     |
| time/                   |             |
|    fps                  | 1044        |
|    iterations           | 204         |
|    time_elapsed         | 400         |
|    total_timesteps      | 417792      |
| train/                  |             |
|    approx_kl            | 0.008206715 |
|    clip_fraction        | 0.0742      |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.73       |
|    explained_variance   | 0.173       |
|    learning_rate        | 0.0003      |
|    loss                 | 83.9        |
|    n_updates            | 2030        |
|    policy_gradient_loss | -0.00949    |
|    std                  | 0.62        |
|    value_loss           | 1.78e+03    |
-----------------------------------------
exceeded: -118.14834294146353
exceeded: -0.6520964613148361
exceeded: -0.193

exceeded: -0.4397262124150527
exceeded: -1.6340097859586264
exceeded: -161.29904997205216
exceeded: -0.0017291238221414024
exceeded: -0.7350955072116947
exceeded: -1.4314731745005573
exceeded: -3.4385818345091366
exceeded: -0.24180159428624895
exceeded: -43.23024651223428
exceeded: -1.5501848051773712
exceeded: -48.40482312764628
exceeded: -0.7145839292011171
exceeded: -52.597288648579486
exceeded: -0.06791951307909483
exceeded: -0.6055787349121766
exceeded: -0.046539135702168444
exceeded: -0.6570353308941818
exceeded: -0.37521520419758153
exceeded: -1.1177982412976875
exceeded: -1.138178942212343
exceeded: -12.37331362194415
exceeded: -0.4175369322637439
exceeded: -15.737386943225832
exceeded: -39.25128678131924
exceeded: -9.946676934434285
exceeded: -25.58700943312322
exceeded: -13.128295519271315
exceeded: -2.3387051308500544
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.16e+04    |
|    ep_rew_mean          | 1.76e

exceeded: -128.9905339250665
exceeded: -0.11723815750899133
exceeded: -1.087517194974496
exceeded: -0.2461468668305603
exceeded: -0.3956414900804431
exceeded: -1.4860481222240245
exceeded: -0.3584571272193851
exceeded: -0.1155070938608347
exceeded: -0.5117686367135138
exceeded: -0.3792175771376022
exceeded: -3.6085549071979157
exceeded: -2.554877430513813
exceeded: -2.0486746091387134
exceeded: -0.3598605595090457
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.17e+04  |
|    ep_rew_mean          | 1.82e+03  |
| time/                   |           |
|    fps                  | 1045      |
|    iterations           | 214       |
|    time_elapsed         | 419       |
|    total_timesteps      | 438272    |
| train/                  |           |
|    approx_kl            | 0.0324374 |
|    clip_fraction        | 0.296     |
|    clip_range           | 0.2       |
|    entropy_loss         | -3.6      |
|    explained_varianc

exceeded: -55.571057870932776
exceeded: -0.6026297654717
exceeded: -0.15501619046051762
exceeded: -1.072866882393844
exceeded: -0.7006719998648981
exceeded: -30.225283963812398
exceeded: -0.575077798401464
exceeded: -0.11771766874581578
exceeded: -1.4097547619996513
exceeded: -1.0244678328457766
exceeded: -0.909925013832289
exceeded: -3.6198489857013123
exceeded: -0.6797285268890747
exceeded: -0.19183240940850071
exceeded: -0.7803799756119985
exceeded: -1.0159572778751476
exceeded: -0.5479477454868221
exceeded: -0.8589626844412223
exceeded: -0.04937583605717239
exceeded: -109.3898812084081
exceeded: -1.1718448836435167
exceeded: -0.3108484817120632
exceeded: -21.835705546501618
exceeded: -0.1413683706197129
exceeded: -1.4205538029955402
exceeded: -2.6460558115135795
exceeded: -3.8416622758106693
exceeded: -22.05206273802459
exceeded: -2.0584674510424414
exceeded: -1.409227012024009
exceeded: -1.43651073556886
exceeded: -1.6288823266055061
exceeded: -137.74565824052627
exceeded: -21.209

exceeded: -80.13875279866751
exceeded: -0.2910943691464377
exceeded: -0.45453932874869496
exceeded: -21.09881688092599
exceeded: -0.4603650680553114
exceeded: -4.3954824559159125
exceeded: -29.569030991661286
exceeded: -3.636604575393789
exceeded: -163.05896263292152
exceeded: -0.27858105485278306
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.17e+04    |
|    ep_rew_mean          | 1.94e+03    |
| time/                   |             |
|    fps                  | 1041        |
|    iterations           | 225         |
|    time_elapsed         | 442         |
|    total_timesteps      | 460800      |
| train/                  |             |
|    approx_kl            | 0.023346718 |
|    clip_fraction        | 0.263       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.51       |
|    explained_variance   | 0.296       |
|    learning_rate        | 0.0003      |
|    loss                 | 2

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.18e+04    |
|    ep_rew_mean          | 2.02e+03    |
| time/                   |             |
|    fps                  | 1039        |
|    iterations           | 231         |
|    time_elapsed         | 455         |
|    total_timesteps      | 473088      |
| train/                  |             |
|    approx_kl            | 0.015331073 |
|    clip_fraction        | 0.172       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.45       |
|    explained_variance   | 0.215       |
|    learning_rate        | 0.0003      |
|    loss                 | 73.3        |
|    n_updates            | 2300        |
|    policy_gradient_loss | -0.0157     |
|    std                  | 0.578       |
|    value_loss           | 145         |
-----------------------------------------
exceeded: -0.5441424897418662
exceeded: -5.952517856413322
exceeded: -0.2679

exceeded: -1.456548556187922
exceeded: -0.008072217416605219
exceeded: -0.5598185460422119
exceeded: -0.6005024303727678
exceeded: -3.673289498516698
exceeded: -2.3207726350279896
exceeded: -0.7589179052710855
exceeded: -12.76610253691854
exceeded: -0.056734052954626385
exceeded: -0.00515321196629946
exceeded: -0.14932783178117706
exceeded: -0.8772455571240225
exceeded: -0.10193171489134964
exceeded: -2.4193859638200537
exceeded: -0.6319457643621595
exceeded: -1.2943364371649992
exceeded: -1.1187066399199046
exceeded: -0.13654611494665606
exceeded: -0.004990740068856951
exceeded: -1.1609732744568648
exceeded: -1.3823291037145142
exceeded: -0.1806025705519147
exceeded: -2.34649784969177
exceeded: -3.147510171281594
exceeded: -0.020648440373084286
exceeded: -0.08901451720300085
exceeded: -1.0847017175784621
exceeded: -1.0487738840472844
exceeded: -0.017046331097148974
exceeded: -0.08629067553861343
exceeded: -61.02746606756821
exceeded: -9.320726148181585
exceeded: -4.591362945927375
exc

exceeded: -107.72580263950317
exceeded: -22.619641776074587
exceeded: -0.014408937410436374
exceeded: -2.5588500377027272
exceeded: -1.5320378824625738
exceeded: -0.8585239821229202
exceeded: -0.08927926491275451
exceeded: -5.65523108585023
exceeded: -7.536158095191249
exceeded: -12.275936937001187
exceeded: -32.4052842325646
exceeded: -4.989925399516381
exceeded: -1.1906505956620377
exceeded: -144.90866971312127
exceeded: -3.5462223072125534
exceeded: -0.21492141046233598
exceeded: -1.54318770109236
exceeded: -25.620516397369343
exceeded: -0.598351906192154
exceeded: -0.25334264852728267
exceeded: -2.6430116223001003
exceeded: -44.4722035024522
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.18e+04  |
|    ep_rew_mean          | 2.08e+03  |
| time/                   |           |
|    fps                  | 1037      |
|    iterations           | 241       |
|    time_elapsed         | 475       |
|    total_timesteps      

exceeded: -16.29513417792414
exceeded: -87.98233691624583
exceeded: -1.6069170809039162
exceeded: -0.2248059160770005
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.18e+04    |
|    ep_rew_mean          | 2.1e+03     |
| time/                   |             |
|    fps                  | 1037        |
|    iterations           | 245         |
|    time_elapsed         | 483         |
|    total_timesteps      | 501760      |
| train/                  |             |
|    approx_kl            | 0.027806845 |
|    clip_fraction        | 0.271       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.42       |
|    explained_variance   | -0.181      |
|    learning_rate        | 0.0003      |
|    loss                 | 36.3        |
|    n_updates            | 2440        |
|    policy_gradient_loss | -0.00686    |
|    std                  | 0.574       |
|    value_loss           | 119         |


exceeded: -3.7037952966028698
exceeded: -1.1083695928892012
exceeded: -0.6768045481662721
exceeded: -0.3374544602196058
exceeded: -0.14830361701195902
exceeded: -1.4754043507214125
exceeded: -0.914366766248338
exceeded: -22.054914314871304
exceeded: -0.8961036108243913
exceeded: -1.9003968499362132
exceeded: -1.0344515708883075
exceeded: -0.6850888664430964
exceeded: -2.044924984992362
exceeded: -0.8872346539423613
exceeded: -0.37529450350486415
exceeded: -0.08043380503171951
exceeded: -0.47615958382014356
exceeded: -1.5915036055509935
exceeded: -0.4063725410431413
exceeded: -0.5005081300995766
exceeded: -0.35882346621286537
exceeded: -1.031845985260769
exceeded: -0.24516205060779822
exceeded: -0.3140904798591173
exceeded: -1.8743049911979393
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.18e+04    |
|    ep_rew_mean          | 2.1e+03     |
| time/                   |             |
|    fps                  | 1037     

exceeded: -37.15783342891366
exceeded: -0.18015659156527822
exceeded: -0.9766551693250045
exceeded: -0.15808890556623012
exceeded: -1.0313256581240984
exceeded: -47.0513408337236
exceeded: -33.82009191467776
exceeded: -0.10132636151009133
exceeded: -0.12315885928423231
exceeded: -1.2915622183361888
exceeded: -186.7410227978496
exceeded: -1.0331935526093856
exceeded: -2.001596754471409
exceeded: -0.5258011685426591
exceeded: -0.46621748881346725
exceeded: -0.833569226806218
exceeded: -14.928139345250791
exceeded: -0.44665763835441624
exceeded: -1.3005819224494106
exceeded: -0.06945209482442596
exceeded: -43.07379412221743
exceeded: -4.218483324297251
exceeded: -9.154733924251792
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.18e+04    |
|    ep_rew_mean          | 2.13e+03    |
| time/                   |             |
|    fps                  | 1036        |
|    iterations           | 253         |
|    time_elapsed  

exceeded: -0.9788268455147717
exceeded: -0.9905534100529303
exceeded: -0.12715761043648713
exceeded: -0.9136439228352813
exceeded: -0.8414455023162797
exceeded: -0.17210338931398317
exceeded: -1.0833955666733326
exceeded: -28.746471765025596
exceeded: -2.68507071770421
exceeded: -1.7561153376059846
exceeded: -0.35968793864939147
exceeded: -1.4551239021750382
exceeded: -0.2844573510010748
exceeded: -0.5973178567678494
exceeded: -0.7427047356286209
exceeded: -0.30765957172609626
exceeded: -1.6226707046427866
exceeded: -0.22083952110847915
exceeded: -0.0595655453268253
exceeded: -2.1938277404815016
exceeded: -1.9558907319501928
exceeded: -1.2395711741751037
exceeded: -2.9787515928008963
exceeded: -0.6787889802953231
exceeded: -1.5293953094798018
exceeded: -135.60898803919588
exceeded: -0.9658647893536841
exceeded: -0.05891686687221205
exceeded: -0.48902809919364365
exceeded: -1.4744452590114199
exceeded: -0.6340679463809151
exceeded: -2.626442395409536
exceeded: -0.18554517718481767
excee

exceeded: -0.1747873930811147
exceeded: -0.2184318575612813
exceeded: -134.58642741870395
exceeded: -2.8358340571854725
exceeded: -12.160331668243083
exceeded: -2.0097581452064555
exceeded: -2.6429500293496
exceeded: -0.6723764686570681
exceeded: -0.3167920626847206
exceeded: -6.993170281306826
exceeded: -0.3314466510469227
exceeded: -1.2706164659151418
exceeded: -0.4120484810067621
exceeded: -0.3782661505685678
exceeded: -0.8139676645591342
exceeded: -0.6191144627242033
exceeded: -0.12809937229575413
exceeded: -0.5242224668820203
exceeded: -0.8875156531934789
exceeded: -0.2654677888488063
exceeded: -3.070757728760671
exceeded: -118.110221281525
exceeded: -1.2858150855947328
exceeded: -0.7220543681139352
exceeded: -0.18282377007827333
exceeded: -1.502541023949366
exceeded: -2.8168059020979914
exceeded: -0.2781725592318933
exceeded: -79.05563357883402
exceeded: -0.3169065607878319
exceeded: -5.695137355700993
exceeded: -0.9426331479693315
exceeded: -1.1121353510480865
exceeded: -0.67122

exceeded: -0.7958014054092173
exceeded: -0.09425750925248401
exceeded: -0.048233339242638756
exceeded: -1.4887175951900287
exceeded: -1.974583706981424
exceeded: -0.2988893269062196
exceeded: -1.4175888516370738
exceeded: -0.11336639541150868
exceeded: -0.26912177553181404
exceeded: -0.43606144335658514
exceeded: -4.020140366790371
exceeded: -0.3464489768596363
exceeded: -0.5576720853520155
exceeded: -1.5553192466256458
exceeded: -0.5015463140683513
exceeded: -1.059603320862683
exceeded: -0.14823268628498565
exceeded: -9.26773209220691
exceeded: -0.2669011237643249
exceeded: -3.653906156736511
exceeded: -0.3420086138827904
exceeded: -2.0575890607624054
exceeded: -0.28502219349574925
exceeded: -1.286957136188064
exceeded: -0.32802571167451505
exceeded: -0.7613182984017102
exceeded: -0.2825323428137752
exceeded: -30.607009472798346
exceeded: -1.2952044401370806
exceeded: -1.369621663840663
exceeded: -1.2696860087948736
exceeded: -1.8701486609340314
exceeded: -0.1661336088033199
exceeded:

exceeded: -22.229961808734295
exceeded: -1.4735355963716759
exceeded: -1.7151327707415585
exceeded: -0.8001555639962212
exceeded: -0.7543935753774782
exceeded: -0.25387425095174987
exceeded: -0.17281134631239062
exceeded: -0.7081365055167159
exceeded: -0.005055410401453396
exceeded: -0.03406136495568969
exceeded: -0.4400658801470775
exceeded: -0.19832322125782845
exceeded: -0.5257828689874258
exceeded: -0.4957373022184117
exceeded: -0.1695742366814828
exceeded: -0.9521150230611358
exceeded: -0.059348046111389594
exceeded: -1.7919567540758086
exceeded: -1.5278688644775258
exceeded: -0.46701438382265653
exceeded: -1.301547786217817
exceeded: -0.11081867374598793
exceeded: -1.2036710583558756
exceeded: -2.7966436977922093
exceeded: -0.8910646462652118
exceeded: -1.094919676809153
exceeded: -0.20708188256848406
exceeded: -0.1302961339848131
exceeded: -0.8815381576256015
exceeded: -3.008407327459117
exceeded: -2.1733498100628172
exceeded: -0.8866685556849816
exceeded: -1.4748916425264433
ex

exceeded: -0.14237778756066166
exceeded: -25.87163201422445
exceeded: -0.08914060123281822
exceeded: -0.38047414912321326
exceeded: -0.2519940449085514
exceeded: -7.3191986584082525
exceeded: -0.05437281138682429
exceeded: -2.5041453740598523
exceeded: -10.182663975118533
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.2e+04     |
|    ep_rew_mean          | 2.29e+03    |
| time/                   |             |
|    fps                  | 1035        |
|    iterations           | 275         |
|    time_elapsed         | 543         |
|    total_timesteps      | 563200      |
| train/                  |             |
|    approx_kl            | 0.012274327 |
|    clip_fraction        | 0.145       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.19       |
|    explained_variance   | 0.141       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.44e+03    |
|    n_update

exceeded: -2.20703560483602
exceeded: -2.0330179828204193
exceeded: -205.26088826212833
exceeded: -1.1628855499119313
exceeded: -0.9148523123979845
exceeded: -0.14193386216232246
exceeded: -1.3298330368735976
exceeded: -2.5208165324207714
exceeded: -2.6335520444268745
exceeded: -0.15823889466597862
exceeded: -17.344760717725936
exceeded: -0.8790878043354331
exceeded: -0.299362785525208
exceeded: -3.2156708991201475
exceeded: -0.13577775016560234
exceeded: -5.481547536277214
exceeded: -0.04934070660673627
exceeded: -1.1527578574852775
exceeded: -1.1422683044527582
exceeded: -0.4223033224063961
exceeded: -0.5433330332768618
exceeded: -1.428187460755069
exceeded: -0.17990854426076922
exceeded: -0.18280616865468416
exceeded: -0.13355778415146208
exceeded: -0.8080993255695581
exceeded: -1.7293740750672608
exceeded: -345.571127486973
exceeded: -0.35647297111403586
exceeded: -0.7213921696288288
exceeded: -0.04532892761649338
exceeded: -0.019729537627441573
exceeded: -0.6863873667319315
exceed

exceeded: -1.9901495940919625
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.2e+04     |
|    ep_rew_mean          | 2.31e+03    |
| time/                   |             |
|    fps                  | 1037        |
|    iterations           | 284         |
|    time_elapsed         | 560         |
|    total_timesteps      | 581632      |
| train/                  |             |
|    approx_kl            | 0.021214604 |
|    clip_fraction        | 0.235       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.09       |
|    explained_variance   | 0.0708      |
|    learning_rate        | 0.0003      |
|    loss                 | 296         |
|    n_updates            | 2830        |
|    policy_gradient_loss | -0.0164     |
|    std                  | 0.533       |
|    value_loss           | 760         |
-----------------------------------------
exceeded: -116.8389219518927
exceeded: -2.7591

exceeded: -46.03327841620334
exceeded: -0.4379978912659837
exceeded: -0.32022130930670323
exceeded: -7.355724524864087
exceeded: -3.9738221472466444
exceeded: -0.8724035954914645
exceeded: -0.1086888108070973
exceeded: -0.5268881362970202
exceeded: -4.556758587571538
exceeded: -1.0216492493064209
exceeded: -0.5567836383124514
exceeded: -1.4422494594317423
exceeded: -0.175676126920064
exceeded: -0.6641090480504431
exceeded: -0.8507058114060652
exceeded: -1.3242013365766883
exceeded: -0.3297438677531834
exceeded: -0.26422483176961215
exceeded: -8.659368285685042
exceeded: -3.353838709734368
exceeded: -0.5986603987717736
exceeded: -4.027020543911865
exceeded: -1.3298198907425538
exceeded: -1.085740929589465
exceeded: -78.23581547873074
exceeded: -0.45648533962825366
exceeded: -3.1526541784233246
exceeded: -0.6130144116420481
exceeded: -1.4783587139815093
exceeded: -3.557580582603803
exceeded: -23.577386319664345
exceeded: -0.503156732860686
exceeded: -0.46694309481152085
exceeded: -0.6686

exceeded: -0.32071378722082483
exceeded: -0.06578106866367489
exceeded: -0.7567110696184058
exceeded: -182.85745511136895
exceeded: -0.194233546064146
exceeded: -1.0370707222659472
exceeded: -1.0014781421186945
exceeded: -2.8956537031459333
exceeded: -16.91952191578345
exceeded: -0.32876274119556964
exceeded: -0.3311203135327929
exceeded: -2.6448494503034197
exceeded: -0.4755654953516741
exceeded: -35.8357500180778
exceeded: -0.5233992679552439
exceeded: -0.39258894801952127
exceeded: -0.9923558492138753
exceeded: -0.4105998945685915
exceeded: -0.5650524765108432
exceeded: -1.0350397419904598
exceeded: -1.856946294199418
exceeded: -1.971050568347592
exceeded: -0.18824163264798008
exceeded: -0.09024887379882914
exceeded: -0.48480088964846635
exceeded: -0.3505423427222384
exceeded: -0.046784885581342354
exceeded: -2.902979152604735
exceeded: -24.732943987675682
exceeded: -0.4832301361094115
exceeded: -0.4492719816421725
exceeded: -0.08170689456408914
exceeded: -0.08477182533861886
exceed

<stable_baselines3.ppo.ppo.PPO at 0x13eed75b0>

In [8]:
total_reward = 0
observation = env.reset()
done = False
while not done:
    action, _states = model.predict(observation)
        
    observation, reward, done, info = env.step(action)
        
    total_reward += reward
    
print(total_reward)

  return torch.cat([torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes))])


exceeded: -4.665130686013766
exceeded: -1.9536245346367898
exceeded: -7.877867497882596
exceeded: -192.71008086654976
exceeded: -1.5878856665312315
exceeded: -0.1412544469174888
exceeded: -0.2424055613866777
exceeded: -0.6162871322937709
exceeded: -2.208778564138499
exceeded: -0.18086723527172235
exceeded: -0.8025489212264398
exceeded: -0.4603388968658789
exceeded: -0.15715175486795135
exceeded: -2.219888236372896
exceeded: -0.5478824084612736
exceeded: -6.347802176238988
exceeded: -0.8073332115947769
exceeded: -0.5928143275961243
exceeded: -0.950266292882262
exceeded: -0.21829156851494824
exceeded: -26.79816321464013
exceeded: -5.578838775822996
exceeded: -0.4248945799279231
exceeded: -0.4880356308259536
exceeded: -0.2470708113930435
exceeded: -0.35435324953108827
exceeded: -0.6967848070966103
exceeded: -0.08367250716870611
exceeded: -0.8187467048732394
exceeded: -0.06905973139019997
exceeded: -1.2733695278473525
exceeded: -0.7335783723175836
exceeded: -0.504025034678973
exceeded: -0.

In [None]:
env.print_statistics()

In [None]:
model.learn(total_timesteps=100000)

In [None]:
env.precomputed_paths

In [None]:
today()