In [1]:
import torch
import torch.nn as nn
import math
import numpy as np
import datetime

np.random.seed(42)

import sys
print(sys.executable) # just to check which python

import gym
from gym import spaces

from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy

/usr/local/opt/python@3.9/bin/python3.9


In [2]:
class RequestType:
    def __init__(self, request_type, bandwidth, service_rate, arrival_rate, source, sink, distribution, switch_rate=None):
        # distribution is 1x2 if elastic and 1x1 if static
        
        self.type = request_type
        self.bw = bandwidth
        self.service_rate = service_rate
        self.arrival_rate = arrival_rate
        self.source = source
        self.sink = sink
        self.distribution = distribution
        self.switch_rate = switch_rate
        
        self.num_made = 0
        self.num_accepted = 0

class Request:
    def __init__(self, request_type, service_time, arrival_time, source, sink, transfer_rate, distribution=None, parent_elastic=None, bw_dist=None, request_type_template=None):
        self.type = request_type
        self.service_time = service_time
        self.arrival_time = arrival_time
        self.source = source
        self.sink = sink
        self.bw = transfer_rate
        self.request_type = request_type
        self.parent_elastic = parent_elastic
        self.accepted = None
        self.path = None
        self.bw_dist = bw_dist
        
        self.blueprint = request_type_template
        
        if request_type == "elastic":
            self.distribution = distribution
            self.scale_requests = []
                   
        if request_type_template is not None:
            request_type_template.num_made += 1
            
    def add_scale_request(self, req): 
        # we store related scale requests for elastic requests
        # not used if static request
        self.scale_requests.append(req)
            
    def get_encoding(self, nodes_in_environment):
        # as per our notes, this SHOULD return 1x5 tensor,
        # but we have one hot encodings INSIDE this tensor,
        # so we will flatten this and return, so the size will be
        # larger than 1x5
        
        # nodes_in_environment is a list of all the nodes in our graph
        # eg ["a", "b", "c"]
        
        # request is [one hot source, one hot destination, bw, service time, one hot type]
                
        one_hot_source = nn.functional.one_hot(torch.tensor([nodes_in_environment.index(self.source)]), num_classes=len(nodes_in_environment)).flatten()
        one_hot_dest   = nn.functional.one_hot(torch.tensor([nodes_in_environment.index(self.sink)]), num_classes=len(nodes_in_environment)).flatten()
    
        if self.request_type == "static":
            one_hot_type = torch.tensor([1, 0, 0])
        elif self.request_type == "elastic":
            one_hot_type = torch.tensor([0, 1, 0])
        elif self.request_type == "scale":
            one_hot_type = torch.tensor([0, 0, 1])
            
        encoding = torch.cat([one_hot_source, 
                             one_hot_dest,
                             torch.tensor([self.bw]), 
                             torch.tensor([self.service_time]),
                             one_hot_type])
        
        return encoding

In [3]:
class Link:
    def __init__(self, node_1, node_2, bw_capacity):
        self.serving_requests = []
        self.nodes = [node_1, node_2]
        self.total_bw = bw_capacity
        
    def reset(self):
        self.serving_requests = []
        
    def add_request(self, request_obj):
        self.serving_requests.append(request_obj)
        
    def remove_request(self, request_obj):
        self.serving_requests.remove(request_obj)
        
    def remaining_bw(self): 
        # subtracting bw being used from total bw capacity
        bw_being_used = 0
        for req in self.serving_requests:
            bw_being_used += req.bw
            
        return (self.total_bw - bw_being_used)

In [4]:
class Environment(gym.Env):
    # requests_in_service_encoder = nn.RNN(????, 7)
    metadata = {'render.modes': ['human']}
    
    def __init__(self, nodes, links, request_blueprints, use_RNN=False, sb3_compat=False):
        super(Environment, self).__init__()
                
        """
        nodes: list of strings where each string is just a name or identifier of a node
        links: list of tuples where in tuple t, t[0] is first node, t[1] is another node, and t[2] is bw capacity of the link
        request_blueprints: list of DeploymentRequest objects
        """
        self.nodes = nodes
        self.links = {}
        self.request_history = []
        self.E_history = []
        self.past_distributions = []
        self.request_blueprints = request_blueprints
        self.last_time = 0
        self.episode_timesteps = 600
        self.use_RNN = use_RNN
        self.sb3_compat = sb3_compat
        self.precomputed_paths = {}
        
        if sb3_compat:
            self.request_being_considered = None
        
        for link in links:
            if link[0] not in self.nodes or link[1] not in self.nodes:
                raise Exception("Node in link " + str(link) + " doesn't exist")
            
            link_obj = Link(*link)

            self.links[link[0] + link[1]] = link_obj
            self.links[link[1] + link[0]] = link_obj
            
        self.request_list = self.create_requests()
        self.request_queue = iter(self.request_list)
        
        # Setup gym-specific code
        env_encoding_size = self.get_encoding(increment_iterator=False).size()
        req_encoding_size = self.request_list[0].get_encoding(self.nodes).size()
        # print(env_encoding_size[0] + req_encoding_size[0])
        
        self.action_space = spaces.Box(low=0, high=math.inf,
                                      shape=(4,), dtype=np.float32)
        self.observation_space = spaces.Box(low=-math.inf, high=math.inf,
                                      shape=(env_encoding_size[0] + req_encoding_size[0],), dtype=np.float32)
        
        # TODO, WRITE RNN logic
        
        #if use_RNN:
        #    self.requests_in_service_encoder = nn.RNN
        
    def precompute_paths(self):
        for req_type in self.request_blueprints:
            self.precomputed_paths[req_type.source + req_type.sink] = env.search(req_type.source, req_type.sink, [], [])
            
    def add_request(self, request, path=None): # we want to add this request to a link or path
        # path: a list of nodes that the request traverses including source and sink
        # if no path is specified, path is assumed to be [req.source, req.sink]
        
        if path is not None: 
            nodes = [[path[i], path[i + 1]] for i in range(len(path) - 1)]
            for node_pair in nodes:
                env.links[node_pair[0] + node_pair[1]].add_request(request)
        
        else:
            self.links[request.source + request.sink].add_request(request)
        
        request.accepted = True
        request.blueprint.num_accepted += 1
        self.request_history.append(request)
        # print(self.links[request.source + request.sink])
    
    def reset(self):
        for link in self.links.values():
            link.reset()
        self.request_history = []
        self.E_history = []
        self.past_distributions = []
        self.last_time = 0
        self.request_list = env.create_requests()
        self.request_queue = iter(self.request_list)
        
        return env.get_encoding()
        
    def reward(self, request, decision):
        base_rate = 1         # 1 when static
        type_bonus = 0.9      # 0.9 when static
        bw = request.bw
        if request.type == "elastic":
            #base_rate = request.bw
            type_bonus = 1.1                # 1.1 when elastic
            bw = np.array(request.bw_dist).dot(request.distribution)
            
        r = bw * base_rate * request.service_time * type_bonus
        
        # if remaining bandwidth on link(s) < 0, very "bad" reward
        if request.path is not None:
            path_length = len(request.path)
            
            r *= math.pow(0.9, path_length - 2)
            
            nodes = [[request.path[i], request.path[i + 1]] for i in range(len(request.path) - 1)]
            for node_pair in nodes:
                if self.links[node_pair[0] + node_pair[1]].remaining_bw() < 0:
                    print("exceeded: " + str(-r * 10))
                    return (-r * 10)
        else:
            # path is direct, so no decrease of reward needed
            remaining_bw = self.links[request.source + request.sink].remaining_bw()
            if remaining_bw < 0:
                print("exceeded: " + str(-r * 10))
                return (-r * 10)
        
        if decision == "accept":
            return r
        
        if decision == "reject":
            if request.type == "static" or request.type == "elastic":
                return 0
            elif request.type == "scale":
                if len(self.past_distributions) == 0:
                    return -1 * r
                
                else:
                    current_sum = torch.from_numpy(np.sum(self.past_distributions, axis=0))

                    average_past_distribution = current_sum / len(self.past_distributions)
                    current_req_distribution = torch.tensor(request.parent_elastic.distribution)

                    return -1 * r * math.exp(-nn.functional.kl_div(average_past_distribution, current_req_distribution))

                """
                past_distributions = []
                for req in self.request_history:
                    if req.request_type == "elastic":
                        past_distributions.append(req.distribution)
                
                average_past_distribution = torch.mean(past_distributions, dim=1)
                current_req_distribution = torch.tensor(request.distribution)
                
                if bool(average_past_distribution[0] < current_req_distribution[0]):
                    return -1 * r * math.exp(-nn.functional.kl_div(average_past_distribution, current_req_distribution))
                else:
                    return 0
                """
                
    def next_req(self):
        next_req = next(self.request_queue)
        if self.sb3_compat:
            self.request_being_considered = next_req
        return next_req
                
    def step(self, action, req=None):
        # what happens if we have two requests that come in on the same timestep but there is only enough bandwidth for one?
        # do we the decision on the second request with knowledge of the first request
        # essentially, after we accept the first request, will we submit an updated encoding of the network to the policy network?
 
        # actions is a Nx2 matrix where the first column in the request and second is the decision
        # decision is either "accept" or "reject"
        # this is given by our agent
        
        # if req is None, that means we are using sb3_compat=True and we can get the req from self.request_being_considered
                
        if req is None:
            req = self.request_being_considered
            
        if action[0] > 0.5:
            # accept request
            paths = env.precomputed_paths[req.source + req.sink]
            paths.sort(key=lambda x: len(x)) # sort by shortest path
            # select the path we are using
            path = paths[action[1:4].argmax()]
            
            self.add_request(req, path)
        
            reward = env.reward(req, "accept")
        elif action[0] < 0.5:
            # reject
            reward = env.reward(req, "reject")
        
        obs = env.get_encoding()
        
        done = req.arrival_time > 600
        info = {}
        
        return obs, reward, done, info
        
    def update_requests(self, current_time):
        # here, we remove expired requests and update E_history based off of the request stats
        
        for link in self.links.values():
            for request in link.serving_requests.copy():
                if (request.arrival_time + request.service_time) > self.last_time and (request.arrival_time + request.service_time) < current_time:
                    # request has expired, let's remove it from the links
                    for link in self.links.values():
                        if request in link.serving_requests:
                            link.remove_request(request)

                    if request.type == "elastic":
                        time_on_higher_bw = 0
                        for scale_req in request.scale_requests:
                            time_on_higher_bw += scale_req.service_time

                        time_on_lower_bw = request.service_time - time_on_higher_bw

                        # calculate E[history]
                        request_time = np.array([time_on_lower_bw, time_on_higher_bw])
                        request_bw = request.bw
                        result = (request_time / request_time.sum()).dot(request_bw)
                        self.past_distributions.append(request_time / request_time.sum())
                        self.E_history.append(result)

    def get_encoding(self, increment_iterator=True):
        links_processed = [] 
        # these will store links that we have already encoded so we don't encode them again
        
        current_encoding = []
        
        # h = torch.zeros(7) # assuming 7 for h0 size
        # last_out = None
        
        env_encoding = []
        
        if increment_iterator:
            next_req = self.next_req()

            while next_req.type == "scale":
                if not next_req.parent_elastic.accepted:
                    next_req = self.next_req()
                elif next_req.parent_elastic.accepted:
                    break
            
            """
            while next_req.type == "scale":
                if next_req.parent_elastic.accepted:
                    next_req.accepted = True # we must accept since we accepted elastic req
                    self.add_request(next_req, next_req.parent_elastic.path)
                next_req = self.next_req()
            """
            self.update_requests(next_req.arrival_time)

        for link in self.links.values():
            if link in links_processed:
                continue

                        
            # Commented because we don't want to encode any queue for phase 1
            
            # for req in link.serving_requests
                # request is [one hot source, one hot destination, bw, service time, one hot type]
                
                # one_hot_source = nn.functional.one_hot(torch.tensor([self.nodes.index(req.source)]), num_classes=len(self.nodes))
                # one_hot_dest   = nn.functional.one_hot(torch.tensor([self.nodes.index(req.sink)]), num_classes=len(self.nodes))

                # req_tensor = torch.Tensor([]) # mismatched dimensions??!
                # last_out, h = self.requests_in_service_encoder(req_tensor, h)

            # current_encoding.append(torch.cat(torch.Tensor([link.remaining_bw]), last_out))
            # torch.stack(current_encoding)
            
            # check implementation later
            
            env_encoding.append(link.remaining_bw())
            
            links_processed.append(link)
            
        if not increment_iterator:
            return torch.tensor(env_encoding)
        
        if self.sb3_compat:
            return torch.cat([torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes))])
        else:
            return torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes)), next_req
    
    def create_requests(self):
        requests = []
        
        for request_type in self.request_blueprints:
            arrival_times = []
            service_times = []
            last_arrival = 0
        
            while last_arrival < self.episode_timesteps: # we want to generate requests till we reach episode end
                last_arrival += np.random.exponential(request_type.arrival_rate)
                arrival_times.append(last_arrival)
                                
            for _ in arrival_times:
                service_times.append(np.random.exponential(request_type.service_rate))
                
            for arrival_time, service_time in zip(arrival_times, service_times):
                # start creating requests
                
                new_request = Request(request_type.type, service_time, arrival_time, request_type.source, request_type.sink, 
                                      request_type.bw[0], request_type.distribution, bw_dist=request_type.bw, request_type_template=request_type)
                requests.append(new_request)
                
                if request_type.type == "elastic": 
                    # we will start with the first bandwidth element as starting bw
                    # WE ASSUME that bw[0] < bw[1]
                    timesteps_from_deployment = 0
                    current_bw = request_type.bw[0]
                    while timesteps_from_deployment < service_time:
                        if current_bw == request_type.bw[0]:
                            # we want to generate a scale request to increase bw
                            scale_bw = request_type.bw[1] - current_bw
                            scale_service_time = np.random.exponential(request_type.switch_rate[1])
                            scale_request = Request("scale", scale_service_time, \
                                                    arrival_time + timesteps_from_deployment, request_type.source, \
                                                   request_type.sink, scale_bw, parent_elastic=new_request,
                                                   request_type_template=request_type)
                            requests.append(scale_request)
                            new_request.add_scale_request(scale_request)
                            
                            timesteps_from_deployment += scale_service_time
                            current_bw = request_type.bw[1] # request_type.bw[0] + scale_bw
                        elif current_bw == request_type.bw[1]:
                            # we want to go to lower bw and spend some time there
                            time_spent_on_lower_bw = np.random.exponential(request_type.switch_rate[0])
                            timesteps_from_deployment += time_spent_on_lower_bw
                            current_bw = request_type.bw[0]
                            
        # sort requests by arrival time
        requests.sort(key=lambda x: x.arrival_time)
        return requests
    
    def search(self, source, dest, visited_a, paths):
        visited_a.append(source)
        # print(visited_a)

        for link in set(env.links.values()):
            visited = visited_a.copy()
            if source in link.nodes:
                if dest in link.nodes:
                    visited.append(dest)
                    paths.append(visited)

                x = link.nodes.copy()
                x.remove(source)
                if x[0] not in visited:
                    self.search(x[0], dest, visited.copy(), paths)
        return paths
    
    def print_statistics(self):
        for req_type in self.request_blueprints:
            print(req_type.source + " | " +
                   req_type.sink + " | " + 
                 "BW: " + str(req_type.bw) + " | " +
                 "Arrival rate: " + str(req_type.arrival_rate) + " | " +
                 "Acceptance rate: " + str(req_type.num_accepted / req_type.num_made))
            
        resources_used = 0
        for req in self.request_list:
            if req.accepted == True:
                resources_used += req.bw * req.service_time
            
        print("BW used: " + str(resources_used))

In [5]:
env = Environment(["a", "b", "c", "d", "e", "f"], [["a", "b", 10], ["a", "c", 10], ["b", "d", 10], \
                                                   ["c", "d", 20], ["c", "e", 10], ["d", "f", 10], \
                                                   ["e", "f", 10]], \
                  [RequestType("static", [2], 0.5, 0.75, "a", "b", [1]), \
                  RequestType("static", [8], 1, 1.5, "a", "b", [1]), \
                  RequestType("elastic", [4, 9], 1, 1.5, "a", "b", [0.8, 0.2], switch_rate=[0.08, 0.02]), \
                  RequestType("static", [1], 1, 1.5, "c", "d", [1]), \
                  RequestType("static", [7], 0.5, 0.75, "c", "d", [1]), \
                  RequestType("elastic", [3, 13], 2, 3, "c", "d", [0.9, 0.1], switch_rate=[0.09, 0.01]), \
                  RequestType("static", [3], 0.5, 0.75, "e", "f", [1]), \
                   RequestType("static", [6], 1, 1.5, "e", "f", [1]), \
                    RequestType("elastic", [5, 8], 2, 3, "e", "f", [0.7, 0.3], switch_rate=[0.07, 0.03])],
                 sb3_compat=True)


                # self, request_type, bandwidth, service_rate, arrival_rate, source, sink, distribution, switch_rate=None
    
env.precompute_paths()

### Choose shortest viable path

In [6]:
def policy(env_encoding, next_req_encoding, next_req_obj):    
    # find all paths between source and sink
    paths = env.precomputed_paths[next_req_obj.source + next_req_obj.sink] 
    # (env.search(next_req_obj.source, next_req_obj.sink, [], []))
    paths.sort(key=lambda x: len(x)) # sort by shortest path
    selection = 0
    for path in paths:
        # check if this path works
        works = True
        nodes = [[path[i], path[i + 1]] for i in range(len(path) - 1)]
        for node_pair in nodes:
            if env.links[node_pair[0] + node_pair[1]].remaining_bw() < next_req_obj.bw:
                works = False
                
        if works:
            selection = paths.index(path)
            selection_one_hot = nn.functional.one_hot(torch.tensor([selection]), num_classes=3).flatten()
            next_req_obj.path = path
            return torch.cat([torch.tensor([1]), selection_one_hot])
        
    return torch.cat([torch.tensor([0]), torch.tensor([0,0,0])])

In [7]:
# ONLY WORKS WITH sb3_compat=False when creating env object

total_reward = 0
env_encoding, next_req_encoding, next_req_obj = env.reset()
done = False

while not done:
    decision = policy(env_encoding, next_req_encoding, next_req_obj)
    
    obs, reward, done, info = env.step(decision, next_req_obj)
    env_encoding, next_req_encoding, next_req_obj = obs
    
    total_reward += reward

print(total_reward)

  return torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes)), next_req


11929.026038920432


In [8]:
env.print_statistics()

a | b | BW: [2] | Arrival rate: 0.75 | Acceptance rate: 0.4870530209617756
a | b | BW: [8] | Arrival rate: 1.5 | Acceptance rate: 0.26838709677419353
a | b | BW: [4, 9] | Arrival rate: 1.5 | Acceptance rate: 0.3286232950642724
c | d | BW: [1] | Arrival rate: 1.5 | Acceptance rate: 0.5038659793814433
c | d | BW: [7] | Arrival rate: 0.75 | Acceptance rate: 0.38267148014440433
c | d | BW: [3, 13] | Arrival rate: 3 | Acceptance rate: 0.23015406327178642
e | f | BW: [3] | Arrival rate: 0.75 | Acceptance rate: 0.46183699870633893
e | f | BW: [6] | Arrival rate: 1.5 | Acceptance rate: 0.3296432964329643
e | f | BW: [5, 8] | Arrival rate: 3 | Acceptance rate: 0.3038709677419355
BW used: 12511.685723575798


### PPO

In [6]:
model = PPO(MlpPolicy, env, verbose=1, tensorboard_log="./multilink/")
model.learn(total_timesteps=600000)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Logging to ./multilink/PPO_1


  return torch.cat([torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes))])


exceeded: -62.715225413816114
exceeded: -22.424637363251733
exceeded: -0.12332758318444846
exceeded: -0.3450352357883517
exceeded: -0.6901326401488881
exceeded: -0.5279052227521029
exceeded: -0.4133610853583354
exceeded: -1.146456680822214
exceeded: -0.38904006699889715
exceeded: -4.214889860839603
exceeded: -12.625222562148
exceeded: -84.67413229508406
exceeded: -0.6491359368342537
exceeded: -4.834806473854401
exceeded: -22.052043944613576
exceeded: -22.584472706306833
exceeded: -8.072674616543821
exceeded: -110.08035188628551
exceeded: -3.292313398885808
exceeded: -19.771028245734414
exceeded: -2.1276617118515477
exceeded: -87.90445605214487
exceeded: -49.47555053300884
exceeded: -2.8088246771416507
exceeded: -33.659249112903126
exceeded: -331.4671474855761
exceeded: -0.09548062120440502
exceeded: -0.2117843945972941
exceeded: -0.2821957916096936
exceeded: -1.5238974218368
exceeded: -2.249007310992488
exceeded: -33.130551137988164
exceeded: -0.8817413470068909
exceeded: -7.1211914306

exceeded: -0.6986167230869702
exceeded: -0.001582432533624942
exceeded: -0.20367574859874463
exceeded: -5.183563998268359
exceeded: -0.8936690552156161
exceeded: -0.1919641568741627
exceeded: -0.20497515856084875
exceeded: -1.7740635722021392
exceeded: -28.121764986444635
exceeded: -0.5291415976009344
exceeded: -20.503488435368258
exceeded: -122.65944171314874
exceeded: -46.99809122965917
exceeded: -2.046137297366246
exceeded: -136.55598700152007
exceeded: -0.9324095306159232
exceeded: -0.5840975473288553
exceeded: -3.1892503393615095
exceeded: -0.41484272458489624
exceeded: -0.8405510622605016
exceeded: -0.12829503161128092
exceeded: -2.7225008528611854
exceeded: -0.6269693065815618
exceeded: -0.029546118166995393
exceeded: -1.155830798807151
exceeded: -0.44673454413942737
exceeded: -0.5274901707757201
exceeded: -0.4921643412203691
exceeded: -0.9077166765054099
exceeded: -1.2654820873080117
exceeded: -0.4285446794721681
exceeded: -1.2191512319257232
exceeded: -1.6376681600215934
excee

exceeded: -0.6954715942762651
exceeded: -1.3856071372875323
exceeded: -0.07643569125611355
exceeded: -0.353859457531601
exceeded: -10.383582793786406
exceeded: -19.732023624362604
exceeded: -24.409386443354965
exceeded: -7.2020534135370475
exceeded: -5.31336378827573
exceeded: -6.658071599020127
exceeded: -85.99838515660369
exceeded: -79.08714988041719
exceeded: -0.9155698965973276
exceeded: -2.1226304013536255
exceeded: -77.16808624485722
exceeded: -2.6049848093532924
exceeded: -0.16678823646013946
exceeded: -0.1735045823278361
exceeded: -0.6084642928095233
exceeded: -0.25426471649850824
exceeded: -0.07089190972241008
exceeded: -0.08237798169508534
exceeded: -1.0445521759835987
exceeded: -19.804582368581233
exceeded: -2.1372941062212307
exceeded: -0.29608106097064046
exceeded: -14.999190734725815
exceeded: -1.5697410486190977
exceeded: -1.2132983088061484
exceeded: -0.320236224343344
exceeded: -0.226535649531191
exceeded: -14.005739852843726
exceeded: -3.022925860504553
exceeded: -0.4

exceeded: -15.81289871147931
exceeded: -1.0402754131124579
exceeded: -26.66236415912351
exceeded: -91.95909097679152
exceeded: -0.6383833595851541
exceeded: -26.720928638336797
exceeded: -1.2486025842905544
exceeded: -0.25833784276455213
exceeded: -1.1746549230157026
exceeded: -1.1825472697125268
exceeded: -1.8352209572552862
exceeded: -0.651313266098112
exceeded: -43.5700617950277
exceeded: -0.09788915507922344
exceeded: -46.98608921164126
exceeded: -24.229262194883887
exceeded: -2.0475389431513076
exceeded: -0.38004126171284297
exceeded: -1.810315722543633
exceeded: -1.2061639276211
exceeded: -10.108140565677399
exceeded: -1.934781887396938
exceeded: -0.036934798884616774
exceeded: -21.467693392395045
exceeded: -27.903536782064325
exceeded: -0.13877811571219542
exceeded: -1.8193623143820827
exceeded: -0.763116719963529
exceeded: -0.8220109215883293
exceeded: -1.402862717105037
exceeded: -1.1230699445992618
exceeded: -0.569355768223351
exceeded: -0.8456207860948741
exceeded: -1.484890

exceeded: -43.346198574897244
exceeded: -11.342041068895973
exceeded: -2.9901906943461114
exceeded: -0.1254239000683645
exceeded: -3.469251519176521
exceeded: -0.937047501811427
exceeded: -1.2635856852615817
exceeded: -0.5517264646151009
exceeded: -65.07534161150426
exceeded: -0.36772494838160336
exceeded: -0.8753333673452977
exceeded: -2.1955990342564373
exceeded: -0.12567620354383308
exceeded: -0.24378802839098848
exceeded: -0.5511195681709934
exceeded: -97.70946786288128
exceeded: -1.5170423683617855
exceeded: -1.9158719735858236
exceeded: -0.2997179980435285
exceeded: -0.6068296263624261
exceeded: -8.399126783080064
exceeded: -1.3471944071854778
exceeded: -80.25650783951127
exceeded: -4.804550980713399
exceeded: -382.11737565166794
exceeded: -1.4136027550917158
exceeded: -2.4786788569178073
exceeded: -0.9537558474413375
exceeded: -8.886674326150857
exceeded: -0.4404942332326955
exceeded: -0.6786986863087524
exceeded: -0.2816622337144156
exceeded: -1.5708391459621693
exceeded: -0.78

exceeded: -87.69409691006254
exceeded: -123.27116610740862
exceeded: -8.959228421377517
exceeded: -0.6305749296319142
exceeded: -0.6088865493032656
exceeded: -0.8959884630271777
exceeded: -0.31561622048465
exceeded: -2.82140291372281
exceeded: -0.10580098755508095
exceeded: -0.00548878063499446
exceeded: -2.24475455829571
exceeded: -0.9142433836947692
exceeded: -1.6716617819487445
exceeded: -8.872894850527874
exceeded: -0.11791290853939632
exceeded: -0.9752616355347306
exceeded: -0.10097920028733372
exceeded: -0.2441627990357595
exceeded: -1.8149031707392123
exceeded: -0.6428350213797152
exceeded: -43.69670558175428
exceeded: -54.04032703492268
exceeded: -12.306890889454522
exceeded: -28.5106614065476
exceeded: -1.32440986047824
exceeded: -60.18017792514841
exceeded: -0.016862844624567986
exceeded: -8.403068403086642
exceeded: -27.967353414695086
exceeded: -8.290060751992582
exceeded: -0.05056240994985851
exceeded: -318.7751721503695
exceeded: -1.8912587492793946
exceeded: -0.409225370

exceeded: -2.5996668004954833
exceeded: -9.17590099524849
exceeded: -0.8158992865128639
exceeded: -1.9007674718553569
exceeded: -106.955892248279
exceeded: -1.0024037894979887
exceeded: -1.776645594323446
exceeded: -1.0675125828001495
exceeded: -82.23368304957256
exceeded: -1.8583199443762843
exceeded: -0.7230343896157954
exceeded: -0.35298266337062467
exceeded: -4.696135252007919
exceeded: -28.764438011236688
exceeded: -0.5030207012900414
exceeded: -1.3809889171031378
exceeded: -2.9530225443650537
exceeded: -1.2749564605169204
exceeded: -0.015682233948890012
exceeded: -0.9044861273578414
exceeded: -0.2631471450647352
exceeded: -6.746319080233584
exceeded: -100.66975828006976
exceeded: -15.181972748936559
exceeded: -62.16730470364247
exceeded: -0.24512137005225826
exceeded: -15.800189857566515
exceeded: -61.63469592915359
exceeded: -1.8259907652706548
exceeded: -0.3725001291333573
exceeded: -38.27537753462234
exceeded: -4.8649519784929005
exceeded: -1.371761084014561
exceeded: -1.07393

exceeded: -39.04402260053284
exceeded: -15.61199647218795
exceeded: -2.2210136405028718
exceeded: -6.899447696407042
exceeded: -6.915573355390906
exceeded: -89.36320451804379
exceeded: -10.327467313763243
exceeded: -5.618619622665718
exceeded: -78.22342118837233
exceeded: -6.37326296569476
exceeded: -105.94733662857382
exceeded: -83.65802766412791
exceeded: -10.03523726293032
exceeded: -7.98960955492435
exceeded: -3.2475410270374585
exceeded: -43.38532632816025
exceeded: -4.25380236914926
exceeded: -0.44618448024465884
exceeded: -29.916750852367255
exceeded: -0.44930185917045196
exceeded: -0.6531112376806905
exceeded: -1.1187209494270198
exceeded: -1.6774446139696886
exceeded: -0.5658608538554889
exceeded: -0.5223233891002209
exceeded: -0.02517959734984308
exceeded: -1.4428387090165993
exceeded: -0.15891187272615576
exceeded: -1.101747643070948
exceeded: -0.8350142653488593
exceeded: -0.8740607944415056
exceeded: -0.0484903334829355
exceeded: -0.4294037015412295
exceeded: -0.2549215602

exceeded: -174.604868970114
exceeded: -25.445110252222133
exceeded: -4.639967274934743
exceeded: -40.584615237293235
exceeded: -17.08336017268887
exceeded: -1.6102948047928558
exceeded: -5.07194323138069
exceeded: -96.29297931510523
exceeded: -2.2148850284542307
exceeded: -0.16867911464215513
exceeded: -1.1563983334476042
exceeded: -1.0940803360396734
exceeded: -1.2470786390503967
exceeded: -13.17721861451858
exceeded: -7.051174944545132
exceeded: -0.6711757320510342
exceeded: -18.42767312424678
exceeded: -0.008462281236479078
exceeded: -8.066930988076463
exceeded: -0.03783743439402882
exceeded: -0.43313934197882425
exceeded: -2.29897035186894
exceeded: -0.1004101792082217
exceeded: -1.2867256031527072
exceeded: -0.2993535410883576
exceeded: -0.7091128666352473
exceeded: -8.390613630452288
exceeded: -1.1396393954701565
exceeded: -5.391066937204667
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.5e+03     |
|    ep_rew_me

exceeded: -91.51993479560659
exceeded: -0.475834363956827
exceeded: -1.5377538471085632
exceeded: -15.994447124517679
exceeded: -0.3021447668491333
exceeded: -1.6357210342500301
exceeded: -0.0834555732930772
exceeded: -0.5803566897187339
exceeded: -0.06378925161026988
exceeded: -0.683636156872426
exceeded: -0.8326909749504814
exceeded: -0.2672250889471172
exceeded: -11.215235657154004
exceeded: -0.1330401615181664
exceeded: -0.09446968256896165
exceeded: -0.4255588398936386
exceeded: -0.2799916934838992
exceeded: -59.79647761059254
exceeded: -17.954012445399485
exceeded: -5.510084233435758
exceeded: -16.159245631191837
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.5e+03     |
|    ep_rew_mean          | -1.75e+03   |
| time/                   |             |
|    fps                  | 1030        |
|    iterations           | 26          |
|    time_elapsed         | 51          |
|    total_timesteps      | 53248    

exceeded: -0.23484496177549397
exceeded: -143.76229722446706
exceeded: -51.84190036695406
exceeded: -109.45916200255097
exceeded: -8.275956475528561
exceeded: -6.198404413270312
exceeded: -3.79332360004285
exceeded: -6.18497605158936
exceeded: -2.0099279980364955
exceeded: -1.0866940444881126
exceeded: -23.627726605452096
exceeded: -0.6236466685095527
exceeded: -3.251241257339159
exceeded: -0.18148327886324203
exceeded: -3.6066523947777904
exceeded: -32.84488245166388
exceeded: -63.31867254186771
exceeded: -0.7653274521196884
exceeded: -145.0113666447948
exceeded: -1.2113210755384434
exceeded: -43.59758180979375
exceeded: -16.65263352556339
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.73e+03     |
|    ep_rew_mean          | -1.28e+03    |
| time/                   |              |
|    fps                  | 1045         |
|    iterations           | 29           |
|    time_elapsed         | 56           |
|    to

exceeded: -75.08887395188509
exceeded: -26.556060517846603
exceeded: -30.021506028668185
exceeded: -0.7058778236667008
exceeded: -0.6634363938679091
exceeded: -0.9925859656556353
exceeded: -5.5482604999605645
exceeded: -2.3768066520831894
exceeded: -94.45407287057598
exceeded: -78.97327979208802
exceeded: -12.137223560989995
exceeded: -37.424558568306516
exceeded: -0.49690105471735424
exceeded: -0.10745246113480209
exceeded: -0.017034931984283797
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.97e+03     |
|    ep_rew_mean          | -1.37e+03    |
| time/                   |              |
|    fps                  | 1052         |
|    iterations           | 32           |
|    time_elapsed         | 62           |
|    total_timesteps      | 65536        |
| train/                  |              |
|    approx_kl            | 0.0070035057 |
|    clip_fraction        | 0.0455       |
|    clip_range           | 0.2  

exceeded: -6.074825005429797
exceeded: -0.7202846440037823
exceeded: -26.205926018271764
exceeded: -13.521321782899012
exceeded: -9.283901542605363
exceeded: -51.152358675201874
exceeded: -1.9701054973341714
exceeded: -0.4909236259473653
exceeded: -191.4829338680728
exceeded: -1.161809378935719
exceeded: -0.1160990930573045
exceeded: -0.7999159680905172
exceeded: -29.923288376119
exceeded: -0.5804354831137666
exceeded: -0.9061659310092909
exceeded: -0.19335927474175751
exceeded: -0.6036538381550944
exceeded: -15.14283572474148
exceeded: -0.0007975008223006876
exceeded: -1.0375642647036067
exceeded: -17.784971926699807
exceeded: -0.1580883683410041
exceeded: -0.6940187983597674
exceeded: -0.15290080784979646
exceeded: -1.6458696746575971
exceeded: -0.8686968549123176
exceeded: -0.6567767386750671
exceeded: -0.40259272927977147
exceeded: -0.5676683013425824
exceeded: -2.274413898850898
exceeded: -0.011019121492962566
exceeded: -0.5593640576137107
exceeded: -123.14360185118312
exceeded: -

exceeded: -6.071620841135595
exceeded: -100.79427463019613
exceeded: -31.725086508394927
exceeded: -1.1276913218374014
exceeded: -0.06750612524416758
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.24e+03    |
|    ep_rew_mean          | -1.14e+03   |
| time/                   |             |
|    fps                  | 1065        |
|    iterations           | 38          |
|    time_elapsed         | 73          |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.014457157 |
|    clip_fraction        | 0.152       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.62       |
|    explained_variance   | 0.12        |
|    learning_rate        | 0.0003      |
|    loss                 | 70.2        |
|    n_updates            | 370         |
|    policy_gradient_loss | -0.0124     |
|    std                  | 0.985       |
|    value

exceeded: -23.371153249458278
exceeded: -0.046935471835875923
exceeded: -0.43195129513592306
exceeded: -22.596401981874973
exceeded: -0.1777563026825964
exceeded: -16.714161938805304
exceeded: -0.5822315583368097
exceeded: -26.130336025357323
exceeded: -1.4997943936249403
exceeded: -0.21379618775079856
exceeded: -0.5682954441789938
exceeded: -0.3460143587260179
exceeded: -0.3453495494599768
exceeded: -0.46873962567137606
exceeded: -2.1398720867597367
exceeded: -0.12310132428893808
exceeded: -2.2567466044223243
exceeded: -0.1511997208640135
exceeded: -1.2068893546945136
exceeded: -0.08175237137076069
exceeded: -0.15400319512464908
exceeded: -0.14142366407400647
exceeded: -0.4449599252938339
exceeded: -1.1801299064598234
exceeded: -38.63314700784769
exceeded: -0.17297630626231386
exceeded: -1.7953382360882066
exceeded: -1.157617498347822
exceeded: -1.6557502987453423
exceeded: -0.5714213005845346
exceeded: -0.20983281002633536
exceeded: -0.6924575362503483
exceeded: -2.2648700173964107
e

exceeded: -2.3301972294808233
exceeded: -1.7573244386966602
exceeded: -3.2602720547848003
exceeded: -0.25435936488615973
exceeded: -0.4070171277454609
exceeded: -0.08366505470236847
exceeded: -0.45695748892903654
exceeded: -0.46377642907145294
exceeded: -62.43745325127477
exceeded: -1.6660847274534638
exceeded: -2.2189975280780656
exceeded: -1.888569945703882
exceeded: -1.112815652070682
exceeded: -1.3052489315162072
exceeded: -3.4619174628701184
exceeded: -4.910566675241522
exceeded: -1.3256654814222348
exceeded: -0.6153650026261951
exceeded: -0.4451167446727746
exceeded: -0.6282432654792126
exceeded: -0.13465318133309026
exceeded: -2.873305441844158
exceeded: -9.865828711965724
exceeded: -0.42249574776464993
exceeded: -36.664852584176025
exceeded: -0.9502150641006153
exceeded: -2.948879543889977
exceeded: -1.5037888061669977
exceeded: -0.9075956088790649
exceeded: -67.33707110015831
exceeded: -1.0079700508395615
exceeded: -1.4048686105860073
exceeded: -0.12292268180317883
exceeded: -

exceeded: -0.6267381818369478
exceeded: -105.05726163855486
exceeded: -10.446462840903095
exceeded: -68.32209263243533
exceeded: -28.330387209464668
exceeded: -6.714539926193677
exceeded: -1.0264116283219007
exceeded: -29.483923846231402
exceeded: -1.3750822435728107
exceeded: -105.22682620105952
exceeded: -0.3545625648583262
exceeded: -1.9384183177968397
exceeded: -1.5709609267415492
exceeded: -1.8554461582922608
exceeded: -3.429437191402846
exceeded: -55.19212843402994
exceeded: -0.2184203647057758
exceeded: -0.04003758098769296
exceeded: -0.6165029895042866
exceeded: -1.7191110398471445
exceeded: -114.86693317139017
exceeded: -3.8815396785561473
exceeded: -1.9533207649158748
exceeded: -31.481572982430546
exceeded: -1.2080859260527497
exceeded: -0.4481924790549697
exceeded: -1.818516063181688
exceeded: -0.028472351909650626
exceeded: -0.8889553721478739
exceeded: -1.3910622400977766
exceeded: -2.565677222067577
exceeded: -85.09711266931816
exceeded: -0.009849285975473084
exceeded: -0

exceeded: -2.389044526798185
exceeded: -0.25808383463357276
exceeded: -16.438581581412155
exceeded: -0.8477846105982032
exceeded: -1.3601026100068248
exceeded: -0.6396470689122734
exceeded: -15.087282942029315
exceeded: -0.1907544083791461
exceeded: -19.38683589326175
exceeded: -183.49469060811717
exceeded: -28.274602405387
exceeded: -0.6141976334597614
exceeded: -4.934746992783654
exceeded: -0.45893263677482177
exceeded: -0.21721293926577334
exceeded: -49.33408928920085
exceeded: -0.5841595691233157
exceeded: -0.23507093289794803
exceeded: -0.1276438367623198
exceeded: -0.5090136933268651
exceeded: -0.8674660164889549
exceeded: -5.572583422266098
exceeded: -1.0423413670372141
exceeded: -0.9861342972212683
exceeded: -0.09156367553415656
exceeded: -0.12310600459047771
exceeded: -0.6108873427692709
exceeded: -0.23288441561386294
exceeded: -0.30180345489767424
exceeded: -0.6543589046557298
exceeded: -77.12128460735238
exceeded: -59.88334550985794
exceeded: -0.7732306584343113
exceeded: -1

exceeded: -0.31913092399801424
exceeded: -0.26841632704075213
exceeded: -0.7498434360406612
exceeded: -0.8651174161772451
exceeded: -0.030168861880994942
exceeded: -15.958606526810229
exceeded: -0.33071562604291943
exceeded: -0.35062888077759324
exceeded: -0.5882492265548409
exceeded: -0.26646213656236906
exceeded: -1.6151329084175925
exceeded: -0.05733521344587297
exceeded: -147.95096835295405
exceeded: -1.3808174396769732
exceeded: -1.5946055989886028
exceeded: -2.2820989324032106
exceeded: -111.16214610275313
exceeded: -22.09590114536615
exceeded: -24.270980321070724
exceeded: -0.000253254712683605
exceeded: -0.0717287746310862
exceeded: -0.7391160036084592
exceeded: -0.16906552427978572
exceeded: -9.134250854364547
exceeded: -1.6544449891696702
exceeded: -3.1081092289656276
exceeded: -0.7661536637857086
exceeded: -0.6708079745124023
exceeded: -0.5390125580061724
exceeded: -0.3626451560852283
exceeded: -1.6075588409232058
exceeded: -0.258423286538367
exceeded: -0.014011671824553831


exceeded: -4.672069914427631
exceeded: -137.3377907654783
exceeded: -0.5802048975925465
exceeded: -57.83611561597535
exceeded: -93.77154048590839
exceeded: -0.6948157650762294
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.2e+03     |
|    ep_rew_mean          | -221        |
| time/                   |             |
|    fps                  | 1058        |
|    iterations           | 61          |
|    time_elapsed         | 118         |
|    total_timesteps      | 124928      |
| train/                  |             |
|    approx_kl            | 0.011886936 |
|    clip_fraction        | 0.116       |
|    clip_range           | 0.2         |
|    entropy_loss         | -5.4        |
|    explained_variance   | 0.0496      |
|    learning_rate        | 0.0003      |
|    loss                 | 50.9        |
|    n_updates            | 600         |
|    policy_gradient_loss | -0.0118     |
|    std                  

exceeded: -0.2441404359332988
exceeded: -0.0036024970878881734
exceeded: -1.3184568884072125
exceeded: -0.5039547957761517
exceeded: -1.333016015409373
exceeded: -0.8696848936133695
exceeded: -0.6291238679472593
exceeded: -0.7630557015605082
exceeded: -149.19802452082322
exceeded: -2.3774258511348494
exceeded: -0.03343351078887358
exceeded: -1.4674624443442694
exceeded: -1.7017045514576603
exceeded: -0.8590601320126856
exceeded: -0.28089027076982287
exceeded: -159.88390576301927
exceeded: -192.72480227089756
exceeded: -10.59242783000788
exceeded: -15.398718332924833
exceeded: -3.4665227281563773
exceeded: -0.26930858130298596
exceeded: -0.14074956745075026
exceeded: -47.37253668191065
exceeded: -0.11712025572494277
exceeded: -1.8442835661467705
exceeded: -2.397881757589504
exceeded: -1.289196810756912
exceeded: -15.481854652747781
exceeded: -2.0513624116678426
exceeded: -0.6144771845993612
exceeded: -1.1277750663887784
exceeded: -1.8820428523386024
exceeded: -1.1646364524995119
exceede

exceeded: -0.4450281239360563
exceeded: -41.02238210786769
exceeded: -133.59610321249974
exceeded: -138.3449599563853
exceeded: -10.089833315524594
exceeded: -262.3606897142289
exceeded: -64.37798435239435
exceeded: -34.70705064033475
exceeded: -3.6269093760317537
exceeded: -32.902500965140966
exceeded: -47.59456490942513
exceeded: -5.444386925706385
exceeded: -18.035311082917502
exceeded: -1.50786033461076
exceeded: -38.74483598304147
exceeded: -16.896757950753795
exceeded: -0.4292643636940964
exceeded: -0.8055696862210872
exceeded: -93.44959198418438
exceeded: -1.984083937087706
exceeded: -21.991674028218483
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.47e+03    |
|    ep_rew_mean          | -205        |
| time/                   |             |
|    fps                  | 1065        |
|    iterations           | 68          |
|    time_elapsed         | 130         |
|    total_timesteps      | 139264      |
| tr

exceeded: -1.5778837233102305
exceeded: -118.8021276417609
exceeded: -1.0087804428080391
exceeded: -0.24956076693076107
exceeded: -0.11114470173532812
exceeded: -0.34908443053895233
exceeded: -0.583199289835914
exceeded: -2.7601093504894676
exceeded: -31.53151214170687
exceeded: -52.53515068637771
exceeded: -0.5754830868938174
exceeded: -37.35968541512487
exceeded: -0.722451187882807
exceeded: -0.01622707440538987
exceeded: -1.7379013984825704
exceeded: -0.15723172507271566
exceeded: -0.47591276486840917
exceeded: -0.5143438333545811
exceeded: -1.2295952860263424
exceeded: -0.5351518556801126
exceeded: -1.1596374027229146
exceeded: -1.8282543074399558
exceeded: -0.9298015534695453
exceeded: -0.80626066431177
exceeded: -0.11260786296629968
exceeded: -1.343441309798852
exceeded: -2.7630700966359303
exceeded: -0.023781097214507448
exceeded: -0.8441592261251645
exceeded: -1.3679438437257616
exceeded: -0.03599411012224953
exceeded: -1.3943513037436528
exceeded: -1.7911960081221634
exceeded:

exceeded: -10.448145396860106
exceeded: -7.978330878819573
exceeded: -18.580671264504495
exceeded: -15.987518526207879
exceeded: -0.03171856469285017
exceeded: -1.1570707769870234
exceeded: -39.76895022227973
exceeded: -0.17961854263260205
exceeded: -0.3676664789126232
exceeded: -0.15772019518572375
exceeded: -0.015117274301748444
exceeded: -0.29318710663390374
exceeded: -0.26069863873209836
exceeded: -0.4586970930284555
exceeded: -0.07097376657435184
exceeded: -2.518887606087858
exceeded: -0.2854720564861749
exceeded: -0.01980363617052127
exceeded: -2.1328141904390097
exceeded: -16.739175760632115
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.73e+03    |
|    ep_rew_mean          | -4.04       |
| time/                   |             |
|    fps                  | 1063        |
|    iterations           | 75          |
|    time_elapsed         | 144         |
|    total_timesteps      | 153600      |
| train/        

exceeded: -0.1628945065404999
exceeded: -0.22287229844278303
exceeded: -0.8231105916514093
exceeded: -0.504394722953757
exceeded: -16.330686789675994
exceeded: -1.7210714809306205
exceeded: -11.354132292456292
exceeded: -1.5534937674685436
exceeded: -0.7367435271061689
exceeded: -46.25009055158101
exceeded: -0.899074609063707
exceeded: -0.9497091375850765
exceeded: -0.02148485031643345
exceeded: -0.012650628445802442
exceeded: -0.2721291600549597
exceeded: -1.3502134372111365
exceeded: -0.6975777886026095
exceeded: -0.626263612304787
exceeded: -27.898671420214036
exceeded: -1.0719772221532067
exceeded: -2.178619701321665
exceeded: -29.00857031719214
exceeded: -0.674073762144939
exceeded: -0.6525537235848654
exceeded: -0.8604486652662892
exceeded: -0.7353309233744034
exceeded: -7.385389790551199
exceeded: -2.097038936665833
exceeded: -0.1472968029174917
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 9.99e+03  |
|    ep_rew_mea

exceeded: -0.6460758005300532
exceeded: -7.853766771185535
exceeded: -3.4335707119564067
exceeded: -0.5789971318806053
exceeded: -0.4308869071192629
exceeded: -2.7237101718724004
exceeded: -44.34414403908975
exceeded: -1.078472334881834
exceeded: -7.402740591429705
exceeded: -0.11423979813419304
exceeded: -1.441893082433242
exceeded: -0.6849897805766575
exceeded: -26.036826832817745
exceeded: -14.103113574328145
exceeded: -9.72538297773161
exceeded: -26.67212919814436
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.99e+03    |
|    ep_rew_mean          | 258         |
| time/                   |             |
|    fps                  | 1054        |
|    iterations           | 84          |
|    time_elapsed         | 163         |
|    total_timesteps      | 172032      |
| train/                  |             |
|    approx_kl            | 0.015446512 |
|    clip_fraction        | 0.14        |
|    clip_range        

exceeded: -2.2921801329615685
exceeded: -2.6108778610645906
exceeded: -9.629263182756834
exceeded: -214.2460128982521
exceeded: -34.99258827061329
exceeded: -117.37302273466443
exceeded: -21.009832229983157
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.02e+04    |
|    ep_rew_mean          | 441         |
| time/                   |             |
|    fps                  | 1050        |
|    iterations           | 87          |
|    time_elapsed         | 169         |
|    total_timesteps      | 178176      |
| train/                  |             |
|    approx_kl            | 0.011622889 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.99       |
|    explained_variance   | 0.264       |
|    learning_rate        | 0.0003      |
|    loss                 | 610         |
|    n_updates            | 860         |
|    policy_gradient_loss | -0.0115  

exceeded: -0.4949540161666815
exceeded: -0.4066439156028065
exceeded: -0.7970082285446691
exceeded: -1.3658774950423846
exceeded: -2.5968119766286026
exceeded: -5.603377732974101
exceeded: -0.23064796134512117
exceeded: -0.5339385210933979
exceeded: -0.2666021851064548
exceeded: -0.3074524970586916
exceeded: -0.3628788549102522
exceeded: -0.6262436840986195
exceeded: -0.32390888497518944
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.04e+04    |
|    ep_rew_mean          | 681         |
| time/                   |             |
|    fps                  | 1051        |
|    iterations           | 92          |
|    time_elapsed         | 179         |
|    total_timesteps      | 188416      |
| train/                  |             |
|    approx_kl            | 0.014907024 |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.95       |
|    explained_varian

exceeded: -126.8855046640787
exceeded: -10.20503900887164
exceeded: -74.43194517591878
exceeded: -3.0883496580216256
exceeded: -3.6121091422376894
exceeded: -0.9605543724257986
exceeded: -0.04281406674858226
exceeded: -0.6255550537234112
exceeded: -0.4102530978894441
exceeded: -1.033877825343
exceeded: -43.116921696193096
exceeded: -1.227067610620574
exceeded: -51.48321663831818
exceeded: -1.539737001850081
exceeded: -1.3389701325482428
exceeded: -11.755047615505397
exceeded: -1.2350163786934534
exceeded: -7.398811338590132
exceeded: -0.2464538752986949
exceeded: -1.8231437142613194
exceeded: -0.07370408373595135
exceeded: -0.3094041968092251
exceeded: -0.9368888486643387
exceeded: -0.35897460294077366
exceeded: -0.2836003096992893
exceeded: -0.6350485577135523
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.04e+04    |
|    ep_rew_mean          | 681         |
| time/                   |             |
|    fps          

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.06e+04    |
|    ep_rew_mean          | 789         |
| time/                   |             |
|    fps                  | 1051        |
|    iterations           | 99          |
|    time_elapsed         | 192         |
|    total_timesteps      | 202752      |
| train/                  |             |
|    approx_kl            | 0.006700741 |
|    clip_fraction        | 0.0666      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.87       |
|    explained_variance   | 0.46        |
|    learning_rate        | 0.0003      |
|    loss                 | 310         |
|    n_updates            | 980         |
|    policy_gradient_loss | -0.00812    |
|    std                  | 0.825       |
|    value_loss           | 444         |
-----------------------------------------
exceeded: -0.4870210594090857
exceeded: -14.826894947276354
exceeded: -2.206

exceeded: -10.249997887586455
exceeded: -0.08723775186221777
exceeded: -22.62418263342546
exceeded: -2.358653283596479
exceeded: -0.8191999731328385
exceeded: -0.15569112430898138
exceeded: -0.09539267024137328
exceeded: -21.246895664737142
exceeded: -2.59772147834152
exceeded: -0.10730595555883092
exceeded: -1.70264417011325
exceeded: -1.2740678221761557
exceeded: -0.08625963578119852
exceeded: -113.3482650681877
exceeded: -0.3860358283233224
exceeded: -0.23119030072150118
exceeded: -0.9483216885365354
exceeded: -1.2890590767635017
exceeded: -1.4534387393048336
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.06e+04    |
|    ep_rew_mean          | 789         |
| time/                   |             |
|    fps                  | 1048        |
|    iterations           | 105         |
|    time_elapsed         | 205         |
|    total_timesteps      | 215040      |
| train/                  |             |
|    approx

exceeded: -231.7572535306079
exceeded: -3.7308730917046296
exceeded: -0.5151494748533605
exceeded: -0.03974202722400608
exceeded: -2.3553976689508125
exceeded: -0.15021498517640422
exceeded: -2.003234439068462
exceeded: -0.0288172542650752
exceeded: -1.8686336925345393
exceeded: -0.09031178733272693
exceeded: -0.924662764096678
exceeded: -0.08127831553084824
exceeded: -0.3285373194015979
exceeded: -0.681563363075125
exceeded: -0.416925410473059
exceeded: -0.17264054630819198
exceeded: -0.8701229222639645
exceeded: -3.6649350735794917
exceeded: -0.6276238019645375
exceeded: -2.2309786044300237
exceeded: -0.5801435105659848
exceeded: -0.056966686241613156
exceeded: -0.3697297998699052
exceeded: -34.714962104155056
exceeded: -246.81593941279806
exceeded: -82.85283744926826
exceeded: -0.41122844295638883
exceeded: -0.2610970735675525
exceeded: -0.08203630522568213
exceeded: -2.4463147246200254
exceeded: -2.7613465465516036
exceeded: -26.296483614571446
exceeded: -0.6485565270652611
exceede

exceeded: -1.5503996950079877
exceeded: -5.506736687688479
exceeded: -11.3350464253268
exceeded: -70.99142542315337
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.09e+04   |
|    ep_rew_mean          | 1.09e+03   |
| time/                   |            |
|    fps                  | 1045       |
|    iterations           | 113        |
|    time_elapsed         | 221        |
|    total_timesteps      | 231424     |
| train/                  |            |
|    approx_kl            | 0.01146525 |
|    clip_fraction        | 0.143      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.76      |
|    explained_variance   | 0.648      |
|    learning_rate        | 0.0003     |
|    loss                 | 133        |
|    n_updates            | 1120       |
|    policy_gradient_loss | -0.0139    |
|    std                  | 0.804      |
|    value_loss           | 175        |
-----------------------

exceeded: -0.17898946324118564
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.1e+04     |
|    ep_rew_mean          | 1.24e+03    |
| time/                   |             |
|    fps                  | 1044        |
|    iterations           | 120         |
|    time_elapsed         | 235         |
|    total_timesteps      | 245760      |
| train/                  |             |
|    approx_kl            | 0.018113345 |
|    clip_fraction        | 0.162       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.64       |
|    explained_variance   | 0.303       |
|    learning_rate        | 0.0003      |
|    loss                 | 21.3        |
|    n_updates            | 1190        |
|    policy_gradient_loss | -0.0132     |
|    std                  | 0.777       |
|    value_loss           | 57.3        |
-----------------------------------------
-----------------------------------------
| r

exceeded: -0.31651465577093757
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.11e+04    |
|    ep_rew_mean          | 1.35e+03    |
| time/                   |             |
|    fps                  | 1040        |
|    iterations           | 125         |
|    time_elapsed         | 245         |
|    total_timesteps      | 256000      |
| train/                  |             |
|    approx_kl            | 0.008796629 |
|    clip_fraction        | 0.0931      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.59       |
|    explained_variance   | 0.0592      |
|    learning_rate        | 0.0003      |
|    loss                 | 56.2        |
|    n_updates            | 1240        |
|    policy_gradient_loss | -0.00911    |
|    std                  | 0.771       |
|    value_loss           | 484         |
-----------------------------------------
-----------------------------------------
| r

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.11e+04     |
|    ep_rew_mean          | 1.35e+03     |
| time/                   |              |
|    fps                  | 1042         |
|    iterations           | 128          |
|    time_elapsed         | 251          |
|    total_timesteps      | 262144       |
| train/                  |              |
|    approx_kl            | 0.0072726747 |
|    clip_fraction        | 0.0588       |
|    clip_range           | 0.2          |
|    entropy_loss         | -4.58        |
|    explained_variance   | 0.377        |
|    learning_rate        | 0.0003       |
|    loss                 | 209          |
|    n_updates            | 1270         |
|    policy_gradient_loss | -0.00741     |
|    std                  | 0.771        |
|    value_loss           | 546          |
------------------------------------------
exceeded: -1.0153554146424608
exceeded: -0.53484637682

exceeded: -0.6786852092375073
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.12e+04    |
|    ep_rew_mean          | 1.44e+03    |
| time/                   |             |
|    fps                  | 1040        |
|    iterations           | 133         |
|    time_elapsed         | 261         |
|    total_timesteps      | 272384      |
| train/                  |             |
|    approx_kl            | 0.009100221 |
|    clip_fraction        | 0.0881      |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.5        |
|    explained_variance   | 0.0372      |
|    learning_rate        | 0.0003      |
|    loss                 | 1.51e+03    |
|    n_updates            | 1320        |
|    policy_gradient_loss | -0.00945    |
|    std                  | 0.755       |
|    value_loss           | 912         |
-----------------------------------------
exceeded: -1.6280212866374122
exceeded: -2.547

exceeded: -4.1372829923263446
exceeded: -1.5428802322398893
exceeded: -0.0023938501467491947
exceeded: -2.0114977298846366
exceeded: -0.759345428012974
exceeded: -0.69479310500275
exceeded: -0.8461599720503743
exceeded: -1.1027691984054386
exceeded: -0.9782287674433341
exceeded: -0.5817677675356472
exceeded: -0.5182471453937383
exceeded: -1.3990747470905731
exceeded: -0.12369349414520286
exceeded: -2.528761134499817
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.12e+04   |
|    ep_rew_mean          | 1.5e+03    |
| time/                   |            |
|    fps                  | 1038       |
|    iterations           | 140        |
|    time_elapsed         | 275        |
|    total_timesteps      | 286720     |
| train/                  |            |
|    approx_kl            | 0.02269245 |
|    clip_fraction        | 0.192      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.45      |
|    e

exceeded: -101.55101803495482
exceeded: -14.400724317977565
exceeded: -2.5314931027968512
exceeded: -0.1652877730979446
exceeded: -1.1671413805900122
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.12e+04    |
|    ep_rew_mean          | 1.58e+03    |
| time/                   |             |
|    fps                  | 1041        |
|    iterations           | 145         |
|    time_elapsed         | 285         |
|    total_timesteps      | 296960      |
| train/                  |             |
|    approx_kl            | 0.013308456 |
|    clip_fraction        | 0.173       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.46       |
|    explained_variance   | 0.328       |
|    learning_rate        | 0.0003      |
|    loss                 | 29.1        |
|    n_updates            | 1440        |
|    policy_gradient_loss | -0.0117     |
|    std                  | 0.749       |
|    value

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.12e+04    |
|    ep_rew_mean          | 1.68e+03    |
| time/                   |             |
|    fps                  | 1041        |
|    iterations           | 150         |
|    time_elapsed         | 294         |
|    total_timesteps      | 307200      |
| train/                  |             |
|    approx_kl            | 0.018004116 |
|    clip_fraction        | 0.185       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.37       |
|    explained_variance   | 0.385       |
|    learning_rate        | 0.0003      |
|    loss                 | 13          |
|    n_updates            | 1490        |
|    policy_gradient_loss | -0.0152     |
|    std                  | 0.731       |
|    value_loss           | 59.9        |
-----------------------------------------
exceeded: -64.97513033355878
exceeded: -160.23151889454257
exceeded: -290.01

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.13e+04    |
|    ep_rew_mean          | 1.71e+03    |
| time/                   |             |
|    fps                  | 1041        |
|    iterations           | 156         |
|    time_elapsed         | 306         |
|    total_timesteps      | 319488      |
| train/                  |             |
|    approx_kl            | 0.015752632 |
|    clip_fraction        | 0.138       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.33       |
|    explained_variance   | 0.355       |
|    learning_rate        | 0.0003      |
|    loss                 | 63.2        |
|    n_updates            | 1550        |
|    policy_gradient_loss | -0.0147     |
|    std                  | 0.728       |
|    value_loss           | 325         |
-----------------------------------------
exceeded: -1.8267503482298406
exceeded: -0.5599462668637557
exceeded: -35.86

exceeded: -0.49966411285411994
exceeded: -0.1712394442191266
exceeded: -0.029521385233401067
exceeded: -0.43930623140959835
exceeded: -0.751758317501951
exceeded: -1.956421297274519
exceeded: -0.5875565780298277
exceeded: -4.040399983890343
exceeded: -0.21430751472132858
exceeded: -1.3382551859216794
exceeded: -0.8471120636513851
exceeded: -0.10337129327048858
exceeded: -12.938667724403537
exceeded: -0.16585656760762071
exceeded: -0.15423019281619726
exceeded: -0.06175507438917857
exceeded: -3.4303671653685064
exceeded: -2.7736100121660967
exceeded: -0.8963628184468067
exceeded: -0.4656199188207149
exceeded: -0.52084957372288
exceeded: -0.08015859172653168
exceeded: -2.211828508489892
exceeded: -0.6813991423257032
exceeded: -1.8936411283487957
exceeded: -0.6859605104720712
exceeded: -0.15201597332855857
exceeded: -0.20716888192035188
exceeded: -1.1093761908829152
exceeded: -2.623789047953986
exceeded: -0.28749251013629396
exceeded: -6.651764620459149
exceeded: -0.010736404182285211
exc

exceeded: -3.652228144207456
exceeded: -0.3393451117051896
exceeded: -0.3206649576572701
exceeded: -65.63136734148398
exceeded: -4.122731977180314
exceeded: -1.2501209134442792
exceeded: -0.07588226546992825
exceeded: -0.04133984536754818
exceeded: -1.165641411822481
exceeded: -129.86585890961564
exceeded: -1.8564813370076574
exceeded: -1.4223276801486588
exceeded: -0.6622834153885299
exceeded: -0.06222758562197953
exceeded: -0.7662235586201662
exceeded: -0.4710029298455132
exceeded: -0.19017610774095695
exceeded: -0.4375098409415918
exceeded: -1.035882543598797
exceeded: -34.94505594933643
exceeded: -112.86839671863164
exceeded: -0.13401641066214426
exceeded: -0.1205674891083585
exceeded: -120.70093499232301
exceeded: -0.7818277168280141
exceeded: -9.619508500887763
exceeded: -16.904974876469634
exceeded: -84.85184157931522
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.14e+04    |
|    ep_rew_mean          | 1.87e+03 

exceeded: -0.428384722061961
exceeded: -0.8506499052364701
exceeded: -2.1026913076236093
exceeded: -0.26922670852669467
exceeded: -0.5169428443614582
exceeded: -0.061473533788887036
exceeded: -0.8857971032682654
exceeded: -3.053222479211168
exceeded: -0.009090422484797493
exceeded: -0.9679911302742229
exceeded: -0.33809359971863195
exceeded: -1.2917590643345873
exceeded: -1.7225442146893208
exceeded: -1.1536073535316367
exceeded: -0.8126144083923558
exceeded: -1.590908694749622
exceeded: -1.4428635354456785
exceeded: -0.7981196480943673
exceeded: -31.357224094416072
exceeded: -0.016253695669202404
exceeded: -0.7773693929803077
exceeded: -0.39791566067844275
exceeded: -0.5707177166383053
exceeded: -0.4301011135935224
exceeded: -2.571240493263518
exceeded: -47.4657183646069
exceeded: -3.3272111354829006
exceeded: -2.714504971131229
exceeded: -0.221286780844137
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.14e+04    |
|  

exceeded: -112.08343231399746
exceeded: -1.0279042478348415
exceeded: -1.5411655233060775
exceeded: -69.70655682640692
exceeded: -0.34987269712092894
exceeded: -0.5790238080866741
exceeded: -0.27718158392949105
exceeded: -1.1288831437491202
exceeded: -0.15112027542897988
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.14e+04    |
|    ep_rew_mean          | 1.97e+03    |
| time/                   |             |
|    fps                  | 1057        |
|    iterations           | 178         |
|    time_elapsed         | 344         |
|    total_timesteps      | 364544      |
| train/                  |             |
|    approx_kl            | 0.027071632 |
|    clip_fraction        | 0.222       |
|    clip_range           | 0.2         |
|    entropy_loss         | -4.11       |
|    explained_variance   | 0.414       |
|    learning_rate        | 0.0003      |
|    loss                 | 34.5        |
|    n_updates

exceeded: -10.560640235208485
exceeded: -27.44491760022141
exceeded: -76.54026836712445
exceeded: -18.625875411984364
exceeded: -38.82937170340848
exceeded: -253.46901212795058
exceeded: -1.181738839925235
exceeded: -0.529025335559584
exceeded: -0.6165437243868122
exceeded: -0.19759270188524086
exceeded: -1.992371628561104
exceeded: -0.6420863641841632
exceeded: -0.19640169798497334
exceeded: -2.1415293896319634
exceeded: -0.4968191169118708
exceeded: -0.8504484472310724
exceeded: -0.4816282675487613
exceeded: -3.4712491206495977
exceeded: -0.029424255338678828
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.15e+04    |
|    ep_rew_mean          | 2e+03       |
| time/                   |             |
|    fps                  | 1061        |
|    iterations           | 184         |
|    time_elapsed         | 355         |
|    total_timesteps      | 376832      |
| train/                  |             |
|    approx_

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.15e+04   |
|    ep_rew_mean          | 2.05e+03   |
| time/                   |            |
|    fps                  | 1062       |
|    iterations           | 188        |
|    time_elapsed         | 362        |
|    total_timesteps      | 385024     |
| train/                  |            |
|    approx_kl            | 0.01567246 |
|    clip_fraction        | 0.131      |
|    clip_range           | 0.2        |
|    entropy_loss         | -4.04      |
|    explained_variance   | 0.422      |
|    learning_rate        | 0.0003     |
|    loss                 | 247        |
|    n_updates            | 1870       |
|    policy_gradient_loss | -0.0162    |
|    std                  | 0.675      |
|    value_loss           | 584        |
----------------------------------------
exceeded: -4.824993777380268
exceeded: -28.893618124330484
exceeded: -34.97696710390287
exceeded: 

exceeded: -10.38509610337911
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.15e+04    |
|    ep_rew_mean          | 2.02e+03    |
| time/                   |             |
|    fps                  | 1066        |
|    iterations           | 194         |
|    time_elapsed         | 372         |
|    total_timesteps      | 397312      |
| train/                  |             |
|    approx_kl            | 0.018524658 |
|    clip_fraction        | 0.161       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.98       |
|    explained_variance   | 0.267       |
|    learning_rate        | 0.0003      |
|    loss                 | 513         |
|    n_updates            | 1930        |
|    policy_gradient_loss | -0.0145     |
|    std                  | 0.668       |
|    value_loss           | 445         |
-----------------------------------------
exceeded: -68.35991148498788
exceeded: -11.8365

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.15e+04    |
|    ep_rew_mean          | 2.07e+03    |
| time/                   |             |
|    fps                  | 1069        |
|    iterations           | 200         |
|    time_elapsed         | 382         |
|    total_timesteps      | 409600      |
| train/                  |             |
|    approx_kl            | 0.024805032 |
|    clip_fraction        | 0.236       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.88       |
|    explained_variance   | 0.364       |
|    learning_rate        | 0.0003      |
|    loss                 | 33.7        |
|    n_updates            | 1990        |
|    policy_gradient_loss | -0.0154     |
|    std                  | 0.649       |
|    value_loss           | 56.7        |
-----------------------------------------
exceeded: -0.9964464509771552
exceeded: -2.1936947548821424
exceeded: -14.85

exceeded: -0.05862693300793713
exceeded: -0.37995550495220515
exceeded: -0.2753288573620305
exceeded: -0.3931042268001954
exceeded: -2.953508138789238
exceeded: -0.020100336706254485
exceeded: -0.11064024305159291
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.15e+04   |
|    ep_rew_mean          | 2.14e+03   |
| time/                   |            |
|    fps                  | 1072       |
|    iterations           | 206        |
|    time_elapsed         | 393        |
|    total_timesteps      | 421888     |
| train/                  |            |
|    approx_kl            | 0.01931335 |
|    clip_fraction        | 0.19       |
|    clip_range           | 0.2        |
|    entropy_loss         | -3.8       |
|    explained_variance   | 0.259      |
|    learning_rate        | 0.0003     |
|    loss                 | 72.9       |
|    n_updates            | 2050       |
|    policy_gradient_loss | -0.0154    |
|    st

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.15e+04    |
|    ep_rew_mean          | 2.18e+03    |
| time/                   |             |
|    fps                  | 1075        |
|    iterations           | 212         |
|    time_elapsed         | 403         |
|    total_timesteps      | 434176      |
| train/                  |             |
|    approx_kl            | 0.016115773 |
|    clip_fraction        | 0.192       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.8        |
|    explained_variance   | 0.622       |
|    learning_rate        | 0.0003      |
|    loss                 | 41.6        |
|    n_updates            | 2110        |
|    policy_gradient_loss | -0.0138     |
|    std                  | 0.639       |
|    value_loss           | 108         |
-----------------------------------------
exceeded: -62.77150126097492
exceeded: -18.30840614328911
------------------

exceeded: -45.53877745398333
exceeded: -12.107036985385713
exceeded: -16.325834470041244
exceeded: -46.201758551398804
exceeded: -3.7900490074058
exceeded: -92.52048120611016
exceeded: -0.5105509546689673
exceeded: -0.5747578079824862
exceeded: -0.25027901889594095
exceeded: -1.0279638132908218
exceeded: -2.0854918804203395
exceeded: -115.76742306579709
exceeded: -59.92902347748499
exceeded: -0.3537561318332798
exceeded: -28.516536972116494
exceeded: -0.11915384555183367
exceeded: -1.165284917267577
exceeded: -1.2915231304642936
exceeded: -0.15383897551769843
exceeded: -2.4793679459402225
exceeded: -0.9941098650411044
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.16e+04    |
|    ep_rew_mean          | 2.24e+03    |
| time/                   |             |
|    fps                  | 1077        |
|    iterations           | 218         |
|    time_elapsed         | 414         |
|    total_timesteps      | 446464    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.16e+04    |
|    ep_rew_mean          | 2.26e+03    |
| time/                   |             |
|    fps                  | 1079        |
|    iterations           | 223         |
|    time_elapsed         | 423         |
|    total_timesteps      | 456704      |
| train/                  |             |
|    approx_kl            | 0.024665918 |
|    clip_fraction        | 0.193       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.74       |
|    explained_variance   | 0.454       |
|    learning_rate        | 0.0003      |
|    loss                 | 257         |
|    n_updates            | 2220        |
|    policy_gradient_loss | -0.0116     |
|    std                  | 0.633       |
|    value_loss           | 747         |
-----------------------------------------
exceeded: -25.08810252908582
exceeded: -4.499733775686437
exceeded: -0.83639

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.16e+04    |
|    ep_rew_mean          | 2.27e+03    |
| time/                   |             |
|    fps                  | 1080        |
|    iterations           | 228         |
|    time_elapsed         | 431         |
|    total_timesteps      | 466944      |
| train/                  |             |
|    approx_kl            | 0.019926853 |
|    clip_fraction        | 0.206       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.74       |
|    explained_variance   | 0.234       |
|    learning_rate        | 0.0003      |
|    loss                 | 87.4        |
|    n_updates            | 2270        |
|    policy_gradient_loss | -0.00692    |
|    std                  | 0.633       |
|    value_loss           | 241         |
-----------------------------------------
-----------------------------------------
| rollout/                |       

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.17e+04    |
|    ep_rew_mean          | 2.35e+03    |
| time/                   |             |
|    fps                  | 1084        |
|    iterations           | 236         |
|    time_elapsed         | 445         |
|    total_timesteps      | 483328      |
| train/                  |             |
|    approx_kl            | 0.015525364 |
|    clip_fraction        | 0.152       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.63       |
|    explained_variance   | 0.46        |
|    learning_rate        | 0.0003      |
|    loss                 | 63.5        |
|    n_updates            | 2350        |
|    policy_gradient_loss | -0.0174     |
|    std                  | 0.62        |
|    value_loss           | 165         |
-----------------------------------------
exceeded: -0.5723192948173127
exceeded: -0.5778037354745555
exceeded: -1.027

exceeded: -25.29922208361798
exceeded: -0.054480720266185605
exceeded: -4.44035258453444
exceeded: -3.1515992740066223
exceeded: -1.42329800814599
exceeded: -0.8791440977537511
exceeded: -254.59374052371558
exceeded: -5.189484233429477
exceeded: -35.10639109031986
exceeded: -11.542008133957676
exceeded: -17.372484894233477
exceeded: -25.623496531534748
exceeded: -0.1500723052817711
exceeded: -9.626117508441602
exceeded: -157.5295659482183
exceeded: -0.5456656213053865
exceeded: -28.668072058646562
exceeded: -5.714260616290929
exceeded: -250.49243374696908
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.17e+04    |
|    ep_rew_mean          | 2.4e+03     |
| time/                   |             |
|    fps                  | 1086        |
|    iterations           | 242         |
|    time_elapsed         | 456         |
|    total_timesteps      | 495616      |
| train/                  |             |
|    approx_kl    

exceeded: -35.15626857188116
exceeded: -76.82059085599059
exceeded: -0.5992366976564069
exceeded: -0.349083223052803
exceeded: -0.12814384025085518
exceeded: -1.0237869321803998
exceeded: -0.38377684394550754
exceeded: -58.80222246051339
exceeded: -0.47479553502261906
exceeded: -0.832865789978728
exceeded: -0.8292944904790587
exceeded: -0.35335915422282754
exceeded: -0.2715612293266728
exceeded: -0.6900356265211077
exceeded: -2.323677199163423
exceeded: -1.9460422662810495
exceeded: -0.8090965309142102
exceeded: -0.438361305666864
exceeded: -129.79048600000542
exceeded: -18.705213636728722
exceeded: -5.643226562196136
exceeded: -252.70202940131105
exceeded: -1.085537740487901
exceeded: -3.829300075505241
exceeded: -1.9657971037029265
exceeded: -0.07946290435028885
exceeded: -1.9988440675311447
exceeded: -0.10662894229440023
exceeded: -1.2191014067288481
exceeded: -0.13064401603775133
exceeded: -32.65693992879616
exceeded: -0.6837247570825479
exceeded: -156.14503625367024
exceeded: -0.0

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.17e+04    |
|    ep_rew_mean          | 2.44e+03    |
| time/                   |             |
|    fps                  | 1090        |
|    iterations           | 253         |
|    time_elapsed         | 475         |
|    total_timesteps      | 518144      |
| train/                  |             |
|    approx_kl            | 0.017795594 |
|    clip_fraction        | 0.188       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.44       |
|    explained_variance   | 0.045       |
|    learning_rate        | 0.0003      |
|    loss                 | 31          |
|    n_updates            | 2520        |
|    policy_gradient_loss | -0.0121     |
|    std                  | 0.595       |
|    value_loss           | 337         |
-----------------------------------------
exceeded: -23.00033873655608
exceeded: -0.14192899159234507
exceeded: -1.653

exceeded: -119.80382434676493
exceeded: -11.460089606180837
exceeded: -2.3437514539398667
exceeded: -43.08549057676912
exceeded: -0.6994841724256249
exceeded: -1.6476535690690874
exceeded: -0.38299900458015945
exceeded: -0.7630251566338097
exceeded: -1.3833572587451461
exceeded: -0.31267102943494407
exceeded: -3.556283114710702
exceeded: -0.014929722450730274
exceeded: -1.5426783765428413
exceeded: -20.625832904989906
exceeded: -0.8751364030405098
exceeded: -0.09353313252367705
exceeded: -3.5117103023640444
exceeded: -0.4526129888958975
exceeded: -0.4659038081548168
exceeded: -0.16676644791470496
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.17e+04    |
|    ep_rew_mean          | 2.5e+03     |
| time/                   |             |
|    fps                  | 1092        |
|    iterations           | 259         |
|    time_elapsed         | 485         |
|    total_timesteps      | 530432      |
| train/          

exceeded: -7.305967244613409
exceeded: -18.054816079533335
exceeded: -8.489467800144684
exceeded: -7.459174124634238
exceeded: -91.33819781301713
exceeded: -35.34095891433234
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.17e+04    |
|    ep_rew_mean          | 2.56e+03    |
| time/                   |             |
|    fps                  | 1094        |
|    iterations           | 265         |
|    time_elapsed         | 495         |
|    total_timesteps      | 542720      |
| train/                  |             |
|    approx_kl            | 0.020953383 |
|    clip_fraction        | 0.213       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.36       |
|    explained_variance   | 0.567       |
|    learning_rate        | 0.0003      |
|    loss                 | 78.6        |
|    n_updates            | 2640        |
|    policy_gradient_loss | -0.0207     |
|    std                  |

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.17e+04    |
|    ep_rew_mean          | 2.6e+03     |
| time/                   |             |
|    fps                  | 1095        |
|    iterations           | 270         |
|    time_elapsed         | 504         |
|    total_timesteps      | 552960      |
| train/                  |             |
|    approx_kl            | 0.027373577 |
|    clip_fraction        | 0.221       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.34       |
|    explained_variance   | 0.673       |
|    learning_rate        | 0.0003      |
|    loss                 | 39          |
|    n_updates            | 2690        |
|    policy_gradient_loss | -0.0137     |
|    std                  | 0.579       |
|    value_loss           | 77.2        |
-----------------------------------------
exceeded: -1.0416938004743463
exceeded: -0.09630043997411653
exceeded: -0.32

exceeded: -10.800634352086394
exceeded: -0.17859339987543188
exceeded: -7.091782223036869
exceeded: -116.59262367372273
exceeded: -1.7959983920492495
exceeded: -0.32100165029449373
exceeded: -0.4320648926664281
exceeded: -0.004085344475589739
exceeded: -45.9281065895511
exceeded: -16.996008728783853
exceeded: -37.03738158686852
exceeded: -10.658418977369832
exceeded: -1.824517626887269
exceeded: -13.40310821108796
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.18e+04    |
|    ep_rew_mean          | 2.66e+03    |
| time/                   |             |
|    fps                  | 1097        |
|    iterations           | 276         |
|    time_elapsed         | 515         |
|    total_timesteps      | 565248      |
| train/                  |             |
|    approx_kl            | 0.031873755 |
|    clip_fraction        | 0.272       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.32   

exceeded: -10.198082014017942
exceeded: -1.3525306154152237
exceeded: -0.13928720510841464
exceeded: -2.4919377601319352
exceeded: -0.13095723094502945
exceeded: -3.4643372738884075
exceeded: -2.1358657120864692
exceeded: -1.1350430473074038
exceeded: -0.44745262187785134
exceeded: -81.31080092995809
exceeded: -12.744151401707871
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.18e+04    |
|    ep_rew_mean          | 2.66e+03    |
| time/                   |             |
|    fps                  | 1095        |
|    iterations           | 280         |
|    time_elapsed         | 523         |
|    total_timesteps      | 573440      |
| train/                  |             |
|    approx_kl            | 0.018475082 |
|    clip_fraction        | 0.176       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.31       |
|    explained_variance   | 0.251       |
|    learning_rate        | 0.0003    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.18e+04    |
|    ep_rew_mean          | 2.68e+03    |
| time/                   |             |
|    fps                  | 1095        |
|    iterations           | 285         |
|    time_elapsed         | 532         |
|    total_timesteps      | 583680      |
| train/                  |             |
|    approx_kl            | 0.019576252 |
|    clip_fraction        | 0.274       |
|    clip_range           | 0.2         |
|    entropy_loss         | -3.24       |
|    explained_variance   | 0.314       |
|    learning_rate        | 0.0003      |
|    loss                 | 23.2        |
|    n_updates            | 2840        |
|    policy_gradient_loss | -0.0087     |
|    std                  | 0.567       |
|    value_loss           | 57.4        |
-----------------------------------------
exceeded: -143.86829085844172
exceeded: -0.21511014985575058
exceeded: -6.36

exceeded: -104.5806473158205
exceeded: -0.5020633847981257
exceeded: -0.9849018443477753
exceeded: -1.6233285930403254
exceeded: -0.136318305400963
exceeded: -0.3771098094981884
exceeded: -0.0751728111796877
exceeded: -2.9728392765265284
exceeded: -0.8920757681507586
exceeded: -1.1257007328734994
exceeded: -1.6656291809607673
exceeded: -0.3520553507219826
exceeded: -0.17671463176730348
exceeded: -23.62754503121358
exceeded: -0.21054262372204838
exceeded: -0.29267218477324514
exceeded: -1.1728125426751321
exceeded: -15.502582342370733
exceeded: -0.2886866275803899
exceeded: -0.2747237040567503
exceeded: -58.01586211572478
exceeded: -2.669038073146803
exceeded: -21.923478381521537
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.18e+04  |
|    ep_rew_mean          | 2.73e+03  |
| time/                   |           |
|    fps                  | 1095      |
|    iterations           | 291       |
|    time_elapsed         | 543 

<stable_baselines3.ppo.ppo.PPO at 0x13ecbcaf0>

In [8]:
total_reward = 0
observation = env.reset()
done = False
while not done:
    action, _states = model.predict(observation)
        
    observation, reward, done, info = env.step(action)
        
    total_reward += reward
    
print(total_reward)

  return torch.cat([torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes))])


exceeded: -4.665130686013766
exceeded: -1.9536245346367898
exceeded: -7.877867497882596
exceeded: -192.71008086654976
exceeded: -1.5878856665312315
exceeded: -0.1412544469174888
exceeded: -0.2424055613866777
exceeded: -0.6162871322937709
exceeded: -2.208778564138499
exceeded: -0.18086723527172235
exceeded: -0.8025489212264398
exceeded: -0.4603388968658789
exceeded: -0.15715175486795135
exceeded: -2.219888236372896
exceeded: -0.5478824084612736
exceeded: -6.347802176238988
exceeded: -0.8073332115947769
exceeded: -0.5928143275961243
exceeded: -0.950266292882262
exceeded: -0.21829156851494824
exceeded: -26.79816321464013
exceeded: -5.578838775822996
exceeded: -0.4248945799279231
exceeded: -0.4880356308259536
exceeded: -0.2470708113930435
exceeded: -0.35435324953108827
exceeded: -0.6967848070966103
exceeded: -0.08367250716870611
exceeded: -0.8187467048732394
exceeded: -0.06905973139019997
exceeded: -1.2733695278473525
exceeded: -0.7335783723175836
exceeded: -0.504025034678973
exceeded: -0.

In [None]:
env.print_statistics()

In [9]:
model.learn(total_timesteps=6000000, reset_num_timesteps=False)

Logging to ./multilink/PPO_1
exceeded: -0.7130045855659766


  return torch.cat([torch.tensor(env_encoding), torch.tensor(next_req.get_encoding(self.nodes))])


exceeded: -1.3580365860941266
exceeded: -104.54600831189973
exceeded: -5.2336595267987285
exceeded: -1.8480526945021147
exceeded: -41.469908927527435
exceeded: -61.47097638466665
exceeded: -2.6656034657694496
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.24e+04 |
|    ep_rew_mean     | 5.09e+03 |
| time/              |          |
|    fps             | 1503088  |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 1802240  |
---------------------------------
exceeded: -1.25913437334013
exceeded: -0.474073222378072
exceeded: -7.724811168411957
exceeded: -1.8112713474509479
exceeded: -61.150143749719376
exceeded: -2.9456761703297945
exceeded: -0.4048069921136552
exceeded: -53.078010767226395
exceeded: -66.09842449695746
exceeded: -0.8066890364038517
exceeded: -0.5007794425420568
exceeded: -0.18517936080109001
exceeded: -189.06952276250257
exceeded: -0.9777572217914401
exceeded: -2.194525214265414
exceed

exceeded: -53.19841861387957
exceeded: -6.625230452081237
exceeded: -0.818083675473589
exceeded: -2.669695493230929
exceeded: -3.351282985038844
exceeded: -15.730492543216425
exceeded: -53.924626211286565
exceeded: -0.030834729533768562
exceeded: -15.066694547362223
exceeded: -0.3118603559079851
exceeded: -0.40436596304487404
exceeded: -1.9286910261518426
exceeded: -0.42958985615011147
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.24e+04    |
|    ep_rew_mean          | 5.12e+03    |
| time/                   |             |
|    fps                  | 122021      |
|    iterations           | 8           |
|    time_elapsed         | 14          |
|    total_timesteps      | 1816576     |
| train/                  |             |
|    approx_kl            | 0.054834764 |
|    clip_fraction        | 0.431       |
|    clip_range           | 0.2         |
|    entropy_loss         | 0.913       |
|    explained_variance

exceeded: -7.7283938772328735
exceeded: -146.5488243760577
exceeded: -9.199416015102242
exceeded: -3.4982087234123123
exceeded: -35.08083119420834
exceeded: -20.192663318176347
exceeded: -0.43137048291241775
exceeded: -30.28921651543596
exceeded: -1.327422778462191
exceeded: -4.836986539643281
exceeded: -21.569443521704596
exceeded: -2.957051180483639
exceeded: -35.2259815749584
exceeded: -2.849043024177158
exceeded: -12.354387399954422
exceeded: -4.340942795739472
exceeded: -0.3886030120799343
exceeded: -0.42078949055920306
exceeded: -2.0943356378198876
exceeded: -0.9043590055722943
exceeded: -0.5301852436802424
exceeded: -1.035529481641797
exceeded: -0.31828016619501753
exceeded: -1.05577435230524
exceeded: -0.13774356988791225
exceeded: -1.5576590639765961
exceeded: -107.18023932032003
exceeded: -0.3828482827178101
exceeded: -0.43115151176458155
exceeded: -2.4041985073448706
exceeded: -0.7519212023398452
----------------------------------------
| rollout/                |           

exceeded: -17.501389970532134
exceeded: -87.07244434901128
exceeded: -28.146008193636973
exceeded: -8.239068930033564
exceeded: -15.987357128140015
exceeded: -21.063281948881208
exceeded: -111.09693733905601
exceeded: -31.113302266023002
exceeded: -8.462995251024564
exceeded: -5.656552703094162
exceeded: -11.270789091333599
exceeded: -50.962824870619315
exceeded: -0.3009797619020711
exceeded: -0.2329947348866513
exceeded: -1.1050485267430346
exceeded: -102.82542232199862
exceeded: -30.34142390204266
exceeded: -1.8676064199945834
exceeded: -0.7171441552268533
exceeded: -18.47219422154335
exceeded: -0.18150756584773733
exceeded: -0.4512692172490174
exceeded: -0.03555016829660917
exceeded: -0.6159274462562254
exceeded: -3.5073994506525574
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.24e+04   |
|    ep_rew_mean          | 5.13e+03   |
| time/                   |            |
|    fps                  | 56021      |
|    ite

exceeded: -22.072500556288553
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.24e+04    |
|    ep_rew_mean          | 5.14e+03    |
| time/                   |             |
|    fps                  | 46590       |
|    iterations           | 22          |
|    time_elapsed         | 39          |
|    total_timesteps      | 1845248     |
| train/                  |             |
|    approx_kl            | 0.061078276 |
|    clip_fraction        | 0.414       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.07        |
|    explained_variance   | 0.0824      |
|    learning_rate        | 0.0003      |
|    loss                 | 338         |
|    n_updates            | 9000        |
|    policy_gradient_loss | -0.0152     |
|    std                  | 0.188       |
|    value_loss           | 232         |
-----------------------------------------
exceeded: -0.256488333186639
exceeded: -0.3357

exceeded: -6.5986478429376305
exceeded: -126.90641125520713
exceeded: -0.3045580816754056
exceeded: -3.0321239452931863
exceeded: -6.162241603673126
exceeded: -90.85945392135928
exceeded: -11.628402166095839
exceeded: -47.33320557370749
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.24e+04   |
|    ep_rew_mean          | 5.15e+03   |
| time/                   |            |
|    fps                  | 37216      |
|    iterations           | 28         |
|    time_elapsed         | 49         |
|    total_timesteps      | 1857536    |
| train/                  |            |
|    approx_kl            | 0.08098121 |
|    clip_fraction        | 0.47       |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.996      |
|    explained_variance   | 0.516      |
|    learning_rate        | 0.0003     |
|    loss                 | 35.2       |
|    n_updates            | 9060       |
|    policy_gradient_loss

exceeded: -5.326064837713051
exceeded: -5.823700129731558
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.24e+04   |
|    ep_rew_mean          | 5.17e+03   |
| time/                   |            |
|    fps                  | 31878      |
|    iterations           | 33         |
|    time_elapsed         | 58         |
|    total_timesteps      | 1867776    |
| train/                  |            |
|    approx_kl            | 0.05216089 |
|    clip_fraction        | 0.436      |
|    clip_range           | 0.2        |
|    entropy_loss         | 0.958      |
|    explained_variance   | 0.32       |
|    learning_rate        | 0.0003     |
|    loss                 | 36.9       |
|    n_updates            | 9110       |
|    policy_gradient_loss | -0.0227    |
|    std                  | 0.193      |
|    value_loss           | 88.1       |
----------------------------------------
exceeded: -0.6555987361740867
exceeded: 

exceeded: -4.889525875430234
exceeded: -19.252685235870945
exceeded: -8.5012933126249
exceeded: -6.76820026710847
exceeded: -3.6409737993880946
exceeded: -68.30133432290252
exceeded: -19.33640000564788
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.24e+04    |
|    ep_rew_mean          | 5.17e+03    |
| time/                   |             |
|    fps                  | 28643       |
|    iterations           | 37          |
|    time_elapsed         | 65          |
|    total_timesteps      | 1875968     |
| train/                  |             |
|    approx_kl            | 0.049035333 |
|    clip_fraction        | 0.35        |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.04        |
|    explained_variance   | 0.431       |
|    learning_rate        | 0.0003      |
|    loss                 | 94.8        |
|    n_updates            | 9150        |
|    policy_gradient_loss | -0.0211     |


-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.23e+04    |
|    ep_rew_mean          | 5.17e+03    |
| time/                   |             |
|    fps                  | 26018       |
|    iterations           | 41          |
|    time_elapsed         | 72          |
|    total_timesteps      | 1884160     |
| train/                  |             |
|    approx_kl            | 0.030400254 |
|    clip_fraction        | 0.297       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.08        |
|    explained_variance   | 0.198       |
|    learning_rate        | 0.0003      |
|    loss                 | 185         |
|    n_updates            | 9190        |
|    policy_gradient_loss | -0.0231     |
|    std                  | 0.188       |
|    value_loss           | 504         |
-----------------------------------------
exceeded: -4.658342737755095
exceeded: -45.57194707813393
exceeded: -0.03653

exceeded: -31.37060393038249
exceeded: -208.61321072664668
exceeded: -0.36104113282385186
exceeded: -0.9616858907574137
exceeded: -0.27126127202372247
exceeded: -0.1845672319961813
exceeded: -77.10334874728255
exceeded: -0.23215631092975048
exceeded: -0.14827591220110975
exceeded: -0.3859868383363846
exceeded: -0.707550637973614
exceeded: -0.8614226738523492
exceeded: -0.00616863519792372
exceeded: -0.4332080081741079
exceeded: -1.9159500010105057
exceeded: -0.8799368136330481
exceeded: -0.7591715874587959
exceeded: -0.20141748161958214
exceeded: -0.47160197811500437
exceeded: -0.07439672035763122
exceeded: -0.05842456121357653
exceeded: -0.18455509774714646
exceeded: -280.20223131964946
exceeded: -0.4322928278081998
exceeded: -0.1078170990927367
exceeded: -1.3426120280967209
exceeded: -0.3219924930745569
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.24e+04    |
|    ep_rew_mean          | 5.18e+03    |
| time/        

exceeded: -3.491088064917366
exceeded: -5.915510864732592
exceeded: -22.0515801855041
exceeded: -32.764524501573916
exceeded: -0.06549043033074639
exceeded: -1.542158669813381
exceeded: -1.5658505192622516
exceeded: -34.060866277830485
exceeded: -145.76513109610354
exceeded: -16.25343254741948
exceeded: -29.234260678364805
exceeded: -92.89784297974808
exceeded: -57.65236345972707
exceeded: -0.28946853858064286
exceeded: -28.457674746479146
exceeded: -0.15189456414296307
exceeded: -0.13229906461227212
exceeded: -0.5623087244690794
exceeded: -1.837068296279617
exceeded: -1.341455157689255
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.24e+04   |
|    ep_rew_mean          | 5.18e+03   |
| time/                   |            |
|    fps                  | 21995      |
|    iterations           | 49         |
|    time_elapsed         | 86         |
|    total_timesteps      | 1900544    |
| train/                  |          

exceeded: -14.853872019225305
exceeded: -17.837976710721286
exceeded: -3.97017446979058
exceeded: -2.0226026470741614
exceeded: -57.447663362001876
exceeded: -52.480670336838166
exceeded: -35.28472887192269
exceeded: -5.2097206800054465
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.23e+04   |
|    ep_rew_mean          | 5.2e+03    |
| time/                   |            |
|    fps                  | 20458      |
|    iterations           | 53         |
|    time_elapsed         | 93         |
|    total_timesteps      | 1908736    |
| train/                  |            |
|    approx_kl            | 0.12818485 |
|    clip_fraction        | 0.478      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.17       |
|    explained_variance   | 0.577      |
|    learning_rate        | 0.0003     |
|    loss                 | 33         |
|    n_updates            | 9310       |
|    policy_gradient_loss

exceeded: -1.8415045058850805
exceeded: -0.7622219894070859
exceeded: -0.1186623835942857
exceeded: -1.441456854367014
exceeded: -0.5720819585323416
exceeded: -0.7078074811232897
exceeded: -1.8918094716308562
exceeded: -0.6019202469191136
exceeded: -72.88194094996601
exceeded: -8.231103392167086
exceeded: -15.017536378552414
--------------------------------------
| rollout/                |          |
|    ep_len_mean          | 1.23e+04 |
|    ep_rew_mean          | 5.2e+03  |
| time/                   |          |
|    fps                  | 18813    |
|    iterations           | 58       |
|    time_elapsed         | 102      |
|    total_timesteps      | 1918976  |
| train/                  |          |
|    approx_kl            | 0.222866 |
|    clip_fraction        | 0.523    |
|    clip_range           | 0.2      |
|    entropy_loss         | 1.19     |
|    explained_variance   | 0.126    |
|    learning_rate        | 0.0003   |
|    loss                 | 49.7     |
|    n_upd

exceeded: -1.0626572534497665
exceeded: -19.79915973129006
exceeded: -0.6433885375441188
exceeded: -1.4089986821043095
exceeded: -0.5026773620060934
exceeded: -0.005973124490070267
exceeded: -57.55112107671237
exceeded: -1.8225943586987832
exceeded: -0.5857541732273077
exceeded: -7.091276575901935
exceeded: -5.807593943441257
exceeded: -1.3292851621595438
exceeded: -0.6438346377142862
exceeded: -2.3578190110523436
exceeded: -0.3770474208774828
exceeded: -1.3524649954675219
exceeded: -0.1529604917307673
exceeded: -47.37515553395792
exceeded: -0.49547378025330036
exceeded: -1.4683146842212
exceeded: -0.560555564090451
exceeded: -1.3576483727210509
exceeded: -1.1145927010103613
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.23e+04    |
|    ep_rew_mean          | 5.19e+03    |
| time/                   |             |
|    fps                  | 17438       |
|    iterations           | 63          |
|    time_elapsed     

exceeded: -169.27091046129775
exceeded: -47.160001180406404
exceeded: -4.024950139810745
exceeded: -0.8081497182953938
exceeded: -0.8397068522235264
exceeded: -49.066668858833665
exceeded: -0.27165352616761107
exceeded: -43.16365945126147
exceeded: -0.02328732347558314
exceeded: -7.1093131538323515
exceeded: -2.98110331058015
exceeded: -0.07744281727211627
exceeded: -2.211846298244851
exceeded: -12.759108643646503
exceeded: -113.52289653453282
exceeded: -6.391655716512419
exceeded: -64.44413314301742
exceeded: -70.74265349281373
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.23e+04    |
|    ep_rew_mean          | 5.19e+03    |
| time/                   |             |
|    fps                  | 16279       |
|    iterations           | 68          |
|    time_elapsed         | 119         |
|    total_timesteps      | 1939456     |
| train/                  |             |
|    approx_kl            | 0.054518715 |
|  

exceeded: -0.32404835950611677
exceeded: -1.2948586580666224
exceeded: -65.83548063709691
exceeded: -0.9388574746224778
exceeded: -1.6538502509303943
exceeded: -1.5505313540716645
exceeded: -0.2324156564037366
exceeded: -0.8987729449310593
exceeded: -3.4791848161737944
exceeded: -0.3071715736117649
exceeded: -0.14814707397017415
exceeded: -0.07846397479699396
exceeded: -0.12049174899933186
exceeded: -0.8351062631260042
exceeded: -0.9810823456776447
exceeded: -7.077473264628457
exceeded: -0.6485047167291452
exceeded: -56.53138848607996
exceeded: -0.7000634063064091
exceeded: -2.761648534940233
exceeded: -21.475954138439004
exceeded: -109.0472350370871
exceeded: -30.362067548265777
exceeded: -12.986776405708955
exceeded: -4.671321602843388
exceeded: -3.5141784978670803
exceeded: -15.433578801576678
exceeded: -19.849954968505948
exceeded: -1.85797574143188
exceeded: -9.992043201040108
-----------------------------------------
| rollout/                |             |
|    ep_len_mean     

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.23e+04   |
|    ep_rew_mean          | 5.2e+03    |
| time/                   |            |
|    fps                  | 14519      |
|    iterations           | 77         |
|    time_elapsed         | 134        |
|    total_timesteps      | 1957888    |
| train/                  |            |
|    approx_kl            | 0.09557651 |
|    clip_fraction        | 0.538      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.25       |
|    explained_variance   | 0.267      |
|    learning_rate        | 0.0003     |
|    loss                 | 34.8       |
|    n_updates            | 9550       |
|    policy_gradient_loss | -0.00411   |
|    std                  | 0.181      |
|    value_loss           | 93.8       |
----------------------------------------
exceeded: -8.761538743994212
exceeded: -9.088652938877061
exceeded: -4.865616922140576
exceeded: -

exceeded: -1.8995965010449012
exceeded: -0.8791535921154372
exceeded: -0.8153748663511364
exceeded: -0.1595675245807392
exceeded: -0.6332526140931625
exceeded: -0.5113046877782086
exceeded: -0.5697036281223136
exceeded: -0.8151328374997819
exceeded: -1.6803283217191987
exceeded: -0.5389641348272152
exceeded: -8.974957137660716
exceeded: -3.0123841386719934
exceeded: -0.9044000364673219
exceeded: -0.565927310894151
exceeded: -0.13614087254295357
exceeded: -12.737825914218021
exceeded: -0.17664433614635297
exceeded: -0.9344837138016251
exceeded: -1.7519055172198748
exceeded: -0.145373847211284
exceeded: -2.2874266419408418
exceeded: -0.5337806763656883
exceeded: -0.8238470305280822
exceeded: -58.458359050250934
exceeded: -2.0630343030390454
exceeded: -0.2586859601681678
exceeded: -0.4677954377689638
exceeded: -94.76511747850294
exceeded: -0.1470158907922548
exceeded: -1.5546786683896356
---------------------------------------
| rollout/                |           |
|    ep_len_mean      

exceeded: -0.620323846387263
exceeded: -22.923308866269295
exceeded: -2.2437274734048165
exceeded: -0.4479085387036114
exceeded: -0.19639929333331477
exceeded: -2.2691971392063714
exceeded: -0.2803579638447492
exceeded: -0.01622029079174043
exceeded: -0.8160871177859976
exceeded: -0.7732369343043267
exceeded: -5.777260274067006
exceeded: -13.666179201269703
exceeded: -56.31493310046494
exceeded: -14.546236723247205
exceeded: -33.269827892196425
exceeded: -39.99400202187421
exceeded: -12.467236794533687
exceeded: -135.9425846943637
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.23e+04    |
|    ep_rew_mean          | 5.21e+03    |
| time/                   |             |
|    fps                  | 12859       |
|    iterations           | 88          |
|    time_elapsed         | 154         |
|    total_timesteps      | 1980416     |
| train/                  |             |
|    approx_kl            | 0.058561444 |
|

exceeded: -7.336597761871733
exceeded: -12.624038187473122
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.23e+04   |
|    ep_rew_mean          | 5.21e+03   |
| time/                   |            |
|    fps                  | 12242      |
|    iterations           | 93         |
|    time_elapsed         | 162        |
|    total_timesteps      | 1990656    |
| train/                  |            |
|    approx_kl            | 0.06396812 |
|    clip_fraction        | 0.413      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.28       |
|    explained_variance   | 0.5        |
|    learning_rate        | 0.0003     |
|    loss                 | 73.6       |
|    n_updates            | 9710       |
|    policy_gradient_loss | -0.0127    |
|    std                  | 0.179      |
|    value_loss           | 163        |
----------------------------------------
exceeded: -135.77237262596122
exceeded:

exceeded: -217.67574913166047
exceeded: -23.122819304599474
exceeded: -0.5825301419972087
exceeded: -0.5222448688843249
exceeded: -0.28925039913129735
exceeded: -1.6670327150422093
exceeded: -1.5924727393109475
exceeded: -0.29676750578320965
exceeded: -1.3756843711209024
exceeded: -0.3867926071221916
exceeded: -1.9191247576728643
exceeded: -0.30595851362562
exceeded: -0.02586868542040358
exceeded: -1.1266660165846631
exceeded: -16.381632623705233
exceeded: -0.20229879744602836
exceeded: -1.7907578656104417
exceeded: -0.5984618281396245
exceeded: -0.7880821452765735
exceeded: -0.26508193798822877
exceeded: -2.308171720001699
exceeded: -1.355316174100967
exceeded: -2.711378834541627
exceeded: -0.3087405048079776
exceeded: -2.9311782281985144
exceeded: -1.2841166916967905
exceeded: -0.5549680094107248
exceeded: -0.5225293269397899
exceeded: -0.11472148217061201
exceeded: -0.021960615310900614
exceeded: -1.1516195737248391
exceeded: -0.772775675012325
exceeded: -0.3277811245314186
exceeded

exceeded: -58.141983352468344
exceeded: -37.625639212411095
exceeded: -0.23609502744583125
exceeded: -37.09008122216363
exceeded: -84.81279025789634
exceeded: -2.1372387199196847
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.23e+04   |
|    ep_rew_mean          | 5.21e+03   |
| time/                   |            |
|    fps                  | 11379      |
|    iterations           | 101        |
|    time_elapsed         | 176        |
|    total_timesteps      | 2007040    |
| train/                  |            |
|    approx_kl            | 0.08917367 |
|    clip_fraction        | 0.511      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.29       |
|    explained_variance   | 0.329      |
|    learning_rate        | 0.0003     |
|    loss                 | 37.7       |
|    n_updates            | 9790       |
|    policy_gradient_loss | -0.000772  |
|    std                  | 0.178      |
|

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.23e+04   |
|    ep_rew_mean          | 5.21e+03   |
| time/                   |            |
|    fps                  | 10997      |
|    iterations           | 105        |
|    time_elapsed         | 183        |
|    total_timesteps      | 2015232    |
| train/                  |            |
|    approx_kl            | 0.14050674 |
|    clip_fraction        | 0.497      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.34       |
|    explained_variance   | 0.389      |
|    learning_rate        | 0.0003     |
|    loss                 | 21.8       |
|    n_updates            | 9830       |
|    policy_gradient_loss | -0.00124   |
|    std                  | 0.176      |
|    value_loss           | 92.3       |
----------------------------------------
exceeded: -13.32750730188522
exceeded: -3.681926972759949
exceeded: -4.783163150811003
exceeded: -

exceeded: -3.487494625272751
exceeded: -8.171652868263772
exceeded: -0.1959560064343851
exceeded: -1.5337487607133673
exceeded: -31.68616117007382
exceeded: -1.0200354830474447
exceeded: -1.0069206334545393
exceeded: -0.29007766313479866
exceeded: -0.1211743077774573
exceeded: -0.7600374086924278
exceeded: -1.4126108254982597
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.23e+04   |
|    ep_rew_mean          | 5.2e+03    |
| time/                   |            |
|    fps                  | 10310      |
|    iterations           | 113        |
|    time_elapsed         | 197        |
|    total_timesteps      | 2031616    |
| train/                  |            |
|    approx_kl            | 0.07622369 |
|    clip_fraction        | 0.5        |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.38       |
|    explained_variance   | 0.249      |
|    learning_rate        | 0.0003     |
|    loss       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.23e+04   |
|    ep_rew_mean          | 5.2e+03    |
| time/                   |            |
|    fps                  | 9788       |
|    iterations           | 120        |
|    time_elapsed         | 209        |
|    total_timesteps      | 2045952    |
| train/                  |            |
|    approx_kl            | 0.06838904 |
|    clip_fraction        | 0.467      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.41       |
|    explained_variance   | 0.388      |
|    learning_rate        | 0.0003     |
|    loss                 | 38.8       |
|    n_updates            | 9980       |
|    policy_gradient_loss | -0.0066    |
|    std                  | 0.173      |
|    value_loss           | 96.3       |
----------------------------------------
exceeded: -28.436400914116394
exceeded: -17.317247518106605
exceeded: -0.9594102313495831
exceeded

exceeded: -0.29912442555077823
exceeded: -0.38606019338789543
exceeded: -0.7454734964883657
exceeded: -0.26189876378508703
exceeded: -0.07080140076604206
exceeded: -0.22401957395040167
exceeded: -0.6416708872815944
exceeded: -0.8844035796616596
exceeded: -1.4019971698153721
exceeded: -0.41201873890518736
exceeded: -2.608303522846658
exceeded: -0.9005361609803976
exceeded: -1.0027755796865536
exceeded: -1.223239800015208
exceeded: -21.487729211386622
exceeded: -3.8609576534032413
exceeded: -3.2350455621712566
exceeded: -37.13713775233947
exceeded: -16.076598309957774
exceeded: -29.788197139783442
exceeded: -7.935698286772496
exceeded: -16.088387585010764
exceeded: -53.06025039439885
exceeded: -52.275436295899326
exceeded: -11.506804959032541
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.23e+04   |
|    ep_rew_mean          | 5.23e+03   |
| time/                   |            |
|    fps                  | 9447       |
|  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.23e+04   |
|    ep_rew_mean          | 5.23e+03   |
| time/                   |            |
|    fps                  | 9067       |
|    iterations           | 131        |
|    time_elapsed         | 228        |
|    total_timesteps      | 2068480    |
| train/                  |            |
|    approx_kl            | 0.08337372 |
|    clip_fraction        | 0.506      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.41       |
|    explained_variance   | 0.483      |
|    learning_rate        | 0.0003     |
|    loss                 | 28.6       |
|    n_updates            | 10090      |
|    policy_gradient_loss | -0.00714   |
|    std                  | 0.172      |
|    value_loss           | 62.6       |
----------------------------------------
exceeded: -40.2404380790658
exceeded: -1.3183676785347587
exceeded: -1.3021819126392469
exceeded: 

exceeded: -5.578314684472948
exceeded: -49.27659526134197
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.23e+04   |
|    ep_rew_mean          | 5.24e+03   |
| time/                   |            |
|    fps                  | 8728       |
|    iterations           | 137        |
|    time_elapsed         | 238        |
|    total_timesteps      | 2080768    |
| train/                  |            |
|    approx_kl            | 0.05332125 |
|    clip_fraction        | 0.377      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.38       |
|    explained_variance   | 0.501      |
|    learning_rate        | 0.0003     |
|    loss                 | 37.9       |
|    n_updates            | 10150      |
|    policy_gradient_loss | -0.016     |
|    std                  | 0.174      |
|    value_loss           | 189        |
----------------------------------------
exceeded: -4.8803437601264585
exceeded: 

exceeded: -1.002022003878264
exceeded: -0.8258997587256767
exceeded: -1.3500475441476054
exceeded: -70.37712985929745
exceeded: -165.63587917413338
exceeded: -40.089175007222494
exceeded: -71.78084334762923
exceeded: -11.520313988220712
exceeded: -8.422721486644342
exceeded: -0.0702638007776572
exceeded: -92.47595878188663
exceeded: -103.96559895619743
exceeded: -57.567405229673845
exceeded: -44.45036663854928
exceeded: -46.94550922687668
exceeded: -12.32530723762392
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.23e+04    |
|    ep_rew_mean          | 5.24e+03    |
| time/                   |             |
|    fps                  | 8389        |
|    iterations           | 143         |
|    time_elapsed         | 249         |
|    total_timesteps      | 2093056     |
| train/                  |             |
|    approx_kl            | 0.057670012 |
|    clip_fraction        | 0.448       |
|    clip_range         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.22e+04    |
|    ep_rew_mean          | 5.23e+03    |
| time/                   |             |
|    fps                  | 7962        |
|    iterations           | 150         |
|    time_elapsed         | 264         |
|    total_timesteps      | 2107392     |
| train/                  |             |
|    approx_kl            | 0.072620936 |
|    clip_fraction        | 0.46        |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.47        |
|    explained_variance   | 0.268       |
|    learning_rate        | 0.0003      |
|    loss                 | 35.5        |
|    n_updates            | 10280       |
|    policy_gradient_loss | -0.00507    |
|    std                  | 0.17        |
|    value_loss           | 180         |
-----------------------------------------
exceeded: -10.82785053435902
exceeded: -0.9085914024227028
exceeded: -15.152

exceeded: -9.519972238809636
exceeded: -30.988730497855435
exceeded: -1.1306154970046283
exceeded: -66.58487436144136
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.22e+04   |
|    ep_rew_mean          | 5.24e+03   |
| time/                   |            |
|    fps                  | 7694       |
|    iterations           | 156        |
|    time_elapsed         | 275        |
|    total_timesteps      | 2119680    |
| train/                  |            |
|    approx_kl            | 0.04439597 |
|    clip_fraction        | 0.399      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.52       |
|    explained_variance   | 0.313      |
|    learning_rate        | 0.0003     |
|    loss                 | 250        |
|    n_updates            | 10340      |
|    policy_gradient_loss | -0.0103    |
|    std                  | 0.168      |
|    value_loss           | 526        |
---------------------

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.22e+04   |
|    ep_rew_mean          | 5.25e+03   |
| time/                   |            |
|    fps                  | 7460       |
|    iterations           | 162        |
|    time_elapsed         | 285        |
|    total_timesteps      | 2131968    |
| train/                  |            |
|    approx_kl            | 0.07953773 |
|    clip_fraction        | 0.535      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.47       |
|    explained_variance   | 0.398      |
|    learning_rate        | 0.0003     |
|    loss                 | 15.4       |
|    n_updates            | 10400      |
|    policy_gradient_loss | 0.0131     |
|    std                  | 0.17       |
|    value_loss           | 47.5       |
----------------------------------------
exceeded: -0.20045102810225254
exceeded: -0.6563853844424374
exceeded: -2.039775654804271
exceeded

exceeded: -24.95690490152017
exceeded: -0.8043664197207341
exceeded: -0.4670163894556234
exceeded: -0.1944806364966116
exceeded: -0.1659601526049957
exceeded: -0.7229281613184553
exceeded: -0.9668299599657422
exceeded: -0.3633816015074886
exceeded: -0.805822084768671
exceeded: -18.272282639249617
exceeded: -5.719688951129893
exceeded: -1.0328109084290282
exceeded: -0.9201181812795172
exceeded: -6.6466329849473516
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.22e+04   |
|    ep_rew_mean          | 5.27e+03   |
| time/                   |            |
|    fps                  | 7207       |
|    iterations           | 169        |
|    time_elapsed         | 297        |
|    total_timesteps      | 2146304    |
| train/                  |            |
|    approx_kl            | 0.60472876 |
|    clip_fraction        | 0.571      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.47       |
|    expl

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.22e+04   |
|    ep_rew_mean          | 5.27e+03   |
| time/                   |            |
|    fps                  | 7102       |
|    iterations           | 172        |
|    time_elapsed         | 303        |
|    total_timesteps      | 2152448    |
| train/                  |            |
|    approx_kl            | 0.07730685 |
|    clip_fraction        | 0.389      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.46       |
|    explained_variance   | 0.362      |
|    learning_rate        | 0.0003     |
|    loss                 | 129        |
|    n_updates            | 10500      |
|    policy_gradient_loss | -0.0182    |
|    std                  | 0.17       |
|    value_loss           | 189        |
----------------------------------------
exceeded: -0.7453483392233222
exceeded: -0.48068356484375063
exceeded: -0.2535217667571739
exceede

exceeded: -0.4795197253285133
exceeded: -0.8937478277030821
exceeded: -17.46772240339133
exceeded: -2.596596431968632
exceeded: -1.2049923019392703
exceeded: -3.4100090647373933
exceeded: -0.8322505863615137
exceeded: -0.7996531636348542
exceeded: -1.909351140207961
exceeded: -0.15451846045530718
exceeded: -0.7231912965538162
exceeded: -0.2984945403111557
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.22e+04   |
|    ep_rew_mean          | 5.27e+03   |
| time/                   |            |
|    fps                  | 6970       |
|    iterations           | 176        |
|    time_elapsed         | 309        |
|    total_timesteps      | 2160640    |
| train/                  |            |
|    approx_kl            | 0.05121419 |
|    clip_fraction        | 0.432      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.44       |
|    explained_variance   | 0.648      |
|    learning_rate        |

exceeded: -0.991345395103312
exceeded: -0.45032270005765285
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.22e+04    |
|    ep_rew_mean          | 5.28e+03    |
| time/                   |             |
|    fps                  | 6841        |
|    iterations           | 180         |
|    time_elapsed         | 316         |
|    total_timesteps      | 2168832     |
| train/                  |             |
|    approx_kl            | 0.057532478 |
|    clip_fraction        | 0.41        |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.43        |
|    explained_variance   | 0.44        |
|    learning_rate        | 0.0003      |
|    loss                 | 72          |
|    n_updates            | 10580       |
|    policy_gradient_loss | -0.0258     |
|    std                  | 0.17        |
|    value_loss           | 145         |
-----------------------------------------
exceeded: -46.53

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.22e+04   |
|    ep_rew_mean          | 5.28e+03   |
| time/                   |            |
|    fps                  | 6752       |
|    iterations           | 183        |
|    time_elapsed         | 322        |
|    total_timesteps      | 2174976    |
| train/                  |            |
|    approx_kl            | 0.18362215 |
|    clip_fraction        | 0.383      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.45       |
|    explained_variance   | 0.569      |
|    learning_rate        | 0.0003     |
|    loss                 | 139        |
|    n_updates            | 10610      |
|    policy_gradient_loss | -0.00565   |
|    std                  | 0.169      |
|    value_loss           | 423        |
----------------------------------------
exceeded: -0.1275965002286106
exceeded: -28.566917748678566
exceeded: -5.055198790018759
exceeded:

exceeded: -68.95671701186463
exceeded: -4.38425901255036
exceeded: -8.807149433805755
exceeded: -11.461217706488334
exceeded: -0.15461748532656058
exceeded: -0.04652984165787581
exceeded: -0.7871952371622393
exceeded: -15.304648342061054
exceeded: -126.99070616338203
exceeded: -0.8660324087172476
exceeded: -73.25338576365355
exceeded: -23.737919448609567
exceeded: -0.33509323022014287
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.22e+04    |
|    ep_rew_mean          | 5.27e+03    |
| time/                   |             |
|    fps                  | 6581        |
|    iterations           | 189         |
|    time_elapsed         | 332         |
|    total_timesteps      | 2187264     |
| train/                  |             |
|    approx_kl            | 0.078408614 |
|    clip_fraction        | 0.501       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.42        |
|    explained_variance 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.21e+04    |
|    ep_rew_mean          | 5.28e+03    |
| time/                   |             |
|    fps                  | 6470        |
|    iterations           | 193         |
|    time_elapsed         | 339         |
|    total_timesteps      | 2195456     |
| train/                  |             |
|    approx_kl            | 0.071625404 |
|    clip_fraction        | 0.467       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.45        |
|    explained_variance   | 0.611       |
|    learning_rate        | 0.0003      |
|    loss                 | 65.2        |
|    n_updates            | 10710       |
|    policy_gradient_loss | -0.0124     |
|    std                  | 0.17        |
|    value_loss           | 121         |
-----------------------------------------
exceeded: -2.204841510306632
exceeded: -0.05357011291216503
exceeded: -2.854

exceeded: -7.1066758799198
exceeded: -93.08160233129193
exceeded: -35.193317506407446
exceeded: -20.088649917896547
exceeded: -1.1682097706812904
exceeded: -0.503323873431176
exceeded: -3.3214992299009376
exceeded: -0.012507370868943192
exceeded: -3.2340222510438443
exceeded: -4.060486410284241
exceeded: -0.5042207236558894
exceeded: -0.039203201748494725
exceeded: -0.729897572861531
exceeded: -0.5863808642442577
exceeded: -0.4889373748086884
exceeded: -40.839956622923744
exceeded: -102.91594524265184
exceeded: -3.99119808675668
exceeded: -4.555190698142444
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.21e+04   |
|    ep_rew_mean          | 5.29e+03   |
| time/                   |            |
|    fps                  | 6287       |
|    iterations           | 200        |
|    time_elapsed         | 351        |
|    total_timesteps      | 2209792    |
| train/                  |            |
|    approx_kl            

exceeded: -0.14468549771120726
exceeded: -21.306782811715188
exceeded: -2.552872884124781
exceeded: -27.047316983771434
exceeded: -9.309586693638883
exceeded: -34.783953619161835
exceeded: -25.0374344247814
exceeded: -95.96130279065348
exceeded: -9.409170836228073
exceeded: -0.35556637692866405
exceeded: -0.6382768553653341
exceeded: -1.0646400884394855
exceeded: -0.4297288587557152
exceeded: -31.479275513615264
exceeded: -17.051634652989136
exceeded: -29.630750618516693
exceeded: -1.1375904950728986
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.21e+04   |
|    ep_rew_mean          | 5.3e+03    |
| time/                   |            |
|    fps                  | 6167       |
|    iterations           | 205        |
|    time_elapsed         | 359        |
|    total_timesteps      | 2220032    |
| train/                  |            |
|    approx_kl            | 0.29617375 |
|    clip_fraction        | 0.626      |
| 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.21e+04   |
|    ep_rew_mean          | 5.31e+03   |
| time/                   |            |
|    fps                  | 6050       |
|    iterations           | 210        |
|    time_elapsed         | 368        |
|    total_timesteps      | 2230272    |
| train/                  |            |
|    approx_kl            | 0.07367039 |
|    clip_fraction        | 0.389      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.43       |
|    explained_variance   | 0.348      |
|    learning_rate        | 0.0003     |
|    loss                 | 36.3       |
|    n_updates            | 10880      |
|    policy_gradient_loss | -0.0174    |
|    std                  | 0.17       |
|    value_loss           | 166        |
----------------------------------------
exceeded: -0.22371666581039448
exceeded: -2.0538914055356274
exceeded: -2.842638960003898
exceeded

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.21e+04   |
|    ep_rew_mean          | 5.34e+03   |
| time/                   |            |
|    fps                  | 5937       |
|    iterations           | 215        |
|    time_elapsed         | 377        |
|    total_timesteps      | 2240512    |
| train/                  |            |
|    approx_kl            | 0.67135864 |
|    clip_fraction        | 0.536      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.4        |
|    explained_variance   | 0.374      |
|    learning_rate        | 0.0003     |
|    loss                 | 99.5       |
|    n_updates            | 10930      |
|    policy_gradient_loss | 0.0537     |
|    std                  | 0.171      |
|    value_loss           | 676        |
----------------------------------------
exceeded: -104.0577895653169
exceeded: -0.9533576337194185
exceeded: -0.9087620555598535
exceeded:

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.21e+04    |
|    ep_rew_mean          | 5.33e+03    |
| time/                   |             |
|    fps                  | 5830        |
|    iterations           | 220         |
|    time_elapsed         | 386         |
|    total_timesteps      | 2250752     |
| train/                  |             |
|    approx_kl            | 0.056567654 |
|    clip_fraction        | 0.435       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.37        |
|    explained_variance   | 0.554       |
|    learning_rate        | 0.0003      |
|    loss                 | 51.4        |
|    n_updates            | 10980       |
|    policy_gradient_loss | -0.00911    |
|    std                  | 0.173       |
|    value_loss           | 157         |
-----------------------------------------
exceeded: -120.85276595766395
exceeded: -2.8046498291292514
exceeded: -148.7

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.21e+04   |
|    ep_rew_mean          | 5.35e+03   |
| time/                   |            |
|    fps                  | 5727       |
|    iterations           | 225        |
|    time_elapsed         | 394        |
|    total_timesteps      | 2260992    |
| train/                  |            |
|    approx_kl            | 0.06606102 |
|    clip_fraction        | 0.433      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.36       |
|    explained_variance   | 0.516      |
|    learning_rate        | 0.0003     |
|    loss                 | 28.4       |
|    n_updates            | 11030      |
|    policy_gradient_loss | -0.0143    |
|    std                  | 0.173      |
|    value_loss           | 116        |
----------------------------------------
exceeded: -1.0439269721743527
exceeded: -0.03549421346317597
exceeded: -0.7205850515600298
exceede

exceeded: -35.9942015392039
exceeded: -5.461949028539985
exceeded: -94.98717540548017
exceeded: -13.916118194239367
exceeded: -21.56516277431698
exceeded: -1.1884095806719912
exceeded: -60.70045441543952
exceeded: -0.1351742780277373
exceeded: -1.0279879997681391
exceeded: -72.39723181863485
exceeded: -0.8460926194346046
exceeded: -1.9251176053684622
exceeded: -0.8187286524755509
exceeded: -4.755719867528686
exceeded: -291.5357781082061
exceeded: -0.1270494576371351
exceeded: -0.07131772683172802
exceeded: -0.026472911025453225
exceeded: -0.33513435157895666
exceeded: -12.218600057531335
exceeded: -134.6858538192878
exceeded: -0.8513404909378881
exceeded: -0.32099113155889863
exceeded: -0.32685774986320804
exceeded: -0.3162838697680103
exceeded: -0.3081056044863044
exceeded: -0.9177927272376508
exceeded: -0.5393941666586519
exceeded: -0.700869719202366
exceeded: -0.10536951661217318
exceeded: -3.336701977934304
exceeded: -0.4901345567946503
exceeded: -1.705320658637486
exceeded: -0.350

exceeded: -27.19625531565153
exceeded: -55.84941453123086
exceeded: -1.6024142818811309
exceeded: -1.1724471215340908
exceeded: -1.1918534914733583
exceeded: -0.7409539945303621
exceeded: -2.8484495265777117
exceeded: -0.20194071910652203
exceeded: -2.5081900436322324
exceeded: -0.5667161113271093
exceeded: -1.0541681937881022
exceeded: -13.241118577163938
exceeded: -0.20595523484683884
exceeded: -3.2811166181230194
exceeded: -0.6066510629207299
exceeded: -0.03549531012557515
exceeded: -0.8161932407798079
exceeded: -0.3834115084549545
exceeded: -1.4570737672030367
exceeded: -0.2524001341485109
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.21e+04   |
|    ep_rew_mean          | 5.35e+03   |
| time/                   |            |
|    fps                  | 5539       |
|    iterations           | 235        |
|    time_elapsed         | 411        |
|    total_timesteps      | 2281472    |
| train/                  |   

exceeded: -2.1024598454589194
exceeded: -0.3494761888034142
exceeded: -41.60386531973547
exceeded: -0.43347131198343913
exceeded: -1.2006011137643064
exceeded: -2.0981656081858517
exceeded: -38.72947040817827
exceeded: -6.325998807088463
exceeded: -0.04872261336655881
exceeded: -2.3062483170469483
exceeded: -1.3646526757471205
exceeded: -1.3545773130126713
exceeded: -23.0929223575562
exceeded: -133.7131690332728
exceeded: -123.75188378004098
exceeded: -57.96050690279295
exceeded: -1.4840017260084017
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.2e+04    |
|    ep_rew_mean          | 5.34e+03   |
| time/                   |            |
|    fps                  | 5433       |
|    iterations           | 241        |
|    time_elapsed         | 422        |
|    total_timesteps      | 2293760    |
| train/                  |            |
|    approx_kl            | 0.06531705 |
|    clip_fraction        | 0.414      |
|  

exceeded: -0.121740158118824
exceeded: -78.9478204416845
exceeded: -0.7183491940746934
exceeded: -1.0599176889446398
exceeded: -2.4946791983359393
exceeded: -7.143440889160325
exceeded: -0.5810852788501887
exceeded: -0.7796208269322811
exceeded: -1.619159617005613
exceeded: -124.38382898194418
exceeded: -1.343190439579959
exceeded: -0.31182505213037126
exceeded: -1.8513246235516028
exceeded: -0.8983528529789475
exceeded: -0.06789749590601073
exceeded: -0.15487829411050122
exceeded: -0.2229028904623033
exceeded: -1.280081501200244
exceeded: -0.4784563241368553
exceeded: -2.2301348110565047
exceeded: -0.35316704895715956
exceeded: -0.17808122856495728
exceeded: -0.5066672957295661
exceeded: -1.026846101819489
exceeded: -0.4258008364042092
exceeded: -24.12353353081109
exceeded: -0.6416334436986638
exceeded: -0.5576030372994278
exceeded: -0.3442494213723719
exceeded: -0.46373301374831144
exceeded: -0.6089185907998372
exceeded: -0.6975920830445081
exceeded: -0.9939455870646008
exceeded: -2.

exceeded: -0.4681204905160142
exceeded: -0.4301783446991635
exceeded: -0.1897180254557485
exceeded: -0.5540319289850388
exceeded: -1.5209263650950393
exceeded: -0.040669062098350854
exceeded: -1.506770525049585
exceeded: -2.424529151973397
exceeded: -3.147327342869453
exceeded: -0.5547393483757093
exceeded: -108.63870711834053
exceeded: -19.755201343205172
exceeded: -113.91555914915685
exceeded: -13.63852259218929
exceeded: -4.255772047869581
exceeded: -2.026817292891711
exceeded: -13.262648687108655
exceeded: -0.08954756956235538
exceeded: -0.8357861395450683
exceeded: -1.093291861771185
exceeded: -0.23602617314494434
exceeded: -0.633718705523902
exceeded: -0.1305870004897565
exceeded: -1.1680093960806741
exceeded: -254.59875308123546
exceeded: -14.200982997399104
exceeded: -6.677113708507154
exceeded: -148.49922064149547
exceeded: -2.2170492806196194
exceeded: -1.3655677244589393
exceeded: -0.08765649099384201
exceeded: -4.734202889943767
exceeded: -0.26441906986683245
exceeded: -17.

exceeded: -1.3333650966270316
exceeded: -0.690171223818229
exceeded: -3.6753336747121783
exceeded: -29.929572939340673
exceeded: -2.4704729211177905
exceeded: -0.9512331103251247
exceeded: -0.651290685782432
exceeded: -0.1496819329885663
exceeded: -1.2224813379825723
exceeded: -1.5894606277433379
exceeded: -0.24840567532609054
exceeded: -1.1477165591801803
exceeded: -7.125469332163286
exceeded: -7.2931204507837
exceeded: -0.946516013740366
exceeded: -5.88769152768306
exceeded: -0.7228797104893893
exceeded: -3.184499940578005
exceeded: -0.1409630928180064
exceeded: -3.20843582065328
exceeded: -4.448738827860243
exceeded: -0.9063536761311133
exceeded: -4.473204506368587
exceeded: -0.37528839678434894
exceeded: -2.9331836196856487
exceeded: -0.5553262785966363
exceeded: -0.28866616437231757
exceeded: -0.6061814778268885
exceeded: -0.41598526614309933
exceeded: -1.064740576641274
exceeded: -0.1389095918595839
exceeded: -0.17816140964577623
exceeded: -0.49308268789857973
exceeded: -0.618763

exceeded: -24.76501037140008
exceeded: -46.70663168630779
exceeded: -50.78514741688191
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.2e+04     |
|    ep_rew_mean          | 5.34e+03    |
| time/                   |             |
|    fps                  | 5143        |
|    iterations           | 259         |
|    time_elapsed         | 453         |
|    total_timesteps      | 2330624     |
| train/                  |             |
|    approx_kl            | 0.073070765 |
|    clip_fraction        | 0.438       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.35        |
|    explained_variance   | 0.217       |
|    learning_rate        | 0.0003      |
|    loss                 | 72.5        |
|    n_updates            | 11370       |
|    policy_gradient_loss | -0.000938   |
|    std                  | 0.174       |
|    value_loss           | 153         |
-------------------------------

exceeded: -32.456254009515334
exceeded: -8.891615026083212
exceeded: -2.4599086837903386
exceeded: -2.3073104805849445
exceeded: -2.445198068744111
exceeded: -34.799677446681144
exceeded: -4.565195197818816
exceeded: -0.5428944821444059
exceeded: -0.5066086808076595
exceeded: -2.8327623430196827
exceeded: -21.343992432097103
exceeded: -0.5763341930823963
exceeded: -1.2870227114948476
exceeded: -21.834574796482947
exceeded: -55.761292390660145
exceeded: -56.92021425033025
exceeded: -58.59091333588201
exceeded: -35.629667504275275
exceeded: -53.10479292288382
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.2e+04     |
|    ep_rew_mean          | 5.33e+03    |
| time/                   |             |
|    fps                  | 5070        |
|    iterations           | 264         |
|    time_elapsed         | 461         |
|    total_timesteps      | 2340864     |
| train/                  |             |
|    approx_kl  

exceeded: -1.5851627602963028
exceeded: -1.0208142997812204
exceeded: -7.39573002547731
exceeded: -49.1171944449307
exceeded: -1.8887781931663905
exceeded: -28.99107772218242
exceeded: -1.208535139613177
exceeded: -3.5124729587221717
exceeded: -6.074187493939988
exceeded: -0.5287599946588515
exceeded: -8.488788410380392
exceeded: -2.021513019986739
exceeded: -10.860749196360684
exceeded: -0.663615975049303
exceeded: -2.0290787655647895
exceeded: -3.222408405221922
exceeded: -0.7028521368254905
exceeded: -2.159240101316096
exceeded: -0.01216940431024169
exceeded: -23.233836297308628
exceeded: -0.5502376478438746
exceeded: -0.05535043494675643
exceeded: -30.309852857390993
exceeded: -2.866788685069037
exceeded: -0.24536075755486234
exceeded: -0.9549769914886415
exceeded: -1.217861086347703
exceeded: -1.0368269106924841
exceeded: -0.7521958457084459
exceeded: -11.359922313107946
exceeded: -23.25147700197144
exceeded: -0.35917594595534313
exceeded: -4.711901290197521
exceeded: -0.733109288

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.2e+04    |
|    ep_rew_mean          | 5.33e+03   |
| time/                   |            |
|    fps                  | 4970       |
|    iterations           | 271        |
|    time_elapsed         | 473        |
|    total_timesteps      | 2355200    |
| train/                  |            |
|    approx_kl            | 0.13762403 |
|    clip_fraction        | 0.484      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.48       |
|    explained_variance   | 0.645      |
|    learning_rate        | 0.0003     |
|    loss                 | 19         |
|    n_updates            | 11490      |
|    policy_gradient_loss | -0.00263   |
|    std                  | 0.168      |
|    value_loss           | 63         |
----------------------------------------
exceeded: -0.130610057209735
exceeded: -0.5809732380145055
exceeded: -0.5424089179216449
exceeded:

exceeded: -1.7435275113188031
exceeded: -1.2771769394283723
exceeded: -0.38106517567167486
exceeded: -0.6489892578748642
exceeded: -0.04330040557752434
exceeded: -0.5817106219847503
exceeded: -1.748160534815372
exceeded: -0.5507308158435283
exceeded: -58.43774898458933
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.2e+04    |
|    ep_rew_mean          | 5.31e+03   |
| time/                   |            |
|    fps                  | 4878       |
|    iterations           | 278        |
|    time_elapsed         | 485        |
|    total_timesteps      | 2369536    |
| train/                  |            |
|    approx_kl            | 0.19389237 |
|    clip_fraction        | 0.579      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.46       |
|    explained_variance   | 0.561      |
|    learning_rate        | 0.0003     |
|    loss                 | 24.5       |
|    n_updates            | 11560

exceeded: -0.438243156740157
exceeded: -0.002841794296143468
exceeded: -2.16184723648072
exceeded: -42.488856483795686
exceeded: -0.7890419247481701
exceeded: -0.7412019024083698
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.2e+04     |
|    ep_rew_mean          | 5.31e+03    |
| time/                   |             |
|    fps                  | 4789        |
|    iterations           | 285         |
|    time_elapsed         | 497         |
|    total_timesteps      | 2383872     |
| train/                  |             |
|    approx_kl            | 0.096671864 |
|    clip_fraction        | 0.504       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.45        |
|    explained_variance   | 0.36        |
|    learning_rate        | 0.0003      |
|    loss                 | 24.5        |
|    n_updates            | 11630       |
|    policy_gradient_loss | 0.00171     |
|    std               

exceeded: -0.8163161019286364
exceeded: -2.8067816106056434
exceeded: -0.2658540734969838
exceeded: -0.13543207240326316
exceeded: -0.11360560760920667
exceeded: -0.08302319385509958
exceeded: -0.47634223396228603
exceeded: -1.0082489911065569
exceeded: -0.33813991320441505
exceeded: -0.27562722491634845
exceeded: -0.08190572773639959
exceeded: -1.0849983507401282
exceeded: -0.2782721029552688
exceeded: -2.1856034725048863
exceeded: -5.207639153154334
exceeded: -44.83624414368612
exceeded: -2.090972362948663
exceeded: -0.5548516987504005
exceeded: -0.3582657805397326
exceeded: -1.786736821070786
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.2e+04     |
|    ep_rew_mean          | 5.31e+03    |
| time/                   |             |
|    fps                  | 4725        |
|    iterations           | 290         |
|    time_elapsed         | 506         |
|    total_timesteps      | 2394112     |
| train/           

exceeded: -1.5814566711680131
exceeded: -10.01648240745142
exceeded: -0.03137837469901439
exceeded: -0.9116580035617982
exceeded: -0.12388234371525536
exceeded: -1.0257249715347587
exceeded: -0.5194029638218098
exceeded: -1.7324627741070378
exceeded: -0.796349786961157
exceeded: -2.406653022944658
exceeded: -9.579669929116363
exceeded: -9.361832769214447
exceeded: -0.23871127416680693
exceeded: -32.82185642784058
exceeded: -23.262599162081106
exceeded: -0.10132173088598755
exceeded: -0.467464345086501
exceeded: -1.378280310643677
exceeded: -0.3415810278959503
exceeded: -0.17885089609182736
exceeded: -1.4388436311414678
exceeded: -0.7023966782268791
exceeded: -0.5414793550602202
exceeded: -1.3072916389247569
exceeded: -0.7883783431898567
exceeded: -8.623848172191554
exceeded: -0.6289202646345995
exceeded: -0.6856851540554553
exceeded: -2.690962406932345
exceeded: -76.56009530282405
exceeded: -0.7875010875723493
-----------------------------------------
| rollout/                |       

exceeded: -0.7927823044442149
exceeded: -39.81317673170399
exceeded: -0.18726155010084997
exceeded: -0.05650810187804604
exceeded: -15.108711191221742
exceeded: -0.42117284903074365
exceeded: -125.75421367392751
exceeded: -1.5766902198700876
exceeded: -0.30203531150247165
exceeded: -0.07233273748482016
exceeded: -1.2550292864710757
exceeded: -0.10723431721328167
exceeded: -1.2320349818369867
exceeded: -2.4685727244363815
exceeded: -1.390425824758267
exceeded: -0.546470746925417
exceeded: -28.513118597591188
exceeded: -1.0996146467488916
exceeded: -1.394599552215671
exceeded: -0.387523121052542
exceeded: -0.2880231243429179
exceeded: -5.537767296017103
exceeded: -23.63168567607208
exceeded: -0.33537869124463593
exceeded: -28.805436282811495
exceeded: -2.0346151280520774
exceeded: -3.220060590699154
exceeded: -0.2611916654636961
exceeded: -0.9696941507952046
exceeded: -0.6400509591581606
exceeded: -67.87742995982964
exceeded: -0.8247183150914128
exceeded: -0.20770096044332345
exceeded: -

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.19e+04   |
|    ep_rew_mean          | 5.29e+03   |
| time/                   |            |
|    fps                  | 4533       |
|    iterations           | 306        |
|    time_elapsed         | 535        |
|    total_timesteps      | 2426880    |
| train/                  |            |
|    approx_kl            | 0.09028213 |
|    clip_fraction        | 0.47       |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.45       |
|    explained_variance   | 0.466      |
|    learning_rate        | 0.0003     |
|    loss                 | 36.5       |
|    n_updates            | 11840      |
|    policy_gradient_loss | -0.00933   |
|    std                  | 0.169      |
|    value_loss           | 98.4       |
----------------------------------------
exceeded: -25.002065662707484
exceeded: -36.83753883945824
exceeded: -69.74865202305463
exceeded: 

exceeded: -0.03100372510363604
exceeded: -0.3145744289076515
exceeded: -0.40812957836503155
exceeded: -26.39369334536907
exceeded: -3.7722638349256954
exceeded: -10.411343982998417
exceeded: -1.3376879343455759
exceeded: -11.788040571481744
exceeded: -199.8060583472129
exceeded: -0.4741023653073459
exceeded: -0.47158075996474935
exceeded: -22.37376600189098
exceeded: -57.296357403807974
exceeded: -0.6235047237373681
exceeded: -0.5987358432806874
exceeded: -28.01136438431831
exceeded: -66.54883513739081
exceeded: -54.56776746639541
exceeded: -3.9488285346705387
exceeded: -60.46184461413382
exceeded: -9.158263059034105
exceeded: -50.08308336982048
exceeded: -58.382933451396816
exceeded: -9.357930352023347
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.19e+04   |
|    ep_rew_mean          | 5.28e+03   |
| time/                   |            |
|    fps                  | 4457       |
|    iterations           | 312        |


exceeded: -52.56520266617492
exceeded: -0.4053898381710081
exceeded: -10.364570675777909
exceeded: -0.5153406410801491
exceeded: -0.5141341151357146
exceeded: -0.12077682904883551
exceeded: -0.14073657815189863
exceeded: -1.1491640449500102
exceeded: -1.294207265547898
exceeded: -9.446284172259162
exceeded: -0.2843511280228405
exceeded: -1.98511685089695
exceeded: -1.7854564744499903
exceeded: -3.186127011143425
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.19e+04   |
|    ep_rew_mean          | 5.28e+03   |
| time/                   |            |
|    fps                  | 4392       |
|    iterations           | 318        |
|    time_elapsed         | 558        |
|    total_timesteps      | 2451456    |
| train/                  |            |
|    approx_kl            | 0.09039074 |
|    clip_fraction        | 0.426      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.49       |
|    expla

exceeded: -183.7998161410069
exceeded: -0.5150662343503828
exceeded: -43.766978334086694
exceeded: -140.29500167445798
exceeded: -15.860468931917053
exceeded: -21.674055243512367
exceeded: -0.2832476045764636
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.19e+04   |
|    ep_rew_mean          | 5.28e+03   |
| time/                   |            |
|    fps                  | 4343       |
|    iterations           | 323        |
|    time_elapsed         | 566        |
|    total_timesteps      | 2461696    |
| train/                  |            |
|    approx_kl            | 0.14611626 |
|    clip_fraction        | 0.504      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.55       |
|    explained_variance   | 0.552      |
|    learning_rate        | 0.0003     |
|    loss                 | 29.5       |
|    n_updates            | 12010      |
|    policy_gradient_loss | 0.00166    |
|    std    

exceeded: -12.267390951651109
exceeded: -1.1874650534529976
exceeded: -81.36805812961234
exceeded: -0.18201848009340255
exceeded: -2.694538069235893
exceeded: -6.964040053833495
exceeded: -1.046375992267483
exceeded: -1.298174571001419
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.19e+04   |
|    ep_rew_mean          | 5.28e+03   |
| time/                   |            |
|    fps                  | 4286       |
|    iterations           | 329        |
|    time_elapsed         | 577        |
|    total_timesteps      | 2473984    |
| train/                  |            |
|    approx_kl            | 0.13761196 |
|    clip_fraction        | 0.553      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.57       |
|    explained_variance   | 0.654      |
|    learning_rate        | 0.0003     |
|    loss                 | 28.9       |
|    n_updates            | 12070      |
|    policy_gradient_loss 

exceeded: -18.44729358100472
exceeded: -9.25687327343
exceeded: -47.82773996504231
exceeded: -11.267860393621401
exceeded: -15.588152443325765
exceeded: -55.08984866453126
exceeded: -28.371453293626704
exceeded: -0.4879514538691779
exceeded: -0.22710713723867296
exceeded: -0.05368405846898617
exceeded: -0.17524760675583545
exceeded: -22.949320680338484
exceeded: -0.44316014225902656
exceeded: -9.377282273188355
exceeded: -0.709845629381588
exceeded: -0.5073220300286628
exceeded: -0.1361266958899107
exceeded: -0.3205170058961755
exceeded: -1.7502434553656672
exceeded: -0.0480080354031613
exceeded: -0.4842254532939421
exceeded: -0.5423890330408614
exceeded: -1.4255299292951622
exceeded: -0.4064671167854083
exceeded: -115.4720455620688
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.19e+04   |
|    ep_rew_mean          | 5.29e+03   |
| time/                   |            |
|    fps                  | 4240       |
|    iterat

exceeded: -50.91947619071172
exceeded: -0.5470177624574841
exceeded: -0.7311510453227822
exceeded: -0.009402188528559141
exceeded: -0.6717011233403025
exceeded: -0.0853082722494286
exceeded: -0.9744516430076962
exceeded: -0.02325799440558455
exceeded: -11.118335242556888
exceeded: -1.625115442036973
exceeded: -0.21728174786955487
exceeded: -1.1303738519675228
exceeded: -0.4894778691294981
exceeded: -1.0624941414487217
exceeded: -1.8503565464534342
exceeded: -0.09633910658066243
exceeded: -0.4186266482498223
exceeded: -0.7260707511093832
exceeded: -0.015164030956538127
exceeded: -0.2457484117489454
exceeded: -0.17591031216297182
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.19e+04   |
|    ep_rew_mean          | 5.3e+03    |
| time/                   |            |
|    fps                  | 4187       |
|    iterations           | 340        |
|    time_elapsed         | 596        |
|    total_timesteps      | 2496512 

exceeded: -0.775122136583276
exceeded: -0.2866389397277055
exceeded: -0.6779826760941126
exceeded: -0.7981743825093921
exceeded: -1.7133276379962479
exceeded: -0.870661011704065
exceeded: -28.96678487046124
exceeded: -7.92477685349146
exceeded: -24.435261664117668
exceeded: -11.552271194141913
exceeded: -16.12926777607146
exceeded: -37.00652494638311
exceeded: -0.28943605148878787
exceeded: -0.2343244237089739
exceeded: -19.659522493891536
exceeded: -0.8007300288081254
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.19e+04   |
|    ep_rew_mean          | 5.31e+03   |
| time/                   |            |
|    fps                  | 4144       |
|    iterations           | 345        |
|    time_elapsed         | 604        |
|    total_timesteps      | 2506752    |
| train/                  |            |
|    approx_kl            | 0.06494865 |
|    clip_fraction        | 0.352      |
|    clip_range           | 0.2   

exceeded: -89.38598532356374
exceeded: -25.169522002136087
exceeded: -24.12363645276631
exceeded: -52.650590547563965
exceeded: -22.558731081319408
exceeded: -3.8775905127141446
exceeded: -7.964769587893121
exceeded: -7.8690639961921995
exceeded: -67.86066695352636
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.19e+04    |
|    ep_rew_mean          | 5.31e+03    |
| time/                   |             |
|    fps                  | 4087        |
|    iterations           | 352         |
|    time_elapsed         | 616         |
|    total_timesteps      | 2521088     |
| train/                  |             |
|    approx_kl            | 0.074917085 |
|    clip_fraction        | 0.433       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.51        |
|    explained_variance   | 0.498       |
|    learning_rate        | 0.0003      |
|    loss                 | 31.5        |
|    n_updates      

exceeded: -0.3326343540721145
exceeded: -0.1559346320168532
exceeded: -0.5707386775674119
exceeded: -0.13798656925463648
exceeded: -31.138594815183108
exceeded: -4.479987034945342
exceeded: -1.1055867794153893
exceeded: -34.70662910884583
exceeded: -0.0702409099197958
exceeded: -0.11716212410092318
exceeded: -0.052099322725597336
exceeded: -0.7750537644914259
exceeded: -0.4287300898342749
exceeded: -0.054707857107778535
exceeded: -0.3626569711875382
exceeded: -0.4649078342185076
exceeded: -16.389819970149585
exceeded: -0.4711508623451772
exceeded: -0.18903675875186546
exceeded: -94.83434722232012
exceeded: -0.14870909402266985
exceeded: -2.622769371889982
exceeded: -53.8265201671428
exceeded: -3.8708882188601637
exceeded: -3.158594127964871
exceeded: -3.149621467694251
exceeded: -3.1396928723906004
exceeded: -1.1333852973304812
exceeded: -8.272636063119082
exceeded: -0.10505004299534848
exceeded: -0.02053151639835718
exceeded: -0.0854993213122854
exceeded: -0.8356512576722375
exceeded:

exceeded: -3.3178886165075143
exceeded: -2.3386896513934636
exceeded: -79.05622629507478
exceeded: -2.888122149178643
exceeded: -1.3377508256977546
exceeded: -40.54089752171117
exceeded: -0.45174202819738796
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.19e+04    |
|    ep_rew_mean          | 5.29e+03    |
| time/                   |             |
|    fps                  | 4016        |
|    iterations           | 361         |
|    time_elapsed         | 632         |
|    total_timesteps      | 2539520     |
| train/                  |             |
|    approx_kl            | 0.051366344 |
|    clip_fraction        | 0.43        |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.46        |
|    explained_variance   | 0.148       |
|    learning_rate        | 0.0003      |
|    loss                 | 129         |
|    n_updates            | 12390       |
|    policy_gradient_loss | -0.00906

exceeded: -3.4582257563998633
exceeded: -80.2543277365052
exceeded: -0.28501115962601237
exceeded: -0.4805669680740976
exceeded: -0.47835140340016835
exceeded: -0.7837149865801389
exceeded: -0.3657772184271576
exceeded: -14.334262946166783
exceeded: -97.10744352478011
exceeded: -2.0047243830604327
exceeded: -1.3435594773829418
exceeded: -1.2097078892094408
exceeded: -2.500416093549515
exceeded: -20.276896124124374
exceeded: -0.0051734491775689805
exceeded: -0.11039059528247935
exceeded: -0.12128358268888234
exceeded: -1.584769766042734
exceeded: -0.7747214344936919
exceeded: -6.927199899562356
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.18e+04   |
|    ep_rew_mean          | 5.3e+03    |
| time/                   |            |
|    fps                  | 3964       |
|    iterations           | 368        |
|    time_elapsed         | 644        |
|    total_timesteps      | 2553856    |
| train/                  |   

exceeded: -117.54487861420579
exceeded: -2.8875910734935633
exceeded: -8.56913975957667
exceeded: -11.971349712746047
exceeded: -3.6646968242666453
exceeded: -8.040934516226987
exceeded: -21.132418364181163
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.18e+04   |
|    ep_rew_mean          | 5.3e+03    |
| time/                   |            |
|    fps                  | 3922       |
|    iterations           | 374        |
|    time_elapsed         | 654        |
|    total_timesteps      | 2566144    |
| train/                  |            |
|    approx_kl            | 0.11372781 |
|    clip_fraction        | 0.532      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.46       |
|    explained_variance   | 0.461      |
|    learning_rate        | 0.0003     |
|    loss                 | 25.6       |
|    n_updates            | 12520      |
|    policy_gradient_loss | 0.00155    |
|    std      

exceeded: -2.1141402936857485
exceeded: -0.8315937481909967
exceeded: -0.4940469656101812
exceeded: -2.4501736345646967
exceeded: -3.2776980638940953
exceeded: -0.793643295905554
exceeded: -1.9840714043695418
exceeded: -0.09744469854778773
exceeded: -0.7121890486349814
exceeded: -1.9104720475451495
exceeded: -0.37767263960496594
exceeded: -0.16247593923801526
exceeded: -1.7379853539217813
exceeded: -0.5681498519820848
exceeded: -0.15172122152144876
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.18e+04    |
|    ep_rew_mean          | 5.3e+03     |
| time/                   |             |
|    fps                  | 3881        |
|    iterations           | 380         |
|    time_elapsed         | 664         |
|    total_timesteps      | 2578432     |
| train/                  |             |
|    approx_kl            | 0.092564225 |
|    clip_fraction        | 0.512       |
|    clip_range           | 0.2         |
|

exceeded: -11.711801170332322
exceeded: -10.874874040445
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.18e+04   |
|    ep_rew_mean          | 5.29e+03   |
| time/                   |            |
|    fps                  | 3846       |
|    iterations           | 385        |
|    time_elapsed         | 673        |
|    total_timesteps      | 2588672    |
| train/                  |            |
|    approx_kl            | 0.11436242 |
|    clip_fraction        | 0.539      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.43       |
|    explained_variance   | 0.627      |
|    learning_rate        | 0.0003     |
|    loss                 | 25.9       |
|    n_updates            | 12630      |
|    policy_gradient_loss | -0.0019    |
|    std                  | 0.172      |
|    value_loss           | 86         |
----------------------------------------
exceeded: -1.1543761024611339
exceeded: -

exceeded: -13.026386010021165
exceeded: -18.442460510990475
exceeded: -11.26523721147744
exceeded: -159.2208244762151
exceeded: -5.57281381139641
exceeded: -0.32443973646022384
exceeded: -28.39198010170996
exceeded: -9.877882955897652
exceeded: -92.14905606052825
exceeded: -1.307698639077548
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.18e+04   |
|    ep_rew_mean          | 5.29e+03   |
| time/                   |            |
|    fps                  | 3813       |
|    iterations           | 390        |
|    time_elapsed         | 681        |
|    total_timesteps      | 2598912    |
| train/                  |            |
|    approx_kl            | 0.12294066 |
|    clip_fraction        | 0.524      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.42       |
|    explained_variance   | 0.521      |
|    learning_rate        | 0.0003     |
|    loss                 | 30.2       |
|    n_upd

exceeded: -50.67899031647838
exceeded: -0.2927763730662103
exceeded: -0.5668727886309783
exceeded: -1.2051519257003578
exceeded: -0.052264666992330795
exceeded: -0.5500891546022623
exceeded: -84.60212095519181
exceeded: -3.2376188680085862
exceeded: -0.17369362062305704
exceeded: -0.3781579421634907
exceeded: -1.0050274631690679
exceeded: -0.4889615450103438
exceeded: -1.5910532626696192
exceeded: -0.37803677517599865
exceeded: -2.5687387260875205
exceeded: -0.6775368830580231
exceeded: -0.36238727602871734
exceeded: -1.3321913060147283
exceeded: -0.4748567796309122
exceeded: -0.2415003189185938
exceeded: -1.4815862178267332
exceeded: -0.04353443513758887
exceeded: -0.026838763693465825
exceeded: -0.42362278145378457
exceeded: -0.22603841302753153
exceeded: -1.7089567058845954
exceeded: -0.3320516418987899
exceeded: -0.1627451997308903
exceeded: -17.878355414933086
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.18e+04  

exceeded: -106.89407468405994
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.18e+04   |
|    ep_rew_mean          | 5.26e+03   |
| time/                   |            |
|    fps                  | 3743       |
|    iterations           | 401        |
|    time_elapsed         | 700        |
|    total_timesteps      | 2621440    |
| train/                  |            |
|    approx_kl            | 0.06383321 |
|    clip_fraction        | 0.43       |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.5        |
|    explained_variance   | -0.0749    |
|    learning_rate        | 0.0003     |
|    loss                 | 77.7       |
|    n_updates            | 12790      |
|    policy_gradient_loss | -0.0148    |
|    std                  | 0.167      |
|    value_loss           | 165        |
----------------------------------------
exceeded: -14.15292702823079
exceeded: -6.229239253748167
exceeded: 

exceeded: -0.06565645436644728
exceeded: -0.7787598387180538
exceeded: -1.1037625957152462
exceeded: -2.1341813549505346
exceeded: -87.0495243507125
exceeded: -1.9005224270768164
exceeded: -0.2302985806877404
exceeded: -0.7658908125133976
exceeded: -0.0013593315150354532
exceeded: -16.249622371102234
exceeded: -1.236151998048634
exceeded: -0.04468246023358446
exceeded: -0.11878969855857172
exceeded: -0.9980732922796367
exceeded: -0.34689929679553333
exceeded: -2.805993231616801
exceeded: -0.47631371810171746
exceeded: -1.7351652488148839
exceeded: -0.0005962912888918933
exceeded: -0.9926841312613949
exceeded: -2.0886792361969935
exceeded: -0.6390114989439097
exceeded: -0.4343723897359375
exceeded: -0.22752675957798812
exceeded: -0.8414377391183221
exceeded: -0.22230197038302713
exceeded: -3.16173322256597
exceeded: -0.8040504337925822
exceeded: -0.5509005944092576
exceeded: -2.324259353564841
exceeded: -2.541320958007794
exceeded: -152.81146926555107
exceeded: -0.12961684131831702
exce

exceeded: -0.24536302757214576
exceeded: -0.7139068202967953
exceeded: -0.07333886444913491
exceeded: -61.572215341777614
exceeded: -0.6051884571104362
exceeded: -1.6938671257660076
exceeded: -0.6866393671552224
exceeded: -1.4366982928370513
exceeded: -0.053566827113098306
exceeded: -1.2163968309984248
exceeded: -0.3869689051436165
exceeded: -45.48326865992968
exceeded: -0.9181879051875086
exceeded: -2.5390760150372174
exceeded: -1.30325081105215
exceeded: -0.4053004361024558
exceeded: -0.9017639829025432
exceeded: -0.34588972499172543
exceeded: -0.8290349086413851
exceeded: -1.7943879008474461
exceeded: -0.6167584905196548
exceeded: -0.3308828691309773
exceeded: -0.25449482314250627
exceeded: -0.35472784294164333
exceeded: -0.32652011035660516
exceeded: -3.3956950591719606
exceeded: -0.23135128579231637
exceeded: -0.5604230162844949
exceeded: -220.39283413241517
exceeded: -25.709217741426095
exceeded: -1.7336358121780169
exceeded: -0.6723545150708499
exceeded: -1.6048145200579857
exce

exceeded: -8.551237875813413
exceeded: -42.20395557131231
exceeded: -36.024234717985664
exceeded: -0.4608690933187626
exceeded: -0.41681685475608493
exceeded: -0.045220808218298617
exceeded: -2.286558514099955
exceeded: -0.9036276389329624
exceeded: -1.200651315561703
exceeded: -2.149138398954799
exceeded: -1.2897148230008348
exceeded: -6.741207879636569
exceeded: -0.5089043904350657
exceeded: -21.380624962425948
exceeded: -2.319263589531397
exceeded: -1.88415003470686
exceeded: -0.9657193442839678
exceeded: -1.8761466005263085
exceeded: -0.22877992442174727
exceeded: -36.86911641978463
exceeded: -0.026863868191842444
exceeded: -0.1572284626938593
exceeded: -0.40316571549014857
exceeded: -0.23543973991760683
exceeded: -12.471476055319277
exceeded: -0.06189087687490928
exceeded: -1.5251795116586426
exceeded: -0.4573723964388444
exceeded: -2.2932215131283398
exceeded: -0.12799085393284465
exceeded: -1.5991214774845155
exceeded: -0.8497976616365134
exceeded: -3.317443156809424
exceeded: -

exceeded: -0.6435650735870817
exceeded: -0.07628445138292314
exceeded: -1.8844790938161946
exceeded: -21.742137830740212
exceeded: -0.1882422228693385
exceeded: -54.44158776292085
exceeded: -0.5850613103831039
exceeded: -3.1484500595783205
exceeded: -10.941445994373593
exceeded: -0.3439883997934575
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.17e+04    |
|    ep_rew_mean          | 5.31e+03    |
| time/                   |             |
|    fps                  | 3614        |
|    iterations           | 423         |
|    time_elapsed         | 737         |
|    total_timesteps      | 2666496     |
| train/                  |             |
|    approx_kl            | 0.045832388 |
|    clip_fraction        | 0.374       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.5         |
|    explained_variance   | 0.411       |
|    learning_rate        | 0.0003      |
|    loss                 | 

exceeded: -0.9639312036964476
exceeded: -0.08748441901337606
exceeded: -0.7957928711776617
exceeded: -4.422186612217985
exceeded: -0.027253383692677394
exceeded: -47.139962368220914
exceeded: -3.4078560855955518
exceeded: -50.13113973530477
exceeded: -0.4528233308919145
exceeded: -0.48640475931329286
exceeded: -0.03037066771966908
exceeded: -1.3758229025273132
exceeded: -0.4675004252769798
exceeded: -1.992726601050536
exceeded: -1.0816338398889935
exceeded: -3.101638705857578
exceeded: -0.01999492360937507
exceeded: -0.18369893394072342
exceeded: -0.03496439586670085
exceeded: -0.6257323986670755
exceeded: -0.980365615119233
exceeded: -0.3986801193528056
exceeded: -1.3000700786682207
exceeded: -2.1187688123345714
exceeded: -0.11198250145327457
exceeded: -0.42121719245968686
exceeded: -1.431540259012005
exceeded: -1.0675191359767522
exceeded: -0.6433240217274059
exceeded: -9.25544756354313
exceeded: -0.5075925443519383
exceeded: -1.416397161228399
exceeded: -2.046638707701047
exceeded: 

exceeded: -4.019999140440865
exceeded: -82.36874900088766
exceeded: -2.4360476572525207
exceeded: -5.877884192640038
exceeded: -2.976876343414854
exceeded: -5.017303882124794
exceeded: -0.5292417704018377
exceeded: -280.68271727892443
exceeded: -0.3521152780194057
exceeded: -0.43982301365597054
exceeded: -0.6413398388866325
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.17e+04   |
|    ep_rew_mean          | 5.31e+03   |
| time/                   |            |
|    fps                  | 3553       |
|    iterations           | 434        |
|    time_elapsed         | 756        |
|    total_timesteps      | 2689024    |
| train/                  |            |
|    approx_kl            | 0.09305524 |
|    clip_fraction        | 0.518      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.49       |
|    explained_variance   | 0.499      |
|    learning_rate        | 0.0003     |
|    loss         

exceeded: -18.726606025894654
exceeded: -37.748028283797616
exceeded: -32.75055853460009
exceeded: -7.5277874882206275
exceeded: -231.93091013472753
exceeded: -0.03709475308968182
exceeded: -0.24145679667459727
exceeded: -0.23426432253044724
exceeded: -8.803100936248065
exceeded: -90.51310819718962
exceeded: -17.771552439186426
exceeded: -41.74528087888055
exceeded: -0.5477319933684027
exceeded: -0.5963744783120952
exceeded: -0.33004813537758687
exceeded: -0.41514290503574336
exceeded: -0.9522406487171653
exceeded: -0.10626047872764033
exceeded: -0.2667835670054616
exceeded: -122.74832047206075
exceeded: -0.46318409814186356
exceeded: -0.18825849591556132
exceeded: -1.081388813480869
exceeded: -0.8682221625306065
exceeded: -0.39107165391037535
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.17e+04   |
|    ep_rew_mean          | 5.31e+03   |
| time/                   |            |
|    fps                  | 3532       |


exceeded: -0.5227715029401594
exceeded: -0.3601268064032159
exceeded: -0.3836180887783714
exceeded: -1.5958342949041744
exceeded: -0.3992293547724076
exceeded: -0.09229285059289968
exceeded: -39.71936156142966
exceeded: -0.276516374130825
exceeded: -0.4053555438478923
exceeded: -109.88171499740949
exceeded: -6.672421574499669
exceeded: -0.2896609553854825
exceeded: -0.3757765390589123
exceeded: -0.5566026963267743
exceeded: -0.353405238570638
exceeded: -1.4197019301179503
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.17e+04   |
|    ep_rew_mean          | 5.29e+03   |
| time/                   |            |
|    fps                  | 3506       |
|    iterations           | 443        |
|    time_elapsed         | 772        |
|    total_timesteps      | 2707456    |
| train/                  |            |
|    approx_kl            | 0.04337819 |
|    clip_fraction        | 0.357      |
|    clip_range           | 0.2

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.17e+04   |
|    ep_rew_mean          | 5.29e+03   |
| time/                   |            |
|    fps                  | 3476       |
|    iterations           | 449        |
|    time_elapsed         | 782        |
|    total_timesteps      | 2719744    |
| train/                  |            |
|    approx_kl            | 0.05021904 |
|    clip_fraction        | 0.365      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.56       |
|    explained_variance   | 0.439      |
|    learning_rate        | 0.0003     |
|    loss                 | 103        |
|    n_updates            | 13270      |
|    policy_gradient_loss | -0.0149    |
|    std                  | 0.165      |
|    value_loss           | 262        |
----------------------------------------
exceeded: -0.0166694922168101
exceeded: -1.722408439237504
exceeded: -17.79885737338523
exceeded: 

exceeded: -17.175927522701993
exceeded: -19.068453127552868
exceeded: -2.4871972974247023
exceeded: -179.27938617509042
exceeded: -46.2650681200671
exceeded: -2.5795778067622797
exceeded: -0.45443378986026006
exceeded: -1.6095884695333618
exceeded: -0.772347595845845
exceeded: -0.2407662086153617
exceeded: -0.028694739071509802
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.17e+04  |
|    ep_rew_mean          | 5.29e+03  |
| time/                   |           |
|    fps                  | 3442      |
|    iterations           | 456       |
|    time_elapsed         | 794       |
|    total_timesteps      | 2734080   |
| train/                  |           |
|    approx_kl            | 0.1312549 |
|    clip_fraction        | 0.476     |
|    clip_range           | 0.2       |
|    entropy_loss         | 1.6       |
|    explained_variance   | 0.591     |
|    learning_rate        | 0.0003    |
|    loss                 | 59

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.17e+04   |
|    ep_rew_mean          | 5.3e+03    |
| time/                   |            |
|    fps                  | 3423       |
|    iterations           | 460        |
|    time_elapsed         | 801        |
|    total_timesteps      | 2742272    |
| train/                  |            |
|    approx_kl            | 0.05741197 |
|    clip_fraction        | 0.329      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.64       |
|    explained_variance   | 0.341      |
|    learning_rate        | 0.0003     |
|    loss                 | 355        |
|    n_updates            | 13380      |
|    policy_gradient_loss | -0.00805   |
|    std                  | 0.162      |
|    value_loss           | 433        |
----------------------------------------
exceeded: -0.23125534335603576
exceeded: -0.06942397155913546
exceeded: -0.9438867059730467
exceed

exceeded: -3.7826891228253428
exceeded: -2.2447693135522675
exceeded: -0.7054816510593621
exceeded: -0.020822166538508845
exceeded: -5.248029642859273
exceeded: -2.1587512268916607
exceeded: -1.6970605255992437
exceeded: -1.314087049485663
exceeded: -0.2046732109934415
exceeded: -2.3515083687861926
exceeded: -0.6198008699387957
exceeded: -1.2232775968359806
exceeded: -1.2224208631929578
exceeded: -0.4416330844116498
exceeded: -1.2852916673517414
exceeded: -0.2758472948201065
exceeded: -0.030254937021789944
exceeded: -0.7913259491473484
exceeded: -0.3963110485510685
exceeded: -3.5506404289129403
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.17e+04   |
|    ep_rew_mean          | 5.31e+03   |
| time/                   |            |
|    fps                  | 3403       |
|    iterations           | 464        |
|    time_elapsed         | 808        |
|    total_timesteps      | 2750464    |
| train/                  |  

exceeded: -38.742179900845485
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.17e+04   |
|    ep_rew_mean          | 5.31e+03   |
| time/                   |            |
|    fps                  | 3385       |
|    iterations           | 468        |
|    time_elapsed         | 814        |
|    total_timesteps      | 2758656    |
| train/                  |            |
|    approx_kl            | 0.08310592 |
|    clip_fraction        | 0.409      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.65       |
|    explained_variance   | 0.356      |
|    learning_rate        | 0.0003     |
|    loss                 | 171        |
|    n_updates            | 13460      |
|    policy_gradient_loss | -0.0183    |
|    std                  | 0.162      |
|    value_loss           | 306        |
----------------------------------------
exceeded: -9.923359927965896
exceeded: -3.7827179049883
exceeded: -1

exceeded: -12.91611634366375
exceeded: -46.39844859010043
exceeded: -13.509406448849909
exceeded: -0.9782655422435736
exceeded: -1.6127464669674367
exceeded: -1.3917604498481908
exceeded: -75.64690141875515
exceeded: -82.36199297506475
exceeded: -1.3204771723223987
exceeded: -0.2604829623399297
exceeded: -0.07331589598429572
exceeded: -0.3016876501224018
exceeded: -208.93370139823648
exceeded: -5.895434895425808
exceeded: -1.0883250403520237
exceeded: -48.47901801914266
exceeded: -0.9442964715998735
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.17e+04   |
|    ep_rew_mean          | 5.32e+03   |
| time/                   |            |
|    fps                  | 3362       |
|    iterations           | 473        |
|    time_elapsed         | 823        |
|    total_timesteps      | 2768896    |
| train/                  |            |
|    approx_kl            | 0.12679644 |
|    clip_fraction        | 0.465      |
|  

exceeded: -69.4451091379997
exceeded: -2.6550742632558233
exceeded: -16.80980259372458
exceeded: -66.3353944957537
exceeded: -73.8843116624968
exceeded: -13.165786788359885
exceeded: -4.2448707032044535
exceeded: -0.9281922838185596
exceeded: -4.707220479956129
exceeded: -44.06061281617829
exceeded: -1.8794666663423343
exceeded: -6.386564015189306
exceeded: -0.11395729520485635
exceeded: -0.4922809638455713
exceeded: -0.28564620640420624
exceeded: -1.7373970877793345
exceeded: -109.75032314772427
exceeded: -0.07014118355998986
exceeded: -1.7132612156993432
exceeded: -0.17780022399792525
exceeded: -0.21693704233897904
exceeded: -1.2611460138652288
exceeded: -26.978859279221172
exceeded: -0.4144854299795676
exceeded: -2.1949276538655167
exceeded: -1.6728141899988103
exceeded: -34.901601221212644
exceeded: -46.919811386054334
exceeded: -6.332613092843947
exceeded: -1.2761436502217283
exceeded: -6.434208730812298
exceeded: -131.4688717274838
exceeded: -47.877780789967794
exceeded: -0.57911

exceeded: -0.38783780177460214
exceeded: -0.09798892764175629
exceeded: -55.408581046801835
exceeded: -0.552648572734089
exceeded: -9.930044898270655
exceeded: -1.4371402222402538
exceeded: -38.252193117586096
exceeded: -12.797770292052874
exceeded: -38.83176215414311
exceeded: -0.1632778217624103
exceeded: -1.4723501963199206
exceeded: -2.2727888054280077
exceeded: -7.287145653963565
exceeded: -0.5383450956773645
exceeded: -25.03729580601114
exceeded: -18.7348983690153
exceeded: -1.0649921904367485
exceeded: -9.789546626690882
exceeded: -4.394595617139892
exceeded: -0.09729783687199448
exceeded: -1.2112991719015183
exceeded: -0.6396137570718924
exceeded: -0.35481451102779105
exceeded: -11.308031827994542
exceeded: -0.2725436658248365
exceeded: -0.20698491957765572
exceeded: -12.602723179788995
exceeded: -0.043030701517240116
exceeded: -1.5202685555071556
exceeded: -11.025503815643303
exceeded: -0.10827727510517696
exceeded: -0.3470561712930368
exceeded: -1.5286396413324534
exceeded: -

exceeded: -2.3856953670829513
exceeded: -0.22107592562896583
exceeded: -1.702316640322203
exceeded: -2.422828286872408
exceeded: -40.571012293834755
exceeded: -76.14510911615349
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.16e+04    |
|    ep_rew_mean          | 5.29e+03    |
| time/                   |             |
|    fps                  | 3300        |
|    iterations           | 487         |
|    time_elapsed         | 847         |
|    total_timesteps      | 2797568     |
| train/                  |             |
|    approx_kl            | 0.085761994 |
|    clip_fraction        | 0.462       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.57        |
|    explained_variance   | 0.216       |
|    learning_rate        | 0.0003      |
|    loss                 | 52.2        |
|    n_updates            | 13650       |
|    policy_gradient_loss | -0.0123     |
|    std                

exceeded: -0.7551190989319266
exceeded: -0.16309156299030592
exceeded: -0.22385090927481657
exceeded: -0.2273562328762038
exceeded: -1.8659492616674291
exceeded: -1.127645323890351
exceeded: -0.8379313567466184
exceeded: -1.2972228109436144
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.16e+04    |
|    ep_rew_mean          | 5.27e+03    |
| time/                   |             |
|    fps                  | 3279        |
|    iterations           | 491         |
|    time_elapsed         | 855         |
|    total_timesteps      | 2805760     |
| train/                  |             |
|    approx_kl            | 0.059159808 |
|    clip_fraction        | 0.418       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.55        |
|    explained_variance   | 0.527       |
|    learning_rate        | 0.0003      |
|    loss                 | 93          |
|    n_updates            | 13690       |
|  

exceeded: -8.347229427038446
exceeded: -35.79923505652262
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.16e+04   |
|    ep_rew_mean          | 5.27e+03   |
| time/                   |            |
|    fps                  | 3261       |
|    iterations           | 495        |
|    time_elapsed         | 862        |
|    total_timesteps      | 2813952    |
| train/                  |            |
|    approx_kl            | 0.16458786 |
|    clip_fraction        | 0.49       |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.57       |
|    explained_variance   | 0.279      |
|    learning_rate        | 0.0003     |
|    loss                 | 101        |
|    n_updates            | 13730      |
|    policy_gradient_loss | -0.0126    |
|    std                  | 0.165      |
|    value_loss           | 241        |
----------------------------------------
exceeded: -2.8468812984334235
exceeded: 

exceeded: -36.39428028924077
exceeded: -119.85802937745649
exceeded: -43.821100036462674
exceeded: -2.962774914425917
exceeded: -1.4547551494016728
exceeded: -1.4143810941856991
exceeded: -19.529508346296712
exceeded: -0.20638763074259683
exceeded: -2.680196635028715
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.16e+04    |
|    ep_rew_mean          | 5.27e+03    |
| time/                   |             |
|    fps                  | 3240        |
|    iterations           | 500         |
|    time_elapsed         | 871         |
|    total_timesteps      | 2824192     |
| train/                  |             |
|    approx_kl            | 0.093591794 |
|    clip_fraction        | 0.559       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.54        |
|    explained_variance   | 0.607       |
|    learning_rate        | 0.0003      |
|    loss                 | 41.9        |
|    n_updates    

exceeded: -0.9942097872519344
exceeded: -0.5488198686905971
exceeded: -1.4056344427925684
exceeded: -2.1903557474275734
exceeded: -1.3957590011748322
exceeded: -1.3400833283761446
exceeded: -11.124813824425079
exceeded: -90.46982437306029
exceeded: -2.8371989450277124
exceeded: -3.1984416692763635
exceeded: -17.717449750221174
exceeded: -18.95068212095768
exceeded: -15.52917041992751
exceeded: -78.13024760724274
exceeded: -1.0974628313923234
exceeded: -1.3278131248925416
exceeded: -94.45236904405724
exceeded: -0.7094854337633343
exceeded: -0.12190201239123254
exceeded: -0.8537430029159442
exceeded: -0.5922496856899362
exceeded: -0.0750591043337214
exceeded: -280.4809944765024
exceeded: -0.19608384639038592
exceeded: -64.38692184993067
exceeded: -0.1571752485333755
exceeded: -0.40178230279338234
exceeded: -2.445172575818493
exceeded: -1.1651882415709065
exceeded: -0.23516141788935488
exceeded: -0.645270350380446
exceeded: -1.0459011805276488
----------------------------------------
| ro

exceeded: -26.945549424886153
exceeded: -3.9478090132674892
exceeded: -65.88650673766531
exceeded: -3.1293954695348
exceeded: -10.230397477819729
exceeded: -31.52922441927586
exceeded: -38.15409896592683
exceeded: -70.14828726631563
exceeded: -72.73520588748379
exceeded: -0.5111807948101046
exceeded: -28.44393105469454
exceeded: -18.303703841231595
exceeded: -24.497246523392768
exceeded: -82.95110594754058
exceeded: -1.281703421622198
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.16e+04    |
|    ep_rew_mean          | 5.27e+03    |
| time/                   |             |
|    fps                  | 3197        |
|    iterations           | 511         |
|    time_elapsed         | 890         |
|    total_timesteps      | 2846720     |
| train/                  |             |
|    approx_kl            | 0.060680754 |
|    clip_fraction        | 0.397       |
|    clip_range           | 0.2         |
|    entropy_lo

exceeded: -0.4960277682716702
exceeded: -0.9035234219076661
exceeded: -8.312152507553566
exceeded: -0.5520511442485743
exceeded: -0.06473840702470066
exceeded: -0.5082532900461942
exceeded: -4.875606055320333
exceeded: -0.3207192308269408
exceeded: -2.7344419049742417
exceeded: -18.68597688394538
exceeded: -0.9060930346734046
exceeded: -0.4911679843887238
exceeded: -0.3164841241247155
exceeded: -0.18615178557210527
exceeded: -0.5048923279996246
exceeded: -0.33352975205858165
exceeded: -0.6162825509249337
exceeded: -0.5147377661906744
exceeded: -1.9273495067257418
exceeded: -0.18659392082370174
exceeded: -7.323465110923006
exceeded: -0.5256456266750754
exceeded: -0.5207505787000575
exceeded: -1.5492297030361468
exceeded: -0.26669881393760037
exceeded: -0.2092030100303627
exceeded: -1.1915975929377864
exceeded: -0.20484753042697224
exceeded: -26.188062929657004
exceeded: -0.6440338277979697
exceeded: -0.1106664634186714
exceeded: -0.5327804350196703
exceeded: -0.06872205477083333
exceede

exceeded: -78.2963520862933
exceeded: -2.467207193038789
exceeded: -23.17335698178821
exceeded: -99.77873220510233
exceeded: -5.092943794246589
exceeded: -11.046974049064902
exceeded: -14.535740816479233
exceeded: -7.177224506647275
exceeded: -1.7756228366513946
exceeded: -3.812206267423224
exceeded: -3.5498676337188355
exceeded: -12.802482842354205
exceeded: -35.91299236893364
exceeded: -0.510738807863751
exceeded: -0.8280495269662904
exceeded: -0.5042172115378741
exceeded: -1.4918924536892566
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.15e+04    |
|    ep_rew_mean          | 5.27e+03    |
| time/                   |             |
|    fps                  | 3160        |
|    iterations           | 521         |
|    time_elapsed         | 907         |
|    total_timesteps      | 2867200     |
| train/                  |             |
|    approx_kl            | 0.111387685 |
|    clip_fraction        | 0.552     

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.15e+04   |
|    ep_rew_mean          | 5.28e+03   |
| time/                   |            |
|    fps                  | 3140       |
|    iterations           | 526        |
|    time_elapsed         | 916        |
|    total_timesteps      | 2877440    |
| train/                  |            |
|    approx_kl            | 0.07046583 |
|    clip_fraction        | 0.431      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.66       |
|    explained_variance   | 0.443      |
|    learning_rate        | 0.0003     |
|    loss                 | 67.5       |
|    n_updates            | 14040      |
|    policy_gradient_loss | -0.025     |
|    std                  | 0.161      |
|    value_loss           | 128        |
----------------------------------------
exceeded: -39.84923004935234
exceeded: -13.330283581834026
exceeded: -21.788604423737148
exceeded:

exceeded: -19.752119185998435
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.15e+04    |
|    ep_rew_mean          | 5.3e+03     |
| time/                   |             |
|    fps                  | 3119        |
|    iterations           | 532         |
|    time_elapsed         | 926         |
|    total_timesteps      | 2889728     |
| train/                  |             |
|    approx_kl            | 0.060335547 |
|    clip_fraction        | 0.413       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.62        |
|    explained_variance   | 0.342       |
|    learning_rate        | 0.0003      |
|    loss                 | 82.4        |
|    n_updates            | 14100       |
|    policy_gradient_loss | -0.0162     |
|    std                  | 0.164       |
|    value_loss           | 171         |
-----------------------------------------
exceeded: -0.3937634887887558
exceeded: -0.496

exceeded: -0.6294549836059387
exceeded: -0.5613446656219611
exceeded: -1.8759600146926616
exceeded: -26.371906436218836
exceeded: -0.4742621953928567
exceeded: -0.1324540570135457
exceeded: -1.2257784828905713
exceeded: -0.17340963608390508
exceeded: -0.14439738922633705
exceeded: -0.03715640643301618
exceeded: -10.159099791508376
exceeded: -0.022956297632931933
exceeded: -0.00869654654312448
exceeded: -0.39909105463542693
exceeded: -1.345257749347453
exceeded: -2.2674680233296667
exceeded: -69.07035081917984
exceeded: -31.59904595520254
exceeded: -34.347635869124694
exceeded: -44.349583388980534
exceeded: -0.03704709535885327
exceeded: -1.6073213580167025
exceeded: -0.05261889826790183
exceeded: -0.3779196472239215
exceeded: -0.0392457618712346
exceeded: -1.9256305246291583
exceeded: -1.6332714741621341
exceeded: -0.2740815262375818
exceeded: -0.5164977058678246
exceeded: -1.2840854323652025
exceeded: -1.8309399179955912
exceeded: -25.492105346784815
exceeded: -0.0645091602416784
exce

exceeded: -2.9829721513841694
exceeded: -14.484916307414242
exceeded: -12.524940947166153
exceeded: -4.4294915167364035
exceeded: -1.967314282576588
exceeded: -10.971016096195672
exceeded: -90.5116277926841
exceeded: -3.152736162552372
exceeded: -35.9706237590773
exceeded: -0.2681129037235984
exceeded: -10.633967851356642
exceeded: -1.845083819694723
exceeded: -0.2517537673411681
exceeded: -0.26734017466369425
exceeded: -0.46996022777276314
exceeded: -5.375533123203474
exceeded: -7.533014520033518
exceeded: -1.0227242932492981
exceeded: -2.7986716264632205
exceeded: -2.6876000191036216
exceeded: -9.251774858000367
exceeded: -0.9898457281257613
exceeded: -234.59197705719563
exceeded: -0.3177074308775949
exceeded: -0.054739248932297625
exceeded: -149.91821131328095
exceeded: -0.6578676652496657
exceeded: -1.0910522087756642
exceeded: -2.441488792368802
exceeded: -2.7059336960082927
exceeded: -0.2000852491938903
exceeded: -2.1745162513432583
exceeded: -6.52467603725293
-------------------

exceeded: -7.156927758548307
exceeded: -6.240719045067458
exceeded: -8.452622652618375
exceeded: -39.71551460713189
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.15e+04   |
|    ep_rew_mean          | 5.3e+03    |
| time/                   |            |
|    fps                  | 3048       |
|    iterations           | 548        |
|    time_elapsed         | 958        |
|    total_timesteps      | 2922496    |
| train/                  |            |
|    approx_kl            | 0.15368246 |
|    clip_fraction        | 0.604      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.67       |
|    explained_variance   | 0.415      |
|    learning_rate        | 0.0003     |
|    loss                 | 25.6       |
|    n_updates            | 14260      |
|    policy_gradient_loss | 0.00425    |
|    std                  | 0.162      |
|    value_loss           | 61         |
-----------------------

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 1.15e+04  |
|    ep_rew_mean          | 5.29e+03  |
| time/                   |           |
|    fps                  | 3029      |
|    iterations           | 553       |
|    time_elapsed         | 968       |
|    total_timesteps      | 2932736   |
| train/                  |           |
|    approx_kl            | 0.2874355 |
|    clip_fraction        | 0.57      |
|    clip_range           | 0.2       |
|    entropy_loss         | 1.66      |
|    explained_variance   | 0.724     |
|    learning_rate        | 0.0003    |
|    loss                 | 44        |
|    n_updates            | 14310     |
|    policy_gradient_loss | 0.0222    |
|    std                  | 0.161     |
|    value_loss           | 107       |
---------------------------------------
exceeded: -8.777276719394854
exceeded: -1.3785738512604016
exceeded: -1.02515814040145
exceeded: -71.53311999830598
exce

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.14e+04   |
|    ep_rew_mean          | 5.3e+03    |
| time/                   |            |
|    fps                  | 3009       |
|    iterations           | 558        |
|    time_elapsed         | 977        |
|    total_timesteps      | 2942976    |
| train/                  |            |
|    approx_kl            | 0.07912907 |
|    clip_fraction        | 0.478      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.6        |
|    explained_variance   | 0.513      |
|    learning_rate        | 0.0003     |
|    loss                 | 31.9       |
|    n_updates            | 14360      |
|    policy_gradient_loss | -0.00647   |
|    std                  | 0.164      |
|    value_loss           | 84.6       |
----------------------------------------
exceeded: -0.22718916321248162
exceeded: -60.62023139061398
exceeded: -10.342079319665123
exceeded

exceeded: -114.63007940977376
exceeded: -0.22801334301187937
exceeded: -68.44855270907848
exceeded: -0.11560559082971542
exceeded: -0.2945716181714544
exceeded: -3.342006090326222
exceeded: -0.9840029655198197
exceeded: -9.568370598130246
exceeded: -1.6361617954769523
exceeded: -1.3139830690147656
exceeded: -0.07794465779652894
exceeded: -1.108130300797515
exceeded: -6.32537057364237
exceeded: -0.2166559623055968
exceeded: -0.8532105002902493
exceeded: -0.33764888660363473
exceeded: -2.198072531942357
exceeded: -0.4558356573591694
exceeded: -1.3991096600058381
exceeded: -1.79931466141404
exceeded: -0.7982232606206423
exceeded: -0.9172570783065609
exceeded: -0.17287931347383928
exceeded: -1.2917757316082508
exceeded: -0.5485846199959901
exceeded: -0.3747719658024651
exceeded: -0.5224511240487858
exceeded: -15.717311897716758
exceeded: -1.3534370825400568
exceeded: -2.8199342303713033
exceeded: -0.05864672273329347
exceeded: -0.6974557686073007
exceeded: -133.0309846479204
exceeded: -0.2

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.14e+04   |
|    ep_rew_mean          | 5.25e+03   |
| time/                   |            |
|    fps                  | 2976       |
|    iterations           | 566        |
|    time_elapsed         | 994        |
|    total_timesteps      | 2959360    |
| train/                  |            |
|    approx_kl            | 0.13603881 |
|    clip_fraction        | 0.611      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.66       |
|    explained_variance   | 0.431      |
|    learning_rate        | 0.0003     |
|    loss                 | 40.3       |
|    n_updates            | 14440      |
|    policy_gradient_loss | 0.00398    |
|    std                  | 0.161      |
|    value_loss           | 75.4       |
----------------------------------------
exceeded: -25.708791495062986
exceeded: -4.910184742682265
exceeded: -7.82476720493777
exceeded: -

exceeded: -21.50259539885955
exceeded: -4.4183638497808335
exceeded: -8.048129202844121
exceeded: -32.87231660735354
exceeded: -37.37172310391927
exceeded: -4.362152862953346
exceeded: -0.0038603781008240464
exceeded: -13.100788544037375
exceeded: -0.1410102061099264
exceeded: -57.07634834525357
exceeded: -1.156775547704022
exceeded: -18.180278074212907
exceeded: -0.009857908833886386
exceeded: -28.744150654728415
exceeded: -0.4878647908633119
exceeded: -11.188797576943797
exceeded: -0.07498987966512069
exceeded: -27.322313469241195
exceeded: -3.523832702107439
exceeded: -7.705510013624018
exceeded: -0.023813435036009124
exceeded: -0.07943610287803818
exceeded: -1.1426850114879044
exceeded: -0.24191007580600976
exceeded: -14.360044252519952
exceeded: -0.11586419902655783
exceeded: -1.3795629282078115
exceeded: -0.37104462526089066
exceeded: -0.6313637298612931
exceeded: -1.064725579501157
exceeded: -40.33767613859087
exceeded: -34.3966653068878
-----------------------------------------

exceeded: -0.8286670473830466
exceeded: -26.57008765751907
exceeded: -0.21901934689178298
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 1.14e+04   |
|    ep_rew_mean          | 5.24e+03   |
| time/                   |            |
|    fps                  | 2939       |
|    iterations           | 577        |
|    time_elapsed         | 1014       |
|    total_timesteps      | 2981888    |
| train/                  |            |
|    approx_kl            | 0.11664106 |
|    clip_fraction        | 0.563      |
|    clip_range           | 0.2        |
|    entropy_loss         | 1.71       |
|    explained_variance   | 0.301      |
|    learning_rate        | 0.0003     |
|    loss                 | 21.5       |
|    n_updates            | 14550      |
|    policy_gradient_loss | 0.0088     |
|    std                  | 0.16       |
|    value_loss           | 82.6       |
----------------------------------------
exceeded

exceeded: -0.8079441689787195
exceeded: -0.23028925114292537
exceeded: -1.3127338880158013
exceeded: -0.15289009314195068
exceeded: -0.04443659785317789
exceeded: -64.62211778202405
exceeded: -207.5355780431282
exceeded: -2.7516806913041303
exceeded: -0.43340945732342206
exceeded: -1.9495790268578974
exceeded: -4.968010757760755
exceeded: -0.0591541661040532
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.14e+04    |
|    ep_rew_mean          | 5.24e+03    |
| time/                   |             |
|    fps                  | 2927        |
|    iterations           | 581         |
|    time_elapsed         | 1021        |
|    total_timesteps      | 2990080     |
| train/                  |             |
|    approx_kl            | 0.057329718 |
|    clip_fraction        | 0.355       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.7         |
|    explained_variance   | 0.263       |
|    lear

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.14e+04    |
|    ep_rew_mean          | 5.24e+03    |
| time/                   |             |
|    fps                  | 2919        |
|    iterations           | 584         |
|    time_elapsed         | 1026        |
|    total_timesteps      | 2996224     |
| train/                  |             |
|    approx_kl            | 0.054764356 |
|    clip_fraction        | 0.382       |
|    clip_range           | 0.2         |
|    entropy_loss         | 1.72        |
|    explained_variance   | 0.693       |
|    learning_rate        | 0.0003      |
|    loss                 | 159         |
|    n_updates            | 14620       |
|    policy_gradient_loss | -0.0111     |
|    std                  | 0.159       |
|    value_loss           | 210         |
-----------------------------------------
exceeded: -6.185179722370544
exceeded: -2.0298609297102645
exceeded: -0.3312

KeyboardInterrupt: 

In [10]:
model.save("3M_trained")

In [None]:
today()