In [1]:
import gym 
from gym import Env
from gym.spaces import Discrete, Box, Dict, Tuple, MultiBinary, MultiDiscrete 
from gym.utils import seeding
import numpy as np
import random
import os
from stable_baselines3 import PPO, DQN, A2C, HER
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy

from typing import Final, Any
import math
from shapely.geometry.polygon import Polygon

import PIL.ImageDraw as ImageDraw
import PIL.Image as Image
import cv2

class ActivVisionEnv(Env):
    def __init__(self, num_objects: int = 3,
                 simulation_frequency: float = 5,
                 width: int = 8000, height: int = 2000, max_velocity_player: int = 200, view: int = 78,
                 img_format: float = (1920 / 1080)):
        # Actions we can take, nothing, left, right, up, down, turnCam right, turn Cam left
        
        
        # consts
        self._object_min_size: Final = 100  # in mm
        self._object_max_size: Final = 100  # in mm
        self._object_max_z_velocity: Final = 20  # in mm
        self._object_max_velocity: Final = 200  # in mm/s
        self._camera_height: Final = 390  # in mm
        self._camera_max_angle: Final = 20  # in degree
        self._velocity_player_per_step = 50  # max velocity change at each cycle
        self._camera_per_step = 1  # max camera angle change at each cycle
        self._player_max_z = width + 1
        self._player_min_z = 1
        self._player_pos_y = height - self._camera_height
        self.timer = 3000
        self._random_force_enabled = False
        self._object_max_rnd_force_p_sec = 10 #in 1/percent from max
        self._player_min_x = 2000
        self._player_max_x = width - self._player_min_x
        
        # params
        self._num_objects = num_objects
        self._simulation_frequency = simulation_frequency
        self._width = width
        self._height = height
        self._max_velocity_player = max_velocity_player
        self._view = view
        self._img_format = img_format
        self._height_view = (1 / img_format) * view
        
        # viewer
        self._im_width = 200
        self._im_height = round(self._im_width * (height / width))
        
        # Actions we can take, down, stay, up
        self.action_space = Discrete(5)
        # postition and velocities
        self.observation_space = Box(low=0, high=255, shape=(self._im_height, self._im_width, 3), dtype=np.uint8)
                                  
        # states for player and objects as dictionary
        self.player = {}
        self._objects = []
        
        #for drawing
        self._last_points = 0
        
        self.reset()
        
   
    def step(self, action):
        # actions:
        # 0 go right
        # 1 go left
        
        # 2 turn cam right
        # 3 turn cam left
        
        # 4 do mothing
        
        # --- go front
        # --- go back

        if action == 0:
            if self.player["vel_x"] < self._max_velocity_player:
                self.player["vel_x"] += self._velocity_player_per_step
            if self.player["vel_x"] > self._max_velocity_player:
                self.player["vel_x"] = self._max_velocity_player

        if action == 1:
            if self.player["vel_x"] > -self._max_velocity_player:
                self.player["vel_x"] -= self._velocity_player_per_step
            if self.player["vel_x"] < -self._max_velocity_player:
                self.player["vel_x"] = -self._max_velocity_player
        """
        if action == 2:
            if self.player["vel_z"] < self._max_velocity_player:
                self.player["vel_z"] += self._velocity_player_per_step
            if self.player["vel_z"] > self._max_velocity_player:
                self.player["vel_z"] = self._max_velocity_player

        if action == 3:
            if self.player["vel_z"] > -self._max_velocity_player:
                self.player["vel_z"] -= self._velocity_player_per_step
            if self.player["vel_z"] < -self._max_velocity_player:
                self.player["vel_z"] = -self._max_velocity_player

        """

        if action == 2:
            if self.player["angle"] < self._camera_max_angle:
                self.player["angle"] += self._camera_per_step
            if self.player["angle"] > self._camera_max_angle:
                self.player["angle"] = self._camera_max_angle

        if action == 3:
            if self.player["angle"] > -self._camera_max_angle:
                self.player["angle"] -= self._camera_per_step
            if self.player["angle"] < -self._camera_max_angle:
                self.player["angle"] = -self._camera_max_angle


        # calc next frame
        self.progress_simulation()
        reward = self._add_points()     
        
        
        # Reduce timer by one step
        self.timer -= 1 
        
        # Calculate reward
        reward = self._add_points()        
        reward *= (1 - (abs(self.player["angle"] / self._camera_max_angle)) * (1 / self._num_objects))
        
        # reward -= abs(self.player["angle"] / self._camera_max_angle) * 0.05
        
        
        """
        if self.player["angle"] > self._camera_per_step and action == 2:
            reward -= (1/self._camera_max_angle+1)
        if self.player["angle"] > self._camera_per_step and action == 3:
            reward += 0.5 * (1/self._camera_max_angle+1)
            
        if self.player["angle"] < -self._camera_per_step and action == 3:
            reward -= (1/self._camera_max_angle+1)
        if self.player["angle"] < -self._camera_per_step and action == 2:
            reward += 0.5 * (1/self._camera_max_angle+1)
        """
            
        
        
        # Check if shower is done
        if self.timer <= 0: 
            done = True
        else:
            done = False
        
        
        # Set placeholder for info
        info = {}
        
        
        # Return step information
        self._last_points = reward
        return self.render("machine"), reward, done, info

    
    def progress_simulation(self):
        seconds_passed = 1 / self._simulation_frequency

        # move player
        self.player["pos_x"] = self.player["pos_x"] + seconds_passed * self.player["vel_x"]
        self.player["pos_z"] = self.player["pos_z"] + seconds_passed * self.player["vel_z"]
        
        
        # push in constraints
        if self.player["pos_x"] < self._player_min_x:
            self.player["pos_x"] = self._player_min_x
        if self.player["pos_x"] > self._player_max_x:
            self.player["pos_x"] = self._player_max_x
        if self.player["pos_z"] < self._player_min_z:
            self.player["pos_z"] = self._player_min_z
        if self.player["pos_z"] > self._player_max_z:
            self.player["pos_z"] = self._player_max_z

        # move objects
        for i in range(0, self._num_objects):
            self._objects[i]["pos_x"] = self._objects[i]["pos_x"] + seconds_passed * self._objects[i]["vel_x"]
            self._objects[i]["pos_y"] = self._objects[i]["pos_y"] + seconds_passed * self._objects[i]["vel_y"]
            self._objects[i]["size"] = self._objects[i]["size"] + seconds_passed * self._objects[i]["vel_z"]

        # collision with wall
        for i in range(0, self._num_objects):
            if self._objects[i]["pos_x"] - self._objects[i]["size"] / 2 <= 0:
                self._objects[i]["vel_x"] = -self._objects[i]["vel_x"]
                self._objects[i]["pos_x"] = self._objects[i]["size"] / 2 + 1
                self._objects[i]["vel_z"] = 0

            if self._objects[i]["pos_x"] + self._objects[i]["size"] / 2 >= self._width:
                self._objects[i]["vel_x"] = -self._objects[i]["vel_x"]
                self._objects[i]["pos_x"] = self._width - self._objects[i]["size"] / 2 - 1
                self._objects[i]["vel_z"] = 0

            if self._objects[i]["pos_y"] - self._objects[i]["size"] / 2 <= 0:
                self._objects[i]["vel_y"] = -self._objects[i]["vel_y"]
                self._objects[i]["pos_y"] = self._objects[i]["size"] / 2 + 1
                self._objects[i]["vel_z"] = 0

            if self._objects[i]["pos_y"] + self._objects[i]["size"] / 2 >= self._height:
                self._objects[i]["vel_y"] = -self._objects[i]["vel_y"]
                self._objects[i]["pos_y"] = self._height - self._objects[i]["size"] / 2 - 1
                self._objects[i]["vel_z"] = 0

            if self._objects[i]["size"] <= self._object_min_size:
                self._objects[i]["vel_z"] = -self._objects[i]["vel_z"]
                self._objects[i]["size"] = self._object_min_size + 1

            if self._objects[i]["size"] >= self._object_max_size:
                self._objects[i]["vel_z"] = -self._objects[i]["vel_z"]
                self._objects[i]["size"] = self._object_max_size - 1

        # object collision
        for i in range(0, self._num_objects):
            for j in range(i + 1, self._num_objects):
                if i == j:
                    continue
                polygon_i = Polygon([(self._objects[i]["pos_x"] - self._objects[i]["size"] / 2,
                                      self._objects[i]["pos_y"] - self._objects[i]["size"] / 2),
                                     (self._objects[i]["pos_x"] + self._objects[i]["size"] / 2,
                                      self._objects[i]["pos_y"] - self._objects[i]["size"] / 2),
                                     (self._objects[i]["pos_x"] - self._objects[i]["size"] / 2,
                                      self._objects[i]["pos_y"] + self._objects[i]["size"] / 2),
                                     (self._objects[i]["pos_x"] + self._objects[i]["size"] / 2,
                                      self._objects[i]["pos_y"] + self._objects[i]["size"] / 2)])

                polygon_j = Polygon([(self._objects[j]["pos_x"] - self._objects[j]["size"] / 2,
                                      self._objects[j]["pos_y"] - self._objects[j]["size"] / 2),
                                     (self._objects[j]["pos_x"] + self._objects[j]["size"] / 2,
                                      self._objects[j]["pos_y"] - self._objects[j]["size"] / 2),
                                     (self._objects[j]["pos_x"] - self._objects[j]["size"] / 2,
                                      self._objects[j]["pos_y"] + self._objects[j]["size"] / 2),
                                     (self._objects[j]["pos_x"] + self._objects[j]["size"] / 2,
                                      self._objects[j]["pos_y"] + self._objects[j]["size"] / 2)])

                if polygon_i.intersects(polygon_j):
                    v_x = self._objects[i]["vel_x"]
                    self._objects[i]["vel_x"] = self._objects[j]["vel_x"]
                    self._objects[j]["vel_x"] = v_x

                    v_y = self._objects[i]["vel_y"]
                    self._objects[i]["vel_y"] = self._objects[j]["vel_y"]
                    self._objects[j]["vel_y"] = v_y

                    v_z = self._objects[i]["vel_z"]
                    self._objects[i]["vel_z"] = self._objects[j]["vel_z"]
                    self._objects[j]["vel_z"] = v_z
                    
        if self._random_force_enabled:
            # apply force
            for i in range(0, self._num_objects):
                dvx = random.randint(-self._object_max_velocity, self._object_max_velocity) / \
                      (self._simulation_frequency * self._object_max_rnd_force_p_sec)
                dvy = random.randint(-self._object_max_velocity, self._object_max_velocity) / \
                      (self._simulation_frequency * self._object_max_rnd_force_p_sec)
                dvz = random.randint(-self._object_max_z_velocity, self._object_max_z_velocity) / \
                      (self._simulation_frequency * self._object_max_rnd_force_p_sec)

                self._objects[i]["vel_x"] += dvx
                self._objects[i]["vel_y"] += dvy
                self._objects[i]["vel_z"] += dvz

                # push it in the constraints
                if self._objects[i]["vel_x"] < -self._object_max_velocity:
                    self._objects[i]["vel_x"] = -self._object_max_velocity

                if self._objects[i]["vel_x"] > self._object_max_velocity:
                    self._objects[i]["vel_x"] = self._object_max_velocity

                if self._objects[i]["vel_y"] < -self._object_max_velocity:
                    self._objects[i]["vel_y"] = -self._object_max_velocity

                if self._objects[i]["vel_y"] > self._object_max_velocity:
                    self._objects[i]["vel_y"] = self._object_max_velocity

                if self._objects[i]["vel_z"] < -self._object_max_z_velocity:
                    self._objects[i]["vel_z"] = -self._object_max_z_velocity

                if self._objects[i]["vel_z"] > self._object_max_z_velocity:
                    self._objects[i]["vel_z"] = self._object_max_z_velocity
    
    
    def _add_points(self):
        corners_view = self._get_view()
        polygon = Polygon([corners_view["left_top"], corners_view["right_top"],
                           corners_view["right_bot"], corners_view["left_bot"]])

        intersections_sum = 0
        intersections_num = 0

        area_view = polygon.area
        for i in range(0, self._num_objects):
            size = self._objects[i]["size"]
            obj_x = self._objects[i]["pos_x"]
            obj_y = self._objects[i]["pos_y"]
            polygon_object = Polygon([
                (round(obj_x - size / 2), round(obj_y - size / 2)),
                (round(obj_x + size / 2), round(obj_y - size / 2)),
                (round(obj_x + size / 2), round(obj_y + size / 2)),
                (round(obj_x - size / 2), round(obj_y + size / 2))
            ])
            if area_view > 1:
                intersection = (polygon.intersection(polygon_object)).area
                if intersection > (size/4):
                    intersections_sum += intersection
                    intersections_num += 1

        return intersections_num
   

    
    
    def _get_view(self) -> Any:
        """
        calculates the four corner points of the view of the camera in the big canvas in points in mm from top left
        :return: dict of the four points
        """

        # calc left , right border
        right_border = self.player["pos_x"] + self.player["pos_z"] * \
                       math.tan((self._view / 2 - self.player["angle"]) * (math.pi / 180))
        left_border = self.player["pos_x"] - self.player["pos_z"] * \
                      math.tan((self._view / 2 + self.player["angle"]) * (math.pi / 180))

        # calc top and bot borders, thinking of distortion
        left_height_diff = math.tan((self._height_view / 2) * (math.pi / 180)) * \
                           (self.player["pos_z"] / math.sin((self._view / 2 - self.player["angle"]) * (math.pi / 180)))

        right_height_diff = math.tan((self._height_view / 2) * (math.pi / 180)) * \
                            (self.player["pos_z"] / math.sin((self._view / 2 + self.player["angle"]) * (math.pi / 180)))

        right_top = self.player["pos_y"] - right_height_diff
        right_bot = self.player["pos_y"] + right_height_diff
        left_top = self.player["pos_y"] - left_height_diff
        left_bot = self.player["pos_y"] + left_height_diff
        
        left_border = round(left_border)
        left_top = round(left_top)
        right_border = round(right_border)
        right_top = round(right_top)
        left_bot = round(left_bot)
        right_bot = round(right_bot)
        

        return {
            "left_top": (left_border, left_top),
            "right_top": (right_border, right_top),
            "left_bot": (left_border, left_bot),
            "right_bot": (right_border, right_bot)
        }
    
    
    def render(self, mode = "human"): 
        if mode == "human":
            image = Image.new("RGB", (self._width, self._height))
            draw = ImageDraw.Draw(image)

            corners_view = self._get_view()
            points = (corners_view["left_top"], corners_view["right_top"], corners_view["right_bot"],
                      corners_view["left_bot"], corners_view["left_top"])
            
            point_1_view = (round(self.player["pos_x"]) - 20, round(self.player["pos_y"]) - 20)
            point_2_view = (round(self.player["pos_x"]) + 20, round(self.player["pos_y"]) + 20)
            draw.ellipse([point_1_view, point_2_view], fill="white")

            draw.line(points, fill="white", width=20)
            colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (120, 120, 0), (0, 120, 120), (120, 0, 120)]
            for i in range(0, self._num_objects):
                point_1 = (round(self._objects[i]["pos_x"]) - 20, round(self._objects[i]["pos_y"]) - 20)
                point_2 = (round(self._objects[i]["pos_x"]) + 20, round(self._objects[i]["pos_y"]) + 20)
                draw.ellipse([point_1, point_2], fill=colors[i % len(colors)])
                size = self._objects[i]["size"]
                left_top_x = self._objects[i]["pos_x"]
                left_top_y = self._objects[i]["pos_y"]

                poly = [(round(left_top_x - size / 2), round(left_top_y - size / 2)),
                        (round(left_top_x + size / 2), round(left_top_y - size / 2)),
                        (round(left_top_x + size / 2), round(left_top_y + size / 2)),
                        (round(left_top_x - size / 2), round(left_top_y + size / 2)),
                        (round(left_top_x - size / 2), round(left_top_y - size / 2))]
                draw.line(poly, fill=colors[i % len(colors)], width=20)

            open_cv_image = np.array(image)
            open_cv_image = open_cv_image[:, :, ::-1].copy()
            open_cv_image = cv2.resize(open_cv_image, (1200, round(1200*(self._height / self._width))))     
            font = cv2.FONT_HERSHEY_SIMPLEX

            text = str("%.2f" % round(self._last_points, 2))
            cv2.putText(open_cv_image,text,(10,50), font, 1, (0, 0, 255), 2, cv2.LINE_AA)
            cv2.imshow("simulation", open_cv_image)
            cv2.waitKey(1)
            
        if mode == "machine": 
            
            scaling = self._im_width/self._width
                       
            image = Image.new("RGB", (self._im_width, self._im_height))
            draw = ImageDraw.Draw(image)

            corners_view = self._get_view()
            
            #scaling every coordinate down
            corner_view_scaled = {
                "left_top" : (corners_view["left_top"][0] * scaling,
                             corners_view["left_top"][1] * scaling),
                "right_top" : (corners_view["right_top"][0] * scaling,
                             corners_view["right_top"][1] * scaling),
                "right_bot" : (corners_view["right_bot"][0] * scaling,
                              corners_view["right_bot"][1] * scaling),          
                "left_bot" : (corners_view["left_bot"][0] * scaling,
                             corners_view["left_bot"][1] * scaling)
            }
   
            points = (corner_view_scaled["left_top"], corner_view_scaled["right_top"], corner_view_scaled["right_bot"],
                      corner_view_scaled["left_bot"], corner_view_scaled["left_top"])

            draw.line(points, fill="white", width=1)
            colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (120, 120, 0), (0, 120, 120), (120, 0, 120)]
            for i in range(0, self._num_objects):
                #scale it down
                obj_scaled = {
                    "pos_x" : round(self._objects[i]["pos_x"] * scaling),
                    "pos_y" : round(self._objects[i]["pos_y"] * scaling),
                    "size" : round(self._objects[i]["size"] * scaling)
                }
             
                point_1 = (obj_scaled["pos_x"] - 1, obj_scaled["pos_y"] - 1)
                point_2 = (obj_scaled["pos_x"] + 1, obj_scaled["pos_y"] + 1)
                draw.ellipse([point_1, point_2], fill=colors[i % len(colors)])
                size = obj_scaled["size"]
                left_top_x = obj_scaled["pos_x"]
                left_top_y = obj_scaled["pos_y"]

                poly = [(round(left_top_x - size / 2), round(left_top_y - size / 2)),
                        (round(left_top_x + size / 2), round(left_top_y - size / 2)),
                        (round(left_top_x + size / 2), round(left_top_y + size / 2)),
                        (round(left_top_x - size / 2), round(left_top_y + size / 2)),
                        (round(left_top_x - size / 2), round(left_top_y - size / 2))]
                draw.line(poly, fill=colors[i % len(colors)], width=1)

            open_cv_image = np.array(image)
            
            
            # open_cv_image = open_cv_image[:, :, ::-1].copy()
            # cv2.imshow("simulation", open_cv_image)
            # cv2.waitKey(1)
                                                           
            return open_cv_image
    
    def reset(self):
        self.points = 0.0
        self.timer = 3000
           
        self.player = {
            "pos_x": self._width / 2 * 1.0,
            "pos_y": self._height - self._camera_height * 1.0,
            "pos_z": 1200.0,
            "vel_x": 0.0,
            "vel_z": 0.0,
            "angle": 0
        }
        self._objects = []
        for i in range(0, self._num_objects):
            dict_to_append = {
                "pos_x": (self._width / 2) + (i - math.floor(self._num_objects / 2)) * self._object_max_size,
                "pos_y": (self._height / 2) + i * self._object_max_size,
                "vel_x": random.randint(-self._object_max_velocity, self._object_max_velocity) * 0.5,
                "vel_y": random.randint(-self._object_max_velocity, self._object_max_velocity) * 0.3,
                "vel_z": random.randint(-self._object_max_z_velocity, self._object_max_z_velocity) * 0.3,
                "size": random.randint(self._object_min_size, self._object_max_size) * 1.0
            }
            self._objects.append(dict_to_append)
            
        
        return self.render("machine")
    
        
        # state:
        # 0 pos player x
        # 1 pos player z
        # 2 vel player x
        # 3 vel player z
        # 4 angle

        # 5 pos obj 1 x
        # 6 pos obj 1 y
        # 7 pos obj 1 z
        # 8 vel obj 1 x
        # 9 vel obj 1 y
        # 10 vel obj 1 z
        # the 5-10 in repeat for every object

In [2]:
env=ActivVisionEnv()
from stable_baselines3.common.env_checker import check_env
check_env(env, warn=True)

In [3]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        # env.render("machine")
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:985.4499999999982
Episode:2 Score:2180.8833333333355
Episode:3 Score:752.6500000000009
Episode:4 Score:1546.7833333333424
Episode:5 Score:1156.516666666669


In [4]:
env.close()
log_path = os.path.join('Training', 'Logs')
model = PPO("CnnPolicy", env, verbose=1, tensorboard_log=log_path)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [None]:
model.learn(total_timesteps=2000000)

Logging to Training\Logs\PPO_19
-----------------------------
| time/              |      |
|    fps             | 494  |
|    iterations      | 1    |
|    time_elapsed    | 4    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.71e+03    |
| time/                   |             |
|    fps                  | 183         |
|    iterations           | 2           |
|    time_elapsed         | 22          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.014572235 |
|    clip_fraction        | 0.202       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.6        |
|    explained_variance   | -0.00487    |
|    learning_rate        | 0.0003      |
|    loss                 | 0.704       |
|    n_updates            | 10          |
| 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3e+03      |
|    ep_rew_mean          | 1.6e+03    |
| time/                   |            |
|    fps                  | 115        |
|    iterations           | 12         |
|    time_elapsed         | 212        |
|    total_timesteps      | 24576      |
| train/                  |            |
|    approx_kl            | 0.02392647 |
|    clip_fraction        | 0.255      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.44      |
|    explained_variance   | 0.73       |
|    learning_rate        | 0.0003     |
|    loss                 | 0.362      |
|    n_updates            | 110        |
|    policy_gradient_loss | 0.00128    |
|    value_loss           | 6.36       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3e+03      |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.58e+03    |
| time/                   |             |
|    fps                  | 111         |
|    iterations           | 22          |
|    time_elapsed         | 405         |
|    total_timesteps      | 45056       |
| train/                  |             |
|    approx_kl            | 0.028305858 |
|    clip_fraction        | 0.226       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.26       |
|    explained_variance   | 0.758       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.509       |
|    n_updates            | 210         |
|    policy_gradient_loss | -0.00867    |
|    value_loss           | 8.14        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.65e+03    |
| time/                   |             |
|    fps                  | 108         |
|    iterations           | 32          |
|    time_elapsed         | 602         |
|    total_timesteps      | 65536       |
| train/                  |             |
|    approx_kl            | 0.051195562 |
|    clip_fraction        | 0.294       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.08       |
|    explained_variance   | 0.741       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.365       |
|    n_updates            | 310         |
|    policy_gradient_loss | -0.000118   |
|    value_loss           | 3.11        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.61e+03    |
| time/                   |             |
|    fps                  | 107         |
|    iterations           | 42          |
|    time_elapsed         | 798         |
|    total_timesteps      | 86016       |
| train/                  |             |
|    approx_kl            | 0.035113208 |
|    clip_fraction        | 0.315       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.11       |
|    explained_variance   | 0.687       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.259       |
|    n_updates            | 410         |
|    policy_gradient_loss | -0.00886    |
|    value_loss           | 2.96        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3e+03      |
|    ep_rew_mean          | 1.58e+03   |
| time/                   |            |
|    fps                  | 107        |
|    iterations           | 52         |
|    time_elapsed         | 993        |
|    total_timesteps      | 106496     |
| train/                  |            |
|    approx_kl            | 0.06975053 |
|    clip_fraction        | 0.178      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.523     |
|    explained_variance   | 0.826      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.546      |
|    n_updates            | 510        |
|    policy_gradient_loss | -0.0194    |
|    value_loss           | 5.76       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.56e+03    |
| time/                   |             |
|    fps                  | 106         |
|    iterations           | 62          |
|    time_elapsed         | 1188        |
|    total_timesteps      | 126976      |
| train/                  |             |
|    approx_kl            | 0.045058038 |
|    clip_fraction        | 0.283       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1          |
|    explained_variance   | 0.874       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.32        |
|    n_updates            | 610         |
|    policy_gradient_loss | -0.0117     |
|    value_loss           | 5.3         |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3e+03   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.6e+03     |
| time/                   |             |
|    fps                  | 106         |
|    iterations           | 72          |
|    time_elapsed         | 1384        |
|    total_timesteps      | 147456      |
| train/                  |             |
|    approx_kl            | 0.039280117 |
|    clip_fraction        | 0.337       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.24       |
|    explained_variance   | 0.847       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.246       |
|    n_updates            | 710         |
|    policy_gradient_loss | -0.0116     |
|    value_loss           | 5.09        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3e+03      |
|    ep_rew_mean          | 1.62e+03   |
| time/                   |            |
|    fps                  | 106        |
|    iterations           | 82         |
|    time_elapsed         | 1584       |
|    total_timesteps      | 167936     |
| train/                  |            |
|    approx_kl            | 0.02768303 |
|    clip_fraction        | 0.32       |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.15      |
|    explained_variance   | 0.758      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.676      |
|    n_updates            | 810        |
|    policy_gradient_loss | -0.0104    |
|    value_loss           | 6          |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3e+03      |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.64e+03    |
| time/                   |             |
|    fps                  | 105         |
|    iterations           | 92          |
|    time_elapsed         | 1784        |
|    total_timesteps      | 188416      |
| train/                  |             |
|    approx_kl            | 0.043115947 |
|    clip_fraction        | 0.365       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.2        |
|    explained_variance   | 0.798       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.384       |
|    n_updates            | 910         |
|    policy_gradient_loss | 0.00679     |
|    value_loss           | 6.26        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3e+03   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.64e+03    |
| time/                   |             |
|    fps                  | 105         |
|    iterations           | 102         |
|    time_elapsed         | 1983        |
|    total_timesteps      | 208896      |
| train/                  |             |
|    approx_kl            | 0.040280737 |
|    clip_fraction        | 0.331       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.21       |
|    explained_variance   | 0.804       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.297       |
|    n_updates            | 1010        |
|    policy_gradient_loss | -0.0114     |
|    value_loss           | 3.31        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3e+03   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.63e+03    |
| time/                   |             |
|    fps                  | 105         |
|    iterations           | 112         |
|    time_elapsed         | 2181        |
|    total_timesteps      | 229376      |
| train/                  |             |
|    approx_kl            | 0.028680293 |
|    clip_fraction        | 0.318       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.18       |
|    explained_variance   | 0.86        |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0707      |
|    n_updates            | 1110        |
|    policy_gradient_loss | -0.0171     |
|    value_loss           | 2.47        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3e+03      |
|    ep_rew_mean          | 1.6e+03    |
| time/                   |            |
|    fps                  | 105        |
|    iterations           | 122        |
|    time_elapsed         | 2378       |
|    total_timesteps      | 249856     |
| train/                  |            |
|    approx_kl            | 0.02996321 |
|    clip_fraction        | 0.364      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.15      |
|    explained_variance   | 0.772      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.127      |
|    n_updates            | 1210       |
|    policy_gradient_loss | -0.00548   |
|    value_loss           | 3.96       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.59e+03    |
| time/                   |             |
|    fps                  | 104         |
|    iterations           | 132         |
|    time_elapsed         | 2575        |
|    total_timesteps      | 270336      |
| train/                  |             |
|    approx_kl            | 0.031899184 |
|    clip_fraction        | 0.355       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.23       |
|    explained_variance   | 0.659       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.192       |
|    n_updates            | 1310        |
|    policy_gradient_loss | -0.0125     |
|    value_loss           | 4.27        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03 

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3e+03      |
|    ep_rew_mean          | 1.62e+03   |
| time/                   |            |
|    fps                  | 104        |
|    iterations           | 142        |
|    time_elapsed         | 2773       |
|    total_timesteps      | 290816     |
| train/                  |            |
|    approx_kl            | 0.04733389 |
|    clip_fraction        | 0.367      |
|    clip_range           | 0.2        |
|    entropy_loss         | -1.18      |
|    explained_variance   | 0.815      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.202      |
|    n_updates            | 1410       |
|    policy_gradient_loss | -0.00854   |
|    value_loss           | 4.91       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.63e+03    |
| time/                   |             |
|    fps                  | 104         |
|    iterations           | 152         |
|    time_elapsed         | 2972        |
|    total_timesteps      | 311296      |
| train/                  |             |
|    approx_kl            | 0.037425704 |
|    clip_fraction        | 0.353       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.17       |
|    explained_variance   | 0.769       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.125       |
|    n_updates            | 1510        |
|    policy_gradient_loss | -0.0105     |
|    value_loss           | 4.7         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03       |
|    ep_rew_mean          | 1.63e+03    |
| time/                   |             |
|    fps                  | 104         |
|    iterations           | 162         |
|    time_elapsed         | 3170        |
|    total_timesteps      | 331776      |
| train/                  |             |
|    approx_kl            | 0.036602072 |
|    clip_fraction        | 0.337       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.16       |
|    explained_variance   | 0.887       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.181       |
|    n_updates            | 1610        |
|    policy_gradient_loss | -0.018      |
|    value_loss           | 2.76        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3e+03 

In [None]:
PPO_path = os.path.join('Training', 'Saved Models', 'CNN_model')

In [None]:
model.save(PPO_path)

In [None]:
evaluate_policy(model, env, n_eval_episodes=1, render=True)