In [2]:
import tkinter as tk
from tkinter import *
import numpy as np
import copy
import random

import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Input, Flatten
from tensorflow.keras.losses import MSE
from tensorflow.keras.optimizers import Adam

from collections import deque, namedtuple
from gym import Env
from gym.spaces import Discrete, MultiDiscrete

import time
import random

  from pandas.core import (


# 1. Rubik's Cube GUI using Tkinter

In [59]:
COLOR_KEY = {
    0 : 'orange',
    1 : 'green',
    2 : 'white',
    3 : 'blue',
    4 : 'yellow',
    5 : 'red'
}


class RubiksCubeGUI:
    def __init__(self, isDesign=True, cube=None):
        self.root = Tk()
        self.cube_colors = np.array([[[0] * 3 for _ in range(3)], 
                                     [[1] * 3 for _ in range(3)],
                                     [[2] * 3 for _ in range(3)],
                                     [[3] * 3 for _ in range(3)],
                                     [[4] * 3 for _ in range(3)],
                                     [[5] * 3 for _ in range(3)]])
        
        self.isDesign = isDesign
        if cube is not None:
            self.cube = cube.reshape((6, 3, 3))
        else:
            self.cube = np.copy(self.cube_colors)
            
        self.actions = { 
            0: self.top,
            1: lambda: self.top(clockwise=False),
            2: self.bottom,
            3: lambda: self.bottom(clockwise=False),
            4: self.left,
            5: lambda: self.left(clockwise=False),
            6: self.right,
            7: lambda: self.right(clockwise=False),
            8: self.front,
            9: lambda: self.front(clockwise=False),
            10: self.back,
            11: lambda: self.back(clockwise=False),
        }
        
        self.cur_colour = 2
        self.cube_grid = []
    
    def set_colour(self, c):
        self.cur_colour = c

    def change_color(self, face, row, col):
        self.cube[face][row][col] = self.cur_colour
        self.cube_grid[face][row][col].config(bg=COLOR_KEY[self.cube[face][row][col]])
        
    def shuffleBoard(self):
        self.shuffle()
        for face in range(6):
            for row in range(3):
                for col in range(3):
                    self.cube_grid[face][row][col].config(bg=COLOR_KEY[self.cube[face][row][col]])
        
    def saveBoard(self):
        noc = [0, 0, 0, 0, 0, 0]
        for face in range(6):
            for row in range(3):
                for col in range(3):
                    noc[self.cube[face][row][col]] += 1
                    
        if noc == [9, 9, 9, 9, 9, 9]:
            self.root.destroy()
        
    def designCube(self):
        frame = Frame(self.root)
        frame.pack()
        bottomframe = Frame(frame)
        bottomframe.pack(side=BOTTOM)

        redbutton = Button(frame, text='Red', fg='red', bg='black', command=lambda: self.set_colour(5))
        redbutton.pack(side=LEFT)

        greenbutton = Button(frame, text='Green', fg='green', bg='black', command=lambda: self.set_colour(1))
        greenbutton.pack(side=LEFT)

        bluebutton = Button(frame, text='Blue', fg='blue', bg='black', command=lambda: self.set_colour(3))
        bluebutton.pack(side=LEFT)

        yellowbutton = Button(frame, text='Yellow', fg='yellow', bg='black', command=lambda: self.set_colour(4))
        yellowbutton.pack(side=LEFT)

        whitebutton = Button(frame, text='White', fg='white', bg='black', command=lambda: self.set_colour(2))
        whitebutton.pack(side=LEFT)

        orangebutton = Button(frame, text='Orange', fg='orange', bg='black', command=lambda: self.set_colour(0))
        orangebutton.pack(side=LEFT)
        
        shufflebutton = Button(bottomframe, text='SHUFFLE', fg='black', bg='white', command=lambda: self.shuffleBoard())
        shufflebutton.pack(side=LEFT)
        
        savebutton = Button(bottomframe, text='SAVE', fg='black', bg='white', command=lambda: self.saveBoard())
        savebutton.pack(side=LEFT)
        
        # Create a frame for the grid squares
        grid_frames = []
        main_frame = tk.Frame(self.root)
        main_frame.pack(padx=10, pady=10)

        for i in range(6):
            grid_frame = tk.Frame(main_frame)
            if i == 0:
                grid_frame.grid(row=0, column=1, padx=5, pady=5)
            elif i == 5:
                grid_frame.grid(row=2, column=1, padx=5, pady=5)
            else:
                grid_frame.grid(row=1, column=i-1, padx=5, pady=5)

            grid_frames.append(grid_frame)
            # Creating the grids
            squares = []

            for row in range(3):
                square_row = []
                for col in range(3):
                    square = tk.Frame(grid_frames[i], width=50, height=50)
                    square.grid(row=row, column=col, padx=1, pady=1)
                    # Associate a button with each square
                    button = tk.Button(square, height=3, width=5, bg=COLOR_KEY[self.cube[i][row][col]])
                    button.pack()
                    if self.isDesign and (row != 1 or col != 1):
                        button.config(command=lambda r=row, c=col, face=i: self.change_color(face, r, c))
                    square_row.append(button)

                squares.append(square_row)
            self.cube_grid.append(squares)

        self.root.mainloop()
        
    def rotate_face_clockwise(self, face):
        self.cube[face] = np.rot90(self.cube[face], 3)

    def rotate_face_counterclockwise(self, face):
        self.cube[face] = np.rot90(self.cube[face])

    def top(self, clockwise=True):
        if clockwise:
            self.rotate_face_clockwise(0)
            temp = np.copy(self.cube[1, 0, :])
            self.cube[1, 0, :] = self.cube[2, 0, :]
            self.cube[2, 0, :] = self.cube[3, 0, :]
            self.cube[3, 0, :] = self.cube[4, 0, :]
            self.cube[4, 0, :] = temp  
        else:
            self.rotate_face_counterclockwise(0)
            temp = np.copy(self.cube[1, 0, :])
            self.cube[1, 0, :] = self.cube[4, 0, :]
            self.cube[4, 0, :] = self.cube[3, 0, :]
            self.cube[3, 0, :] = self.cube[2, 0, :]
            self.cube[2, 0, :] = temp
        return self.cube

    def bottom(self, clockwise=True):
        if clockwise:
            self.rotate_face_counterclockwise(5)
            temp = np.copy(self.cube[1, 2, :])
            self.cube[1, 2, :] = self.cube[2, 2, :]
            self.cube[2, 2, :] = self.cube[3, 2, :]
            self.cube[3, 2, :] = self.cube[4, 2, :]
            self.cube[4, 2, :] = temp
        else:
            self.rotate_face_clockwise(5)
            temp = np.copy(self.cube[1, 2, :])
            self.cube[1, 2, :] = self.cube[4, 2, :]
            self.cube[4, 2, :] = self.cube[3, 2, :]
            self.cube[3, 2, :] = self.cube[2, 2, :]
            self.cube[2, 2, :] = temp
        return self.cube

    def left(self, clockwise=True):
        if clockwise:
            self.rotate_face_clockwise(1)
            temp = np.copy(self.cube[0, :, 0])
            self.cube[0, :, 0] = self.cube[4, ::-1, 2]
            self.cube[4, :, 2] = self.cube[5, ::-1, 0]
            self.cube[5, :, 0] = self.cube[2, :, 0]
            self.cube[2, :, 0] = temp
        else:
            self.rotate_face_counterclockwise(1)
            temp = np.copy(self.cube[0, :, 0])
            self.cube[0, :, 0] = self.cube[2, :, 0]
            self.cube[2, :, 0] = self.cube[5, :, 0]
            self.cube[5, :, 0] = self.cube[4, ::-1, 2]
            self.cube[4, :, 2] = temp[::-1]
        return self.cube

    def right(self, clockwise=True):
        if clockwise:
            self.rotate_face_clockwise(3)
            temp = np.copy(self.cube[0, :, 2])
            self.cube[0, :, 2] = self.cube[2, :, 2]
            self.cube[2, :, 2] = self.cube[5, :, 2]
            self.cube[5, :, 2] = self.cube[4, ::-1, 0]
            self.cube[4, :, 0] = temp[::-1]
        else:
            self.rotate_face_counterclockwise(3)
            temp = np.copy(self.cube[0, :, 2])
            self.cube[0, :, 2] = self.cube[4, ::-1, 0]
            self.cube[4, :, 0] = self.cube[5, ::-1, 2]
            self.cube[5, ::-1, 2] = self.cube[2, :, 2]
            self.cube[2, :, 2] = temp
        return self.cube

    def front(self, clockwise=True):
        if clockwise:
            self.rotate_face_clockwise(2)
            temp = np.copy(self.cube[0, 2, :])
            self.cube[0, 2, :] = self.cube[1, ::-1, 2]
            self.cube[1, :, 2] = self.cube[5, 0, :]
            self.cube[5, 0, :] = self.cube[3, ::-1, 0]
            self.cube[3, :, 0] = temp
        else:
            self.rotate_face_counterclockwise(2)
            temp = np.copy(self.cube[0, 2, :])
            self.cube[0, 2, :] = self.cube[3, :, 0]
            self.cube[3, :, 0] = self.cube[5, 0, ::-1]
            self.cube[5, 0, :] = self.cube[1, :, 2]
            self.cube[1, :, 2] = temp[::-1]
        return self.cube

    def back(self, clockwise=True):
        if clockwise:
            self.rotate_face_clockwise(4)
            temp = np.copy(self.cube[0, 0, :])
            self.cube[0, 0, :] = self.cube[3, :, 2]
            self.cube[3, :, 2] = self.cube[5, 2, ::-1]
            self.cube[5, 2, :] = self.cube[1, :, 0]
            self.cube[1, :, 0] = temp[::-1]
        else:
            self.rotate_face_counterclockwise(4)
            temp = np.copy(self.cube[0, 0, :])
            self.cube[0, 0, :] = self.cube[1, ::-1, 0]
            self.cube[1, :, 0] = self.cube[5, 2, :]
            self.cube[5, 2, :] = self.cube[3, ::-1, 2]
            self.cube[3, :, 2] = temp
        return self.cube    
    
    def shuffle(self):
        n = random.randint(5, 15)
        for i in range(n):
            r = random.randint(0, 5)
            self.actions[2 * r]()
        
        return self.cube

# 2. Reinforcement Learning Environment

In [60]:
# Values of the constants
cube_number = 100

In [61]:
class CubeEnv (Env):
    def __init__(self):
        # Moves that we can make
        self.action_space = Discrete(12)
        # All possible combinations of a rubiks cube
        self.observation_space = MultiDiscrete([6]*54)
        
        #instantiating RubiksCubeGUI class
        self.myCube = RubiksCubeGUI()
        self.myCube.designCube()
        # Use the cube state obtained from the RubiksCubeGUI class
        self.state = self.myCube.cube.reshape(54,)
        # Maximum no. of moves the agent is allowed to make
        self.moves_length = cube_number
        self.root = Tk()
        self.cube_grid = []
        
        grid_frames = []
        main_frame = tk.Frame(self.root)
        main_frame.pack(padx=10, pady=10)

        for i in range(6):
            grid_frame = tk.Frame(main_frame)
            if i == 0:
                grid_frame.grid(row=0, column=1, padx=5, pady=5)
            elif i == 5:
                grid_frame.grid(row=2, column=1, padx=5, pady=5)
            else:
                grid_frame.grid(row=1, column=i-1, padx=5, pady=5)

            grid_frames.append(grid_frame)
            # Creating the grids
            squares = []

            for row in range(3):
                square_row = []
                for col in range(3):
                    square = tk.Frame(grid_frames[i], width=50, height=50, bg=COLOR_KEY[self.myCube.cube[i][row][col]])
                    square.grid(row=row, column=col, padx=1, pady=1)
                    square_row.append(square)
                squares.append(square_row)
            self.cube_grid.append(squares)

        
    
    def step(self, action):
        # Applying the action
        self.state = self.myCube.actions[action]().reshape(54,)
        self.myCube.cube = self.state.reshape(6, 3, 3)
        # Reduce no. of moves left by 1
        self.moves_length -= 1
        
        # Calculating the reward
        if (self.state.reshape(6, 3, 3) == self.myCube.cube_colors).all():
            reward = 100
            done = True
        else:
            reward = 0
            done = False
            
#         # Check if no. of moves exceeded maximum number of moves
#         if  (self.state == self.myCube.cube_colors).all():
#             done = True
#         else:
#             done = False
        if self.moves_length <= 0: done = True
        
        info = {}
        
        return self.state, reward, done, info
    
    def render(self):
        state3d = self.state.reshape(6, 3, 3)
        for face in range(6):
            for row in range(3):
                for col in range(3):
                    self.cube_grid[face][row][col].configure(bg=COLOR_KEY[state3d[face][row][col]])
        
    def reset(self):
        self.myCube.cube = np.copy(self.myCube.cube_colors)
        self.state = self.myCube.shuffle().reshape(54,)
        self.moves_length = cube_number
        return self.state
    

In [62]:
env = CubeEnv()
env.reset()
env.state.shape

(54,)

In [63]:
for episode in range(10):
    state = env.reset()
    done = False
    score = 0
    i = 0
    
    while not done and i < 30:
        i += 1
        action = env.action_space.sample()
        n_state, reward, done, _ = env.step(action)
        score += reward
    print(f'Episode: {episode + 1} Score: {score}')
    
env.root.mainloop()

Episode: 1 Score: 0
Episode: 2 Score: 0
Episode: 3 Score: 0
Episode: 4 Score: 0
Episode: 5 Score: 0
Episode: 6 Score: 0
Episode: 7 Score: 0
Episode: 8 Score: 0
Episode: 9 Score: 0
Episode: 10 Score: 0


# 3.0 PPO

In [55]:
from stable_baselines3 import PPO

model = PPO('MlpPolicy', env, verbose=1)

model.learn(total_timesteps=1000000)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 6.14     |
|    ep_rew_mean     | 96       |
| time/              |          |
|    fps             | 388      |
|    iterations      | 1        |
|    time_elapsed    | 5        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 10.9         |
|    ep_rew_mean          | 92           |
| time/                   |              |
|    fps                  | 278          |
|    iterations           | 2            |
|    time_elapsed         | 14           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0029897685 |
|    clip_fraction        | 0.0349       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.685       |
|    explained_variance   | 0.876        |
|    learning_r

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 7.04       |
|    ep_rew_mean          | 95         |
| time/                   |            |
|    fps                  | 247        |
|    iterations           | 11         |
|    time_elapsed         | 91         |
|    total_timesteps      | 22528      |
| train/                  |            |
|    approx_kl            | 0.01146033 |
|    clip_fraction        | 0.0928     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.618     |
|    explained_variance   | 0.95       |
|    learning_rate        | 0.0003     |
|    loss                 | 16.2       |
|    n_updates            | 1080       |
|    policy_gradient_loss | -0.00201   |
|    value_loss           | 43.7       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.08        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5           |
|    ep_rew_mean          | 97          |
| time/                   |             |
|    fps                  | 244         |
|    iterations           | 21          |
|    time_elapsed         | 175         |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.013511406 |
|    clip_fraction        | 0.0927      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.536      |
|    explained_variance   | 0.983       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.956       |
|    n_updates            | 1180        |
|    policy_gradient_loss | -0.0122     |
|    value_loss           | 24.3        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.04  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 7.94       |
|    ep_rew_mean          | 94         |
| time/                   |            |
|    fps                  | 243        |
|    iterations           | 31         |
|    time_elapsed         | 260        |
|    total_timesteps      | 63488      |
| train/                  |            |
|    approx_kl            | 0.02751098 |
|    clip_fraction        | 0.116      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.461     |
|    explained_variance   | 0.983      |
|    learning_rate        | 0.0003     |
|    loss                 | 14.1       |
|    n_updates            | 1280       |
|    policy_gradient_loss | -0.0109    |
|    value_loss           | 31.6       |
----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.02       |
|    ep_rew_mean

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.08        |
|    ep_rew_mean          | 98          |
| time/                   |             |
|    fps                  | 244         |
|    iterations           | 41          |
|    time_elapsed         | 344         |
|    total_timesteps      | 83968       |
| train/                  |             |
|    approx_kl            | 0.030803446 |
|    clip_fraction        | 0.166       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.504      |
|    explained_variance   | 0.995       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0624      |
|    n_updates            | 1380        |
|    policy_gradient_loss | -0.0167     |
|    value_loss           | 9.51        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6.02  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6.04        |
|    ep_rew_mean          | 96          |
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 51          |
|    time_elapsed         | 428         |
|    total_timesteps      | 104448      |
| train/                  |             |
|    approx_kl            | 0.004659141 |
|    clip_fraction        | 0.0617      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.521      |
|    explained_variance   | 0.95        |
|    learning_rate        | 0.0003      |
|    loss                 | 12.7        |
|    n_updates            | 1480        |
|    policy_gradient_loss | -0.00332    |
|    value_loss           | 58.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.04        |
|    ep_rew_mean          | 97          |
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 61          |
|    time_elapsed         | 513         |
|    total_timesteps      | 124928      |
| train/                  |             |
|    approx_kl            | 0.010829462 |
|    clip_fraction        | 0.081       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.641      |
|    explained_variance   | 0.968       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.63        |
|    n_updates            | 1580        |
|    policy_gradient_loss | -0.00976    |
|    value_loss           | 47.8        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.02  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.12        |
|    ep_rew_mean          | 99          |
| time/                   |             |
|    fps                  | 242         |
|    iterations           | 71          |
|    time_elapsed         | 599         |
|    total_timesteps      | 145408      |
| train/                  |             |
|    approx_kl            | 0.013488883 |
|    clip_fraction        | 0.118       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.586      |
|    explained_variance   | 0.984       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.542       |
|    n_updates            | 1680        |
|    policy_gradient_loss | -0.00995    |
|    value_loss           | 26          |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.08  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.06         |
|    ep_rew_mean          | 99           |
| time/                   |              |
|    fps                  | 242          |
|    iterations           | 81           |
|    time_elapsed         | 685          |
|    total_timesteps      | 165888       |
| train/                  |              |
|    approx_kl            | 0.0034999156 |
|    clip_fraction        | 0.0446       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.378       |
|    explained_variance   | 0.945        |
|    learning_rate        | 0.0003       |
|    loss                 | 88.4         |
|    n_updates            | 1780         |
|    policy_gradient_loss | -0.00841     |
|    value_loss           | 92.4         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6           |
|    ep_rew_mean          | 96          |
| time/                   |             |
|    fps                  | 242         |
|    iterations           | 91          |
|    time_elapsed         | 769         |
|    total_timesteps      | 186368      |
| train/                  |             |
|    approx_kl            | 0.001616119 |
|    clip_fraction        | 0.00713     |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.52       |
|    explained_variance   | 0.95        |
|    learning_rate        | 0.0003      |
|    loss                 | 4.33        |
|    n_updates            | 1880        |
|    policy_gradient_loss | -0.00322    |
|    value_loss           | 27.1        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 8.92    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.02        |
|    ep_rew_mean          | 98          |
| time/                   |             |
|    fps                  | 243         |
|    iterations           | 101         |
|    time_elapsed         | 847         |
|    total_timesteps      | 206848      |
| train/                  |             |
|    approx_kl            | 0.028586645 |
|    clip_fraction        | 0.166       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.55       |
|    explained_variance   | 0.995       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0214     |
|    n_updates            | 1980        |
|    policy_gradient_loss | -0.0145     |
|    value_loss           | 11.5        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.14  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.94        |
|    ep_rew_mean          | 93          |
| time/                   |             |
|    fps                  | 244         |
|    iterations           | 111         |
|    time_elapsed         | 929         |
|    total_timesteps      | 227328      |
| train/                  |             |
|    approx_kl            | 0.002418165 |
|    clip_fraction        | 0.026       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.58       |
|    explained_variance   | 0.961       |
|    learning_rate        | 0.0003      |
|    loss                 | 2.37        |
|    n_updates            | 2080        |
|    policy_gradient_loss | -0.00789    |
|    value_loss           | 63.7        |
-----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 4.16      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.04        |
|    ep_rew_mean          | 99          |
| time/                   |             |
|    fps                  | 245         |
|    iterations           | 121         |
|    time_elapsed         | 1008        |
|    total_timesteps      | 247808      |
| train/                  |             |
|    approx_kl            | 0.030852087 |
|    clip_fraction        | 0.172       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.558      |
|    explained_variance   | 0.995       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.052      |
|    n_updates            | 2180        |
|    policy_gradient_loss | -0.0157     |
|    value_loss           | 9.3         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6.98  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.04        |
|    ep_rew_mean          | 98          |
| time/                   |             |
|    fps                  | 246         |
|    iterations           | 131         |
|    time_elapsed         | 1089        |
|    total_timesteps      | 268288      |
| train/                  |             |
|    approx_kl            | 0.005383132 |
|    clip_fraction        | 0.054       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.565      |
|    explained_variance   | 0.991       |
|    learning_rate        | 0.0003      |
|    loss                 | 49.5        |
|    n_updates            | 2280        |
|    policy_gradient_loss | -0.00669    |
|    value_loss           | 12.4        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 5.08

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.02        |
|    ep_rew_mean          | 97          |
| time/                   |             |
|    fps                  | 245         |
|    iterations           | 141         |
|    time_elapsed         | 1175        |
|    total_timesteps      | 288768      |
| train/                  |             |
|    approx_kl            | 0.023233203 |
|    clip_fraction        | 0.173       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.687      |
|    explained_variance   | 1           |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0243      |
|    n_updates            | 2380        |
|    policy_gradient_loss | -0.0167     |
|    value_loss           | 0.13        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.06  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.02        |
|    ep_rew_mean          | 98          |
| time/                   |             |
|    fps                  | 245         |
|    iterations           | 151         |
|    time_elapsed         | 1259        |
|    total_timesteps      | 309248      |
| train/                  |             |
|    approx_kl            | 0.022004645 |
|    clip_fraction        | 0.165       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.728      |
|    explained_variance   | 0.978       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0539      |
|    n_updates            | 2480        |
|    policy_gradient_loss | -0.0126     |
|    value_loss           | 41.4        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6     

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 4.24         |
|    ep_rew_mean          | 98           |
| time/                   |              |
|    fps                  | 245          |
|    iterations           | 161          |
|    time_elapsed         | 1341         |
|    total_timesteps      | 329728       |
| train/                  |              |
|    approx_kl            | 0.0018157104 |
|    clip_fraction        | 0.0083       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.608       |
|    explained_variance   | 0.935        |
|    learning_rate        | 0.0003       |
|    loss                 | 71.4         |
|    n_updates            | 2580         |
|    policy_gradient_loss | -0.00459     |
|    value_loss           | 128          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.16         |
|    ep_rew_mean          | 100          |
| time/                   |              |
|    fps                  | 246          |
|    iterations           | 171          |
|    time_elapsed         | 1423         |
|    total_timesteps      | 350208       |
| train/                  |              |
|    approx_kl            | 0.0065688086 |
|    clip_fraction        | 0.0587       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.194       |
|    explained_variance   | 0.942        |
|    learning_rate        | 0.0003       |
|    loss                 | 64.1         |
|    n_updates            | 2680         |
|    policy_gradient_loss | -0.0168      |
|    value_loss           | 31.2         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.88        |
|    ep_rew_mean          | 99          |
| time/                   |             |
|    fps                  | 244         |
|    iterations           | 181         |
|    time_elapsed         | 1514        |
|    total_timesteps      | 370688      |
| train/                  |             |
|    approx_kl            | 0.003920226 |
|    clip_fraction        | 0.0547      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.402      |
|    explained_variance   | 0.707       |
|    learning_rate        | 0.0003      |
|    loss                 | 183         |
|    n_updates            | 2780        |
|    policy_gradient_loss | -0.0128     |
|    value_loss           | 315         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.02  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.06       |
|    ep_rew_mean          | 100        |
| time/                   |            |
|    fps                  | 244        |
|    iterations           | 191        |
|    time_elapsed         | 1597       |
|    total_timesteps      | 391168     |
| train/                  |            |
|    approx_kl            | 0.13369846 |
|    clip_fraction        | 0.181      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.136     |
|    explained_variance   | 0.923      |
|    learning_rate        | 0.0003     |
|    loss                 | 3.1        |
|    n_updates            | 2880       |
|    policy_gradient_loss | -0.028     |
|    value_loss           | 15.2       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.14        |
|    ep_rew_m

-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 24.7          |
|    ep_rew_mean          | 82            |
| time/                   |               |
|    fps                  | 244           |
|    iterations           | 201           |
|    time_elapsed         | 1680          |
|    total_timesteps      | 411648        |
| train/                  |               |
|    approx_kl            | 0.00097227725 |
|    clip_fraction        | 0.00869       |
|    clip_range           | 0.2           |
|    entropy_loss         | -0.156        |
|    explained_variance   | 0.31          |
|    learning_rate        | 0.0003        |
|    loss                 | 102           |
|    n_updates            | 2980          |
|    policy_gradient_loss | -0.00417      |
|    value_loss           | 250           |
-------------------------------------------
-----------------------------------------
| rollout/                |       

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.12       |
|    ep_rew_mean          | 100        |
| time/                   |            |
|    fps                  | 245        |
|    iterations           | 211        |
|    time_elapsed         | 1763       |
|    total_timesteps      | 432128     |
| train/                  |            |
|    approx_kl            | 0.01961341 |
|    clip_fraction        | 0.0369     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.0774    |
|    explained_variance   | 0.986      |
|    learning_rate        | 0.0003     |
|    loss                 | 0.221      |
|    n_updates            | 3080       |
|    policy_gradient_loss | -0.0103    |
|    value_loss           | 4.24       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.1         |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.24        |
|    ep_rew_mean          | 99          |
| time/                   |             |
|    fps                  | 244         |
|    iterations           | 221         |
|    time_elapsed         | 1849        |
|    total_timesteps      | 452608      |
| train/                  |             |
|    approx_kl            | 0.046087585 |
|    clip_fraction        | 0.11        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.17       |
|    explained_variance   | 0.985       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.0899      |
|    n_updates            | 3180        |
|    policy_gradient_loss | -0.022      |
|    value_loss           | 11          |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.2   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.02        |
|    ep_rew_mean          | 98          |
| time/                   |             |
|    fps                  | 245         |
|    iterations           | 231         |
|    time_elapsed         | 1930        |
|    total_timesteps      | 473088      |
| train/                  |             |
|    approx_kl            | 0.051572453 |
|    clip_fraction        | 0.109       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.179      |
|    explained_variance   | 0.989       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.0324     |
|    n_updates            | 3280        |
|    policy_gradient_loss | -0.0241     |
|    value_loss           | 9.4         |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.48

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.12        |
|    ep_rew_mean          | 99          |
| time/                   |             |
|    fps                  | 245         |
|    iterations           | 241         |
|    time_elapsed         | 2010        |
|    total_timesteps      | 493568      |
| train/                  |             |
|    approx_kl            | 0.025756337 |
|    clip_fraction        | 0.132       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.55       |
|    explained_variance   | 0.993       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.228       |
|    n_updates            | 3380        |
|    policy_gradient_loss | -0.0173     |
|    value_loss           | 8.09        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6.08  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 4.24         |
|    ep_rew_mean          | 98           |
| time/                   |              |
|    fps                  | 245          |
|    iterations           | 251          |
|    time_elapsed         | 2090         |
|    total_timesteps      | 514048       |
| train/                  |              |
|    approx_kl            | 0.0026425438 |
|    clip_fraction        | 0.0265       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.177       |
|    explained_variance   | 0.936        |
|    learning_rate        | 0.0003       |
|    loss                 | 14.2         |
|    n_updates            | 3480         |
|    policy_gradient_loss | -0.0139      |
|    value_loss           | 62.4         |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.3          |
|    ep_rew_mean          | 100          |
| time/                   |              |
|    fps                  | 246          |
|    iterations           | 261          |
|    time_elapsed         | 2171         |
|    total_timesteps      | 534528       |
| train/                  |              |
|    approx_kl            | 0.0068762973 |
|    clip_fraction        | 0.0371       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.132       |
|    explained_variance   | 0.924        |
|    learning_rate        | 0.0003       |
|    loss                 | 11.5         |
|    n_updates            | 3580         |
|    policy_gradient_loss | -0.00935     |
|    value_loss           | 21.5         |
------------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mea

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 13.9       |
|    ep_rew_mean          | 90         |
| time/                   |            |
|    fps                  | 246        |
|    iterations           | 271        |
|    time_elapsed         | 2251       |
|    total_timesteps      | 555008     |
| train/                  |            |
|    approx_kl            | 0.19385165 |
|    clip_fraction        | 0.128      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.138     |
|    explained_variance   | 0.889      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.0525    |
|    n_updates            | 3680       |
|    policy_gradient_loss | -0.0315    |
|    value_loss           | 0.0199     |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 16.1        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.06        |
|    ep_rew_mean          | 100         |
| time/                   |             |
|    fps                  | 246         |
|    iterations           | 281         |
|    time_elapsed         | 2334        |
|    total_timesteps      | 575488      |
| train/                  |             |
|    approx_kl            | 0.060684033 |
|    clip_fraction        | 0.0874      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.157      |
|    explained_variance   | 0.978       |
|    learning_rate        | 0.0003      |
|    loss                 | -0.00846    |
|    n_updates            | 3780        |
|    policy_gradient_loss | -0.0173     |
|    value_loss           | 16.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.4   

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5.52       |
|    ep_rew_mean          | 97         |
| time/                   |            |
|    fps                  | 177        |
|    iterations           | 291        |
|    time_elapsed         | 3357       |
|    total_timesteps      | 595968     |
| train/                  |            |
|    approx_kl            | 0.01062127 |
|    clip_fraction        | 0.0637     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.381     |
|    explained_variance   | 0.926      |
|    learning_rate        | 0.0003     |
|    loss                 | 54         |
|    n_updates            | 3880       |
|    policy_gradient_loss | -0.0159    |
|    value_loss           | 125        |
----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 3.66         |
|    ep_re

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 40.2         |
|    ep_rew_mean          | 71           |
| time/                   |              |
|    fps                  | 179          |
|    iterations           | 301          |
|    time_elapsed         | 3438         |
|    total_timesteps      | 616448       |
| train/                  |              |
|    approx_kl            | 0.0069118994 |
|    clip_fraction        | 0.0825       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.492       |
|    explained_variance   | 0.558        |
|    learning_rate        | 0.0003       |
|    loss                 | 163          |
|    n_updates            | 3980         |
|    policy_gradient_loss | 0.00399      |
|    value_loss           | 275          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 5.48        |
|    ep_rew_mean          | 97          |
| time/                   |             |
|    fps                  | 181         |
|    iterations           | 311         |
|    time_elapsed         | 3517        |
|    total_timesteps      | 636928      |
| train/                  |             |
|    approx_kl            | 0.010407655 |
|    clip_fraction        | 0.0769      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.386      |
|    explained_variance   | 0.956       |
|    learning_rate        | 0.0003      |
|    loss                 | 7.43        |
|    n_updates            | 4080        |
|    policy_gradient_loss | -0.013      |
|    value_loss           | 32          |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.16    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.82        |
|    ep_rew_mean          | 100         |
| time/                   |             |
|    fps                  | 182         |
|    iterations           | 321         |
|    time_elapsed         | 3598        |
|    total_timesteps      | 657408      |
| train/                  |             |
|    approx_kl            | 0.006486493 |
|    clip_fraction        | 0.0294      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.123      |
|    explained_variance   | 0.836       |
|    learning_rate        | 0.0003      |
|    loss                 | 94.8        |
|    n_updates            | 4180        |
|    policy_gradient_loss | -0.00894    |
|    value_loss           | 219         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.6   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.94        |
|    ep_rew_mean          | 100         |
| time/                   |             |
|    fps                  | 184         |
|    iterations           | 331         |
|    time_elapsed         | 3679        |
|    total_timesteps      | 677888      |
| train/                  |             |
|    approx_kl            | 0.013870187 |
|    clip_fraction        | 0.143       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.232      |
|    explained_variance   | 0.492       |
|    learning_rate        | 0.0003      |
|    loss                 | 15.3        |
|    n_updates            | 4280        |
|    policy_gradient_loss | -0.019      |
|    value_loss           | 49          |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.72  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 23.7         |
|    ep_rew_mean          | 85           |
| time/                   |              |
|    fps                  | 185          |
|    iterations           | 341          |
|    time_elapsed         | 3758         |
|    total_timesteps      | 698368       |
| train/                  |              |
|    approx_kl            | 0.0019223891 |
|    clip_fraction        | 0.0235       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.158       |
|    explained_variance   | 0.483        |
|    learning_rate        | 0.0003       |
|    loss                 | 118          |
|    n_updates            | 4380         |
|    policy_gradient_loss | -0.00937     |
|    value_loss           | 314          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.48        |
|    ep_rew_mean          | 99          |
| time/                   |             |
|    fps                  | 187         |
|    iterations           | 351         |
|    time_elapsed         | 3838        |
|    total_timesteps      | 718848      |
| train/                  |             |
|    approx_kl            | 0.010502168 |
|    clip_fraction        | 0.0667      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.205      |
|    explained_variance   | 0.869       |
|    learning_rate        | 0.0003      |
|    loss                 | 13.6        |
|    n_updates            | 4480        |
|    policy_gradient_loss | -0.0164     |
|    value_loss           | 50.9        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.52  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.28       |
|    ep_rew_mean          | 100        |
| time/                   |            |
|    fps                  | 188        |
|    iterations           | 361        |
|    time_elapsed         | 3917       |
|    total_timesteps      | 739328     |
| train/                  |            |
|    approx_kl            | 0.00369605 |
|    clip_fraction        | 0.0393     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.185     |
|    explained_variance   | 0.802      |
|    learning_rate        | 0.0003     |
|    loss                 | 43.8       |
|    n_updates            | 4580       |
|    policy_gradient_loss | -0.0123    |
|    value_loss           | 124        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.1         |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.06        |
|    ep_rew_mean          | 100         |
| time/                   |             |
|    fps                  | 190         |
|    iterations           | 371         |
|    time_elapsed         | 3997        |
|    total_timesteps      | 759808      |
| train/                  |             |
|    approx_kl            | 0.026609086 |
|    clip_fraction        | 0.111       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.286      |
|    explained_variance   | 0.967       |
|    learning_rate        | 0.0003      |
|    loss                 | 17.5        |
|    n_updates            | 4680        |
|    policy_gradient_loss | -0.0128     |
|    value_loss           | 60.4        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.1   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.58        |
|    ep_rew_mean          | 95          |
| time/                   |             |
|    fps                  | 191         |
|    iterations           | 381         |
|    time_elapsed         | 4077        |
|    total_timesteps      | 780288      |
| train/                  |             |
|    approx_kl            | 0.004371036 |
|    clip_fraction        | 0.0622      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.273      |
|    explained_variance   | 0.89        |
|    learning_rate        | 0.0003      |
|    loss                 | 154         |
|    n_updates            | 4780        |
|    policy_gradient_loss | -0.0121     |
|    value_loss           | 131         |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 7.7     

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.1         |
|    ep_rew_mean          | 100         |
| time/                   |             |
|    fps                  | 192         |
|    iterations           | 391         |
|    time_elapsed         | 4166        |
|    total_timesteps      | 800768      |
| train/                  |             |
|    approx_kl            | 0.027465628 |
|    clip_fraction        | 0.0363      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.0635     |
|    explained_variance   | 0.969       |
|    learning_rate        | 0.0003      |
|    loss                 | 0.00734     |
|    n_updates            | 4880        |
|    policy_gradient_loss | 0.00161     |
|    value_loss           | 5.79        |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 5.36    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 9.2         |
|    ep_rew_mean          | 96          |
| time/                   |             |
|    fps                  | 193         |
|    iterations           | 401         |
|    time_elapsed         | 4246        |
|    total_timesteps      | 821248      |
| train/                  |             |
|    approx_kl            | 0.004600615 |
|    clip_fraction        | 0.0167      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.143      |
|    explained_variance   | 0.596       |
|    learning_rate        | 0.0003      |
|    loss                 | 42.5        |
|    n_updates            | 4980        |
|    policy_gradient_loss | -0.00385    |
|    value_loss           | 250         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 8.34  

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 2.7          |
|    ep_rew_mean          | 100          |
| time/                   |              |
|    fps                  | 194          |
|    iterations           | 411          |
|    time_elapsed         | 4326         |
|    total_timesteps      | 841728       |
| train/                  |              |
|    approx_kl            | 0.0010169756 |
|    clip_fraction        | 0.00991      |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.1         |
|    explained_variance   | 0.802        |
|    learning_rate        | 0.0003       |
|    loss                 | 37.3         |
|    n_updates            | 5080         |
|    policy_gradient_loss | -0.00543     |
|    value_loss           | 171          |
------------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 7.52         |
|    ep_rew_mean          | 97           |
| time/                   |              |
|    fps                  | 195          |
|    iterations           | 421          |
|    time_elapsed         | 4408         |
|    total_timesteps      | 862208       |
| train/                  |              |
|    approx_kl            | 0.0034720106 |
|    clip_fraction        | 0.0393       |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.229       |
|    explained_variance   | 0.785        |
|    learning_rate        | 0.0003       |
|    loss                 | 65.8         |
|    n_updates            | 5180         |
|    policy_gradient_loss | -0.011       |
|    value_loss           | 172          |
------------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_m

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 13.2       |
|    ep_rew_mean          | 91         |
| time/                   |            |
|    fps                  | 196        |
|    iterations           | 431        |
|    time_elapsed         | 4490       |
|    total_timesteps      | 882688     |
| train/                  |            |
|    approx_kl            | 0.21100211 |
|    clip_fraction        | 0.146      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.123     |
|    explained_variance   | -3.51      |
|    learning_rate        | 0.0003     |
|    loss                 | -0.000364  |
|    n_updates            | 5280       |
|    policy_gradient_loss | -0.0243    |
|    value_loss           | 1.23       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.1        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.16        |
|    ep_rew_mean          | 100         |
| time/                   |             |
|    fps                  | 197         |
|    iterations           | 441         |
|    time_elapsed         | 4579        |
|    total_timesteps      | 903168      |
| train/                  |             |
|    approx_kl            | 0.004410336 |
|    clip_fraction        | 0.0512      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.27       |
|    explained_variance   | 0.585       |
|    learning_rate        | 0.0003      |
|    loss                 | 34.4        |
|    n_updates            | 5380        |
|    policy_gradient_loss | -0.00379    |
|    value_loss           | 119         |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.12    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.7         |
|    ep_rew_mean          | 99          |
| time/                   |             |
|    fps                  | 198         |
|    iterations           | 451         |
|    time_elapsed         | 4659        |
|    total_timesteps      | 923648      |
| train/                  |             |
|    approx_kl            | 0.012123969 |
|    clip_fraction        | 0.163       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.373      |
|    explained_variance   | 0.981       |
|    learning_rate        | 0.0003      |
|    loss                 | 5.11        |
|    n_updates            | 5480        |
|    policy_gradient_loss | -0.0305     |
|    value_loss           | 26          |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 2.36  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 2.18       |
|    ep_rew_mean          | 100        |
| time/                   |            |
|    fps                  | 199        |
|    iterations           | 461        |
|    time_elapsed         | 4739       |
|    total_timesteps      | 944128     |
| train/                  |            |
|    approx_kl            | 0.03286409 |
|    clip_fraction        | 0.067      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.105     |
|    explained_variance   | 0.898      |
|    learning_rate        | 0.0003     |
|    loss                 | 2.96       |
|    n_updates            | 5580       |
|    policy_gradient_loss | -0.023     |
|    value_loss           | 9.54       |
----------------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 21.7      |
|    ep_rew_mean   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 23.7        |
|    ep_rew_mean          | 82          |
| time/                   |             |
|    fps                  | 200         |
|    iterations           | 471         |
|    time_elapsed         | 4818        |
|    total_timesteps      | 964608      |
| train/                  |             |
|    approx_kl            | 0.004298671 |
|    clip_fraction        | 0.0379      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.269      |
|    explained_variance   | 0.841       |
|    learning_rate        | 0.0003      |
|    loss                 | 26.3        |
|    n_updates            | 5680        |
|    policy_gradient_loss | -0.00983    |
|    value_loss           | 79.2        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 26.3

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.32        |
|    ep_rew_mean          | 95          |
| time/                   |             |
|    fps                  | 201         |
|    iterations           | 481         |
|    time_elapsed         | 4896        |
|    total_timesteps      | 985088      |
| train/                  |             |
|    approx_kl            | 0.006028797 |
|    clip_fraction        | 0.0757      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.381      |
|    explained_variance   | 0.899       |
|    learning_rate        | 0.0003      |
|    loss                 | 40.1        |
|    n_updates            | 5780        |
|    policy_gradient_loss | -0.00931    |
|    value_loss           | 119         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.4   

<stable_baselines3.ppo.ppo.PPO at 0x1c8afa8a710>

In [58]:
model.save('Rubiks_Cube_solver')

In [65]:
from stable_baselines3.common.evaluation import evaluate_policy

mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)
print(f"Mean reward: {mean_reward} +/- {std_reward:.2f}")

Mean reward: 0.0 +/- 0.00
