In [1]:
import pygame

import pygame_menu
import pygame.freetype
import sys
import random
import math
import numpy as np

from gym import Env
from gym.spaces import Discrete, Box


from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

from tensorflow.keras.backend import clear_session
from tensorflow.keras.layers import LeakyReLU
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam


pygame 2.0.1 (SDL 2.0.14, Python 3.6.10)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
import sys
import os
import os.path

#Import local modules
sys.path.append(os.getcwd())
from ipynb.fs.full.Games.SpaceInvaders import Space_Invaders
from ipynb.fs.full.Games.Asteroids import Asteroids
pygame.init()

(8, 0)

In [3]:
class Custom_OpenAI_Env(Env):
    def __init__(self, screen_width, screen_height, game, 
                 action_space = Discrete(5)):
        # Assign action and observation space
        self.action_space = action_space
        self.observation_space = Box(0, 255, shape=(1, screen_width, screen_height, ))

        #Initialise the game
        self.game = game

        #Assign colour, get the initial game state and record it 
        self.state = self.game.get_state(colour = self.colour)
        self.start_state = self.state
    
        print("initialisation complete")
        
        self.delay = 1000
        
    def step(self, action):
        # Apply action
        #print("action: ", action)
        self.game.execute_action(action)
        #Call the update loop before getting the state
        self.game.update()
        self.state = self.game.get_state(colour = self.colour)
        #Calculate step-based reward
        reward = self.game.calculate_reward()
        done = self.game.done
        # Set placeholder for info
        info = {}
        # Return step information
        return self.state, reward, done, info
    
    def render(self, mode):
        self.game.render()
        
    def reset(self):
        #Restart the game
        self.state = self.start_state
        self.game.reset()
        return self.game.get_state()
    

In [4]:
#Game menu class controlling the functionality of the entire framework
WINDOW_LENGTH = 3
class Game_Menu:

    def __init__(self, width, height):
        self.menu = None
        self.surface = None
        self.game = None
        self.icon_surface = None
        
        #Set standard network parameters
        self.learning_rates = [0.0001, 0.000001, 0.0000001]
        self.episodes = [1, 10, 100]
        self.steps = [3000, 50000, 100000]

        #Standard parameter indices
        self.learning_rate = 0
        self.episode = 0
        self.step = 0
        self.visualize = False

        #Asteroid specific parameters
        self.scales = [[800,600, 1.0], [200,200, 0.65], [150,150, 0.45]]
        self.intensity_rates = [0, 1, 3]
        self.player_speeds = [1, 2, 3]

        #Scale, player speed and colour are common to both games
        self.scale = 0
        self.intensity = 0
        self.player_speed = 0
        self.homogenous_controls = False
        self.colour = False
        
        #Space invaders specific parameters
        self.enemy_speeds = [0.5, 1.0, 2.0]
        self.enemy_speed = 0
        
        #Transfer mode 
        self.transfer = -1
        self.test_env = -1
        
        #Initialise pygame
        pygame.display.init()
        self.surface = pygame.display.set_mode((800, 600))
        self.icon_surface = pygame.image.load(os.path.join(os.getcwd(), "Dependencies/Resources", "Masterslogo.png"))
        pygame.display.set_icon(self.icon_surface)
        pygame.display.set_caption("Master's Project")
        
        #Start the main menu
        self.main_menu()
        
    def reset_parameters(self):
        #Reset all adjustable variables when returning to the main menu.
        self.enemy_speed = 0
        self.scale = 0
        self.intensity = 0
        self.player_speed = 0
        self.homogenous_controls = False
        self.colour = False
        self.learning_rate = 0
        self.episode = 0
        self.step = 0
        self.visualize = False
        
    def main_menu(self):
        self.reset_parameters()
        self.surface = pygame.display.set_mode((800, 600))
        #Disable any menu if it exists
        if self.menu:
            self.menu.disable()
        #Initialise the main menu interface
        self.menu = pygame_menu.Menu(600, 800, 'Main Menu',
                         theme=pygame_menu.themes.THEME_DARK)
        self.menu.add_button('Space Invaders', self.start_space_invaders)
        self.menu.add_button('Space Invaders - Training', self.set_game_mode)
        self.menu.add_button('Asteroids', self.start_asteroids)
        self.menu.add_button('Asteroids - Training', self.set_standard_parameters)
        self.menu.add_button('Transfer Learning', self.transfer_menu)
        self.menu.add_button('Test - Space Invaders', self.set_test_space_invaders)
        self.menu.add_button('Test - Asteroids', self.set_test_asteroids)
        self.menu.add_button('Quit', pygame_menu.events.EXIT)
        
        self.menu.mainloop(self.surface)

    def transfer_menu(self):
        #Transfer menu, always disable because this will never be the first
        #menu created
        self.menu.disable()
        self.menu = pygame_menu.Menu(600, 800, 'Transfer Learning',
                               theme=pygame_menu.themes.THEME_DARK)
        self.menu.add_button('Space Invaders -> Asteroids', self.set_transfer_SA)
        self.menu.add_button('Asteroids -> Space Invaders', self.set_transfer_AS)
        self.menu.add_button('Back', self.main_menu)
        
        self.menu.mainloop(self.surface)
    
    
    #Common DQN network settings
    def set_standard_param(self, arg, param):
        #Learning rate 
        if param == 0:
            print("changing learning rate")
            if self.learning_rate < 2:
                self.learning_rate += 1
            else:
                self.learning_rate = 0
        #Steps
        elif param == 1:
            if self.step < 2:
                self.step += 1
            else:
                self.step = 0
        #Episodes
        elif param == 2:
            if self.episode < 2:
                self.episode += 1
            else:
                self.episode = 0
        #Visualize
        elif param == 3:
            self.visualize = 1 if self.visualize == 0 else 1 
            
    def set_game_mode(self):
        self.set_standard_parameters(1)
        
    def set_test_asteroids(self):
        self.test_env = 0
        self.test_model()
    
    def set_test_space_invaders(self):
        self.test_env = 1
        self.test_model()
        
    #Set transfer mode: Space invaders to Asteroids
    def set_transfer_SA(self):
        self.transfer = 0
        self.set_standard_parameters()
        
    #Set transfer mode: Asteroids to Space Invaders
    def set_transfer_AS(self):
        self.transfer = 1
        self.set_standard_parameters(1)
    
    #Setup menu for standard network parameters
    def set_standard_parameters(self, gamemode = 0):
        print("standard params called")
        self.menu.disable()
        self.gamemode = gamemode
        self.menu = pygame_menu.Menu(600, 800, 'Select Variables',
                               theme=pygame_menu.themes.THEME_DARK)

        self.menu.add_selector('Learning rate :', [('1e-1', 0),
                                                   ('1e-2', 0),
                                                   ('1e-3', 0)],
                                                   onchange=self.set_standard_param)
        self.menu.add_selector('Steps :', [('1000', 1),
                                           ('5000', 1),
                                           ('10000', 1),],
                                           onchange=self.set_standard_param)
        self.menu.add_selector('Episodes: ', [('1', 2),
                                           ('5', 2),
                                           ('10', 2),],
                                           onchange=self.set_standard_param)
        self.menu.add_selector('Visualize', [('False', 3), ('True', 3)],
                               onchange=self.set_standard_param)
        self.menu.add_button('Back', self.main_menu)
        
        #Continue based on what game mode is selected
        if gamemode == 0:
            self.menu.add_button('Continue - Asteroids', self.set_asteroids_parameters)
        else:
            self.menu.add_button('Continue - Space Invaders', self.set_space_invaders_parameters)
        
        self.menu.mainloop(self.surface)
    
    def set_mode_parameter(self, arg, param):
        #0 scale, 1 intensity, 2, player speed, 3 homo controls, 4 colours, 5 enemy speed
        if param == 0:
            self.scale += 1 if self.scale < 2 else 0
        if param == 1:
            self.intensity += 1 if self.intensity < 2 else 0        
        if param == 2:
            self.player_speed += 1 if self.player_speed < 2 else 0        
        if param == 3:
            self.homogenous_controls = True if self.homogenous_controls == False else False
        if param == 4:
            self.colour = True if self.colour == False else False
        if param == 5:
            self.enemy_speed += 1 if self.enemy_speed < 2 else 0
            
    #Menu for setting asteroids game mode specific parameters
    def set_asteroids_parameters(self):
        self.menu.disable()
        self.menu = pygame_menu.Menu(600, 800, 'Select Gameplay Variables',
                               theme=pygame_menu.themes.THEME_DARK)

        self.menu.add_selector('Scale :', [('(800, 600)', 0), ('(400, 300)', 0),('(200, 150)', 0)],
                               onchange=self.set_mode_parameter)
        self.menu.add_selector('Intensity:', [('0.1', 1), ('0.5', 1),('1.0', 1)],
                               onchange=self.set_mode_parameter)
        self.menu.add_selector('Player Speed :', [('1', 2), ('1.5', 2),('3', 2)],
                               onchange=self.set_mode_parameter)
        self.menu.add_selector('Homogenous Controls: ', [('False', 3), ('True', 3)],
                        onchange=self.set_mode_parameter)
        self.menu.add_selector('Colour Input: ', [('False', 4), ('True', 4)],
                        onchange=self.set_mode_parameter)
        self.menu.add_button('Start Training', self.start_asteroids_training)
        self.menu.add_button('Back', self.main_menu)
        
        self.menu.mainloop(self.surface)
    
    #Menu for setting Space invaders game mode specific parameters
    def set_space_invaders_parameters(self):
        self.menu.disable()
        self.menu = pygame_menu.Menu(600, 800, 'Select Gameplay Variables',
                               theme=pygame_menu.themes.THEME_DARK)

        self.menu.add_selector('Scale :', [('(800, 600)', 0), ('(300, 300)', 0),
                                          ('(150, 150)', 0)],
                               onchange=self.set_mode_parameter)
        self.menu.add_selector('Enemy Speed:', [('0.5', 5), ('1.0', 5),
                                          ('2.0', 5)],
                               onchange=self.set_mode_parameter)
        self.menu.add_selector('Player Speed :', [('1', 2), ('1.5', 2),
                                          ('3', 2)],
                               onchange=self.set_mode_parameter)
        self.menu.add_selector('Colour Input: ', [('False', 4), ('True', 4)],
                        onchange=self.set_mode_parameter)
        
        self.menu.add_button('Start Training', self.start_space_invaders_training)
        self.menu.add_button('Back', self.main_menu)
        
        self.menu.mainloop(self.surface)

    #Start asteroids as a normal player
    def start_asteroids(self):
        self.game = Asteroids(800, 600, self.surface, False, 0.125)

    #Start space invaders as a normal player
    def start_space_invaders(self):
        self.game = Space_Invaders(800, 600, self.surface, False, 1.0)
        
    def start_asteroids_training(self):
        self.menu.disable()
        print("starting asteroid training")
        training_game = Asteroids(self.scales[self.scale][0], self.scales[self.scale][1], 
                                  self.surface, True, self.scales[self.scale][2], #add player speed
                                  player_speed = self.player_speeds[self.player_speed],
                                  player_rtspeed = self.player_speeds[self.player_speed], 
                                  intensity_modifier = self.intensity_rates[self.intensity],
                                  has_colour = self.colour)
        
        self.game = Custom_OpenAI_Env(self.scales[self.scale][0], self.scales[self.scale][1],
                                       training_game)
        self.build_model()
        self.train_model()
        if self.transfer == -1:
            self.save("asteroids")
        else:
            self.save("Space-Asteroids-Transfer")
        self.main_menu()
    
    def start_space_invaders_training(self):
        self.menu.disable()
        print("Starting space invaders training")
        training_game = Space_Invaders(self.scales[self.scale][0], self.scales[self.scale][1]
                                       , self.surface, True, self.scales[self.scale][2],
                                       enemy_speed = self.enemy_speeds[self.enemy_speed],
                                       player_speed = self.player_speeds[self.player_speed],
                                       game_intensity_modifier = self.intensity_rates[self.intensity],
                                       homogenous_controls = self.homogenous_controls,
                                       has_colour = self.colour)
        
        #Assign new action space based on homogenous control setting
        action_space= Discrete(5)
        #if self.homogenous_controls:
        #    action_space = Discrete(5)
        self.game = Custom_OpenAI_Env(self.scales[self.scale][0], self.scales[self.scale][1],
                                     training_game,
                                     action_space = action_space)
        self.build_model()
        self.train_model()
        if self.transfer == -1:
            self.save("space-invaders")
        else:
            self.save("Asteroids-Space-Transfer")
        self.main_menu()
        
    def build_model(self, metrics = ['mae']):
        print("building model")
        #Initialise state/action arrays
        states = self.game.observation_space.shape
        states = (WINDOW_LENGTH, states[1], states[2])
        print(states)
        self.actions = self.game.action_space.n
        
        #Initialise the DRL model based on transfer setting
        #if self.transfer == -1:
        self.model = self.build_network(states, self.actions)

        if self.transfer == 0:
            print("loading space invaders original model.")
            if self.load("space-invaders") != False:
                print("found space invaders")
                self.load("space-invaders")
                #self.load("space-invaders")
            else:
                print("calling this cause why not?")
                self.model = self.build_network(states, self.actions)
                
        elif self.transfer == 1:
            print("loading asteroids original model")
            if self.load("asteroids") != False:
                self.load("asteroids")
            else:
                self.model = self.build_network(states, self.actions)

        
        self.dqn = self.build_agent(self.model, self.actions)
        self.dqn.compile(Adam(beta_1 = 0.99, lr=self.learning_rates[self.learning_rate]), metrics=metrics)
        
    def train_model(self, episodes = 5, verbose = 1):
        #Initialise the DQN agent
        #Debug info
        print("parameters: ", "\nlearning rate: ", self.learning_rates[self.learning_rate],
             "\nepisodes: ",  self.episodes[self.episode],
              "\nsteps: ", self.steps[self.step],
              "\nvisualize: ", self.visualize,
              "\nscale: ", self.scales[self.scale],
              "\nintensity (asteroids only): ", self.intensity_rates[self.intensity],
              "\nplayer speed : ", self.player_speeds[self.player_speed],
              "\nhomogenous controls (space invaders only): ", self.homogenous_controls,
              "\ncolour: ", self.colour)

        print("beginning training")
        #Fit with openAI gym
        self.dqn.fit(self.game, nb_steps=self.steps[self.step], visualize=self.visualize, verbose=2)

        print("training complete")
        self.test_model()

    
    def test_model(self):
        #Test with scores
        action_space= Discrete(5)
        
        if self.test_env == 1:
            training_game = Space_Invaders(self.scales[2][0], self.scales[2][1]
                                    , self.surface, True, self.scales[2][2],
                                    homogenous_controls = self.homogenous_controls)
            self.game = Custom_OpenAI_Env(self.scales[2][0], self.scales[2][1],
                                         training_game, colour=self.colour,
                                         action_space = action_space)
        elif self.test_env == 0:

            training_game = Asteroids(self.scales[2][0], self.scales[2][1], 
                                self.surface, True, self.scales[2][2],
                                intensity_modifier = self.intensity_rates[self.intensity])
            self.game = Custom_OpenAI_Env(self.scales[2][0], self.scales[2][1],
                                           training_game, colour=self.colour)
        
        if self.test_env != -1:
            self.build_model()
            
        scores = self.dqn.test(self.game, nb_episodes=100, visualize=True)
        print(np.mean(scores.history['episode_reward']))
        
        #if self.test_env == -1:
        #    self.main_menu()
        
    def save(self, name='default-model'):
        #was dqn now model
        self.dqn.save_weights(name + ".hdf5", overwrite=True)
        
    def load(self, name):
        #Load existing weights into the blank model 
        if os.path.isfile(name + '.hdf5'):
            return self.model.load_weights(name + '.hdf5')
        print("file not found, creating default model.")
        return False
    
    def build_network(self, states, actions):
        model = Sequential()
        print("shape : ", len(states), actions)
        #Convolutional layers
        model.add(Conv2D(16, (10,10), activation='relu', input_shape=states, padding='same'))
        model.add(MaxPooling2D((2, 2), padding = 'same'))
        model.add(LeakyReLU(alpha=0.1))
        model.add(Conv2D(32, (5,5), activation='relu', input_shape=states, padding='same'))
        model.add(MaxPooling2D((2, 2), padding = 'same'))
        model.add(LeakyReLU(alpha=0.1))
        model.add(Flatten())
        #Fully connected layers
        model.add(Dense(256, activation='relu'))
        model.add(LeakyReLU(alpha=0.1))
        model.add(Dense(32, activation='relu'))
        model.add(LeakyReLU(alpha=0.1))
        model.add(Dense(32, activation='relu'))
        model.add(LeakyReLU(alpha=0.1))
        model.add(Dense(16, activation='relu'))
        model.add(LeakyReLU(alpha=0.1))
        model.add(Dense(self.actions, activation='relu'))
        #Debug summary of the model built
        print(model.summary())
        return model

    #Function to create DQN model 
    def build_agent(self, model, actions):
        policy = BoltzmannQPolicy()
        #policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
        memory = SequentialMemory(limit=1000, window_length=WINDOW_LENGTH)
        dqn = DQNAgent(model=model, memory=memory, policy=policy,
                      enable_dueling_network=True, dueling_type='avg', 
                       nb_actions=actions, nb_steps_warmup=1000
                      )
        return dqn

In [None]:
Game = Game_Menu(800,600)

standard params called
changing learning rate
changing learning rate
Starting space invaders training
initialisation complete
building model
(3, 150, 150)
shape :  3 5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 3, 150, 16)        240016    
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 2, 75, 16)         0         
_________________________________________________________________
leaky_re_lu (LeakyReLU)      (None, 2, 75, 16)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 2, 75, 32)         12832     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 1, 38, 32)         0         
_________________________________________________________________
leaky_re_lu_1 (Leaky

  8210/100000: episode: 22, duration: 37.483s, episode steps: 229, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.895 [2.000, 4.000],  loss: 0.621430, mae: 7.032332, mean_q: 12.274657
  8786/100000: episode: 23, duration: 93.731s, episode steps: 576, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.488 [1.000, 4.000],  loss: 0.637991, mae: 6.924659, mean_q: 12.168443
  9016/100000: episode: 24, duration: 37.542s, episode steps: 230, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.839 [2.000, 4.000],  loss: 0.812476, mae: 7.311093, mean_q: 12.515945
  9237/100000: episode: 25, duration: 36.081s, episode steps: 221, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.824 [1.000, 4.000],  loss: 0.934108, mae: 7.222859, mean_q: 12.685339
  9539/100000: episode: 26, duration: 49.166s, episo

 33437/100000: episode: 57, duration: 376.011s, episode steps: 2258, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.151 [1.000, 4.000],  loss: 0.747612, mae: 6.487613, mean_q: 12.904465
 33780/100000: episode: 58, duration: 57.152s, episode steps: 343, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.743 [0.000, 4.000],  loss: 0.673948, mae: 5.220836, mean_q: 11.323541
 34000/100000: episode: 59, duration: 36.506s, episode steps: 220, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.855 [2.000, 4.000],  loss: 0.792949, mae: 5.626153, mean_q: 11.736444
 34288/100000: episode: 60, duration: 47.550s, episode steps: 288, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.753 [1.000, 4.000],  loss: 1.037877, mae: 5.873625, mean_q: 12.299480
 34645/100000: episode: 61, duration: 58.865s, epi

 54778/100000: episode: 92, duration: 54.564s, episode steps: 329, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.796 [0.000, 4.000],  loss: 0.804757, mae: 6.180227, mean_q: 12.093407
 57031/100000: episode: 93, duration: 373.208s, episode steps: 2253, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.146 [1.000, 4.000],  loss: 0.605429, mae: 6.369172, mean_q: 12.762829
 57403/100000: episode: 94, duration: 62.106s, episode steps: 372, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.774 [1.000, 4.000],  loss: 0.607281, mae: 5.136412, mean_q: 11.291678
 57792/100000: episode: 95, duration: 64.842s, episode steps: 389, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.753 [2.000, 4.000],  loss: 0.702244, mae: 5.667836, mean_q: 11.874360
 58164/100000: episode: 96, duration: 61.756s, epi

 70333/100000: episode: 127, duration: 81.933s, episode steps: 497, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.437 [1.000, 4.000],  loss: 0.780588, mae: 6.095946, mean_q: 11.739261
 70606/100000: episode: 128, duration: 45.110s, episode steps: 273, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.700 [1.000, 4.000],  loss: 0.734181, mae: 6.253467, mean_q: 11.877147
 70792/100000: episode: 129, duration: 30.773s, episode steps: 186, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.763 [2.000, 4.000],  loss: 0.845699, mae: 6.304060, mean_q: 11.896191
 71165/100000: episode: 130, duration: 61.618s, episode steps: 373, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.697 [1.000, 4.000],  loss: 0.637113, mae: 6.068664, mean_q: 11.737287
 71663/100000: episode: 131, duration: 82.082s, 

 87662/100000: episode: 162, duration: 46.587s, episode steps: 282, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.550 [0.000, 4.000],  loss: 0.672537, mae: 5.782821, mean_q: 11.822300
 88213/100000: episode: 163, duration: 91.007s, episode steps: 551, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.290 [1.000, 4.000],  loss: 0.646173, mae: 5.904868, mean_q: 11.978782
 90164/100000: episode: 164, duration: 321.047s, episode steps: 1951, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.026 [1.000, 4.000],  loss: 0.548917, mae: 6.219874, mean_q: 12.795177
 90334/100000: episode: 165, duration: 28.303s, episode steps: 170, steps per second:   6, episode reward:  0.000, mean reward:  0.000 [ 0.000,  0.000], mean action: 3.471 [2.000, 4.000],  loss: 0.580765, mae: 5.564011, mean_q: 11.672005
 90538/100000: episode: 166, duration: 33.873s

Episode 33: reward: 0.000, steps: 461
Episode 34: reward: 0.000, steps: 220
Episode 35: reward: 0.000, steps: 234
Episode 36: reward: 0.000, steps: 272
Episode 37: reward: 0.000, steps: 1909
Episode 38: reward: 0.000, steps: 532
Episode 39: reward: 0.000, steps: 491
Episode 40: reward: 0.000, steps: 173
Episode 41: reward: 0.000, steps: 593
Episode 42: reward: 0.000, steps: 292
Episode 43: reward: 0.000, steps: 291
Episode 44: reward: 0.000, steps: 334
Episode 45: reward: 0.000, steps: 663
Episode 46: reward: 0.000, steps: 361
Episode 47: reward: 0.000, steps: 249
Episode 48: reward: 0.000, steps: 256
Episode 49: reward: 0.000, steps: 374
Episode 50: reward: 0.000, steps: 266
Episode 51: reward: 0.000, steps: 622
Episode 52: reward: 0.000, steps: 176
Episode 53: reward: 0.000, steps: 1759
Episode 54: reward: 0.000, steps: 460
Episode 55: reward: 0.000, steps: 275
Episode 56: reward: 0.000, steps: 531
Episode 57: reward: 0.000, steps: 271
Episode 58: reward: 0.000, steps: 545
Episode 59