# Ship Sim Gym

Quick overview of training on a custom environment as it is slightly different from a normal one. 

Most cells contain code that is simply imported from corresponding py files. It is usually possible to call those scripts directly via their main hooks as well as long as you call them as modules `python -m ...`

If you change something in the scripts themselves and would like to see the cell updated here as well, uncomment the first line in the cell that does the IPython magic called %load. 

# The Ship Game

This is a basic top down ship simulator built with pygame for the graphics and uses pymunk for physics.

In [1]:
# %load ../ship_gym/game.py
import os
import random
import sys
import time

import numpy as np
import pygame

from pymunk import Vec2d, Transform
import pymunk as pm
import pymunk.pygame_util

from ship_gym import game_map
from ship_gym.config import GameConfig
from ship_gym.models import GameObject, Ship, PolyEnv, LiDAR

N_GOALS = 5
DEFAULT_BOUNDS = (500, 500)


class ShipGame(object):

    ships = list()
    goals = list()

    frame_counter = 0
    base_dt = 0.1
    colliding = False
    observe_mode = False
    record = False

    def __init__(self, game_config=None):

        if game_config is None:
            game_config = GameConfig

        self.speed = game_config.SPEED
        self.fps = game_config.FPS
        self.bounds = game_config.BOUNDS
        self.screen = pygame.display.set_mode(self.bounds)
        self.clock = pygame.time.Clock()
        self.goal_reached = False
        self.colliding = False

        self.debug_mode = game_config.DEBUG

        pygame.init()
        pygame.display.set_caption("Ship Sim Gym")
        pygame.key.set_repeat(10, 10)

        print("-"*30)
        print("SHIP GAME INITIALIZED")
        print("DEBUG MODE = ", self.debug_mode)
        print("GAME SPEED = ", self.speed)
        print("GAME FPS   = ", self.fps)
        print("-"*30, "\n")

        self.reset()

    def gen_level(self):
        """
        Generate a level on the fly by calling game map gen river poly function wrapping them in a GeoMap object
        and adding the generated pymunk primitives (shapes and bodies) to the game space
        :return:
        """
        poly = game_map.gen_river_poly(self.bounds)

        self.level = PolyEnv(poly, self.bounds)

        for body, shape in zip(self.level.bodies, self.level.shapes):
            self.space.add(body, shape)

    def invert_p(self, p):
        """Because in screen Y=0 is at the top or some shit like that """
        return Vec2d(p[0], self.bounds[1] - p[1])

    def add_goal(self, x, y):
        """Add a ball to the given space at a random position """
        self.total_goals += 1

        mass = 1
        radius = 5
        inertia = pm.moment_for_circle(mass, 0, radius, (0,0))
        body = pm.Body(mass, inertia)

        body.position = x, y
        shape = pm.Circle(body, radius, (0,0))
        shape.color = pygame.color.THECOLORS["green"]
        self.space.add(body, shape)
        shape.collision_type = 2

        goal = GameObject(body, shape)
        self.goals.append(goal)

        return goal

    def add_player_ship(self, x, y, width, height, color):
        """
        Call this after you have created the level!
        Creates a new Ship instance and adds a shape and body to the pymunk space
        :param self:
        :param x:
        :param y:
        :param width:
        :param height:
        :param color:
        :return:
        """

        ship = Ship(x, y, width, height, color)
        ship.add_lidar(self.level.shapes)

        self.space.add(ship.body, ship.shape)

        return ship

    def add_ship(self, x, y, width, height, color):
        """
        Creates a new Ship instance and adds a shape and body to the pymunk space
        :param self:
        :param x:
        :param y:
        :param width:
        :param height:
        :param color:
        :return:
        """
        ship = Ship(x, y, width, height, color)
        self.space.add(ship.body, ship.shape)

        return ship

    def get_screen(self):
        """
        Returns the game's screen space buffer as a 3D (color) array
        :return:
        """
        return pygame.surfarray.array3d(self.screen)

    def handle_discrete_action(self, action):
        """
        Handle discrete actions: It is possible to move forward, rotate left and right and do nothing.
        Moving backwards is not possible, but is easy to add if needed. See the player definition in models for this.
        :param action: integer value to indicate the action to take
        """
        if action == 0:
            self.player.move_forward()
        elif action == 1:
            self.player.rotate(-5)
        elif action == 2:
            self.player.rotate(+5)
        elif action == 3:
            pass

    def handle_input(self):
        """
        Maps key inputs to actions (via handle_discrete_action) and other utility functions such as quit
        """

         # Handle key strokes
        for event in pygame.event.get():

            # print(event.key)
            if event.type == pygame.QUIT:
                sys.exit(0)

            if event.type == pygame.KEYDOWN:
                if event.key == pygame.K_ESCAPE or event.key == pygame.K_q:
                    sys.exit(0)

                elif event.key == pygame.K_w:
                    self.handle_discrete_action(0)
                    print("W pressed. ")
                elif event.key == pygame.K_s:
                    print("S pressed")
                    self.handle_discrete_action(1)
                elif event.key == pygame.K_a:
                    self.handle_discrete_action(2)
                    print("A pressed. ", self.player.rudder_angle)
                elif event.key == pygame.K_d:
                    print("D pressed")
                    self.handle_discrete_action(3)


    def update(self):
        """
        The main update loop, resets certain event states, handles input, sensor routines and updates the game's
        pymunk space
        """
        self.colliding = False
        self.goal_reached = False
        self.handle_input()
        self.player.query_sensors()
        self.space.step(self.speed * self.base_dt)
        self.clock.tick(self.fps)

    def render(self):
        """
        The main render loop clears the screen and draws primitives if requested
        """
        self.screen.fill((0, 0, 200))
        if self.debug_mode:
            options = pm.pygame_util.DrawOptions(self.screen)
            options.flags = pymunk.SpaceDebugDrawOptions.DRAW_SHAPES
            self.space.debug_draw(options)

            res = self.player.lidar.query_results
            for r in res:
                if r is not None and r.shape is None:
                    p = r.point
                    p = self.invert_p(p)
                    p = (round(p.x), round(p.y))

                    # Green circle indicating the rays did not hot anything
                    pygame.draw.circle(self.screen, (0, 255, 0), p, 10)
                else:
                    p = r.point
                    p = self.invert_p(p)
                    p = (round(p.x), round(p.y))

                    # Red circle
                    pygame.draw.circle(self.screen, (255, 0, 0), p, 10)

        p = self.invert_p(self.player.position)

        pygame.draw.circle(self.screen, (255, 255, 0), (round(p.x), round(p.y)), 10)
        pygame.display.flip()

        self.frame_counter += 1


    def collide_ship(self, arbiter, space, data):
        """
        Ship collision callback for when the player ship hits another ship. All params are ignored at this point
        :param arbiter:
        :param space:
        :param data:
        :return:
        """
        self.colliding = True
        return True

    def collide_goal(self, arbiter, space, data):
        """
        Ship collision callback for when the player ship hits a goal object. All params are ignored at this point
        :param arbiter:
        :param space:
        :param data:
        :return:
        """
        shape = arbiter.shapes[1]
        space.remove(shape, shape.body)

        self.goal_reached = True
        self.goals = [g for g in self.goals if g.body is not shape.body]

        return False


    def reset(self):
        """
        Reset the game. Create the environment, the player and the goals
        :param spawn_point:
        :return:
        """
        self.total_goals = 0
        self.ships = list()
        self.goals = list()
        self.space = pm.Space()
        self.space.damping = 0.4
        self.create_environment()
        self.gen_goal_path(N_GOALS)

        spawn_point = Vec2d(self.bounds[0] / 2, 25)
        self.player = self.add_player_ship(spawn_point.x, spawn_point.y, 2, 3, pygame.color.THECOLORS["white"])
        self.player.shape.collision_type = 0
        self.setup_collision_handlers()

    def add_default_traffic(self):
        """
        Add some simple static traffic to the game
        :return:
        """
        self.ships.append(self.add_ship(100, 200, 1, 1, pygame.color.THECOLORS["black"]))
        self.ships.append(self.add_ship(300, 200, 1.5, 2, pygame.color.THECOLORS["black"]))
        self.ships.append(self.add_ship(400, 350, 1, 3, pygame.color.THECOLORS["black"]))

    def setup_collision_handlers(self):
        """
        Add collision handlers to the game space for goal and obstacle interactions.
        """
        h = self.space.add_collision_handler(0, 1)
        h.begin = self.collide_ship

        goal_agent_col = self.space.add_collision_handler(0, 2)
        goal_agent_col.begin = self.collide_goal

        self.space.add_collision_handler(0, 3)

    def gen_goal_path(self, n):
        """
        Generate a path of goals by sampling somewhat randomly the coordinate space. To avoid complete randomness
        where it is hard to even see a path, I kind of use a jittery approach where delta_y is computed according to
        the game bounds and incremented, and randomly jittered. The X position is determined by taking a jittered
        point close to the midline and doing a segment query to the left and right on environmental level shapes (see
        ShapeFilter). These points are then used as extreme points between which the X value is determined according
        to some tolerance value.
        :param n: number of goals to generate
        """

        y_delta = self.bounds[1] / (n+1)
        x_middle = self.bounds[0] / 2
        x_jitter = 50
        y_jitter = 20

        tolerance = 60
        filter = pymunk.ShapeFilter(mask=pymunk.ShapeFilter.ALL_MASKS ^ 0b1) # This has not been properly tested!

        for i in range(1, n+1):
            y = y_delta*i + random.randint(-y_jitter, y_jitter)
            try:
                left_ret = self.space.segment_query((self.bounds[0]/2, y), (0, y), 10, filter)[0]
                right_ret = self.space.segment_query((self.bounds[0] / 2, y), (self.bounds[0], y), 10, filter)[0]

                x = np.random.uniform(left_ret.point.x + tolerance, right_ret.point.x - tolerance)
                self.add_goal(x, y)

            except Exception as e:
                x = x_middle * i + random.randint(-x_jitter, x_jitter)
                self.add_goal(x, y)


    def closest_goal(self):
        """
        Return the goal with the smallest Euclidean distance to the player. Returns None if there are no goals left.
        :return:
        """
        if len(self.goals):
            min_goal = self.goals[0]
            min_distance = min_goal.body.position.get_distance(self.player.body.position)
            for goal in self.goals[1:]:

                dist = goal.body.position.get_distance(self.player.body.position)
                if dist < min_distance:
                    min_distance = dist
                    min_goal = goal

            return min_goal
        return None

    def create_environment(self):
        """
        The hook for creating the environment. Replace the call for gen_level
        :return:
        """
        self.gen_level()


def main():

    import os

    cwd = os.getcwd()
    gc = GameConfig
    gc.SPEED = 1
    gc.FPS = 30
    gc.DEBUG = True

    g = ShipGame()

    while True:
        g.update()
        g.render()



if __name__ == '__main__':
    main()


pygame 1.9.4
Hello from the pygame community. https://www.pygame.org/contribute.html
Loading chipmunk for Darwin (64bit) [/Users/gerard/miniconda3/envs/ship-sim-gym-3.6/lib/python3.6/site-packages/pymunk/libchipmunk.dylib]


ModuleNotFoundError: No module named 'ship_gym'

## OpenAI Gym Environment

This creates a wrapper for the ship game I made

The way you create an OpenAI gym is by defining a class that overrides the gym.Env class and override a few properties:

```
action_space
observation_space
```

and a few functions:

```
step()
reset()
render()
```

In [12]:
# Environment / Gym

DEFAULT_STATE_VAL = -1
STEP_PENALTY = -0.01

class ShipEnv(Env):

    metadata = {'render.modes': ['human', 'rgb_array']}
    action_space = Discrete(5)
    reward_range = (-1, 1)

    # TODO: Derive the discrete actions
    def __init__(self, game_config, env_config):

        # TODO: Should add some basic sanity checks (max_steps > 0 etc.)
        self.last_action = None
        self.last_action = None
        self.reward = 0
        self.cumulative_reward = 0
        self.step_count = 0
        self.env_config = env_config

        self.game = ShipGame(game_config)
        self.episodes_count = -1 # Because the first reset will increment it to 0
        self.n_states = 2 + 1 + 1 + 2 + self.game.player.lidar.n_beams
        self.states_history = self.n_states * self.env_config.HISTORY_SIZE

        if self.env_config.HISTORY_SIZE < 1:
            raise ValueError("history_size must be greater than zero")
        self.observation_space = Box(low=0, high=max(self.game.bounds), shape=(self.states_history,), dtype=np.uint8)

        # print(" *** SHIP-GYM INITIALIZED *** ")

    def seed(self, seed=None):
        """
        Small but extremely important function, this makes sure that every environment you create is slightly different
        otherwise parallelization is useless since the states will be exactly the same!
        """
        self.np_random, seed = seeding.np_random(seed)
        np.random.seed(seed)
        return [seed]

    def determine_reward(self):

        if self.game.colliding:
            self.reward = -1.0
        if self.game.goal_reached:
            self.reward = 1.0

        # TODO: Code duplication with is_done()
        elif self.game.player.x < 0 or self.game.player.x > self.game.bounds[0]:
            self.reward = -1
        elif self.game.player.y < 0 or self.game.player.y > self.game.bounds[1]:
            self.reward = -1
        else:
            self.reward = STEP_PENALTY  # Small penalty

    def _normalized_coords(self, x, y):
        return x / self.game.bounds[0], y / self.game.bounds[1]

    def __add_states(self):
        '''
        Push back some new state information for the current timestep onto the FIFO queue for all history timesteps
        it keeps track of.

        Layout of a single time step state is like this:

        Px Py R Gx Gy L1 L2 ... Ln

        Where
        P is the player position
        A is the player angle
        R is the rudder angle
        G is the nearest goal position
        L are the lidar values
        N is the number of rays lidar uses


        :return: the complete history buffer of states extended with the most recent one
        '''

        states = self.n_states * [-1]
        goal = self.game.closest_goal()
        goal_pos = [-1, -1]
        player = self.game.player

        if goal:
            goal_pos = [goal.body.position.x, goal.body.position.y]
        states[:6] = [player.x, player.y, player.rudder_angle, player.body.angle, goal_pos[0], goal_pos[1]]

        lidar_vals = self.game.player.lidar.vals

        states[6:] = lidar_vals
        self.states.extend(states)

    def is_done(self):
        if self.game.colliding:
            # print("OOPS --- COLLISION")
            return True
        elif len(self.game.goals) == 0:
            print("ALL GOALS REACHED! -- CUMULATIVE REWARD = ", self.cumulative_reward)
            return True

        player = self.game.player
        if player.x < 0 or player.x > self.game.bounds[0]:
            print("X out of bounds")
            return True
        elif player.y < 0 or player.y > self.game.bounds[1]:
            print("Y out of bounds")
            return True

        if self.step_count >= self.env_config.MAX_STEPS:
            print("MAX STEPS")
            return True

        return False

    def step(self, action):
        assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))

        self.game.handle_action(action)
        self.game.update()
        self.game.render()

        self.determine_reward()
        self.cumulative_reward += self.reward
        self.__add_states()
        self.step_count += 1

        done = self.is_done()

        return np.array(self.states), self.reward, done, {}

    def render(self, mode='human', close=False):
        """
        This could be a rendered frame or just some stats that are used for debugging
        """
        out = sys.stdout

        if self.last_action is not None:
            out.write(f'action={self.last_action}, cum_reward={self.cumulative_reward}')

        return

    def reset(self):
        self.game.reset()

        self.last_action = None
        self.reward = 0
        self.cumulative_reward = 0
        self.step_count = 0
        self.episodes_count += 1

        n = self.n_states * self.env_config.HISTORY_SIZE
        self.states = deque([DEFAULT_STATE_VAL] * n, maxlen=n)
        self.__add_states()

        return np.array(self.states)


In [13]:
game_config = GameConfig
game_config.FPS = 1000
game_config.SPEED = 30
game_config.BOUNDS = (1000, 1000)

def make_env(rank, game_config, env_config, seed=0):
        """
        Utility function for multiprocessed env.

        :param n_goals:
        :param env_id: (str) the environment ID
        :param num_env: (int) the number of environment you wish to have in subprocesses
        :param seed: (int) the inital seed for RNG
        :param rank: (int) index of the subprocess
        """

        def _init():
            env_config = EnvConfig
            env = ShipEnv(game_config, env_config)
            return env

        return _init

## Random Agent

A simple baseline to compare against is a random agent

In [15]:
gc = GameConfig
gc.FPS = 10
gc.SPEED = 10
gc.BOUNDS = (400,400)

env = ShipEnv(game_config=gc, env_config=EnvConfig)
env.reset()

rewards = list()

for _ in range(1):

    episode_reward = 0
    for _ in range(1000):
        env.render()

        states, reward, done, _ = env.step(env.action_space.sample()) # take a random action
        # ret = env.step(0) # take a random action

        episode_reward += reward
        
        print("Reward = ", reward)
        print("States = ", states)
        
        if done == True:
            print(f"AGENT IS DONE. TOTAL REWARD = {episode_reward}")
            rewards.append(episode_reward)
            env.reset()
            break

Init game at speed =  10
Init game at fps =  10
Reward =  -0.01
States =  [200.          25.           0.           0.         187.19246125
  69.66666667  -1.          -1.          -1.          -1.
  -1.          -1.          -1.          -1.          -1.
  -1.         200.          25.         -10.           0.
 187.19246125  69.66666667  -1.          -1.          -1.
  -1.          -1.          -1.          -1.          -1.
  -1.          -1.        ]
Reward =  -0.01
States =  [200.          25.         -10.           0.         187.19246125
  69.66666667  -1.          -1.          -1.          -1.
  -1.          -1.          -1.          -1.          -1.
  -1.         200.          25.           0.           0.
 187.19246125  69.66666667  -1.          -1.          -1.
  -1.          -1.          -1.          -1.          -1.
  -1.          -1.        ]
Reward =  -0.01
States =  [200.          25.           0.           0.         187.19246125
  69.66666667  -1.          -1.         

In [16]:
game_config = GameConfig
game_config.FPS = 1000
game_config.SPEED = 30
game_config.BOUNDS = (1000, 1000)

def env_creator(env_config):

    env_config = EnvConfig
    env = ShipEnv(game_config, env_config)

    return env

register_env("ShipGym-v1", env_creator)

pbt = PopulationBasedTraining(
time_attr="time_total_s",
reward_attr="episode_reward_mean",
perturbation_interval=300, # 5 mins
resample_probability=0.25,

# Specifies the mutations of these hyperparams
hyperparam_mutations={
    "lambda": lambda: random.uniform(0.9, 1.0),
    "clip_param": lambda: random.uniform(0.01, 0.5),
    "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
    "num_sgd_iter": lambda: random.randint(1, 30),
    "sgd_minibatch_size": lambda: random.randint(128, 16384),
    "train_batch_size": lambda: random.randint(2000, 160000),
})

ray.init()

n_goals = 5
reward_done = .9*n_goals

run_experiments(
{
    "pbt_ship_sim": {
        "run": "PPO",
        "env": "ShipGym-v1",
        "num_samples": 12, # Repeat the experiment this many times
        "checkpoint_at_end" : True,
        "checkpoint_freq" : 10,
        "config": {
            "kl_coeff": 1.0,
            "num_workers": multiprocessing.cpu_count() - 1,
            "num_gpus": 1,
            
            # These params are tuned from a fixed starting value.
            "lambda": 0.95,
            "clip_param": 0.2,
            "lr" : 5.0e-4,
            "num_sgd_iter":
                lambda spec: random.choice([10, 20, 30]),
            "sgd_minibatch_size":
                lambda spec: random.choice([128, 512, 2048]),
            "train_batch_size":
                lambda spec: random.choice([10000, 20000, 40000])
        },
    },
}, scheduler=pbt) # Reference the scheduler

NameError: name 'register_env' is not defined

## Training Visualisation

rllib has very nice Tensorboard integration.

[http://localhost:6006/](http://localhost:6006/)

Insert TensorBoard image

## Evaluate

Let's look at some models we have saved, change the res_dir to whatever directory you are using to store the rllib log data and models in

In [57]:
import glob
import json
import pprint
import pandas as pd
import csv

pp = pprint.PrettyPrinter(indent=4)

res_dir = "/Users/gerard/Desktop/pbt_ship_sim_v2"

def read_csvs():
    result_csvs = glob.glob(os.path.join(res_dir, "*/progress.csv"))

    print(f"{len(result_csvs)} CSV files found!")
    dfs = []

    for res_csv in result_csvs:

        log_dir = res_csv.split("/")[-2]

        df_res = pd.read_csv(res_csv, delimiter=",", header=0, quotechar='"', quoting=csv.QUOTE_ALL)
        df_res['log_dir'] = log_dir
        dfs.append(df_res)

    frame = pd.concat(dfs, axis=0, ignore_index=True)

# params = [json.load(open(p)) for p in glob.glob(os.path.join(res_dir, "*/params.json"))]
# results = [json.load(open(p)) for p in glob.glob(os.path.join(res_dir, "*/result.json"))]



paths = list()
mean_rewards = list()

cols = []

for res in glob.glob(os.path.join(res_dir, "*/result.json")):
    
    try:
        f = open(res)
        for line in f:
            j = json.loads(line)

            path = "/".join(res.split('/')[:-1])
            paths.append(path)
        
            
            mean_rewards.append(j['episode_reward_mean'])
            
    except Exception as e:
        print("Could not read")
        print(e)
        
df = pd.DataFrame({'mean_reward':mean_rewards, 'path':paths})
df.sort_values(by='mean_reward', ascending=False, inplace=True)

# pp.pprint()

In [58]:
df['path'][0]

'/Users/gerard/Desktop/pbt_ship_sim_v2/PPO_ShipGym-v1_1_num_sgd_iter=30,sgd_minibatch_size=2048,train_batch_size=40000_2018-12-05_17-41-41woi3j510'