In [3]:
from gym import spaces
from gym.utils import seeding
import gym
import math
import numpy as np


GRAVITY = 9.8
MAX_RANGE = 3500
MAX_RADIUS = 25

HIGH_ANGLE = 60.0
LOW_ANGLE = 20.0


class FireSolution:

    def __init__ (self, _g=GRAVITY, _range=MAX_RANGE, _radius=MAX_RADIUS):
        self.g = _g
        self.range = _range
        self.radius = _radius
        self.velocity = math.sqrt(float(self.range) * self.g)


    def deg_to_rad (cls, degree):
        return degree * math.pi / 180.0


    def calc_dist (self, theta):
        return self.velocity**2.0 / self.g * math.sin(theta)


class Projectile (gym.Env):
    metadata = {"render.modes": ["human"]}
    reward_range = (-100.0, 100.0)


    def __init__ (self):
        self.fire = FireSolution()

        high = np.float32(HIGH_ANGLE / HIGH_ANGLE)
        self.action_space = spaces.Box(np.float32(LOW_ANGLE / HIGH_ANGLE), high, shape=(1,))

        self.observation_space = spaces.Tuple((spaces.Discrete(self.fire.range), spaces.Discrete(self.fire.range),))

        self.np_random = None
        self.reset()


    def reset (self):

        self.seed()

        half_range = self.fire.range / 2

        _pos = 0
        _loc = round(self.np_random.random() * float(half_range) + half_range)
        self.state = [ _loc, _pos ]

        self.reward = -100.0
        self.done = 0
        self.info = {}

        return self.observation_space.sample()


    def step (self, action):
        if self.done == 1:
            print("episode done")
            return [self.state, self.reward, self.done, self.info]

        else:
            degree = float(action[0] * HIGH_ANGLE)
            loc, last_pos = self.state

            theta = self.fire.deg_to_rad(degree)
            pos = round(self.fire.calc_dist(theta))
            delta = abs(loc - pos)

            self.state[1] = pos
            self.info["degree"] = degree
            self.info["theta"] = round(theta, 3)
            self.info["delta"] = delta

            self.render()

        if pos <= self.fire.radius:
            self.reward = -100.0
        elif delta <= self.fire.radius:
            self.reward = 100.0
            self.done = 1;
        else:
            self.reward = round(100.0 * float(abs(loc - delta)) / float(self.fire.range))

        return [self.state, self.reward, self.done, self.info]


    def render (self, mode="human"):
        print("location:", self.state)


    def close (self):
        pass


    def seed (self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]