In [2]:
%matplotlib notebook
import numpy as np
import ipywidgets as widgets
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.widgets import Button
import matplotlib.lines as mlines
import warnings

In [3]:
def segment_intersection(segment_1, segment_2):
    # Matthew Herndon: function borrowed from previous project of mine
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        m1 = np.divide(*np.squeeze(np.diff(segment_1, axis=0))[::-1])
        m2 = np.divide(*np.squeeze(np.diff(segment_2, axis=0))[::-1])
    if np.isinf(m1) and np.isfinite(m2):
        xi = segment_1[0, 0]
        yi = segment_2[0, 1] + (xi - segment_2[0, 0]) * m2
    elif np.isfinite(m1) and np.isinf(m2):
        xi = segment_2[0, 0]
        yi = segment_1[0, 1] + (xi - segment_1[0, 0]) * m1
    elif np.isinf(m1) and np.isinf(m2):
        # give up
        return None
    elif m1 == 0 and m2 == 0:
        # give up
        return None
    else:
        lin_system = np.matrix([[1, -m1],[1, -m2]])
        a = np.sum(np.array(lin_system[0, :]) * segment_1[0,::-1])
        b = np.sum(np.array(lin_system[1, :]) * segment_2[0,::-1])
        yi = np.sum(np.array(lin_system.I[0,:]) * [a, b])
        xi = np.sum(np.array(lin_system.I[1,:]) * [a, b])
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        # Allow errors of up to 1% of the average length in X and Y when clipping points 
        w1, h1 = np.ptp(segment_1, axis=0)
        w2, h2 = np.ptp(segment_2, axis=0)
        ff_x = 0 #(w1+w2) * 0.01
        ff_y = 0 #(h1+h2) * 0.01

        if (xi < (np.min(segment_1[:,0])-ff_x) or xi > (np.max(segment_1[:,0])+ff_x)) or \
                (xi < (np.min(segment_2[:,0])-ff_x) or xi > (np.max(segment_2[:,0])+ff_x)):
            # Intersection clipped, does not lie on segment
            return None
        elif (yi < (np.min(segment_1[:,1])-ff_y) or yi > (np.max(segment_1[:,1])+ff_y)) or \
                (yi < (np.min(segment_2[:,1])-ff_y) or yi > (np.max(segment_2[:,1])+ff_y)):
            # Intersection clipped, does not lie on segment
            return None
        else:
            if np.isnan(xi) or np.isnan(yi):
                return None
            else:
                # Intersection lies on segment
                return xi, yi

class RayCollideable:
    def __init__(self, xc, yc, color='b'):
        self._xc = xc
        self._yc = yc
        self._color = color
    
    def compute_intersection(self, x, y, slope):
        raise NotImplementedError()
    
    def render_to_axis(self, ax):
        raise NotImplementedError()

class Segment(RayCollideable):
    def __init__(self, dx, dy, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._dx = dx
        self._dy = dy
    
    def compute_intersection(self, x, y, angle):
        # (y - y1) = m(x - x1)
        if angle < 100 or angle > 260:
            raise ValueError('Angle is invalid!')
                
        angle = np.deg2rad(angle)
        
        slope = np.sin(angle) / np.cos(angle)
        
        segment_1 = np.array([
            [self._xc - self._dx / 2, self._yc - self._dy / 2],
            [self._xc + self._dx / 2, self._yc + self._dy / 2]
        ])
        # make the ray hugely long with respect to worldspace
        segment_2 = np.array([
            [x, y],
            [x + 100*np.cos(angle), y + 100*np.sin(angle)]
        ])
        return segment_intersection(segment_1, segment_2)
    
    def render_to_axis(self, ax):
        l = mlines.Line2D(
            [self._xc - self._dx/2, self._xc + self._dx/2],
            [self._yc - self._dy/2, self._yc + self._dy/2],
            color=self._color
        )
        ax.add_line(l)
        return ax

class Box(RayCollideable):
    def __init__(self, width, height, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._width = width
        self._height = height
        self._edges = [None]*4
        # left
        self._edges[0] = Segment(0, height, self._xc - width/2, self._yc)
        # right
        self._edges[1] = Segment(0, height, self._xc + width/2, self._yc)
        # top
        self._edges[2] = Segment(width, 0, self._xc, self._yc+height/2)
        # bottom
        self._edges[3] = Segment(width, 0, self._xc, self._yc-height/2)
        
    def compute_intersection(self, x, y, angle):
        intersections = []
        distances = []
        for edge in self._edges:
            i = edge.compute_intersection(x, y, angle)
            if i is None:
                continue
            intersections.append(i)
            distances.append(np.sqrt((x-i[0])**2+(y-i[1])**2))
        if intersections:
            return intersections[np.argmin(distances)]
        else:
            return None
    
    def render_to_axis(self, ax):
        for edge in self._edges:
            edge.render_to_axis(ax)
        return ax

s = Segment(5, 5, 0, 0)
b = Box(1,1,0,0)


In [16]:
class EnvironmentState:
    REGION_WIDTH = 2
    REGION_HEIGHT = 2
    AGENT_X = 0.9
    TARGET_X = -0.9
    Y_POSITIONS = np.round(np.arange(-1*REGION_HEIGHT/2, REGION_HEIGHT/2+0.05, 0.05),2)
    
    def __init__(self):
        self._agent_state_index = (0,0)
        self._last_step_reward = 0
        
        self._agent_position_x = self.AGENT_X
        self._agent_position_y = 0
        self._agent_aiming_angle = 180
        
        self._closest_intersection = None
        
        self._target = Segment(0, self.REGION_HEIGHT/10, self.TARGET_X, 0)
        self._obstructions = []
    
    def randomize(self, NUMBER_OBSTRUCTIONS = 4, seed=None):
        # Start: Interesting note here. The agent aiming angle does reset with every randomize, though it does not appear to do so in the visualization.
        self._agent_aiming_angle = 180 
        self._agent_position_y = np.random.choice(self.Y_POSITIONS)
        self._target = Segment(0, self.REGION_HEIGHT/10, self.TARGET_X, 
                               np.random.choice(self.Y_POSITIONS))
        
        if isinstance(seed, int):
            np.random.seed(seed)
        elif seed is not None:
            raise ValueError('Random seed must be an integer')
        self._obstructions.clear()
        
        if NUMBER_OBSTRUCTIONS == 0:
            self._obstructions.clear()
        else:
            for i in range(NUMBER_OBSTRUCTIONS):
                x, y = np.random.rand()*self.REGION_WIDTH/3-1*self.REGION_WIDTH/6, self.REGION_HEIGHT*np.random.rand()-self.REGION_HEIGHT/2
                self._obstructions.append(Box(0.2, 0.2, x,y))
        self.draw()
    
    def initialize(self):
        self._agent_position_x = self.AGENT_X
        self._agent_position_y = 0
        self._agent_aiming_angle = 180
        
    def compute_reward(self):
        distances = []
        intersections = []
        
        for obj in [self._target] + self._obstructions:
            i = obj.compute_intersection(
                self._agent_position_x, 
                self._agent_position_y, 
                self._agent_aiming_angle
            )
            if i is None:
                intersections.append(None)
                distances.append(np.inf)
            else:
                intersections.append(i)
                distances.append(np.sqrt(
                    (self._agent_position_x-i[0])**2+
                    (self._agent_position_y-i[1])**2)
                )
                
        if np.isfinite(distances).any():
            closest = np.argmin(distances)
            if closest == 0:
                # hit the target
                reward = 1
            else:
                reward = -1
            self._closest_intersection = intersections[closest]
        else:
            self._closest_intersection = None
            reward = 0
        return reward
            
    def action_wait(self):
        self.step()
    
    def action_pos_up(self):
        if self._agent_position_y == self.REGION_HEIGHT/2:
            self.step()
        else:
            self._agent_position_y = self._agent_position_y + 0.05
            self.step()
    
    def action_pos_down(self):
        if self._agent_position_y == -1*(self.REGION_HEIGHT/2):
            self.step()
        else:
            self._agent_position_y = self._agent_position_y - 0.05
            self.step()
    
    def action_aim_up(self):
        if (260 - self._agent_aiming_angle) < 3: # Keep environment from moving to an invalid angle
            self.step()
        else:
            self._agent_aiming_angle = self._agent_aiming_angle - 3
            self.step()
    
    def action_aim_down(self):
        if (self._agent_aiming_angle - 100) < 3: # Keep environment from moving to an invalid angle
            self.step()
        else:
            self._agent_aiming_angle = self._agent_aiming_angle + 3
            self.step()
        
    def step(self):
        self._last_step_reward = self.compute_reward()
        self.draw()
        
    def take_action(self, action):
        # Take an action based on an integer input from 0 to 4
        if action == 0:
            self.action_wait()
        elif action == 1:
            self.action_pos_up()
        elif action == 2:
            self.action_pos_down()
        elif action == 3:
            self.action_aim_up()
        elif action == 4:
            self.action_aim_down()
    
    def draw(self, draw_aiming_ray=False):
        ax = plt.gca()
        ax.clear()
        plt.title('Step ; reward={}'.format(self._last_step_reward))
        ax.set_xbound([-self.REGION_WIDTH/2,  self.REGION_WIDTH/2])
        ax.set_ybound([-self.REGION_HEIGHT/2, self.REGION_HEIGHT/2])
         
        agent = plt.Circle(
            (self._agent_position_x, self._agent_position_y), 
            self.REGION_WIDTH/50, 
            color='r'
        )
        ax.add_artist(agent)
        
        for obj in [self._target] + self._obstructions:
            obj.render_to_axis(ax)
            
        if self._closest_intersection is not None:
            rx, ry = self._closest_intersection
        else:
            span = np.sqrt(self.REGION_WIDTH^2 + self.REGION_HEIGHT^2)
            rx = self._agent_position_x + 10*span*np.cos(np.deg2rad(self._agent_aiming_angle))
            ry = self._agent_position_y + 10*span*np.sin(np.deg2rad(self._agent_aiming_angle))
        l = mlines.Line2D(
            [self._agent_position_x, rx],
            [self._agent_position_y, ry]
        )
        ax.add_line(l)
        

In [17]:
env = EnvironmentState()

apup   = widgets.Button(description='Pos+')
apdown = widgets.Button(description='Pos-')
aaup   = widgets.Button(description='Aim+')
aadown = widgets.Button(description='Aim-')

canvas = widgets.Output()
buttons = widgets.VBox(children=[apup, apdown, aaup, aadown])
all_widgets = widgets.HBox(children=[buttons, canvas])
display(all_widgets)

apup.on_click(lambda x: env.action_pos_up())
apdown.on_click(lambda x: env.action_pos_down())
aaup.on_click(lambda x: env.action_aim_up())
aadown.on_click(lambda x: env.action_aim_down())

with canvas:
    plt.figure(figsize=(4,4))
    env.draw()
    plt.show()

HBox(children=(VBox(children=(Button(description='Pos+', style=ButtonStyle()), Button(description='Pos-', styl…

In [14]:
env.randomize(0)