In [22]:
from keras.models import Sequential, load_model
from keras.optimizers import Adam
from keras.layers import Dense, Flatten, Dropout
from keras.callbacks import TensorBoard, EarlyStopping
from keras.initializers import RandomUniform

from collections import deque

import random
import numpy as np

from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

In [23]:
bridge_start_west = np.array([-3727.41, 1668.2])
bridge_start_east = np.array([-3735.6, 1668.89])

bridge_end_west = np.array([-3734.51, 1579.9])
bridge_end_east = np.array([-3742.36, 1580.42])

end_avg = np.sum([bridge_end_west, bridge_end_east], axis=0) / 2

bridge_b = np.abs(bridge_end_east[1] - bridge_start_east[1])
bridge_a = np.abs(bridge_end_east[0] - bridge_start_east[0])
bridge_angle = np.arctan(bridge_a / bridge_b)

start_angle = bridge_angle

init_pos = np.sum([bridge_start_west, bridge_start_east], axis=0) / 2

k = (bridge_end_west[1] - bridge_end_east[1]) / (bridge_end_west[0] - bridge_end_east[0])
m = bridge_end_west[1] -(k * bridge_end_west[0])

step_size = np.abs((bridge_start_west[0] - bridge_start_east[0]) / 2)

In [35]:
def is_goal(x, y):
      
    goal_y = (k*x) + m
    
    return y < goal_y    

In [25]:
def is_within_bridge(x, y):
    
    point = Point(x, y)
    polygon = Polygon([bridge_start_west, bridge_start_east, bridge_end_east, bridge_end_west])
    
    return polygon.contains(point) 

In [26]:
def get_right_edge():
    
    k_r = (bridge_start_west[1] - bridge_end_west[1]) / (bridge_start_west[0] - bridge_end_west[0])
    m_r = bridge_end_west[1] -(k_r * bridge_end_west[0])
    
    return k_r, m_r
    
k_r, m_r = get_right_edge()

In [27]:
def get_edge_dist(x, y):
    
    k_p = -1 / k_r
    m_p = y - (k_p * x);

    x_new = (m_p - m_r) / (k_r - k_p);
    y_new = (k_r*x_new) + m_r;

    a = (x_new - x);
    b = (y_new - y);

    d = np.sqrt(a**2 + b**2);
    
    return d

In [28]:
def get_new_angle(action):  
    
    return{
        0 : bridge_angle,
        1 : bridge_angle - (np.pi / 4),
        2 : bridge_angle + (np.pi / 4),
    }[action]

In [29]:
def do_action(action, x, y):
        
    v = get_new_angle(action)
    
    a = step_size * np.cos(v);
    b = step_size * np.sin(v);

    x_new = x - b;
    y_new = y - a;
    
    return x_new, y_new

In [30]:
terminate_reward = 1000
step_reward = -0.01
stuck_reward = -1000

state_size = 3
action_size = 3

In [31]:
def step(action, p_x, p_y):
    
    tmp_player_x = p_x
    tmp_player_y = p_y
    
    new_pos = do_action(action, p_x, p_y)
    
    p_x = new_pos[0]
    p_y = new_pos[1]
    
    reward = 0
    done = False
    
    if is_goal(p_x, p_y):
        done = True
        reward = terminate_reward
    
    elif is_within_bridge(p_x, p_y):
        
        #stuck = is_stuck(p_x, p_y)
        #dist = get_distance(p_x, p_y)
        
        reward = step_reward #* (dist / 100)
        
        #if stuck:
         #   reward = stuck_reward
          #  p_x = tmp_player_x
          #  p_y = tmp_player_y
            #done = True
            
    else:
        p_x = tmp_player_x
        p_y = tmp_player_y
        reward = stuck_reward;
        #done = True
            
    state = np.reshape([p_x, p_y, get_edge_dist(p_x, p_y)], [1, state_size])
        
    return state, reward, done, p_x, p_y
    

In [32]:
def reset():
    
    p_x = init_pos[0]
    p_y = init_pos[1]
    
    state = np.reshape([p_x, p_y, get_edge_dist(p_x, p_y)], [1, state_size])
    
    return state, p_x, p_y
    

In [33]:
class Agent:
    def __init__(self, state_size, action_size, discount, eps, eps_decay, eps_min, l_rate, decay_linear):
        self.path = "my_model.h5"
        self.state_size = state_size       
        self.action_size = action_size
        self.mem = deque(maxlen=2000)
        self.discount = discount
        self.eps = eps
        self.eps_decay = eps_decay
        self.decay_linear = decay_linear
        self.eps_min = eps_min
        self.l_rate = l_rate
        self.model = self.init_model()

    def save_model(self):
        self.model.save(self.path)

    def init_model(self):
        model = Sequential()
        model.add(Dense(30, input_dim=self.state_size, activation='relu'))
        model.add(Dense(30, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.l_rate))

        return model

    def action(self, state):

        if np.random.rand() <= self.eps:
            return random.randrange(self.action_size)   
        
        actions = self.model.predict(state)
        
        return np.argmax(actions[0])

    def remember(self, state, action, reward, next_state, terminal):
        self.mem.append((state, action, reward, next_state, terminal))

    def replay(self, batch_size):
        
        if len(self.mem) < batch_size:
            batch = self.mem
        else:
            batch = random.sample(self.mem, batch_size)
        
        for state, action, reward, next_state, terminal in batch:
            target = reward

            if not terminal:
                target += self.discount * np.amax(self.model.predict(next_state)[0])
            
            target_f = self.model.predict(state)
            target_f[0][action] = target
        
            self.model.fit(state, target_f, epochs=1, verbose=0)
        
        self.decay()


    def decay(self):
        if self.eps > self.eps_min:
            if self.decay_linear:
                self.eps -= self.eps_decay
            else:
                self.eps *= self.eps_decay
            

In [None]:
agent = Agent(
            state_size = state_size, 
            action_size = action_size, 
            discount = 0.98, 
            eps = 1, 
            eps_decay = 1 / 25000, 
            eps_min = 0.001, 
            l_rate = 0.0001,
            decay_linear = True
            )

#Train agent
episodes = 25000
steps = 200

goalCounter = 0
goalAvg = -1
stepCounter = 0
epGoalCounter = 0
epGoalAvg = -1
epStepCounter = 0

for ep in range(1, episodes):
    
    state, p_x, p_y = reset()

    player_x = p_x
    player_y = p_y
    
    for st in range(1, steps):
        
        action = agent.action(state)

        next_state, reward, done, p_x, p_y = step(action, player_x, player_y)
        
        player_x = p_x
        player_y = p_y
        
        agent.remember(state, action, reward, next_state, done)

        state = next_state

        if done:
            goalCounter += 1
            epGoalCounter += 1
            epStepCounter += st
            epGoalAvg = epStepCounter / epGoalCounter 
            break
       
    agent.replay(64)
    
    if ep % 1000 == 0:
        agent.save_model()
        print("episode {} done, found goal {} times with and avg step of {}, total goals: {}".format(ep, epGoalCounter, epGoalAvg, goalCounter))
        epGoalCounter = 0
        epGoalAvg = -1
        epStepCounter = 0


agent.save_model()
print("summary: total goals found: {}, an avg of {} episodes reached the goal".format(goalCounter, goalCounter/episodes))

episode 1000 done, found goal 1000 times with and avg step of 33.401, total goals: 1000
episode 2000 done, found goal 1000 times with and avg step of 32.761, total goals: 2000
episode 3000 done, found goal 1000 times with and avg step of 32.019, total goals: 3000
