In [1]:
import sys

import numpy as np
import pygame
import random
import math

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [6]:
# COLORS
BLACK = (0, 0, 0)
WHITE = (200, 200, 200)
BLUE = (30, 144, 255)
RED = (255, 0, 0)
GREEN = (0, 255, 0)
YELLOW = (255, 255, 0)

In [7]:
class Player:
    def __init__(self,x,y,limit):
        self.x = x
        self.y = y
        self.limit = limit
        self.color = BLUE

    def move(self, direction):
        # UP
        if direction == 0 and self.y > 0:
            self.y -= 1
        # DOWN
        elif direction == 1 and self.y < self.limit:
            self.y += 1
        # LEFT
        elif direction == 2 and self.x > 0:
            self.x -= 1
        # RIGHT
        elif direction == 3 and self.x < self.limit:
            self.x += 1

class Goal:
    def __init__(self,x,y):
        self.x = x
        self.y = y
        self.color = RED

class Gridworld:
    def __init__(self,rows, cols):
        self.rows = 10
        self.cols = 10

        self.WINDOW_SIZE = 400
        self.BLOCS_PER_ROW = 10
        self.BLOCK_SIZE = self.WINDOW_SIZE / self.BLOCS_PER_ROW
        self.PLAYER_SIZE = self.BLOCK_SIZE
        self.PLAYER_X = 4
        self.PLAYER_Y = 4
        self.GOAL_X = 8
        self.GOAL_Y = 8
        self.TURN_TIME = 100

        self.player = Player(self.PLAYER_X,self.PLAYER_Y,self.BLOCS_PER_ROW-1)
        self.goal = Goal(self.GOAL_X,self.GOAL_Y)
        self.action_space = 4
        self.state = None
        self.steps_taken = 0
        self.screen = pygame.display.set_mode((self.WINDOW_SIZE, self.WINDOW_SIZE))

    def step(self, action):
        self.steps_taken += 1

        # print("Step: %s - Action %s" % (self.steps_taken, action))

        self.player.move(action)
        self.state = (self.player.x, self.player.y, self.goal.x, self.goal.y)

        reward = 0
        done = False
        info = {"direction": action}
        if self.player.x == self.goal.x and self.player.y == self.goal.y:
            reward = 1
            done = True

        return np.array(self.state), reward, done, info

    def reset(self):
        self.player = Player(self.PLAYER_X,self.PLAYER_Y,self.BLOCS_PER_ROW-1)
        self.goal = Goal(self.GOAL_X,self.GOAL_Y)
        self.state = (self.player.x, self.player.y, self.goal.x, self.goal.y)
        self.steps_taken = 0

        return np.array(self.state)

    def draw_grid(self):
        for row in range(self.BLOCS_PER_ROW):
            for col in range(self.BLOCS_PER_ROW):
                rect = pygame.Rect(self.BLOCK_SIZE * col,
                                   self.BLOCK_SIZE * row,
                                   self.BLOCK_SIZE,
                                   self.BLOCK_SIZE)
                pygame.draw.rect(self.screen, WHITE, rect, 1)

    def draw_rect(self, x, y, color):
        rect = pygame.Rect(x * self.BLOCK_SIZE, y * self.BLOCK_SIZE, self.PLAYER_SIZE, self.PLAYER_SIZE)
        pygame.draw.rect(self.screen, color, rect)

    def render(self, q_table):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                sys.exit()

        # Reset backdrop
        self.screen.fill(BLACK)
        self.draw_grid()

        # Q_table values
        # self.draw_highest_Q_value(Q_table)
        # self.render_table(q_table)

        # Draw Player & Target
        self.draw_rect(self.goal.x, self.goal.y, self.goal.color)
        self.draw_rect(self.player.x, self.player.y, self.player.color)
        pygame.display.flip()
        pygame.time.wait(self.TURN_TIME)

In [8]:
class GridWorldAgent:
    def __init__(self, rows, cols, min_lr=0.1, min_er=0.1):
        self.min_learning_rate = min_lr
        self.min_exploration_rate = min_er
        self.discount = 0.95
        self.decay = 25
        self.actions = {"up":0, "right":1, "down":2, "left":3}
        self.action_space = [0,1,2,3]
        self.q_table = np.zeros((rows, cols, len(self.action_space)))

    def select_action(self, state, n):
        decayed_er = self.decay_rate(n, self.min_exploration_rate)
        if np.random.random() < decayed_er:
            return random.randint(0, len(self.action_space)-1)
        else:
            return np.argmax(self.q_table[state])

    # Decays the learning rate so that the model learns more at the beginning.
    def decay_rate(self, n, min_value):
        return max(min_value, min(1.0, 1.0 - math.log10((n + 1) / self.decay)))

    def update_q_table(self, state, action, reward, new_state, lr):
        future_optimal_value = np.max(self.q_table[new_state])
        learned_value = reward + self.discount * future_optimal_value
        old_value = self.q_table[state][action]
        self.q_table[state][action] = (1-lr)*old_value + lr*learned_value


In [9]:
training_episodes = 1000

# Get Game environment
env = Gridworld(rows=10, cols=10)
agent = GridWorldAgent(rows=env.rows, cols=env.cols,min_lr=0.01,min_er=0.1)

# Initial Q-table
q_table = np.zeros((10, 10) + (env.action_space,))

# Training
print("Training Started...")
for episode in range(training_episodes):

    lr = agent.decay_rate(episode,agent.min_learning_rate)

    print("Episode %s" % episode)
    current_state = env.reset()
    done = False
    for t in range(100):
        # Decide known action or random action.
        action = agent.select_action(current_state, episode)

        # Increment environment
        obs, reward, done, info = env.step(action)
        new_state = obs

        # Update Q-Table
        agent.update_q_table(current_state,action,reward,new_state,lr)
        # Update state
        current_state = new_state

        if episode > 800:
            env.render(q_table)

        if done:
            print("Success after %s steps" % t)
            break
    #print("Steps taken: %dt" % env.steps_taken)

print("Training Complete!")

Training Started...
Episode 0


TypeError: update_q_table() missing 1 required positional argument: 'lr'