In [128]:
import random

In [339]:
from copy import copy
from collections import defaultdict

class Car():
    def __init__(self, environment):
        self.environment = environment
        self._reset()
        self.Q = defaultdict(float)
        self.returns_sum = defaultdict(float)
        self.returns_count = defaultdict(float)
        self.epsilon = 0.1
        self.discount_factor = 1.0
        self.actions = []
        for i in [-1, 0, 1]:
            for j in [-1, 0, 1]:
                if i == 0 and j == 0: continue
                self.actions.append((i, j))
    
    def __str__(self):
        return "#"
    
    def _reset(self):
        self.speed = (0, 0)
        self.position = self.environment.select_start_position()
    
    def select_action(self):
        actions = self.actions
        speed = self.speed
        actions = list(filter(lambda action: speed[0] + action[0] < 5, actions))
        actions = list(filter(lambda action: speed[0] + action[0] > -5, actions))
        actions = list(filter(lambda action: speed[1] + action[1] < 5, actions))
        actions = list(filter(lambda action: speed[1] + action[1] > -5, actions))
            
        #if self.speed[0] == 0:
        #    actions = list(filter(lambda action: action[0] >= 0, actions))
        #if self.speed[1] == 0:
        #    actions = list(filter(lambda action: action[1] >= 0, actions))
        
        return random.choice(actions)
    
    def play(self):
        steps = []
        rewards = []
        while True:
            new_position, reward, action = self.step()
            steps.append(((self.position, self.speed), action))
            rewards.append(reward)
            self.position = new_position
            
            if self.environment.is_finish(new_position):
                print("finish!")
                break
        return steps, rewards
    
    def step(self):
        action = self.select_action()
        self.speed = (self.speed[0] + action[0], self.speed[1] + action[1])
        new_position = (self.position[0] + self.speed[0], self.position[1] + self.speed[1])
        new_position, path = self.environment.move_to(self, new_position)
        
        if self.environment.is_start(new_position):
            self.speed = (0, 0)
        
        return new_position, self.reward(new_position), action
    
    def reward(self, new_position):
        if self.environment.is_finish(new_position):
            return 0
        return -1
        
    def move(self):
        self.speed = self.select_speed()
        new_position = (self.car.position[0] + self.speed[0], self.car.position[1] + self.speed[1])

from enum import Enum
class Status(Enum):
    STOPPED = "stopped"
    RUNNING = "running"
    
class Environment():
    def __init__(self, filename='racetrack1_32x17.txt'):    
        self.scenario = self._load_scenario(filename)
        self.finish = "f"
        self.start = "s"
        self.track = " "
        self.wall = "+"
        self.waypoint = "·"
        self.status = Status.STOPPED
        self.car = Car(self)
        self.path = []
    
    def _load_scenario(self, filename):
        with open(filename) as f:
            scenario = list(map(lambda line: [c for c in line], f.read().split("\n")))
        return scenario
    
    def __str__(self):
        top_nums = "  " + "".join(map(lambda n: str(n%10), range(0, len(self.scenario[0]))))
        buffer = top_nums + "\n  " + "-"*len(self.scenario[0])
        buffer += "\n"
        
        for row_index, row in enumerate(self.scenario):
            buffer += str(row_index % 10) + "|"
            for col_index, col in enumerate(self.scenario[row_index]):
                if self.car.position == (row_index, col_index):
                    buffer += str(self.car)
                    continue
                if any(filter(lambda waypoint: waypoint == (row_index, col_index), self.path)):
                    buffer += self.waypoint
                    continue
                
                buffer += self.scenario[row_index][col_index]
            
            buffer += "\n"
            
        return buffer
    
    def select_start_position(self):
        starts = []
        for row_index, row in enumerate(self.scenario):
            for col_index, col in enumerate(self.scenario[row_index]):
                if self.scenario[row_index][col_index] == self.start:
                    starts.append((row_index, col_index))
        return random.choice(starts)
    
    def move_to(self, car, new_position):
        position = car.position
        increment_v = 1 if new_position[0] - position[0] >= 0 else -1
        increment_h = 1 if new_position[1] - position[1] >= 0 else -1
        
        path = [position]
        while position != new_position:
            if abs(new_position[0] - position[0]) >= abs(new_position[1] - position[1]):
                position = (position[0] + increment_v, position[1])
            else:
                position = (position[0], position[1] + increment_h)
                
            path.append(position)
            if self.is_wall(position):
                return self.select_start_position(), path
            elif self.is_finish(position):
                return position, path
                
        return new_position, path
    
    def is_track(self, position):
        if self.scenario[position[0]][position[1]] == self.track:
            return True
        
        return False
    
    def is_wall(self, position):
        if position[0] < 0 or position[0] > len(self.scenario)-1:
            return True
        elif position[1] < 0 or position[1] > len(self.scenario[position[0]])-1:
            return True
        elif self.scenario[position[0]][position[1]] == self.wall:
            return True
        else:
            return False
    
    def is_start(self, position):
        if self.scenario[position[0]][position[1]] == self.start:
            return True
        return False
    
    def is_finish(self, position):
        if self.scenario[position[0]][position[1]] == self.finish:
            return True
        return False


env = Environment("racetrack2_30x32.txt")
car = env.car

In [346]:
car._reset()
steps, rewards = car.play()
print(len(steps), sum(rewards))

    

finish!
24 -23


In [347]:
steps

[(((29, 1), (-1, -1)), (-1, -1)),
 (((28, 0), (0, 0)), (0, -1)),
 (((29, 21), (0, 0)), (0, -1)),
 (((29, 20), (0, 0)), (0, 1)),
 (((29, 21), (0, 0)), (0, -1)),
 (((29, 20), (0, 0)), (1, 0)),
 (((29, 7), (0, 0)), (1, 0)),
 (((29, 17), (-1, 0)), (-1, 0)),
 (((28, 17), (-2, 0)), (-1, 0)),
 (((26, 17), (-1, -1)), (1, -1)),
 (((25, 16), (-1, 0)), (0, 1)),
 (((24, 16), (-1, -1)), (0, -1)),
 (((23, 15), (0, 0)), (1, 1)),
 (((23, 15), (-1, -1)), (-1, -1)),
 (((22, 14), (-1, 0)), (0, 1)),
 (((21, 14), (-2, 1)), (-1, 1)),
 (((19, 15), (-3, 2)), (-1, 1)),
 (((16, 17), (-3, 1)), (0, -1)),
 (((13, 18), (-2, 2)), (1, 1)),
 (((11, 20), (-2, 1)), (0, -1)),
 (((9, 21), (-1, 2)), (1, 1)),
 (((8, 23), (0, 3)), (1, 1)),
 (((8, 26), (-1, 3)), (-1, 0)),
 (((7, 29), (0, 4)), (1, 1))]