In [1]:
import pygame
from pygame.constants import K_RIGHT, K_LEFT, K_UP, K_DOWN, K_SPACE, QUIT, KEYDOWN
from ple.games import base
import numpy as np
import sys

class Map(pygame.sprite.Sprite):
    def __init__(self, lon, lat, SCREEN_WIDTH, SCREEN_HEIGHT):
        self.gridLon = lon
        self.gridLat = lat
        self.gridSize = (len(lon), len(lat))
        self.tileMap = None
            
        pygame.sprite.Sprite.__init__(self)
        
        #useful game dimensions
        self.MAPWIDTH = len(lon)
        self.MAPHEIGHT = len(lat)
        self.TILESIZE  = min(SCREEN_WIDTH/self.MAPWIDTH, SCREEN_HEIGHT/self.MAPHEIGHT)

    
    def draw(self, screen):
        #constants representing the different resources
        ISLAND  = 0
        WATER = 1
        FISH = 2

        # a dictionary linking resources to colours
        textures =   {
                        ISLAND  :  pygame.image.load('island.png'),
                        WATER :  pygame.image.load('water.png'),
                        FISH : pygame.image.load('fish.png')
                     }
        #loop through each row
        for row in range(self.MAPHEIGHT):
            #loop through each column in the row
            for column in range(self.MAPWIDTH):

                #draw an image for the resource, in the correct position
                screen.blit(textures[self.tileMap[row][column]], (column*self.TILESIZE,row*self.TILESIZE))

    
    def computeTileMap(self, island):
        self.tileMap = None
        #constants representing the different resources
        ISLAND  = 0
        WATER = 1    
        tilemap = [[WATER for i in range(self.gridSize[0])] for i in range(self.gridSize[1])]
        for i in island:
            tilemap[self.getIdx(i)[1]][self.getIdx(i)[0]] = ISLAND
        self.tileMap = tilemap
    
    def getIdx(self, pos):
        return (self.gridLon.index(pos[0]), self.gridLat.index(pos[1]))
    
    def updateTileMap(self):
        #constants representing the different resources
        WATER = 1
        FISH = 2
        for col in range(np.int(self.gridSize[0]/2), self.gridSize[0]):
            for row in range(np.int(self.gridSize[1]/2)):
                if self.tileMap[row][col] == WATER:
                    self.tileMap[row][col] = np.random.choice([2]*1+[1]*100)
                        
                elif self.tileMap[row][col] == FISH:
                    self.tileMap[row][col] = np.random.choice([2]*80 + [1]*20)

class Bird(pygame.sprite.Sprite):
    def __init__(self, pos):
        self.position = np.array(pos)
        self.nbMove = 0
        self.nbDive = 0
        self.nbFish = 0
        self.catch = False
    
    def getLife(self):
        return 100 + self.nbMove*-1 + self.nbDive*-25
    
    def moveRight(self, Map):
        if self.position[0] < max(Map.gridLon):
            self.position[0] += 1
        self.nbMove += 1
    def moveLeft(self, Map):
        if self.position[0] > min(Map.gridLon):
            self.position[0] -= 1
        self.nbMove += 1
    def moveUp(self, Map):
        if self.position[1] < max(Map.gridLat):
            self.position[1] += 1
        self.nbMove += 1
    def moveDown(self, Map):
        if self.position[1] > min(Map.gridLat):
            self.position[1] -= 1
        self.nbMove += 1
    
    def dive(self, Map):
        self.nbDive += 1
        if Map.tileMap[Map.getIdx(self.position)[1]][Map.getIdx(self.position)[0]] == 2:
            Map.tileMap[Map.getIdx(self.position)[1]][Map.getIdx(self.position)[0]] = 1
            self.nbFish += 1
            self.catch = True
    
    def draw(self, Map, screen):
        WHITE = (255, 255, 255)
        BLACK = (0, 0, 0)
        RED = (255, 0, 0)
        PURPLE = (50, 50, 50)
        
        #display the player at the correct position 
        PLAYER = pygame.image.load('bird.png').convert()
        PLAYER.set_alpha(1000)
        PLAYER.set_colorkey(WHITE)
        screen.blit(PLAYER,(Map.getIdx(self.position)[0]*Map.TILESIZE, Map.getIdx(self.position)[1]*Map.TILESIZE))
        
        #display score
        LEGEND = pygame.image.load('fish_legend.png')
        posLEGEND = ((Map.gridSize[0] + 1)*Map.TILESIZE, Map.TILESIZE)
        screen.blit(LEGEND,posLEGEND)
        TITLE = pygame.font.Font('FreeSansBold.ttf', 18)
        
        textObj = TITLE.render(str(self.nbFish), True, WHITE, BLACK)
        posTITLE = ((Map.gridSize[0] + 1)*Map.TILESIZE, 2 * Map.TILESIZE)
        screen.blit(textObj,posTITLE)
        
        #display health bar
        #(left, top, width, height)
        pygame.draw.rect(screen, PURPLE, (733.4, 200, 50, 200))
        pygame.draw.rect(screen, RED, (733.4, 400-2*self.getLife(), 50, 2*self.getLife()))
        
        textObj = TITLE.render("Energy", True, WHITE, BLACK)
        posTITLE = (733.4, 170)
        screen.blit(textObj,posTITLE)

class RLBird(base.PyGameWrapper):

    def __init__(self, width, height, lon, lat, init_bird_position, init_island_position):

        actions = {
            "left": K_LEFT,
            "right": K_RIGHT,
            "down": K_DOWN,
            "up": K_UP,
            "dive": K_SPACE
        }

        base.PyGameWrapper.__init__(self, width, height, actions=actions)

        # Drawing specific
        self.width = width
        self.height = height
        
        # Game specific      
        self.lon = lon
        self.lat = lat
        self.init_bird_position = init_bird_position
        self.init_island_position = init_island_position
        
        
        
    def _handle_player_events(self):
        for event in pygame.event.get():
            if event.type == QUIT:
                #and the game and close the window
                pygame.quit()
                sys.exit()
            #if a key is pressed
            elif event.type == KEYDOWN:
                self.map.updateTileMap()
                #if the right arrow is pressed
                if (event.key == K_RIGHT):
                    self.bird.moveRight(self.map)
                if (event.key == K_LEFT):
                    self.bird.moveLeft(self.map)
                if (event.key == K_UP):
                    self.bird.moveUp(self.map)
                if (event.key == K_DOWN):
                    self.bird.moveDown(self.map)
                if (event.key == K_SPACE):
                    self.bird.dive(self.map)
                    if self.bird.catch:
                        self.score += rewards["win"]
                    else:
                        self.score += rewards["loss"]
        
                self.bird.catch = False
                

    def init(self):
              
        # Set Map
        m = Map(self.lon, self.lat, self.width, self.height)
        m.computeTileMap(self.init_island_position)
        m.updateTileMap()
        self.map = m
        
        # Set Bird
        self.bird = Bird(self.init_bird_position)
        
        # Set Score
        self.score = 0
        
    def getGameState(self):
        state = {
            "bird_idx": self.map.getIdx(self.bird.position),
            "map": np.array(self.map.tileMap)
        }
        return state

    def getScore(self):
        return self.score

    def game_over(self):
        return self.bird.getLife() <= 0

    def step(self, dt):
        
        # -------------- update game
        self._handle_player_events()
        
        # -------------- update drawing
        self.map.draw(self.screen)
        self.bird.draw(self.map, self.screen)

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html
couldn't import doomish
Couldn't import doom


# RLBird Training

## Game Overview

Execute following code and play by pressind:
* up, down, right, left: movement on the grid
* space: dive

**Objective** : Get two fishes before lacking of energy

In [2]:
import pygame
from pygame.constants import *
from ple import PLE
#from rlbird import RLBird
import numpy as np

lon = [i for i in range(-40, -20)]
lat = [i for i in range(-10, 10)]
lat.reverse()

# List of coordinates for islands
island = [(-38, -9), (-37, -9), (-36, -8)]
birdStart = [-36, -8]

SCREEN_WIDTH = 800
SCREEN_HEIGHT = 700

rewards = {
    "positive": 0.0,
    "negative": 0.0,
    "tick": 0.0,
    "loss": -1.0,
    "win": 1.0
}

# run.py
pygame.init()
game = RLBird(width=SCREEN_WIDTH, height=SCREEN_HEIGHT, lon = lon, lat = lat, init_bird_position = birdStart, init_island_position = island)
game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32)
game.init()

while True:
    dt = 0
    if game.game_over():
        game.reset()

    game.step(dt)
    pygame.display.update()

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


## Playing through PLE

Pygame Learning Environment - PLE

In [4]:
def UserPolicy(state):
    waiting = True
    while waiting:
        events = pygame.event.get()
        for event in events:
            if event.type == pygame.KEYDOWN:
                #if the correct key is pressed
                if (event.key == K_RIGHT):
                    waiting = False
                    return K_RIGHT
                if (event.key == K_LEFT):
                    waiting = False
                    return K_LEFT
                if (event.key == K_UP):
                    waiting = False
                    return K_UP
                if (event.key == K_DOWN):
                    waiting = False
                    return K_DOWN
                if (event.key == K_SPACE):
                    waiting = False
                    return K_SPACE
                
def NaivePolicy(state):
    if(state['map'][state['bird_idx']] == 2):
        return K_SPACE
    else:
        return np.random.choice([K_UP]+[K_DOWN]+[K_RIGHT]+[K_LEFT])

In [5]:
game = RLBird(width=SCREEN_WIDTH, height=SCREEN_HEIGHT, lon = lon, lat = lat, init_bird_position = birdStart, init_island_position = island)

p = PLE(game, fps=1000, frame_skip=1, num_steps=1, force_fps=True, display_screen=True, reward_values = rewards)
p.init()
reward = 0
nb_games = 10
cumulated = np.zeros((nb_games))

for i in range(nb_games):
    p.reset_game()  
    while(not p.game_over()):
        state = game.getGameState()
        action = NaivePolicy(state)
        reward = p.act(action)
        if reward == 1:
            print("FISH!")
        cumulated[i] = cumulated[i] + reward
        
print(cumulated)

SystemExit: 

## Initialize Q policy

In [6]:
import itertools

# Function to define bird-observed states
def getBirdState(gameState, vis):
    a = np.ones((vis, vis))
    (y,x) = gameState['bird_idx']
    (ny, nx) = gameState['map'].shape
    for i in range(-np.int(vis/2), np.int(vis/2)+1):
        for j in range(-np.int(vis/2), np.int(vis/2)+1):
            if x+i >= 0 and x+i < nx:
                if y+j >= 0 and y+j < ny:
                    if gameState['map'][x+i, y+j] == 2:
                        a[np.int(vis/2)+i,np.int(vis/2)+j] = 2
    state_map = list(a.reshape(vis**2))
    state_pos = list((int(x < np.int(nx/2)), int(y > np.int(nx/2))))
    return tuple(state_map + state_pos)

# Epsilon greedy algorithm for exploring state-space (movement only)
def epsilon_greedy(q, epsilon, state):
    
    idx = q.listStates.index(state)
    a = np.argmax(q.Q[idx][:])
    if(np.random.rand()<=epsilon): # random action
            listAction = list(q.listAction)
            listAction.remove(a)
            a = np.random.choice(listAction)
    return q.dictAction[a]


# Class Policy
class Policy:
    def __init__(self, Q, listStates, listAction, dictAction):
        self.Q = Q
        self.listStates = listStates
        self.listAction = listAction
        self.dictAction = dictAction
    
    def index(self, state, action):
        # get index state
        idx_state = self.listStates.index(state)
        # get index action
        keys = list(self.dictAction.keys())
        values = list(self.dictAction.values())
        idx_action = keys[values.index(action)]
        # return coordinate
        return (idx_state, idx_action)
    
    def get(self, state, action):
        (idx_state, idx_action) = self.index(state, action)        
        return self.Q[idx_state, idx_action]
    
    
    def update(self, Eligibility, reward, state, a, state_new, a_new):
        delta = reward + Eligibility.gamma * self.get(state_new, a_new) - self.get(state, a)
        self.Q[self.index(state, a)] += Eligibility.alpha * Eligibility.get(state)* delta
        
# Class Eligibility
class Eligibility:
    def __init__(self, eligibility, listStates, gamma, lbd, alpha):
        self.eligibility = eligibility
        self.listStates = listStates
        self.gamma = gamma
        self.lbd = lbd
        self.alpha = alpha
        
    def index(self, state):
        # get index state
        idx_state = self.listStates.index(state)
        return idx_state
    
    def get(self, state):
        return self.eligibility[self.index(state)]
    
    def update(self, state):
        idx = self.index(state)
        self.eligibility = self.gamma * self.lbd * self.eligibility
        self.eligibility[idx] = 1 

In [7]:
vis = 3

listAction = range(5)
listState = tuple([tuple(list(item) +  list(pos)) for item in itertools.product(range(1,3), repeat = vis**2)\
            for pos in itertools.product(range(2), range(2))])
dictAction = dict(zip(listAction, [K_RIGHT, K_LEFT, K_DOWN, K_UP, K_SPACE]))


# parameter definition
gamma = 0.9
alpha = 0.1
epsilon = 0.05
lbd = 0.9

# initialize matrix Q and eligibility
Q = np.zeros((len(listState), len(listAction)))
eligibility = np.zeros(len(listState))

## Q basic: if fish dive if not do not
for s in listState:
    if s[np.int(vis**2/2) + 1] == 2:
        idx = listState.index(s)
        Q[idx][:] = [0,0,0,0,1]
    else:
        idx = listState.index(s)
        Q[idx][:] = [0.25,0.25,0.25,0.25,0]

        
# Create Policy Eligibility objects
q = Policy(Q, listState, listAction, dictAction)
eligibility = Eligibility(eligibility, listState, gamma, lbd, alpha)

In [8]:
import pickle
# import data
file = open("Eligibilitysarsa",'rb')
eligibility = pickle.load(file)

file = open("Qsarsa",'rb')
q = pickle.load(file)

### SARSA

cf. [RL Courses](https://github.com/erachelson/RLclass/blob/master/RL2%20-%20Online%20Value%20Function%20Prediction.ipynb)

In [9]:
## TRAINING GAME

game = RLBird(width=SCREEN_WIDTH, height=SCREEN_HEIGHT, lon = lon, lat = lat, init_bird_position = birdStart,\
              init_island_position = island)
p = PLE(game, fps=100, frame_skip=1, num_steps=1, force_fps=False, display_screen=True, reward_values = rewards)


# parameters of training
nb_games = 10000
cumulated = np.zeros((100)) # for mean scores
count = 0


for kk in range(nb_games):
    if ((kk+1)%100==0):
        # reducing epsilon
        epsilon = epsilon/1.1
        # printing mean on last 100 games
        print('Moyenne sur les 100 derniers jeux:')
        print((np.mean(cumulated)))
        cumulated = np.zeros((100))
        count = 0
    
    p.init()
    reward = 0.0
    p.reset_game()
    gameState = game.getGameState()
    state = getBirdState(gameState, vis)
    a = epsilon_greedy(q, epsilon, state)
    
    while(not p.game_over()):
    # observe r, s and s' 
        reward = p.act(a)
        gameState_new = game.getGameState()
        state_new = getBirdState(gameState_new, vis)

        a_new = epsilon_greedy(q, epsilon, state_new)
        eligibility.update(state)
        q.update(eligibility, reward, state, a, state_new, a_new)

        a = a_new
        state = state_new

        cumulated[count] = cumulated[count] + reward
    count += 1

SystemExit: 

In [9]:
## SAVE
import pickle
with open('Qsarsa', 'wb') as f:
    pickle.dump(q,f)
with open('Eligibilitysarsa', 'wb') as f:
    pickle.dump(eligibility,f)

### Q-Learning

cf. [RL Courses](https://github.com/erachelson/RLclass/blob/master/RL3%20-%20Control%20problems%2C%20model-free%20Policy%20Optimization.ipynb)

### Deep Q-Learning
cf. [RL Courses](https://github.com/erachelson/RLclass/blob/master/RL4%20-%20Deep%20Reinforcement%20Learning.ipynb)