In [None]:
from rlbirdv1 import *
from ple import PLE
from IPython.display import Image
import imageio
import mdptoolbox

x = [i for i in range(3)]
y = [i for i in range(3)]
y.reverse()

# List of coordinates for islands
island = [(0, 0)]
birdStart = (0, 0)

TILESIZE = 40
SCREEN_WIDTH = TILESIZE*(len(x)+3)
SCREEN_HEIGHT = TILESIZE * len(y)

reward = {'win' : 1,
         'lose' : 0}

listAction = [K_LEFT, K_RIGHT, K_DOWN, K_UP, K_s, K_d]

N = 20

# run.py
pygame.init()
game = RLBird(width = SCREEN_WIDTH, height = SCREEN_HEIGHT, x = x, y = y,\
              init_bird_position = birdStart, island_position = island, \
              energyMax = 10, catchMax = 2, costMove = -1, costDive = -1, gainFish = 2, factorFishFly = 0.75,\
              nbStep = N, reward = reward)
game.init()
saveFishMap = game.map.fishMap.copy()

### Q-Learning 

In [None]:
# Epsilon
INITIAL_EXPLORATION = 2000
EXPLORATION_STEPS = 8000
INITIAL_EPSILON = 1
FINAL_EPSILON = 1e-3

# Constants
GAMMA = 0.99
ALPHA = 0.001
NUMBER_GAMES = 10000
LEARNING_RATE = 0.01

INTERMEDIATE_SCORE = 100

def epsilon(step):
    """
    Epsilon for exploration/exploitation trade-off
    """
    if step < INITIAL_EXPLORATION:
        return 1
    elif step < EXPLORATION_STEPS:
        return INITIAL_EPSILON + (FINAL_EPSILON - INITIAL_EPSILON)/(EXPLORATION_STEPS-INITIAL_EXPLORATION) * (step-INITIAL_EXPLORATION)
    else:
        return FINAL_EPSILON
    
# Epsilon greedy algorithm for exploring state-space (movement only)
def epsilon_greedy(game, Q, k, state):
    
    idx = game.listStates.state2idx(state)
    a = np.argmax(Q[idx][:])
    
    if(np.random.rand()<=epsilon(k)):            # random action
            la = [i for i in range(6)]
            la.remove(a)
            a = np.random.choice(la)
    return a

In [None]:
p = PLE(game, fps=1000, frame_skip=1, num_steps=1, force_fps=True, display_screen=True, reward_values = reward)
p.init()

# Q-learning
Q = np.zeros((game.listStates.size, len(listAction))) # to track update frequencies
count = np.zeros((game.listStates.size, len(listAction))) # to track update frequencies
score = np.zeros(INTERMEDIATE_SCORE)

kk =0
for k in range(NUMBER_GAMES):
    if((k+1)%INTERMEDIATE_SCORE==0):
        print('Mean Score: ', np.mean(score))
        kk = 0 
    p.init()
    p.reset_game()
    game.updateFishMap(saveFishMap.copy())
    reward = 0.0
    
    while(not p.game_over()):
        state = game.getGameState()
        x = game.listStates.state2idx(state)
        a = epsilon_greedy(game, Q, k, state)
        reward = p.act(listAction[int(a)])
        state_new = game.getGameState()
        x_new = game.listStates.state2idx(state_new)

        Q[x][a] = Q[x][a] + ALPHA * (reward+GAMMA*np.max(Q[x_new][:])-Q[x][a])
        count[x][a] += 1
        
    score[kk] = game.getScore()
    kk += 1
pygame.quit()

In [None]:
## View current policy
p.init()
p.reset_game()
game.updateFishMap(saveFishMap.copy())

images = []
dt = 0
while(not p.game_over()):
    state = game.getGameState()
    idx = game.listStates.state2idx(state)
    a = np.argmax(Q[idx][:])
    reward = p.act(listAction[int(a)])
    
    pic = './results/screenshot'+str(dt)+'.jpeg'
    pygame.image.save(game.screen, pic)
    images.append(imageio.imread(pic))

    dt +=1
    
imageio.mimsave('./results/RL.gif', images)
pygame.quit()

In [None]:
with open('./results/RL.gif','rb') as file:
    display(Image(file.read()))