In [None]:
from rlbirdv2 import *
from ple import PLE
from IPython.display import Image
import imageio
import mdptoolbox
from keras.models import Sequential, load_model
from keras.layers import Conv2D, Flatten, Dense, Activation
from keras.optimizers import Adam
from keras import backend as kerasBackend
from ple import PLE
import pickle

x = [i for i in range(3)]
y = [i for i in range(3)]
y.reverse()

# List of coordinates for islands
island = [(0, 0)]
birdStart = (0, 0)

TILESIZE = 40
SCREEN_WIDTH = TILESIZE*(len(x)+3)
SCREEN_HEIGHT = TILESIZE * len(y)

reward = {'win' : 1,
         'lose' : 0}

# import specific Game -------------------------
file = open("../RLv1/fish2",'rb')
fish = pickle.load(file)
# ----------------------------------------------

# run.py
pygame.init()
game = RLBird(width = SCREEN_WIDTH, height = SCREEN_HEIGHT, x = x, y = y,\
              init_bird_position = birdStart, island_position = island, \
              energyMax = 25, catchMax = 2, costMove = -2, costDive = -3, factorFishFly = 0.25,\
              reward = reward)
game.init()
game.updateFishMap(fish.copy())

### Human Policy 

In [None]:
pygame.init()
p = PLE(game, fps=100, frame_skip=1, num_steps=1, force_fps=True, display_screen=True, reward_values = reward)

p.init()
p.reset_game()
game.updateFishMap(fish.copy())

dt = 0
while(not p.game_over()):
        game.step(dt)
        pygame.display.update()
        dt += 1
print(game.getScore())
pygame.quit()

In [None]:
fish

### Q-Learning : A model free approach


In [None]:
COMPUTE_NEW_POLICY = False

# Epsilon
INITIAL_EXPLORATION = 1000
EXPLORATION_STEPS = 1000
INITIAL_EPSILON = 1
FINAL_EPSILON = 1e-3

# Constants
GAMMA = 0.99
ALPHA = 0.01
NUMBER_GAMES = 3000

INTERMEDIATE_SCORE = 100

def epsilon(step):
    """
    Epsilon for exploration/exploitation trade-off
    """
    if step < INITIAL_EXPLORATION:
        return 1
    elif step < EXPLORATION_STEPS:
        return INITIAL_EPSILON + (FINAL_EPSILON - INITIAL_EPSILON)/(EXPLORATION_STEPS-INITIAL_EXPLORATION) * (step-INITIAL_EXPLORATION)
    else:
        return FINAL_EPSILON
    
# Epsilon greedy algorithm for exploring state-space (movement only)
def epsilon_greedy(game, Q, k, state):
    
    idx = game.listStates.state2idx(state)
    a = np.argmax(Q[idx][:])
    
    if(np.random.rand()<=epsilon(k)):                       # other random action
            la = [i for i in range(len(game.listAction))]
            la.remove(a)
            a = np.random.choice(la)
    return a

In [None]:
# Q-learning

if COMPUTE_NEW_POLICY:
    Q = np.zeros((game.listStates.size, len(game.listAction))) # to track update frequencies
else :
    file = open("Qlearning",'rb')
    Q = pickle.load(file)

score = np.zeros(INTERMEDIATE_SCORE)

kk =0
for k in range(NUMBER_GAMES):
    if((k+1)%INTERMEDIATE_SCORE==0):
        print('Mean Score after ', k+1, ' Games : ', np.mean(score))
        kk = 0 
    p.init()
    p.reset_game()
    game.updateFishMap(fish.copy())
    reward = 0.0
    
    while(not p.game_over()):
        state = game.getGameState()
        x = game.listStates.state2idx(state)
        a = epsilon_greedy(game, Q, k, state)
        reward = p.act(game.dictAction[game.listAction[a]])
        
        state_new = game.getGameState()
        x_new = game.listStates.state2idx(state_new)

        Q[x][a] = Q[x][a] + ALPHA * (reward+GAMMA*np.max(Q[x_new][:])-Q[x][a])
        
    score[kk] = game.getScore()
    kk += 1
pygame.quit()


# save specific Policy -------------------------
with open('Qlearning', 'wb') as f:
    pickle.dump(Q,f)

In [None]:
## View current policy
p.init()
p.reset_game()
game.updateFishMap(fish.copy())

images = []
dt = 0
while(not p.game_over()):
    state = game.getGameState()
    idx = game.listStates.state2idx(state)
    a = np.argmax(Q[idx][:])
    reward = p.act(game.dictAction[game.listAction[a]])
    
    pic = './results/screenshot'+str(dt)+'.jpeg'
    pygame.image.save(game.screen, pic)
    images.append(imageio.imread(pic))

    dt +=1
    
imageio.mimsave('./results/RL.gif', images)
pygame.quit()

In [None]:
with open('./results/RL.gif','rb') as file:
    display(Image(file.read()))

### Q-Learning : An agent-based approach


### Deep Q Learning

In [None]:
def createDQN(game):
    """
    Create deep Q network
    """
    # Neural network
    dqn = Sequential()
    dqn.add(Dense(units = 20, input_dim = game.listStates.all.shape[1] , activation='relu'))
    dqn.add(Dense(units = len(game.listAction) , activation='relu'))
    dqn.add(Activation('linear'))
    
    dqn.compile(optimizer=Adam(lr=LEARNING_RATE), loss='mean_squared_error')
    return dqn


def epsilonGreedy(dqn, game, x, step):
    """
    Epsilon-greedy action
    """
    a = np.argmax(dqn.predict(np.array([x])))
    
    if np.random.rand() < epsilon(step):
        la = [i for i in range(len(game.listAction))]
        la.remove(a)
        a = np.random.choice(la)
    
    return a


In [None]:
# Epsilon
INITIAL_EXPLORATION = 2000
EXPLORATION_STEPS = 8000
INITIAL_EPSILON = 1
FINAL_EPSILON = 1e-3

# Constants
GAMMA = 0.99
NUMBER_GAMES = 10000

# Epsilon
LEARNING_RATE = 0.01

# Try to load DQN, or create a new one
dqn = createDQN(game)

kk = 0
for k in range(NUMBER_GAMES):
    if((k+1)%INTERMEDIATE_SCORE==0):
        print('Mean Score after ', k+1, ' Games : ', np.mean(score))
        kk = 0 
        
    p.init()
    p.reset_game()
    game.updateFishMap(fish.copy())
    reward = 0.0
    
    while(not p.game_over()):
        state = game.getGameState()
        x = game.listStates.state2idx(np.array(state))    
        a = epsilonGreedy(dqn, game, state, k)
        reward = p.act(game.dictAction[game.listAction[a]])
        
        state_new = game.getGameState()
        x_new = game.listStates.state2idx(np.array(state_new))        
        q  = dqn.predict(np.array([state]))
        q_new = dqn.predict(np.array([state_new]))
               
        if p.game_over():
            update = reward 
        else :
            update = reward + GAMMA * np.max(q_new)
        q[0,a] = update
        
        dqn.train_on_batch(np.array([state]),q)
        
    score[kk] = game.getScore()
    kk += 1
pygame.quit()

In [None]:
## View current policy
p.init()
p.reset_game()
game.updateFishMap(saveFishMap.copy())

images = []
dt = 0
while(not p.game_over()):
    state = game.getGameState()
    idx = game.listStates.state2idx(state)
    Q  = dqn.predict(np.array([state]))
    a = np.argmax(Q)
    reward = p.act(game.dictAction[game.listAction[a]])
    
    pic = './results/screenshot'+str(dt)+'.jpeg'
    pygame.image.save(game.screen, pic)
    images.append(imageio.imread(pic))

    dt +=1
    
imageio.mimsave('./results/DeepRL.gif', images)
pygame.quit()

In [None]:
with open('./results/DeepRL.gif','rb') as file:
    display(Image(file.read()))