In [1]:
from keras.models import load_model
from keras.models import Sequential, clone_model
from keras.layers import Dense, InputLayer
from keras.optimizers import Adam
from keras.callbacks import CSVLogger, TensorBoard
import keras.backend as K
import random
import gc
import time
import numpy as np
import os.path

Using TensorFlow backend.


In [3]:
import json
from mini_pacman import PacmanGame

with open('test_params.json', 'r') as file:
    read_params = json.load(file)
    game_params = read_params['params']
    env = PacmanGame(**game_params)

In [None]:
def get_state(obs):
    v = []
    x,y = obs["player"]
    v.append(x)
    v.append(y)
    for x, y in obs["monsters"]:
        v.append(x)
        v.append(y)
    for x, y in obs["diamonds"]:
        v.append(x)
        v.append(y)
    for x, y in obs["walls"]:
        v.append(x)
        v.append(y)
    return v

In [6]:
obs = env.reset()
array_obs = np.array(get_state(obs))

input_shape = array_obs.shape
nb_actions = 9

In [7]:
model = Sequential()
model.add(Dense(units=32, input_shape=input_shape, activation="relu"))
model.add(Dense(units=64, activation="relu"))
model.add(Dense(units=64, activation="relu"))
model.add(Dense(units=512, activation="relu"))
model.add(Dense(nb_actions, activation="linear"))
model.compile(optimizer=Adam(), loss="mse")

In [8]:
model.load_weights('pacman1.h5')

In [9]:
def test_dqn(env, n_games, model, nb_actions, eps=0.05, render=False, sleep_time=0.01):
    scores = []
    for i in range(n_games):
        obs = env.reset()
        score = 0
        done = False
        while not done:
            obs_state = np.array([get_state(obs)])
            q_values = model.predict(obs_state)[0]  
            nb_actions = obs['possible_actions']
            action = epsilon_greedy(q_values, eps, nb_actions)
            next_obs = env.make_action(action)
            done = next_obs['end_game']
            reward = next_obs['reward']
            obs = next_obs
            score += reward
            if render:
                env.render()
                time.sleep(sleep_time)
        scores.append(score)
    return scores

In [10]:
def epsilon_greedy(q_values, epsilon, poss_actions):
    if random.random() < epsilon:
        # make sure n_outputs is a list of possible actions
        return random.choice(poss_actions)  # random action # return random pic from list of possbile actions 
    else:
        actions_array = np.array(poss_actions)
        actions_array_index = actions_array-1
        maxid= np.argmax(q_values[actions_array_index])
        maxaction = actions_array[maxid]
        return (maxaction)  # q-optimal action

In [11]:
scores = test_dqn(env, 10, model, nb_actions, eps=0.01, render=True)

In [12]:
env.close()

In [13]:
scores

[827, 2200, 625, 730, 933, 742, 2, 4, 1695, 104]

In [14]:
def DQN_strategy(obs):
    eps=0.01
    state=get_state(obs)
    q_values = model.predict(np.array([state]))[0]
    pos_actions = obs["possible_actions"]
    action = epsilon_greedy(q_values, eps, pos_actions)
    return action



In [15]:
from mini_pacman import test
test(DQN_strategy)

Your average score is 747.756, median is 541.5, saved log to 'test_pacman_log.json'. Do not forget to upload it for submission!


541.5

In [16]:
import os
os.getcwd()

'/Users/joshuagoldberg/PycharmProjects/Advanced-Machine-Learning/Reinforced-Learning/project'