In [1]:
import gym
import argparse
import numpy as np
import atari_py
from game_models.ddqn_game_model import DDQNTrainer, DDQNSolver
from game_models.ge_game_model import GETrainer, GESolver
from gym_wrappers import MainGymWrapper

Using TensorFlow backend.


In [2]:
FRAMES_IN_OBSERVATION = 4
FRAME_SIZE = 84
INPUT_SHAPE = (FRAMES_IN_OBSERVATION, FRAME_SIZE, FRAME_SIZE)

In [3]:
class Atari:

    def __init__(self):
        game_name, game_mode, render, total_step_limit, total_run_limit, clip = "MsPacman","ddqn_training", True,5000000,None,True
        env_name = game_name + "Deterministic-v4"  # Handles frame skipping (4) at every iteration
        env = MainGymWrapper.wrap(gym.make(env_name))
        self._main_loop(self._game_model(game_mode, game_name, env.action_space.n), env, render, total_step_limit, total_run_limit, clip)

    def _main_loop(self, game_model, env, render, total_step_limit, total_run_limit, clip):
        if isinstance(game_model, GETrainer):
            game_model.genetic_evolution(env)

        run = 0
        total_step = 0
        while True:
            if total_run_limit is not None and run >= total_run_limit:
                print ("Reached total run limit of: " + str(total_run_limit))
                exit(0)

            run += 1
            current_state = env.reset()
            step = 0
            score = 0
            while True:
                if total_step >= total_step_limit:
                    print ("Reached total step limit of: " + str(total_step_limit))
                    exit(0)
                total_step += 1
                step += 1

                if render:
                    env.render()

                action = game_model.move(current_state)
                next_state, reward, terminal, info = env.step(action)
                if clip:
                    np.sign(reward)
                score += reward
                game_model.remember(current_state, action, reward, next_state, terminal)
                current_state = next_state

                game_model.step_update(total_step)

                if terminal:
                    game_model.save_run(score, step, run)
                    break



    def _game_model(self, game_mode,game_name, action_space):
        if game_mode == "ddqn_training":
            return DDQNTrainer(game_name, INPUT_SHAPE, action_space)
        elif game_mode == "ddqn_testing":
            return DDQNSolver(game_name, INPUT_SHAPE, action_space)
        elif game_mode == "ge_training":
            return GETrainer(game_name, INPUT_SHAPE, action_space)
        elif game_mode == "ge_testing":
            return GESolver(game_name, INPUT_SHAPE, action_space)
        else:
            print ("Unrecognized mode. Use --help")
            exit

In [None]:
if __name__ == "__main__":
    Atari()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 32, 20, 20)        8224      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 64, 9, 9)          32832     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 64, 7, 7)          36928     
_________________________________________________________________
flatten_1 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               1606144   
_________________________________________________________________
dense_2 (Dense)              (None, 9)                 4617      
Total params: 1,688,745
Trainable params: 1,688,745
Non-trainable para



score: (min: 160.0, avg: 233.0, max: 420.0
{"metric": "score", "value": 233.0}
step: (min: 420, avg: 483.8, max: 623
{"metric": "step", "value": 483.8}
{"metric": "run", "value": 10}
score: (min: 110.0, avg: 287.0, max: 570.0
{"metric": "score", "value": 287.0}
step: (min: 350, avg: 553.5, max: 1112
{"metric": "step", "value": 553.5}
{"metric": "run", "value": 20}
score: (min: 100.0, avg: 205.0, max: 260.0
{"metric": "score", "value": 205.0}
step: (min: 308, avg: 429.1, max: 541
{"metric": "step", "value": 429.1}
{"metric": "run", "value": 30}
score: (min: 80.0, avg: 213.0, max: 350.0
{"metric": "score", "value": 213.0}
step: (min: 257, avg: 452.3, max: 562
{"metric": "step", "value": 452.3}
{"metric": "run", "value": 40}
score: (min: 140.0, avg: 247.0, max: 460.0
{"metric": "score", "value": 247.0}
step: (min: 365, avg: 456.5, max: 619
{"metric": "step", "value": 456.5}
{"metric": "run", "value": 50}
score: (min: 190.0, avg: 243.0, max: 330.0
{"metric": "score", "value": 243.0}
step: 

score: (min: 140.0, avg: 226.0, max: 310.0
{"metric": "score", "value": 226.0}
step: (min: 415, avg: 455.1, max: 498
{"metric": "step", "value": 455.1}
{"metric": "run", "value": 220}
loss: (min: 0.06031094118952751, avg: 0.5066355365663767, max: 5
{"metric": "loss", "value": 0.5066355365663767}
accuracy: (min: 0.125, avg: 0.731, max: 1.0
{"metric": "accuracy", "value": 0.731}
q: (min: 17.509830650389194, avg: 19.880434002838435, max: 25.727834396362304
{"metric": "q", "value": 19.880434002838435}
score: (min: 120.0, avg: 340.0, max: 960.0
{"metric": "score", "value": 340.0}
step: (min: 314, avg: 473.6, max: 710
{"metric": "step", "value": 473.6}
{"metric": "run", "value": 230}
loss: (min: 0.051236219704151154, avg: 0.48204935326427223, max: 5
{"metric": "loss", "value": 0.48204935326427223}
accuracy: (min: 0.1875, avg: 0.7126875, max: 0.96875
{"metric": "accuracy", "value": 0.7126875}
q: (min: 17.810149142742155, avg: 19.814474192754925, max: 22.19192151963711
{"metric": "q", "value":

score: (min: 170.0, avg: 284.0, max: 860.0
{"metric": "score", "value": 284.0}
step: (min: 310, avg: 478, max: 922
{"metric": "step", "value": 478}
{"metric": "run", "value": 360}
loss: (min: 0.07854633033275604, avg: 0.5257129151150585, max: 5
{"metric": "loss", "value": 0.5257129151150585}
accuracy: (min: 0.25, avg: 0.72321875, max: 1.0
{"metric": "accuracy", "value": 0.72321875}
q: (min: 19.598334756493568, avg: 22.4117410822621, max: 27.721631413102152
{"metric": "q", "value": 22.4117410822621}
score: (min: 120.0, avg: 317.0, max: 670.0
{"metric": "score", "value": 317.0}
step: (min: 309, avg: 510.1, max: 746
{"metric": "step", "value": 510.1}
{"metric": "run", "value": 370}
loss: (min: 0.06990277767181396, avg: 0.5178087099790573, max: 5
{"metric": "loss", "value": 0.5178087099790573}
accuracy: (min: 0.25, avg: 0.73434375, max: 0.96875
{"metric": "accuracy", "value": 0.73434375}
q: (min: 19.85899344563484, avg: 22.444011097690165, max: 27.723588709831237
{"metric": "q", "value": 2

{"metric": "epsilon", "value": 0.7988224705886545}
{"metric": "total_step", "value": 240000}
score: (min: 230.0, avg: 327.0, max: 710.0
{"metric": "score", "value": 327.0}
step: (min: 369, avg: 481.8, max: 653
{"metric": "step", "value": 481.8}
{"metric": "run", "value": 500}
loss: (min: 0.07999137043952942, avg: 0.6873189784362912, max: 5
{"metric": "loss", "value": 0.6873189784362912}
accuracy: (min: 0.28125, avg: 0.72775, max: 0.96875
{"metric": "accuracy", "value": 0.72775}
q: (min: 21.351781058311463, avg: 25.42564649503857, max: 32.889790917038916
{"metric": "q", "value": 25.42564649503857}
loss: (min: 0.11066913604736328, avg: 0.7456215901970863, max: 5
{"metric": "loss", "value": 0.7456215901970863}
accuracy: (min: 0.1875, avg: 0.72884375, max: 0.96875
{"metric": "accuracy", "value": 0.72884375}
q: (min: 22.903690137267112, avg: 26.29466358428359, max: 34.357260356545446
{"metric": "q", "value": 26.29466358428359}
score: (min: 220.0, avg: 376.0, max: 1090.0
{"metric": "score", 

loss: (min: 0.12870299816131592, avg: 0.7634107094407082, max: 5
{"metric": "loss", "value": 0.7634107094407082}
accuracy: (min: 0.34375, avg: 0.74228125, max: 1.0
{"metric": "accuracy", "value": 0.74228125}
q: (min: 23.592006927728654, avg: 27.812523840531707, max: 38.6573181772232
{"metric": "q", "value": 27.812523840531707}
score: (min: 150.0, avg: 285.0, max: 420.0
{"metric": "score", "value": 285.0}
step: (min: 248, avg: 426.2, max: 627
{"metric": "step", "value": 426.2}
{"metric": "run", "value": 640}
loss: (min: 0.1194612979888916, avg: 0.7354951766133309, max: 5
{"metric": "loss", "value": 0.7354951766133309}
accuracy: (min: 0.28125, avg: 0.73525, max: 0.9375
{"metric": "accuracy", "value": 0.73525}
q: (min: 22.98300682067871, avg: 27.80999831817329, max: 36.14722489356995
{"metric": "q", "value": 27.80999831817329}
score: (min: 110.0, avg: 411.0, max: 860.0
{"metric": "score", "value": 411.0}
step: (min: 275, avg: 481, max: 709
{"metric": "step", "value": 481}
{"metric": "run"

score: (min: 220.0, avg: 444.0, max: 960.0
{"metric": "score", "value": 444.0}
step: (min: 313, avg: 474.2, max: 671
{"metric": "step", "value": 474.2}
{"metric": "run", "value": 770}
loss: (min: 0.12845808267593384, avg: 1.1030530622005463, max: 5
{"metric": "loss", "value": 1.1030530622005463}
accuracy: (min: 0.25, avg: 0.7265, max: 0.96875
{"metric": "accuracy", "value": 0.7265}
q: (min: 26.146194429397582, avg: 30.985076965546906, max: 37.77287447094917
{"metric": "q", "value": 30.985076965546906}
score: (min: 220.0, avg: 476.0, max: 1260.0
{"metric": "score", "value": 476.0}
step: (min: 362, avg: 499.9, max: 817
{"metric": "step", "value": 499.9}
{"metric": "run", "value": 780}
loss: (min: 0.21455399692058563, avg: 1.1180245050638915, max: 5
{"metric": "loss", "value": 1.1180245050638915}
accuracy: (min: 0.375, avg: 0.73584375, max: 1.0
{"metric": "accuracy", "value": 0.73584375}
q: (min: 26.02001447439194, avg: 31.0880698263523, max: 45.98306575000286
{"metric": "q", "value": 31.