In [None]:
from __future__ import print_function
import os, sys, StringIO, time, threading, collections
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import keras
import KerasTools as KT
import numpy as np

import ipywidgets
import skimage
import matplotlib.pyplot as plt

In [None]:
stopEvent = threading.Event()
currentGame = collections.deque([], 1)
class GamePlot(threading.Thread):
    def __init__(self, factor=1.0):
        threading.Thread.__init__(self, name="GamePlot")
        self.factor=factor
        self.imbuf = StringIO.StringIO()
        self.img = ipywidgets.Image(width=int(factor*256), height=256)
        display(self.img)
        self.plot_frame(np.zeros((10,10), np.uint8))
    def run(self):
        while not stopEvent.isSet():
            try: game = currentGame.pop()
            except IndexError: time.sleep(0.1)
            else:
                for frame in game:
                    self.plot_frame(frame)
                    time.sleep(0.1)
                time.sleep(0.5)
    def plot_frame(self, frame):
        f = skimage.transform.resize(frame,(256,int(self.factor*256)), order=0, mode='constant', anti_aliasing=False)
        plt.imsave(self.imbuf, f, vmin=0.0, vmax=1.0, cmap='nipy_spectral')
        self.imbuf.seek(0)           
        self.img.value = self.imbuf.buf
        
class GameStore:
    def __init__(self, factor=1.0):
        self.plotter = GamePlot(factor)
        self.plotter.start()
    def game_start(self, frame):
        self.gamestore = [frame]
    def game_frame(self, frame):
        self.gamestore.append(frame)
    def game_over(self):
        currentGame.append(self.gamestore)

In [None]:
grid_size = 10
nb_frames = 10

model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=(nb_frames, grid_size, grid_size)))
model.add(keras.layers.Dense(100, activation='relu'))
model.add(keras.layers.Dense(3))
model.compile(keras.optimizers.rmsprop(), "logcosh")
model.summary()

game = KT.qlearn.catch.Catch(grid_size)
agent = KT.qlearn.agent.Agent(model=model, memory_size=20000, nb_frames = nb_frames)
stopEvent.clear()
agent.train(game, batch_size=256, epochs=10, train_interval=32,
            epsilon=[0.5, 0.01], epsilon_rate=0.5, reset_memory=False,
            callbacks=[GameStore()])

stopEvent.set()

In [None]:
grid_size = 10
nb_frames = 10

model = keras.models.Sequential()
model.add(keras.layers.Reshape((nb_frames, grid_size, grid_size, 1), input_shape=(nb_frames, grid_size, grid_size)))
model.add(keras.layers.TimeDistributed(keras.layers.Conv2D(4, (3,3), padding='same')))
model.add(keras.layers.TimeDistributed(keras.layers.MaxPooling2D(2)))
model.add(keras.layers.TimeDistributed(keras.layers.Conv2D(8, (3,3))))
model.add(keras.layers.Reshape((nb_frames, 3*3*8,)))
model.add(keras.layers.GRU(nb_frames))
model.add(keras.layers.Dense(3))
model.compile(keras.optimizers.rmsprop(), "logcosh")
model.summary()

game = KT.qlearn.snake.Snake(grid_size)
agent = KT.qlearn.agent.Agent(model=model, memory_size=20000, nb_frames = nb_frames)
stopEvent.clear()
agent.train(game, batch_size=256, epochs=10, train_interval=32,
            epsilon=[0.5, 0.01], epsilon_rate=0.5, reset_memory=False,
            callbacks=[GameStore()])

stopEvent.set()

In [None]:
grid_size = 10
nb_frames = 4

model = keras.models.Sequential()
model.add(keras.layers.Permute((2,3,1), input_shape=(nb_frames, grid_size, grid_size)))
model.add(keras.layers.Conv2D(16, (3, 3), activation='relu'))
model.add(keras.layers.Conv2D(32, (3, 3), activation='relu'))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(3, activation='linear'))
model.compile(keras.optimizers.rmsprop(), "logcosh")
model.summary()

game = KT.qlearn.snake.Snake(grid_size, max_turn=64)
agent = KT.qlearn.agent.Agent(model=model, memory_size=65536, nb_frames = nb_frames)

stopEvent.clear()
agent.train(game, batch_size=256, epochs=100, train_interval=128,
            epsilon=[0.5, 0.01], epsilon_rate=0.1, gamma=0.8, reset_memory=False,
            callbacks=[GameStore()])
stopEvent.set()

In [None]:
width = 5
height = 8
nb_frames = 8

inpc = keras.layers.Input(shape=(height, width, 3))
conv1 = keras.layers.Conv2D(16, (3, 3), activation='relu', padding='same')(inpc)
conv2 = keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
gpool = keras.layers.GlobalMaxPooling2D()(conv2)
convm = keras.models.Model(inputs=inpc, outputs=gpool)
convm.summary()

inp = keras.layers.Input(shape=(None, height, width, 3))
x = keras.layers.TimeDistributed(convm)(inp)
x = keras.layers.SimpleRNN(32, return_sequences=False)(x)
act = keras.layers.Dense(5, activation='linear')(x)

model = keras.models.Model(inputs=inp, outputs=act)
model.compile(keras.optimizers.rmsprop(), 'logcosh')
model.summary()

game = KT.qlearn.tromis.Tromis(width=width,height=height, max_turn=250)
agent = KT.qlearn.agent.Agent(model=model, memory_size=65536*4, nb_frames = nb_frames)
stopEvent.clear()
agent.train(game, batch_size=256, epochs=100, train_interval=128,
            epsilon=[0.5, 0.0], epsilon_rate=0.10,
            gamma=0.9, reset_memory=False, callbacks=[GameStore(factor=width/float(height))])
stopEvent.set()