In [None]:
from __future__ import print_function
import os, sys, StringIO, time, threading, collections
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import keras
import KerasTools as KT
import numpy as np

import ipywidgets
import skimage
import matplotlib.pyplot as plt

In [None]:
stopEvent = threading.Event()
currentGame = collections.deque([], 1)
class GamePlot(threading.Thread):
    def __init__(self):
        threading.Thread.__init__(self, name="GamePlot")
        self.imbuf = StringIO.StringIO()
        self.img = ipywidgets.Image(width=256, height=256)
        display(self.img)
        self.plot_frame(np.zeros((10,10,3), np.uint8))
    def run(self):
        while not stopEvent.isSet():
            try: game = currentGame.pop()
            except IndexError: time.sleep(0.1)
            else:
                for frame in game:
                    self.plot_frame(frame)
                    time.sleep(0.1)
                time.sleep(0.5)
    def plot_frame(self, frame):
        f = skimage.transform.resize(frame,(256,256), order=0, mode='constant', anti_aliasing=False)
        plt.imsave(self.imbuf, f)#, vmin=0.0, vmax=1.0, cmap='nipy_spectral')
        self.imbuf.seek(0)           
        self.img.value = self.imbuf.buf
        
class GameStore:
    def __init__(self):
        self.plotter = GamePlot()
        self.plotter.start()
    def game_start(self, frame):
        self.gamestore = [frame]
    def game_frame(self, frame):
        self.gamestore.append(frame)
    def game_over(self):
        currentGame.append(self.gamestore)

In [None]:
grid_size = 10
hidden_size = 100
nb_frames = 8

model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=(nb_frames, grid_size, grid_size)))
model.add(keras.layers.Dense(16, activation='relu'))
model.add(keras.layers.Dense(16, activation='relu'))
model.add(keras.layers.Dense(3))
model.compile(keras.optimizers.rmsprop(), "logcosh")
model.summary()

game = KT.qlearn.catch.Catch(grid_size)
agent = KT.qlearn.agent.Agent(model=model, memory_size=20000, nb_frames = nb_frames)
stopEvent.clear()
agent.train(game, batch_size=256, epochs=50, train_interval=32,
            epsilon=[0.5, 0.01], epsilon_rate=0.5, reset_memory=False,
            callbacks=[GameStore()])

stopEvent.set()

In [None]:
grid_size = 10
hidden_size = 100
nb_frames = 8

model = keras.models.Sequential()
model.add(keras.layers.Reshape((nb_frames, grid_size*grid_size), input_shape=(nb_frames, grid_size, grid_size)))
model.add(keras.layers.GRU(16))
model.add(keras.layers.Dense(3))
model.compile(keras.optimizers.rmsprop(), "logcosh")
model.summary()

game = KT.qlearn.catch.Catch(grid_size)
agent = KT.qlearn.agent.Agent(model=model, memory_size=20000, nb_frames = nb_frames)
stopEvent.clear()
agent.train(game, batch_size=256, epochs=10, train_interval=32,
            epsilon=[0.5, 0.01], epsilon_rate=0.5, reset_memory=False,
            callbacks=[GameStore()])

stopEvent.set()

In [None]:
grid_size = 10
hidden_size = 100
nb_frames = 8

model = keras.models.Sequential()
model.add(keras.layers.Permute((2,3,1), input_shape=(nb_frames, grid_size, grid_size)))
model.add(keras.layers.SeparableConv2D(8, (3, 3), activation='relu', padding='same'))
model.add(keras.layers.SeparableConv2D(16, (3, 3), activation='relu', padding='same'))
model.add(keras.layers.Permute((3,1,2)))
model.add(keras.layers.Reshape((16, (grid_size)*(grid_size))))
model.add(keras.layers.GRU(8))
model.add(keras.layers.Dense(3))
model.compile(keras.optimizers.rmsprop(), "logcosh")
model.summary()

game = KT.qlearn.snake.Snake(grid_size, max_turn=256)
agent = KT.qlearn.agent.Agent(model=model, memory_size=500000, nb_frames = nb_frames)
stopEvent.clear()
agent.train(game, batch_size=256, epochs=50, train_interval=32,
            epsilon=[0.5, 0.01], epsilon_rate=0.5, reset_memory=False,
            callbacks=[GameStore()])

stopEvent.set()

In [None]:
grid_size = 10
hidden_size = 100
nb_frames = 8

model = keras.models.Sequential()
model.add(keras.layers.Reshape((nb_frames, grid_size, grid_size, 1), input_shape=(nb_frames, grid_size, grid_size)))
model.add(keras.layers.ConvLSTM2D(8, (3, 3), activation='relu'))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(3))
model.compile(keras.optimizers.rmsprop(), "logcosh")
model.summary()

game = KT.qlearn.snake.Snake(grid_size)
agent = KT.qlearn.agent.Agent(model=model, memory_size=20000, nb_frames = nb_frames)
stopEvent.clear()
agent.train(game, batch_size=256, epochs=10, train_interval=32,
            epsilon=[0.5, 0.01], epsilon_rate=0.5, reset_memory=False,
            callbacks=[GameStore()])

stopEvent.set()

In [None]:
grid_size = 10
nb_frames = 8

model = keras.models.Sequential()
model.add(keras.layers.Permute((2,3,1), input_shape=(nb_frames, grid_size, grid_size)))
model.add(keras.layers.SeparableConv2D(16, (3, 3), activation='relu', padding='same'))
model.add(keras.layers.SeparableConv2D(32, (3, 3), activation='relu', padding='same'))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dense(3))
model.compile(keras.optimizers.rmsprop(), "logcosh")
model.summary()

game = KT.qlearn.snake.Snake(grid_size, max_turn=256)
agent = KT.qlearn.agent.Agent(model=model, memory_size=500000, nb_frames = nb_frames)
stopEvent.clear()
agent.train(game, batch_size=256, epochs=50, train_interval=32,
            epsilon=[0.5, 0.01], epsilon_rate=0.5, reset_memory=False,
            callbacks=[GameStore()])

stopEvent.set()

In [None]:
grid_size = 10
nb_frames = 1

model = keras.models.Sequential()
#model.add(keras.layers.Reshape((nb_frames, grid_size, grid_size, 3)))
model.add(keras.layers.TimeDistributed(keras.layers.Conv2D(16, (3, 3), activation='relu'), input_shape=(None, grid_size, grid_size, 3)))
model.add(keras.layers.TimeDistributed(keras.layers.Conv2D(32, (3, 3), activation='relu')))
model.add(keras.layers.TimeDistributed(keras.layers.Reshape((32*(grid_size-4)*(grid_size-4),))))
model.add(keras.layers.SimpleRNN(64, return_sequences=False))
#model.add(keras.layers.Flatten())
#model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dense(3))
model.compile(keras.optimizers.rmsprop(), "logcosh")
model.summary()

game = KT.qlearn.snake.Snake(grid_size, max_turn=64)
agent = KT.qlearn.agent.Agent(model=model, memory_size=65536, nb_frames = nb_frames)
stopEvent.clear()

agent.train(game, batch_size=256, epochs=10, train_interval=128,
            epsilon=[0.5, 0.01], epsilon_rate=0.1, gamma=0.8, reset_memory=False, callbacks=[GameStore()])
stopEvent.set()


In [None]:
model.save('snake.h5')
game.max_turn=256
stopEvent.clear()
agent.train(game, batch_size=256, epochs=5, train_interval=128,
            epsilon=0.0, gamma=0.95, reset_memory=False, callbacks=[GameStore()])
stopEvent.set()

In [None]:
grid_size = 10
nb_frames = 1

inpc = keras.layers.Input(shape=(grid_size, grid_size, 3))
conv1 = keras.layers.Conv2D(16, (3, 3), activation='relu')(inpc)
conv2 = keras.layers.Conv2D(32, (3, 3), activation='relu')(conv1)
resh = keras.layers.Reshape((32*(grid_size-4)*(grid_size-4),))(conv2)
convm = keras.models.Model(inputs=inpc, outputs=resh)
convm.summary()

inp = keras.layers.Input(shape=(None, grid_size, grid_size, 3))
x = keras.layers.TimeDistributed(convm)(inp)
x = keras.layers.SimpleRNN(64, return_sequences=False)(x)
act = keras.layers.Dense(3, activation='linear')(x)

model = keras.models.Model(inputs=inp, outputs=act)
model.compile(keras.optimizers.rmsprop(), "logcosh")
model.summary()

game = KT.qlearn.snake.Snake(grid_size, max_turn=64)
agent = KT.qlearn.agent.Agent(model=model, memory_size=65536, nb_frames = nb_frames)
stopEvent.clear()

agent.train(game, batch_size=256, epochs=5, train_interval=128,
            epsilon=[0.5, 0.01], epsilon_rate=0.1, gamma=0.8, reset_memory=False, callbacks=[GameStore()])
stopEvent.set()
