In [None]:
import os, sys, time, threading, collections
from io import BytesIO

#module_path = os.path.abspath(os.path.join('..'))
#if module_path not in sys.path:
#    sys.path.append(module_path)

import keras
#import KerasTools as KT
import numpy as np

import agent, memory, callbacks, catch, snake, tromis

import ipywidgets
import skimage
import matplotlib.pyplot as plt

import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

In [None]:
stopEvent = threading.Event()
currentGame = collections.deque([], 1)
class GamePlot(threading.Thread):
    def __init__(self, ratio=1.0):
        threading.Thread.__init__(self, name="GamePlot")
        self.ratio=ratio
        self.imbuf = BytesIO()
        self.img = ipywidgets.Image(width=int(ratio*256), height=256)
        display(self.img)
        self.plot_frame(np.zeros((10,10), np.uint8))
    def run(self):
        while not stopEvent.isSet():
            try: game = currentGame.pop()
            except IndexError: time.sleep(0.1)
            else:
                for frame in game:
                    self.plot_frame(frame)
                    time.sleep(0.1)
                time.sleep(0.5)
    def plot_frame(self, frame):
        f = skimage.transform.resize(frame,(256,int(self.ratio*256)), order=0, mode='constant', anti_aliasing=False)
        plt.imsave(self.imbuf, f, vmin=0.0, vmax=1.0, cmap='nipy_spectral')
        self.imbuf.seek(0)           
        self.img.value = self.imbuf.getvalue()

class GameStore(callbacks.Callback):
    def __init__(self, ratio=1.0):
        currentGame.append([])
        self.plotter = GamePlot(ratio)
        self.plotter.start()
    def game_start(self, frame):
        self.gamestore = [frame]
    def game_frame(self, frame):
        self.gamestore.append(frame)
    def game_over(self):
        currentGame.append(self.gamestore)

In [None]:
grid_size = 10
nb_frames = 1

game = catch.Catch(grid_size)

inp = keras.layers.Input(shape=(nb_frames, grid_size, grid_size, 3))
#gray = keras.layers.Lambda(lambda t:t[...,0]*0.3 + t[...,1]*0.6 + t[...,2]*0.1)(inp)
resz = keras.layers.Reshape((grid_size, grid_size, 3))(inp)
flat = keras.layers.Flatten()(resz)
x = keras.layers.Dense(32, activation='relu')(flat)
act = keras.layers.Dense(game.nb_actions, activation='linear')(x)

model = keras.models.Model(inputs=inp, outputs=act)
model.compile(keras.optimizers.rmsprop(), 'logcosh')
model.summary()

m = memory.UniqMemory(memory_size=65536)
a = agent.Agent(model=model, mem=m, num_frames = nb_frames)

stopEvent.clear()
a.train(game, batch_size=256, epochs=20, train_interval=32, episodes=256,
            epsilon=[0.5, 0.0], epsilon_rate=0.1,
            gamma=0.98, reset_memory=False, callbacks = [GameStore()])
stopEvent.set()

In [None]:
grid_size = 10
nb_frames = 1

game = snake.Snake(grid_size, max_turn=64)

inp = keras.layers.Input(shape=(nb_frames, grid_size, grid_size, 3))
#gray = keras.layers.Lambda(lambda t:t[...,0]*0.3 + t[...,1]*0.6 + t[...,2]*0.1)(inp)
#perm = keras.layers.Permute((2,3,1))(gray)
resz = keras.layers.Reshape((grid_size, grid_size, 3))(inp)
conv = keras.layers.Conv2D(64, 3, activation='relu', padding='same')(resz)
flat = keras.layers.Flatten()(conv)
x = keras.layers.Dense(64, activation='elu')(flat)
act = keras.layers.Dense(game.nb_actions, activation='linear')(x)

model = keras.models.Model(inputs=inp, outputs=act)
model.compile(keras.optimizers.rmsprop(), 'logcosh')
model.summary()

m = memory.UniqMemory(memory_size=65536)
a = agent.Agent(model=model, mem=m, num_frames = nb_frames)

stopEvent.clear()
a.train(game, batch_size=32, epochs=50, train_interval=32, episodes=256,
            epsilon=[0.5, 0.0], epsilon_rate=0.1, 
            gamma=0.95, reset_memory=False, callbacks = [GameStore()])
stopEvent.set()

In [None]:
width, height, nb_frames = 6, 9, 1

game = tromis.Tromis(width=width, height=height, max_turn=128)

#inpc = keras.layers.Input(shape=(height, width, 3))
#conv1 = keras.layers.Conv2D(32, 3, activation='relu', strides=2, padding='same')(inpc)
#conv2 = keras.layers.Conv2D(64, 3, activation='relu', strides=2, padding='same')(conv1)
#gpool = keras.layers.GlobalMaxPooling2D()(conv2)
#flt = keras.layers.Flatten()(gpool)
#convm = keras.models.Model(inputs=inpc, outputs=gpool)
#convm.summary()

#inp = keras.layers.Input(shape=(nb_frames, height, width, 3))
#x = keras.layers.TimeDistributed(convm)(inp)
#x = keras.layers.SimpleRNN(128, return_sequences=False, activation='elu')(x)
#act = keras.layers.Dense(5, activation='linear')(x)

inp = keras.layers.Input(shape=(nb_frames, height, width, 3))
resz = keras.layers.Reshape((height, width, 3))(inp)
conv = keras.layers.Conv2D(16, 3, strides=2, activation='relu', padding='same')(resz)
conv = keras.layers.Conv2D(32, 3, strides=2, activation='relu', padding='same')(resz)
flat = keras.layers.Flatten()(conv)
x = keras.layers.Dense(64, activation='elu')(flat)
act = keras.layers.Dense(game.nb_actions, activation='linear')(x)


model = keras.models.Model(inputs=inp, outputs=act)
model.compile(keras.optimizers.rmsprop(), 'logcosh')
model.summary()

m = memory.UniqMemory(memory_size=65536)
a = agent.Agent(model=model, mem=m, num_frames = nb_frames)

stopEvent.clear()
a.train(game, batch_size=64, epochs=50, train_interval=32, episodes=256,
            epsilon=[0.5, 0.0], epsilon_rate=0.1, 
            gamma=0.95, reset_memory=False, callbacks = [GameStore(ratio=width/height)])
stopEvent.set()