In [None]:
from google.colab import files
uploaded = files.upload()
print uploaded

In [None]:
import keras
import qlearn
import numpy as np
import time
!ls -l

In [None]:
def build_dqn(nb_actions):
    inpc = keras.layers.Input(shape=(None, None, 3))
    conv1 = keras.layers.Conv2D(16, (3, 3), activation='relu', padding='same')(inpc)
    conv2 = keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    gpool = keras.layers.GlobalMaxPooling2D()(conv2)
    convm = keras.models.Model(inputs=inpc, outputs=gpool)
    convm.summary()

    inp = keras.layers.Input(shape=(None, None, None, 3))
    x = keras.layers.TimeDistributed(convm)(inp)
    x = keras.layers.SimpleRNN(64, return_sequences=False)(x)
    x = keras.layers.Dense(128, activation='relu')(x)
    act = keras.layers.Dense(nb_actions, activation='linear')(x)

    model = keras.models.Model(inputs=inp, outputs=act)
    model.compile(keras.optimizers.rmsprop(), 'logcosh')
    model.summary()

    return model

class Callback(object):
    def game_start(self, frame): pass
    def game_frame(self, frame): pass
    def game_over(self): pass
    def epoch_end(self, *args): pass

class History(Callback):
    def __init__(self, name):
        st = time.gmtime()
        self.timestamp = "{:04d}{:02d}{:02d}_{:02d}{:02d}{:02d}".format(st.tm_year, st.tm_mon, st.tm_mday, st.tm_hour, st.tm_min, st.tm_sec)
        self.filename = '{}-{}.log'.format(name, self.timestamp)
        with open(self.filename, 'w+') as fp:
            fp.write('Epoch, Epsilon,    Loss, Win Ratio, Avg Score, Max Score,   Memory\n')
    def epoch_end(self, *args):
        _model, name, epoch, epsilon, loss, win_ratio, avg_score, max_score, memory = args
        with open(self.filename, 'a') as fp:
            fp.write('{:> 5d}, {:>7.2f}, {:>7.4f}, {:>9.2%}, {:>9.2f}, {:>9.2f}, {:>8d}\n'.format(epoch, epsilon, loss, win_ratio, avg_score, max_score, memory))
       
class Checkpoint(Callback):
    def __init__(self, interval=1):
        self.interval = interval
    def epoch_end(self, *args):
        model, name, epoch, epsilon, loss, win_ratio, avg_score, max_score, memory = args
        if epoch % self.interval == 0:
            filename = '{}_{:03d}.h5'.format(name, epoch)
            model.save(filename)


In [None]:
grid_size = 10
nb_frames = 4

game = qlearn.Catch(grid_size=grid_size)
model = build_dqn(game.nb_actions)
agent = qlearn.Agent(model=model, memory_size=65536, nb_frames = nb_frames)
agent.train(game, batch_size=256, epochs=20, train_interval=128,
            epsilon=[0.5, 0.0], epsilon_rate=0.25,
            gamma=0.95, reset_memory=False, callbacks=[Checkpoint(1), History(game.name)])

In [None]:
grid_size = 10
nb_frames = 12

game = qlearn.Snake(grid_size=grid_size, max_turn=128)
model = build_dqn(game.nb_actions)
agent = qlearn.Agent(model=model, memory_size=65536, nb_frames = nb_frames)
agent.train(game, batch_size=256, epochs=100, train_interval=128,
            epsilon=[0.5, 0.0], epsilon_rate=0.1,
            gamma=0.95, reset_memory=False, callbacks=[Checkpoint(10), History(game.name)])

In [None]:
width = 5
height = 8
nb_frames = 8

game = qlearn.Tromis(width=width,height=height, max_turn=512)
model = keras.models.load_model('Tromis_500.h5') #build_dqn(game.nb_actions)
agent = qlearn.Agent(model=model, memory_size=65536, nb_frames = nb_frames)
agent.train(game, batch_size=256, initial_epoch=501, epochs=550, train_interval=128,
            epsilon=0.0, gamma=0.90, reset_memory=False, callbacks=[Checkpoint(10), History(game.name)])

In [None]:
!ls
!tar -cvJf tromis_550.txz Tromis*

In [None]:
files.download('tromis_550.txz')

In [None]:
np.save('Tromis_epoch100_mem',agent.memory.memory)

In [None]:
!tar -cvJf tromis.txz Tromis*
!ls -lh