In [1]:
import numpy as np
import tensorflow as tf
from datetime import datetime
from keras.models import Sequential, load_model
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.utils.training_utils import multi_gpu_model
from tqdm import tqdm

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# read in data
cardtext = [list(x) for x in list(np.load('data/card_texts_new.npy'))]
c2i = np.load('data/c2i.npy').item()
i2c = np.load('data/i2c.npy').item()

In [3]:
# test - randomize!
np.random.seed = 1337
indices = list(np.random.permutation(len(cardtext)))
cardtext = [cardtext[i] for i in indices]
# cardtext = cardtext

In [4]:
# set parameters

DROP_RATE = 0.2 # dropout
EMBEDDING_SIZE = 128 # embedding size
HIDDEN_SIZE = 192 # lstm feature vector
HIDDEN_LAYERS = 3 # number of layers
START_EPOCH = 0
VOCAB_SIZE = len(c2i.keys()) # number of characters

WINDOW_SIZE = 100 # context length
BATCH_SIZE = 64
CARDS_PER_BATCH = 5
NUM_EPOCHS = 1000

OUT_INCREMENT = 100 # printout after n batches - and save

In [5]:
def cardGenerator(cardtext, windowsize, cards_per_batch, c2i=c2i, debug=False):
    
    i = 0
    indices = list(np.random.permutation(len(cardtext)))
    idx = indices[i]
    lastseq = ''
    
    def nextcard(cardtext, idx, debug=debug):
        if debug:
            card_idx = cardtext[idx]
        else:
            card_idx = [c2i[c] for c in cardtext[idx]]
        
        return list(card_idx)
    
    # pregenerate warmup sequence of windowsize
    # get startup sequence with card longer than windowsize+1
    while len(cardtext[idx]) < windowsize:
        i += 1
        idx = indices[i]
    # 'pad' sequence with end of card
    if debug:
        sequence = list(cardtext[idx][-(windowsize):])
    else:
        sequence = list([c2i[c] for c in cardtext[idx][-(windowsize):]])
    i += 1
    idx = indices[i]
    # add n cards to sequence where n = cards_per_batch
    for j in range(cards_per_batch):
        sequence += nextcard(cardtext, idx)
        i += 1
        idx = indices[i]
        
    # create matrix
    x = []
    y = []
    
    # main iterator
    while True:
        
        # set lastseq for next cycle
        laststr = sequence[-(windowsize):]

        # generate batch (of cards_per_batch cards)
        while len(sequence) > windowsize:
            x.append(np.array(sequence[:windowsize]))
            y.append(sequence[windowsize])
            sequence.pop(0)
        
        # generate window-shifted data
        # reshape for sparse_categorical_crossentropy
        sequence = []
        y = np.array(y)
        y = y[:, np.newaxis]
        # yield and reset
        yield(np.asarray(x), y)
        x, y = [], []
        
        # for next batch, check if enough remaining, else reset
        if len(indices[i:]) < cards_per_batch:
            indices = np.random.permutation(len(cardtext))
            i = 0
            idx = indices[i]
        else:
            i += 1
            idx = indices[i]
            
        # seed warmup sequence with end of last batch
        sequence = list(laststr)

        i += 1
        idx = indices[i]
        for j in range(cards_per_batch):
            sequence += nextcard(cardtext, idx)
            i += 1
            idx = indices[i]

In [6]:
getbatch = cardGenerator(cardtext, WINDOW_SIZE, CARDS_PER_BATCH)

In [7]:
# define model on CPU
with tf.device("/cpu:0"):
    model = Sequential()
    model.add(Embedding(VOCAB_SIZE, EMBEDDING_SIZE, 
                        input_shape=(WINDOW_SIZE, )))
    model.add(Dropout(DROP_RATE))
    for _ in range(HIDDEN_LAYERS-1):
        model.add(LSTM(HIDDEN_SIZE, return_sequences=True))
    model.add(LSTM(HIDDEN_SIZE))
    model.add(Dense(VOCAB_SIZE, activation='softmax'))

# make multi-gpu model with 2 GPU's
model = multi_gpu_model(model, gpus=2)

In [8]:
# compile
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

In [9]:
# predict 'Ⓔ'

def predict(startchars='random', temperature=0.5, maxlen=800):
    
    seq_out = []
    
    if temperature=='random':
        tmp = np.random.random()
    else:
        tmp = temperature
    
    # starting sequence
    if startchars=='none':
        seq_in = [c2i['Ⓔ'] for i in range(WINDOW_SIZE)]
    
    elif startchars=='random':
        seq_in = []
        alpha = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
                 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
                 'w', 'x', 'y', 'z']
        alpha = [a for a in alpha if a in c2i.keys()]
        while len(seq_in) < WINDOW_SIZE-1:
            rnd = np.random.randint(0, len(alpha))
            seq_in += [c2i[alpha[rnd]]]
        seq_in += [c2i['Ⓔ']]
    
    else:
        s = list(startchars)
        s = s[:WINDOW_SIZE]
        seq_out =  [c2i[c] for c in s]
        while len(s) < WINDOW_SIZE:
            s.insert(0, 'Ⓔ')
        seq_in = [c2i[c] for c in s]
        
    # softmax temperature
    # scaling factor of logits = logits/temperature
    # high temp = more confident = more diverse, more mistakes
    # low temp: more conservative
    # https://stackoverflow.com/questions/37246030/how-to-change-the-temperature-of-a-softmax-output-in-keras/37254117#37254117
    def sample(a, temperature=tmp):
        a = np.array(a)**(1/temperature)
        p_sum = a.sum()
        sample_temp = a/p_sum 

        # stupid fix for > 1 error
        while sum(sample_temp) > 1:
            sample_temp[0] -= 0.0001

        return np.argmax(np.random.multinomial(1, sample_temp, 1))

    for i in range(maxlen):

        # predict next char
        pred_out = model.predict(np.array(seq_in).reshape((1, WINDOW_SIZE)))
        # get index of highest pred
        idx = sample(pred_out[0])
        # save index for decoding
        seq_out.append(idx)
        # add index to input sequence
        seq_in.append(int(idx))
        # remove earliest
        seq_in.pop(0)

    # decode final sequence
    card_char = ''.join([i2c[int(i)] for i in seq_out])
    card_char = card_char.replace('·', '|')
    card_char = card_char.replace('Ⓔ', 'Ⓔ\n|')
    card_text = card_char.split('|')

    for f in card_text:
        print(f)
        
    return card_text

In [None]:
# load model
model.load_weights('model/v4-multi2-modelweights-epoch6-cards5000-2018-02-05 17:27:15.520281.h5')
START_EPOCH = 6

In [None]:
# just train, fix epochs later
for epoch_idx in range(START_EPOCH, NUM_EPOCHS):
    # epochs are "meaningless" here, roughly batch size = n cards so...
    for batch in range(int(len(cardtext)/CARDS_PER_BATCH)):
        print('epoch', epoch_idx+1, 'batch',batch+1, str(datetime.now()))
        # get batch
        x_batch, y_batch = next(getbatch)
        
        # fit to card batch
        r = model.fit(x_batch, y_batch, batch_size=BATCH_SIZE, epochs=1, verbose=0)

        if batch % OUT_INCREMENT == 0 and batch > 0:
            model.save_weights('model/v4-multi2-modelweights-epoch{}-cards{}-{}.h5'.format(epoch_idx+1, CARDS_PER_BATCH*(batch), str(datetime.now())))
            # change to writing to file as well as printing
            a = predict(startchars='random', temperature='random')
            b = predict(startchars='random', temperature='random')
            c = predict(startchars='random', temperature='random')
            filename = 'sample/v4-multi2-cardsamples-epoch{}-cards{}-{}.txt'.format(epoch_idx+1, CARDS_PER_BATCH*(batch), str(datetime.now()))
            print("\nwriting file", filename)
            with open(filename, 'w') as f:
                for prd in [a, b, c]:
                    for ln in prd:
                        f.write(ln)
                        f.write('\n')
                f.write('\n')
            
            print('***************************************************************')

epoch 7 batch 1 2018-02-05 19:27:12.710068
epoch 7 batch 2 2018-02-05 19:27:20.174926
epoch 7 batch 3 2018-02-05 19:27:26.792809
epoch 7 batch 4 2018-02-05 19:27:30.272326
epoch 7 batch 5 2018-02-05 19:27:35.306334
epoch 7 batch 6 2018-02-05 19:27:39.697476
epoch 7 batch 7 2018-02-05 19:27:44.956613
epoch 7 batch 8 2018-02-05 19:27:49.305318
epoch 7 batch 9 2018-02-05 19:27:52.763251
epoch 7 batch 10 2018-02-05 19:27:57.119972


In [None]:
# # todo: just save to json one time
# model.save('model/100test_model.h5')
# print("saved model to disk\n")
# model.save_weights('model/100test_model_weights.h5')
# print("saved model weights to disk\n")

In [None]:
# # load model
# model.load_weights('model/100-modelweights-epoch71-batch99.h5')
# START_EPOCH = 25

In [None]:
predict(startchars='random', temperature='random')