In [None]:
import numpy as np

from keras.models import Sequential, load_model
from keras.layers import Embedding, LSTM, Dense, Dropout
from tqdm import tqdm

In [None]:
# read in data
cardtext = [list(x) for x in list(np.load('data/card_texts.npy'))]
c2i = np.load('data/c2i.npy').item()
i2c = np.load('data/i2c.npy').item()

In [None]:
# test - randomize!
np.random.seed = 1337
indices = list(np.random.permutation(len(cardtext)))
cardtext = [cardtext[i] for i in indices]
cardtext = cardtext[:100]

In [None]:
# set parameters

DROP_RATE = 0.25 # dropout
EMBEDDING_SIZE = 256 # embedding size
HIDDEN_SIZE = 256 # lstm feature vector
HIDDEN_LAYERS = 2 # number of layers
START_EPOCH = 0
VOCAB_SIZE = len(c2i.keys()) # number of characters

WINDOW_SIZE = 5 # context length
NUM_EPOCHS = 200

OUT_INCREMENT = 10 # printout after n EPOCHS - and save

In [None]:
# batch generator
def cardGenerator(cardtext, windowsize, strt='Ⓢ', pad='⎕', c2i=c2i):
    
    i = 0
    indices = list(np.random.permutation(len(cardtext)))
    idx = indices[i]
    
    # for each card...
    # todo: shuffle this??
    while True:

        # new card, get index
        thiscard = []
        
        # start-pad the window
        for j in range(windowsize):    
            thiscard.append(strt)

        # add the cardtext
        thiscard += cardtext[idx]
        
        # int-index
        thiscard = [c2i[c] for c in thiscard]
        
        # create matrix
        x = []
        y = []
        for k in range(len(thiscard)-windowsize):
            x.append(thiscard[k:(k+windowsize)])
            y.append(thiscard[k+windowsize])
        
        # reshape for sparse_categorical_crossentropy
        y = np.array(y)
        y = y[:, np.newaxis]
        
        yield(np.asarray(x), y)
        
        if i + 1 >= len(cardtext):
            indices = np.random.permutation(len(cardtext))
            i = 0
            idx = indices[i]
        else:
            i += 1
            idx = indices[i]

In [None]:
getbatch = cardGenerator(cardtext, WINDOW_SIZE)

In [None]:
# define model
model = Sequential()
model.add(Embedding(VOCAB_SIZE, EMBEDDING_SIZE, 
                    batch_input_shape=(1, WINDOW_SIZE, )))
model.add(Dropout(DROP_RATE))
for _ in range(HIDDEN_LAYERS-1):
    model.add(LSTM(HIDDEN_SIZE, return_sequences=True, stateful=True))
model.add(LSTM(HIDDEN_SIZE, stateful=True))
model.add(Dense(VOCAB_SIZE, activation='softmax'))

In [None]:
# compile
model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam', metrics=['accuracy'])

In [None]:
# predict 'Ⓢ'

def predict(startchars='none', temperature=1.0, maxlen=300):
    
    seq_out = []
    
    if temperature=='random':
        tmp = np.random.random()
    else:
        tmp = temperature
    
    # starting sequence
    if startchars=='none':
        seq_in = [c2i['Ⓢ'] for i in range(WINDOW_SIZE)]
    
    elif startchars=='random':
        seq_in = [c2i['Ⓢ'] for i in range(WINDOW_SIZE-1)]
        alpha = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
                 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
                 'w', 'x', 'y', 'z']
        alpha = [a for a in alpha if a in c2i.keys()]
        rnd = np.random.randint(0, len(alpha))
        seq_in += [c2i[alpha[rnd]]]
        seq_out = [c2i[alpha[rnd]]]
    
    else:
        s = list(startchars)
        s = s[:WINDOW_SIZE]
        seq_out =  [c2i[c] for c in s]
        while len(s) < WINDOW_SIZE:
            s.insert(0, 'Ⓢ')
        seq_in = [c2i[c] for c in s]
        
    # softmax temperature
    # scaling factor of logits = logits/temperature
    # high temp = more confident = more diverse, more mistakes
    # low temp: more conservative
    # https://stackoverflow.com/questions/37246030/how-to-change-the-temperature-of-a-softmax-output-in-keras/37254117#37254117
    def sample(a, temperature=tmp):
        a = np.array(a)**(1/temperature)
        p_sum = a.sum()
        sample_temp = a/p_sum 

        # stupid fix for > 1 error
        while sum(sample_temp) > 1:
            sample_temp[0] -= 0.0001

        return np.argmax(np.random.multinomial(1, sample_temp, 1))

    for i in range(maxlen):

        # predict next char
        pred_out = model.predict(np.array(seq_in).reshape((1, WINDOW_SIZE)))
        # get index of highest pred
        idx = sample(pred_out[0])
        # save index for decoding
        seq_out.append(idx)
        # add index to input sequence
        seq_in.append(int(idx))
        # remove earliest
        seq_in.pop(0)

    # decode final sequence
    card_char = ''.join([i2c[int(i)] for i in seq_out])
    card_text = card_char.split('|')
    for f in card_text:
        f = f.replace('Ⓝ', ''.join(card_text[0]))

    for f in card_text:
        print(f)
        
    return card_text

In [None]:
# # load model
# model.load_weights('model/temp-modelweights-epoch26-batch957.h5')
# START_EPOCH = 25

In [None]:
# epoch thru all cards
for epoch_idx in range(START_EPOCH, NUM_EPOCHS):

    # print("epoch", epoch_idx, "of", NUM_EPOCHS)
    
    for batch in tqdm(range(len(cardtext))):
        
        # get batch (one card)
        x_batch, y_batch = next(getbatch)
        
        # fit to card batch
        r = model.fit(x_batch, y_batch, 
                      epochs=1, batch_size=1, shuffle=False,
                      verbose=0)

        # reset state
        model.reset_states()
        
        # if batch % OUT_INCREMENT == 0 and batch > 0:

    if epoch_idx % OUT_INCREMENT == 0 and epoch_idx > 0:
        model.save_weights('model/100-modelweights-epoch{}-batch{}.h5'.format(epoch_idx+1, batch))
        print("EPOCH:", epoch_idx+1, "card #:", batch, "of", len(cardtext), r)
        predict()

In [None]:
# # todo: just save to json one time
model.save('model/100test_model.h5')
print("saved model to disk\n")
model.save_weights('model/100test_model_weights.h5')
print("saved model weights to disk\n")

In [None]:
# # load model
# model.load_weights('model/100-modelweights-epoch71-batch99.h5')
# START_EPOCH = 25

In [None]:
predict(startchars='random', temperature='random')