# decoding

use the model to predict some new cards!

In [1]:
import numpy as np
from keras.models import Model, load_model
from keras.layers import Input, Embedding, LSTM, Dense, Dropout
from keras.callbacks import ModelCheckpoint
import h5py

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# restrict GPU usage here
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
# read in data
c2i = np.load('data/c2i.npy').item()
i2c = np.load('data/i2c.npy').item()
ycards = np.load('data/ycards.npy')

## hyperparameters

although dropout, batch size and epochs aren't needed, we need the variables that define the model size.

In [4]:
# copied from training
DROP_RATE = 0.33

EMBEDDING_SIZE = 400          # character embedding size
HIDDEN_SIZE = 800             # lstm feature vector size
MAX_Y_LEN = ycards.shape[1]   # maximum card length
VOCAB_SIZE = len(c2i.keys())  # number of characters

## decoding

In [5]:
# Set up the decoder, using `encoder_states` as initial state.
decoder_input  = Input(shape=(MAX_Y_LEN, ), name='lm_input')
decoder_embed  = Embedding(VOCAB_SIZE, EMBEDDING_SIZE, 
                           mask_zero=True, trainable=True, name='lm_emb')
decoder_lstm1  = LSTM(HIDDEN_SIZE, 
                      return_sequences=True, 
                      return_state=True, 
                      name='lm_lstm1')
decoder_lstm2  = LSTM(HIDDEN_SIZE, 
                      return_sequences=True, 
                      return_state=True, 
                      name='lm_lstm2')

decoder_dense_1  = Dense(HIDDEN_SIZE, activation='relu', name='lm_dns_1')
decoder_dense_2  = Dense(VOCAB_SIZE, activation='softmax', name='lm_dns_final')

x = decoder_embed(decoder_input)
x = Dropout(DROP_RATE)(x)
x, h1, c1 = decoder_lstm1(x)
x = Dropout(DROP_RATE)(x)
x, h2, c2 = decoder_lstm2(x)
x = Dropout(DROP_RATE)(x)
x = decoder_dense_1(x)
x = Dropout(DROP_RATE)(x)
x = decoder_dense_2(x)

model = Model(decoder_input, x)

In [6]:
model.load_weights('model/weights_final.h5')

In [7]:
# this input is for the previously-predicted character
decoder_input  = Input(shape=(1, ))
# these inputs are the recurrent states
decoder_state_input_h1 = Input(shape=(HIDDEN_SIZE,))
decoder_state_input_c1 = Input(shape=(HIDDEN_SIZE,))
decoder_states_inputs1 = [decoder_state_input_h1, decoder_state_input_c1]
decoder_state_input_h2 = Input(shape=(HIDDEN_SIZE,))
decoder_state_input_c2 = Input(shape=(HIDDEN_SIZE,))
decoder_states_inputs2 = [decoder_state_input_h2, decoder_state_input_c2]

# we reuse the embedding layer
x = decoder_embed(decoder_input)
x, dh1, dc1 = decoder_lstm1(x, initial_state=decoder_states_inputs1)
decoded_states1 = [dh1, dc1]
x, dh2, dc2 = decoder_lstm2(x, initial_state=decoder_states_inputs2)
decoded_states2 = [dh2, dc2]

x = decoder_dense_1(x)
x = decoder_dense_2(x)

In [8]:
gen_model = Model(inputs=[decoder_input] + decoder_states_inputs1 + decoder_states_inputs2, 
                  outputs=[x] + decoded_states1 + decoded_states2)

In [9]:
gen_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
lm_emb (Embedding)              multiple             39600       input_1[0][0]                    
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 800)          0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 800)          0                                            
__________________________________________________________________________________________________
lm_lstm1 (

## decode function

we initialize the states randomly, and start our sequence qith the SOS character. until we reach a set length or we reach an end-of-sequence character, we will generate a probability distribution over the next predicted characters, sample a character randomly according to the distribution (we won't use a greedy or beam-search method because we *want* a degree of 'wackiness' in this case), and input that character (along with the LSTM previous states) *back* into the model to generate another character, etc.

the *temperature* scales the softmax distribution, allowing for more or less randomness in the network predictions. a temperature of 1 is unscaled, a temperature above one means that the relative probabilities are closer (and thus the network is more 'random'), while temperatures below 1 make the network more confident (and thus more 'conservative').

In [10]:
def decode_sequence(temperature=1.0, maxlen=256, debug=False):
    # randomize input state vectors.
    a = np.random.random(HIDDEN_SIZE).reshape(1, -1)
    b = np.random.random(HIDDEN_SIZE).reshape(1, -1)
    c = np.random.random(HIDDEN_SIZE).reshape(1, -1)
    d = np.random.random(HIDDEN_SIZE).reshape(1, -1)
    states1 = [a, b]
    states2 = [c, d]

    # Generate empty target sequence of length 1.
    # Populate the first character of target sequence with the start character.
    target_seq = [c2i['Ⓢ']]
    
    # Sampling loop for a batch of sequences
    # (to simplify, here we assume a batch of size 1).
    stop_condition = False
    decoded_sentence = []
    while not stop_condition:
        if debug:
            print('inp:', [np.array([target_seq[-1]])])
            print('st1:', np.shape(states1))
            print('st2:', np.shape(states2))
        output_tokens, h1, c1, h2, c2 = gen_model.predict([np.array([target_seq[-1]])] + states1 + states2)
        if debug:
            # print('typ:', type(output_tokens), type(h1), type(c2), type(h2), type(c2))
            print('out:', output_tokens.shape)
            print('max:', i2c[target_seq[-1]], '=>', i2c[np.argmax(output_tokens)])
            print()
        
        # Update states
        states1 = [h1, c1]
        states2 = [h2, c2]
        
        def sample(a, temperature=temperature):
            a = np.array(a)**(1/temperature)
            p_sum = a.sum()
            sample_temp = a/p_sum 
            # stupid fix for > 1 error
            while sum(sample_temp) > 1:
                sample_temp[0] -= 0.0001
            return np.argmax(np.random.multinomial(1, sample_temp, 1))
        
        # Sample a token with temperature
        sampled_token_index = idx = sample(np.squeeze(output_tokens))
        sampled_char = i2c[sampled_token_index]
        decoded_sentence.append(sampled_char)

        # Exit condition: either hit max length
        # or find stop character.
        if len(decoded_sentence) > maxlen*2 or sampled_char in ['Ⓔ']:
            stop_condition = True

        # Update the target sequence (of length 1).
        target_seq.append(sampled_token_index)

    return decoded_sentence

In [11]:
def generate(temperature=1):
    card = ''.join(decode_sequence(temperature=temperature)).replace('Ⓔ', '').replace('·', '|').split('|')
    for l in card:
        print(l.replace('Ⓝ', card[0]))
    return

## examples

here we generate some cards with different temperature settings

i did cheat here to generate a 'well-formed' card

In [66]:
generate(temperature=0.25)

illanter's spike
②Ⓖ
R
sorcery
create a 1/1 white knight creature token with haste. exile it at the beginning of the next end step.


In [62]:
generate(temperature=1.0)

oning-archive
③Ⓦ
U
creature
angel
3
3
flying
②: target creature loses all abilities until your next upkeep.


In [56]:
generate(temperature=1.5)

warmingpear's rapler
⑦
C
artifact
creature
hydra
4
2
ⒼⒼ: target creature loses double strike until end of turn.


In [51]:
generate(temperature=3)

wumppaiiter
①ⓌⓌ
M
creature
dwarfⒷx
4
2
quit,btins ↷, phoucu
x't.
rix
③ⒷⓇg: /19 Ⓤ5.thoslc
wumppaiiters . ③
Ⓦw5yismwayp"ucs—②
④, sacrifice ↷w eyf.,ctrezibmr—dr
 unlity i
