In [1]:
import requests

cards = requests.get("https://c2.scryfall.com/file/scryfall-bulk/oracle-cards/oracle-cards-20201024210538.json").json()

In [2]:
def splice_reminders(otext):
    while True:
        start_index = otext.find('(')
        if start_index == -1:
            break
        end_index = otext.find(')')
        otext = otext[0:start_index] + otext[end_index+1:]
    return otext

def extract_data(card):
    if card['layout'] in ["art_series", "double_faced_token", "transform", "split", "adventure", "modal_dfc", "flip"]:
        return None
    if card['set'] in ["unh", "ugl", "ust", "und", 'wc00', 'wc01', 'wc02', 'wc03', 'wc04', 'wc97', 'wc98', 'wc99', 'h17', 'ptg', 'cmb1', 'mznr']:
        #print(card["name"])
        return None
    if "oracle_text" not in card:
        print(card)
    if card["type_line"] == "Card":
        return None
    stats = [
        card["type_line"],
        splice_reminders(card["oracle_text"])
    ]
    if "mana_cost" in card:
        stats.append(card["mana_cost"])
    if "power" in card:
        stats.append(f"power: {card['power']}")
        stats.append(f"toughness: {card['toughness']}")
    if "loyalty" in card:
        stats.append(f"loyalty: {card['loyalty']}")
    
    text = "|".join(stats)
    if(len(text) > 200):
        return None
    return text.replace(card['name'], "$").lower()

In [3]:
card_strings = [extract_data(c) for c in cards if extract_data(c) is not None]


In [4]:
longest = list(reversed(sorted([(len(s), s) for s in card_strings], key=lambda x: x[0])))
print(longest[:5])

[(200, 'enchantment|whenever a player casts a creature spell, destroy all reflections. then that player creates an x/x white reflection creature token, where x is the converted mana cost of that spell.|{2}{w}'), (200, 'sorcery|this spell costs {3} less to cast if it targets a creature whose controller has eight or more cards in their graveyard.\ngain control of target creature with converted mana cost x.|{x}{u}{u}{u}'), (200, 'sorcery|\nsearch your library for a card with converted mana cost less than or equal to the number of lands you control, reveal it, and put it into your hand. then shuffle your library.|{2/b}{2/b}{2/b}'), (200, 'enchantment|landfall — whenever a land enters the battlefield under your control, choose one —\n• create a 1/1 white kor ally creature token.\n• creatures you control get +1/+1 until end of turn.|{3}{w}'), (200, 'artifact|{3}, {t}: choose a number greater than 0 and a color. target opponent reveals their hand. if that opponent reveals exactly the chosen n

In [5]:
tokens = sorted(set([c for s in card_strings for c in s]))
tokens.insert(0, '<PAD>')

In [6]:
import keras

Using TensorFlow backend.


In [8]:
import numpy as np
from keras_transformer import *
from keras import layers
from keras_layer_normalization import LayerNormalization
from keras_multi_head import MultiHeadAttention
from keras_position_wise_feed_forward import FeedForward
from keras_pos_embd import TrigPosEmbedding
from keras_embed_sim import EmbeddingRet, EmbeddingSim



In [88]:
def get_model(token_num,
              embed_dim,
              encoder_num,
              decoder_num,
              head_num,
              hidden_dim,
              attention_activation=None,
              feed_forward_activation=gelu,
              dropout_rate=0.0,
              use_same_embed=True,
              embed_weights=None,
              embed_trainable=None,
              trainable=True):
    """Get full model without compilation.
    :param token_num: Number of distinct tokens.
    :param embed_dim: Dimension of token embedding.
    :param encoder_num: Number of encoder components.
    :param decoder_num: Number of decoder components.
    :param head_num: Number of heads in multi-head self-attention.
    :param hidden_dim: Hidden dimension of feed forward layer.
    :param attention_activation: Activation for multi-head self-attention.
    :param feed_forward_activation: Activation for feed-forward layer.
    :param dropout_rate: Dropout rate.
    :param use_same_embed: Whether to use the same token embedding layer. `token_num`, `embed_weights` and
                           `embed_trainable` should be lists of two elements if it is False.
    :param embed_weights: Initial weights of token embedding.
    :param embed_trainable: Whether the token embedding is trainable. It will automatically set to False if the given
                            value is None when embedding weights has been provided.
    :param trainable: Whether the layers are trainable.
    :return: Keras model.
    """
    if not isinstance(token_num, list):
        token_num = [token_num, token_num]
    encoder_token_num, decoder_token_num = token_num

    if not isinstance(embed_weights, list):
        embed_weights = [embed_weights, embed_weights]
    encoder_embed_weights, decoder_embed_weights = embed_weights
    if encoder_embed_weights is not None:
        encoder_embed_weights = [encoder_embed_weights]
    if decoder_embed_weights is not None:
        decoder_embed_weights = [decoder_embed_weights]

    if not isinstance(embed_trainable, list):
        embed_trainable = [embed_trainable, embed_trainable]
    encoder_embed_trainable, decoder_embed_trainable = embed_trainable
    if encoder_embed_trainable is None:
        encoder_embed_trainable = encoder_embed_weights is None
    if decoder_embed_trainable is None:
        decoder_embed_trainable = decoder_embed_weights is None

    
    encoder_embed_layer = decoder_embed_layer = EmbeddingRet(
        input_dim=encoder_token_num,
        output_dim=embed_dim,
        mask_zero=False,
        weights=encoder_embed_weights,
        trainable=encoder_embed_trainable,
        name='Token-Embedding',
    )
    
    encoder_input = keras.layers.Input(shape=(None,), name='Encoder-Input')
    encoder_embed = TrigPosEmbedding(
        mode=TrigPosEmbedding.MODE_ADD,
        name='Encoder-Embedding',
    )(encoder_embed_layer(encoder_input)[0])
    encoded_layer = get_encoders(
        encoder_num=encoder_num,
        input_layer=encoder_embed,
        head_num=head_num,
        hidden_dim=hidden_dim,
        attention_activation=attention_activation,
        feed_forward_activation=feed_forward_activation,
        dropout_rate=dropout_rate,
        trainable=trainable,
    )
    
    resh = layers.Permute((1, 0))(encoded_layer)
    print(resh.shape)
    flat = layers.GlobalAveragePooling1D()(resh)
    encoded_layer_dense = layers.Dense(12, activation='relu', name='compr')(flat)
    
    decoder_input = keras.layers.Input(shape=(None,), name='Decoder-Input')
    decoder_embed, decoder_embed_weights = decoder_embed_layer(decoder_input)
    decoder_embed = TrigPosEmbedding(
        mode=TrigPosEmbedding.MODE_ADD,
        name='Decoder-Embedding',
    )(decoder_embed)
    decoded_layer = get_decoders(
        decoder_num=decoder_num,
        input_layer=decoder_embed,
        encoded_layer=encoded_layer_dense,
        head_num=head_num,
        hidden_dim=hidden_dim,
        attention_activation=attention_activation,
        feed_forward_activation=feed_forward_activation,
        dropout_rate=dropout_rate,
        trainable=trainable,
    )
    output_layer = EmbeddingSim(
        trainable=trainable,
        name='Decoder-Output',
    )([decoded_layer, decoder_embed_weights])
    return keras.models.Model(inputs=[encoder_input, decoder_input], outputs=output_layer)

In [89]:
model = get_model(
    token_num=len(tokens),
    embed_dim=12,
    encoder_num=3,
    decoder_num=2,
    head_num=3,
    hidden_dim=120,
    attention_activation='relu',
    feed_forward_activation='relu',
    dropout_rate=0.05,
    embed_weights=np.random.random((len(tokens), 12)),
)
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
)
model.summary()

(?, ?, ?)


TypeError: unsupported operand type(s) for +: 'NoneType' and 'int'

In [40]:
def encode_card_string(tokens, card_string, pad_length=200):
    return [tokens.index(c) for c in card_string] + [0] * (pad_length - len(card_string))

def encode_card_string_nested(tokens, card_string, pad_length=200):
    return [[tokens.index(c)] for c in card_string] + [[0]] * (pad_length - len(card_string))

def decode_card_string(tokens, enc_card_string):
    return "".join([tokens[i] for i in enc_card_string])

In [64]:
print(max([len(s) for s in card_strings]))

inp = [encode_card_string(tokens, s) for s in card_strings]
outp = [encode_card_string_nested(tokens, s) for s in card_strings]

inp2 = np.squeeze(np.asarray(inp))
outp2 = np.asarray(outp)
print(inp2.shape)

200
(16209, 200)


In [65]:
model.fit(
    x=[inp2, inp2],
    y=outp2,
    epochs=1,
)

r = model.predict([np.asarray(inp[0]), np.asarray(inp[0])])



Epoch 1/1


In [66]:
r2 = np.argmax(r, axis=2)

"".join([tokens[i[0]] for i in r2 if i[0] < len(tokens)])

"areipace|asilon\niasieiisiuneapped iplayersican'eiuneapimoreiehanieroipermanenesidurin\nieheiriuneapiseepsr|}f}mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm"

In [67]:
extractors = keras.Model(inputs=model.inputs,
                        outputs=model.get_layer("compr").output)

ex2 = keras.Model(inputs=model.inputs,
                        outputs=model.get_layer("Encoder-3-FeedForward-Norm").output)

In [68]:
foo = extractors.predict([np.asarray(inp[0]), np.asarray(inp[0])])
foo2 = ex2.predict([np.asarray(inp[0]), np.asarray(inp[0])])

In [69]:
foo2.shape

(200, 1, 12)