In [1]:
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np

from keras.models import Model
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras.layers import Input
from keras.layers.core import Activation, Dropout, Dense
from keras.layers import Flatten, LSTM, Embedding

In [2]:
cards = pd.read_json("data/cards.json")
cards

Unnamed: 0,name,rules_text,colors,color_identity,flavour_text,type_line,power,toughness,set
0,Static Orb,"As long as CARDNAME is untapped, players can't...","[0, 0, 0, 0, 0, 1]","[0, 0, 0, 0, 0, 1]",,Artifact,,,7ed
1,Sensory Deprivation,Enchant creature\nEnchanted creature gets -3/-0.,"[0, 1, 0, 0, 0, 0]","[0, 1, 0, 0, 0, 0]",,Enchantment — Aura,,,m14
2,Road of Return,Choose one —\n• Return target permanent card f...,"[0, 0, 0, 0, 1, 0]","[0, 0, 0, 0, 1, 0]",,Sorcery,,,c19
3,Storm Crow,Flying (This creature can't be blocked except ...,"[0, 1, 0, 0, 0, 0]","[0, 1, 0, 0, 0, 0]",,Creature — Bird,1,2,9ed
4,Walking Sponge,tap: Target creature loses your choice of fly...,"[0, 1, 0, 0, 0, 0]","[0, 1, 0, 0, 0, 0]",,Creature — Sponge,1,1,ulg
...,...,...,...,...,...,...,...,...,...
24444,Quarry Beetle,"When CARDNAME enters the battlefield, you may ...","[0, 0, 0, 0, 1, 0]","[0, 0, 0, 0, 1, 0]",,Creature — Insect,4,5,hou
24445,Devoted Hero,,"[1, 0, 0, 0, 0, 0]","[1, 0, 0, 0, 0, 0]",,Creature — Elf Soldier,1,2,s99
24446,Without Weakness,Target creature you control gains indestructib...,"[0, 0, 1, 0, 0, 0]","[0, 0, 1, 0, 0, 0]",,Instant,,,hou
24447,Firesong and Sunspeaker,Red instant and sorcery spells you control hav...,"[1, 0, 0, 1, 0, 0]","[1, 0, 0, 1, 0, 0]",,Legendary Creature — Minotaur Cleric,4,6,2x2


In [3]:
import spacy

en = spacy.load('en_core_web_sm')
stopwords = en.Defaults.stop_words

def text_preprocess(input_text):
    # remove all stop words
    input_text = ' '.join([word for word in input_text.split() if word not in stopwords])

    input_text = ''.join([char for char in input_text if char.isalnum() or char == '/' or char == ' '])

    return input_text

X = []
Y = []

# input_text = type_line + rules_text (if not None) + power / toughness (if not None)
for index, card in cards.iterrows():

    input_text = card['type_line']
    if card['rules_text'] is not None:
        input_text += '\n' + card['rules_text']
    if card['power'] is not None:
        input_text += '\n' + card['power'] + '/' + card['toughness']

    input_text = text_preprocess(input_text)
    
    X.append(input_text)
    Y.append(card["color_identity"])

In [4]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.10)

In [5]:
nr_of_targets= 5

# Remove colorless from array and convert from int to float32
y_train = [np.asarray(y)[0:nr_of_targets].astype('float32').ravel() for y in y_train] 
y_test = [np.asarray(y)[0:nr_of_targets].astype('float32').ravel() for y in y_test]

# Create tensors for tensorflow
y_train = tf.convert_to_tensor(y_train)
y_test = tf.convert_to_tensor(y_test)

In [6]:
# Tokenize text
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(x_train)

x_train = tokenizer.texts_to_sequences(x_train)
x_test = tokenizer.texts_to_sequences(x_test)

In [7]:
vocab_size = len(tokenizer.word_index) + 2
maxlen = 50

# Pad vectors with short text and shorten vectors with long text
x_train = pad_sequences(x_train, maxlen=maxlen)
x_test = pad_sequences(x_test, maxlen=maxlen)

In [8]:
from numpy import array
from numpy import asarray
from numpy import zeros

# https://keras.io/examples/nlp/pretrained_word_embeddings/
# using pretrained word embeddings from https://nlp.stanford.edu/projects/glove/
embeddings_index = {}
with open('data/glove.6B.100d.txt', encoding="utf8") as f:
    for line in f:
        word, coefs = line.split(maxsplit=1)
        coefs = np.fromstring(coefs, "f", sep=" ")
        embeddings_index[word] = coefs

print("Found %s word vectors." % len(embeddings_index))

hits = 0
misses = 0

# Prepare embedding matrix
embedding_matrix = np.zeros((vocab_size, 100))
for word, i in tokenizer.word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # Words not found in embedding index will be all-zeros.
        # This includes the representation for "padding" and "OOV"
        embedding_matrix[i] = embedding_vector
        hits += 1
    else:
        misses += 1
print("Converted %d words (%d misses)" % (hits, misses))

Found 400000 word vectors.
Converted 2343 words (540 misses)


In [9]:
# Create model
deep_inputs = Input(shape=(maxlen,))
embedding_layer = Embedding(vocab_size, 100, weights=[embedding_matrix], trainable=False)(deep_inputs)
LSTM_Layer_1 = LSTM(128)(embedding_layer)
dense_layer_1 = Dense(nr_of_targets, activation='sigmoid')(LSTM_Layer_1)
model = Model(inputs=deep_inputs, outputs=dense_layer_1)

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])

In [10]:
# Fit our model
history = model.fit(x_train, y_train, batch_size=128, epochs=15, verbose=1, validation_split=0.1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [11]:
# Model scores
score = model.evaluate(x_test, y_test, verbose=1)
print("Test Score:", score[0])
print("Test Accuracy:", score[1])

Test Score: 0.2661699950695038
Test Accuracy: 0.6736196279525757


In [12]:
# Get predictions for our test set
predictions = model.predict(x_test)



In [13]:
# Show some prediction samples
import random

for i in random.sample(range(len(predictions)), 10):
    formatted_y_test = ["%.3f"%item for item in y_test[i]]
    formatted_predictions = ["%.3f"%item for item in predictions[i]]
    
    print("Colors:           [   w   ,    u   ,    b   ,    r   ,    g   ]")
    print("Actual colors:   ", formatted_y_test)
    print("Predicted colors:", formatted_predictions)
    print()

Colors:           [   w   ,    u   ,    b   ,    r   ,    g   ]
Actual colors:    ['0.000', '0.000', '1.000', '1.000', '0.000']
Predicted colors: ['0.484', '0.005', '0.858', '0.004', '0.018']

Colors:           [   w   ,    u   ,    b   ,    r   ,    g   ]
Actual colors:    ['1.000', '0.000', '0.000', '0.000', '0.000']
Predicted colors: ['0.972', '0.084', '0.017', '0.017', '0.004']

Colors:           [   w   ,    u   ,    b   ,    r   ,    g   ]
Actual colors:    ['0.000', '1.000', '0.000', '0.000', '0.000']
Predicted colors: ['0.081', '0.771', '0.020', '0.034', '0.016']

Colors:           [   w   ,    u   ,    b   ,    r   ,    g   ]
Actual colors:    ['0.000', '0.000', '0.000', '1.000', '0.000']
Predicted colors: ['0.014', '0.000', '0.014', '0.999', '0.008']

Colors:           [   w   ,    u   ,    b   ,    r   ,    g   ]
Actual colors:    ['1.000', '0.000', '0.000', '0.000', '0.000']
Predicted colors: ['0.989', '0.002', '0.002', '0.040', '0.002']

Colors:           [   w   ,    u   