In [1]:
import numpy as np
import re
from collections import Counter

In [2]:
setnm = list(np.load('data//numpy//setnm.npy'))
idnum = list(np.load('data//numpy//idnum.npy'))
names = list(np.load('data//numpy//names.npy'))
mcost = list(np.load('data//numpy//mcost.npy'))
rarty = list(np.load('data//numpy//rarty.npy'))
types = list(np.load('data//numpy//types.npy'))
subtp = list(np.load('data//numpy//subtp.npy'))
ctext = list(np.load('data//numpy//ctext_orig.npy'))
power = list(np.load('data//numpy//power.npy'))
tough = list(np.load('data//numpy//tough.npy'))

### encode the rarities as capital letters

In [3]:
rletr = [w[0] for w in rarty]
list(set(rletr))

['M', 'R', 'U', 'S', 'C']

### encode the types, subtypes as lower-cased words separated by a symbol '·'

In [4]:
typss = []
for x in types:
    item = ''
    lst = str(x).split(' ')
    for idx, tp in enumerate(lst):
        item += tp.lower()
        if idx < len(lst)-1:
            item += '·'
    
    typss.append(item)    

In [5]:
set(typss)

{'artifact',
 'artifact·creature',
 'creature',
 'enchantment',
 'enchantment·artifact',
 'enchantment·creature',
 'instant',
 'planeswalker',
 'sorcery',
 'tribal·artifact',
 'tribal·enchantment',
 'tribal·instant',
 'tribal·sorcery'}

In [6]:
subts = []
for lst in subtp:
    item = ''
    lst = list(lst)
    for idx, subtype in enumerate(lst):
        if subtype != 'PAD':
            item += subtype.lower()
        if idx < len(lst)-1:
            if lst[idx+1] != 'PAD':
                item += '·'
    
    if item == '':
        item = '⌧'
    
    subts.append(item)    

### remove reminder text
strip all text between parentheses

In [7]:
ctext[:10]

['first strike (this creature deals combat damage before creatures without first strike.)|when Ⓝ enters the battlefield, you gain 1 life for each card in your graveyard.',
 'flying|when Ⓝ enters the battlefield, you gain 3 life.',
 "target creature gets +3/+3 and gains flying until end of turn. (it can't be blocked except by creatures with flying or reach.)",
 'whenever a creature enters the battlefield under your control, you gain life equal to its toughness.',
 "defender (this creature can't attack.)|flying",
 'artifact and enchantment spells your opponents cast cost ② more to cast.|sacrifice Ⓝ: destroy target artifact or enchantment.',
 "flying (this creature can't be blocked except by creatures with flying or reach.)|when Ⓝ enters the battlefield, destroy target enchantment.",
 'ⓍⓌ, ↷: Ⓝ deals x damage to target attacking or blocking creature.',
 'prevent the next 1 damage that would be dealt to target creature or player this turn.|draw a card.',
 "double target player's life total

In [8]:
ctext_new = []
for text in ctext:
    new_text = re.sub(r'\s\([\S\s]+\)', '', text)
    new_text = new_text.replace('|', '·')
    ctext_new.append(new_text)

In [9]:
ctext_new[:10]

['first strike·when Ⓝ enters the battlefield, you gain 1 life for each card in your graveyard.',
 'flying·when Ⓝ enters the battlefield, you gain 3 life.',
 'target creature gets +3/+3 and gains flying until end of turn.',
 'whenever a creature enters the battlefield under your control, you gain life equal to its toughness.',
 'defender·flying',
 'artifact and enchantment spells your opponents cast cost ② more to cast.·sacrifice Ⓝ: destroy target artifact or enchantment.',
 'flying·when Ⓝ enters the battlefield, destroy target enchantment.',
 'ⓍⓌ, ↷: Ⓝ deals x damage to target attacking or blocking creature.',
 'prevent the next 1 damage that would be dealt to target creature or player this turn.·draw a card.',
 "double target player's life total. shuffle Ⓝ into its owner's library."]

### combine items into string with '|' separator
[mcost|rletr|typss|subts|ctext_new|power|tough]
don't add power/tough if not needed
use START symbol: Ⓢ and EOF symbol: Ⓔ

In [10]:
cardtext = []
for i in range(len(names)):
    lst = [names[i], mcost[i], rletr[i], typss[i], subts[i], ctext_new[i]]
    
    if power[i] != '⌧' and tough[i] != '⌧':
        lst += [power[i], tough[i]]
    
    txt = '|'.join(lst)
    txt += 'Ⓔ'
    cardtext.append(txt)

In [11]:
for i in range(10):
    print(cardtext[i])

ancestor's chosen|⑤ⓌⓌ|U|creature|human·cleric|first strike·when Ⓝ enters the battlefield, you gain 1 life for each card in your graveyard.|4|4Ⓔ
angel of mercy|④Ⓦ|U|creature|angel|flying·when Ⓝ enters the battlefield, you gain 3 life.|3|3Ⓔ
angelic blessing|②Ⓦ|C|sorcery|⌧|target creature gets +3/+3 and gains flying until end of turn.Ⓔ
angelic chorus|③ⓌⓌ|R|enchantment|⌧|whenever a creature enters the battlefield under your control, you gain life equal to its toughness.Ⓔ
angelic wall|①Ⓦ|C|creature|wall|defender·flying|0|4Ⓔ
aura of silence|①ⓌⓌ|U|enchantment|⌧|artifact and enchantment spells your opponents cast cost ② more to cast.·sacrifice Ⓝ: destroy target artifact or enchantment.Ⓔ
aven cloudchaser|③Ⓦ|C|creature|bird·soldier|flying·when Ⓝ enters the battlefield, destroy target enchantment.|2|2Ⓔ
ballista squad|③Ⓦ|U|creature|human·rebel|ⓍⓌ, ↷: Ⓝ deals x damage to target attacking or blocking creature.|2|2Ⓔ
bandage|Ⓦ|C|instant|⌧|prevent the next 1 damage that would be dealt to target creatur

### save cardtexts

In [None]:
# np.save('data/card_texts.npy', cardtext)

### analysis & testing

In [12]:
txtlens = [len(s) for s in cardtext]

In [13]:
lencnts = Counter(txtlens)
zips = lencnts.most_common()
x = sorted(zips, key = lambda x: x[0], reverse=True)

In [14]:
# x

In [15]:
idx = 250
over = sum([t[1] for t in x if t[0] > idx])
undr = sum([t[1] for t in x if t[0] <= idx])
print("% of cards over chosen idx:", over/(over+undr))

% of cards over chosen idx: 0.061505434967799096


In [16]:
print(cardtext[txtlens.index(max(txtlens))])

dance of the dead|①Ⓑ|U|enchantment|aura|enchant creature card in a graveyard·when Ⓝ enters the battlefield, if it's on the battlefield, it loses "enchant creature card in a graveyard" and gains "enchant creature put onto the battlefield with Ⓝ." put enchanted creature card onto the battlefield tapped under your control and attach Ⓝ to it. when Ⓝ leaves the battlefield, that creature's controller sacrifices it.·enchanted creature gets +1/+1 and doesn't untap during its controller's untap step.·at the beginning of the upkeep of enchanted creature's controller, that player may pay ①Ⓑ. if he or she does, untap that creature.Ⓔ


## generate batches

START: Ⓢ  PAD: ⎕

In [17]:
# first get vocabulary
vocab = []
vocabset = ['⎕'] # zero-pad PAD character
for card in cardtext:
    lst = list(card)
    vocab += lst

# add vocab according to frequency
vocabset += [x[0] for x in sorted([t for t in Counter(vocab).most_common()], key=lambda x: x[1], reverse=True)]
vocabset.append('Ⓢ') # START-PAD character

c2i = dict([(c, i) for i, c in enumerate(vocabset)])
i2c = dict([(i, c) for i, c in enumerate(vocabset)])

In [18]:
c2i[' '], i2c[0]

(1, '⎕')

In [19]:
# save
# np.save('c2i.npy', c2i)
# np.save('i2c.npy', i2c)

In [177]:
def cardGenerator(cardtext, windowsize, cards_per_batch, c2i=c2i, debug=False):
    
    i = 0
    indices = list(np.random.permutation(len(cardtext)))
    idx = indices[i]
    
    def nextcard(cardtext, idx, debug=debug):
        if debug:
            card_idx = cardtext[idx]
        else:
            card_idx = [c2i[c] for c in cardtext[idx]]
        
        return list(card_idx)
    
    # pregenerate warmup sequence
    if debug:
        sequence = list(cardtext[idx][-(windowsize):])
    else:
        sequence = list([c2i[c] for c in cardtext[idx][-(windowsize):]])
    i += 1
    idx = indices[i]
    for j in range(cards_per_batch):
        sequence += nextcard(cardtext, idx)
        i += 1
        idx = indices[i]

    # create matrix
    x = []
    y = []
    
    # main iterator
    while True:
        
        # generate batch (of cards_per_batch cards)
        while len(sequence) > windowsize:
            x.append(np.array(sequence[:windowsize]))
            y.append(sequence[windowsize])
            sequence.pop(0)
        
        # generate batch_size worth of window-shifted data
        # reshape for sparse_categorical_crossentropy
        sequence = []
        y = np.array(y)
        y = y[:, np.newaxis]
        # yield and reset
        yield(np.asarray(x), y)
        x, y = [], []
        
        # check for too long, reset
        if len(indices[i:]) < cards_per_batch:
            indices = np.random.permutation(len(cardtext))
            i = 0
            idx = indices[i]
        else:
            i += 1
            idx = indices[i]
            
        # pregenerate warmup sequence
        if debug:
            sequence = list(cardtext[idx][-(windowsize):])
        else:
            sequence = list([c2i[c] for c in cardtext[idx][-(windowsize):]])
        i += 1
        idx = indices[i]
        for j in range(cards_per_batch):
            sequence += nextcard(cardtext, idx)
            i += 1
            idx = indices[i]

In [178]:
# stress test : batch of 5000 x 2000 = over all cards at least once
# itr = cardGenerator(cardtext, 5, 5000, debug=True)
# for i in range(2000):
#     x, y = next(itr)

In [179]:
itr = cardGenerator(cardtext, 10, 3, debug=True)

In [180]:
x, y = next(itr)
print(x.shape, y.shape)
for z in range(len(x)):
    print(x[z], y[z])

(500, 10) (500, 1)
['s' ' ' '3' ' ' 'l' 'i' 'f' 'e' '.' 'Ⓔ'] ['r']
[' ' '3' ' ' 'l' 'i' 'f' 'e' '.' 'Ⓔ' 'r'] ['i']
['3' ' ' 'l' 'i' 'f' 'e' '.' 'Ⓔ' 'r' 'i'] ['p']
[' ' 'l' 'i' 'f' 'e' '.' 'Ⓔ' 'r' 'i' 'p'] ['t']
['l' 'i' 'f' 'e' '.' 'Ⓔ' 'r' 'i' 'p' 't'] ['i']
['i' 'f' 'e' '.' 'Ⓔ' 'r' 'i' 'p' 't' 'i'] ['d']
['f' 'e' '.' 'Ⓔ' 'r' 'i' 'p' 't' 'i' 'd'] ['e']
['e' '.' 'Ⓔ' 'r' 'i' 'p' 't' 'i' 'd' 'e'] [' ']
['.' 'Ⓔ' 'r' 'i' 'p' 't' 'i' 'd' 'e' ' '] ['s']
['Ⓔ' 'r' 'i' 'p' 't' 'i' 'd' 'e' ' ' 's'] ['u']
['r' 'i' 'p' 't' 'i' 'd' 'e' ' ' 's' 'u'] ['r']
['i' 'p' 't' 'i' 'd' 'e' ' ' 's' 'u' 'r'] ['v']
['p' 't' 'i' 'd' 'e' ' ' 's' 'u' 'r' 'v'] ['i']
['t' 'i' 'd' 'e' ' ' 's' 'u' 'r' 'v' 'i'] ['v']
['i' 'd' 'e' ' ' 's' 'u' 'r' 'v' 'i' 'v'] ['o']
['d' 'e' ' ' 's' 'u' 'r' 'v' 'i' 'v' 'o'] ['r']
['e' ' ' 's' 'u' 'r' 'v' 'i' 'v' 'o' 'r'] ['|']
[' ' 's' 'u' 'r' 'v' 'i' 'v' 'o' 'r' '|'] ['②']
['s' 'u' 'r' 'v' 'i' 'v' 'o' 'r' '|' '②'] ['Ⓤ']
['u' 'r' 'v' 'i' 'v' 'o' 'r' '|' '②' 'Ⓤ'] ['|']
['r' 'v' 'i' 'v' 'o' 

to-do, network notes:

create window-size, pad with START tokens
add EOF token (and remove power/toughness if null?)
in network, encode name with LSTM(s) then copy state(s) to output network
(effectively a seq-to-seq)
generator needs to PAD TO FIXED BATCH SIZE (see length chosen above)
use batch-training and reset_state