# Pokemon TCG card generator

- Downloads and saves card data from pokemontcg.io with python sdk, reformats it as YAML
- Uses keras lstm example to generate card data

## Load card data

In [2]:
# imports
import yaml, json, os, random
from pokemontcgsdk import Card, Type, Subtype

In [4]:
data_dir = '/home/ubuntu/fastai-data/pokemon'

In [12]:
alphabet = 'abcdefghijklmnopqrstuvwxyz'
type_char = 'cddryifglmpw'
subtypes = Subtype.all()
types = Type.all()

In [79]:
with open(os.path.join(data_dir,'cards.json')) as f:
     cards = json.load(f)
        
print(cards[0])

{'rarity': 'Common', 'name': 'Shroomish', 'resistances': None, 'ancient_trait': None, 'subtype': 'Basic', 'types': ['Grass'], 'hp': '60', 'ability': None, 'attacks': [{'text': "Your opponent's Active Pokémon is now Asleep.", 'name': 'Spore', 'cost': ['Colorless'], 'convertedEnergyCost': 1, 'damage': ''}], 'weaknesses': [{'type': 'Fire', 'value': 'x2'}], 'supertype': 'Pokémon', 'text': None, 'retreat_cost': ['Colorless']}


In [85]:
def type_to_char(t_list):
    if t_list:
        return ''.join([type_char[types.index(t)] for t in t_list if t in types])
    else:
        return ''

def singlify(text, name=None):
    if text:
        text = ''.join(text) if isinstance(text, list) else text
        if name:
            text = text.replace(name, '@')
        return text
    else:
        return ''

with open(os.path.join(data_dir,'cards.txt'), 'w+') as f:
    for card in cards:
        lines = ['\n']
        lines.append('|'.join([card['supertype'][0],
                alphabet[subtypes.index(card['subtype'])] if card['subtype'] else '',
                card['rarity'][0] if card['rarity'] else '',
                type_to_char(card['types']),
                type_char[types.index(card['weaknesses'][0]['type'])]+('^'*int(card['weaknesses'][0]['value'][1]) if '0' in card['weaknesses'][0]['value'] else 'x') if card['weaknesses'] else '',     
                type_char[types.index(card['resistances'][0]['type'])]+('^'*int(card['resistances'][0]['value'][1]) if '0' in card['resistances'][0]['value'] else 'x') if card['resistances'] else '',     
                '^'*(int(card['hp'])//10) if card['hp'] and card['hp'].isdigit() else '',
                type_to_char(card['retreat_cost']),
                singlify(card['name']), singlify(card['text'],name=card['name'])]))
        if card['ability']:
            lines.append('|'.join(['x', card['ability']['name'], singlify(card['ability']['text'],name=card['name'])]))
        if card['ancient_trait']:
            lines.append('|'.join(['y', card['ancient_trait']['name'], singlify(card['ancient_trait']['text'],name=card['name'])]))
        if card['attacks'] and card['attacks']:
            for attack in card['attacks']:
                lines.append('|'.join(['z', type_to_char(attack['cost']) if 'cost' in attack else '', str(attack['damage']), singlify(attack['name']), singlify(attack['text'],name=card['name'])]))
        for line in lines:
            f.write(line+'\n')
            

## Preprocessing

- code is mostly copied from [fastai](https://github.com/fastai/courses/blob/master/deeplearning1/nbs/char-rnn.ipynb)

In [3]:
from theano.sandbox import cuda

 https://github.com/Theano/Theano/wiki/Converting-to-the-new-gpu-back-end%28gpuarray%29

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [44]:
# imports
from keras.models import Sequential
from keras.layers import *
from keras.optimizers import Adam
import numpy as np

In [91]:
# load text
path = os.path.join(data_dir,'cards.txt')
text = open(path).read().lower()[:]
print('corpus length:', len(text))

corpus length: 2162780


In [92]:
# check text
!tail {path} -n26


P|k|R|y|mx|d^^|^^^^^^^^^^^^^^^^^^^^|cc|Sylveon-GX|When your Pokémon-GX is Knocked Out, your opponent takes 2 Prize cards.
z|y||Magical Ribbon|Search your deck for up to 3 cards and put them into your hand. Then, shuffle your deck.
z|ycc|110|Fairy Wind|
z|ycc||Plea-GX|Put 2 of your opponent's Benched Pokémon and all cards attached to them into your opponent’s hand. (You can’t use more than 1 GX attack in a game.)


P|k|R|r|yx||^^^^^^^^^^^^^^^^^^^^^^^^|cc|Kommo-o-GX|When your Pokémon-GX is Knocked Out, your opponent takes 2 Prize cards.
z|c|30|Adamantine Press|During your opponent's next turn, this Pokémon takes 30 less damage from attacks (after applying Weakness and Resistance).
z|licc|130|Shred|This attack's damage isn't affected by any effects on your opponent's Active Pokémon.
z|licc|240|Ultra Uppercut-GX|(You can’t use more than 1 GX attack in a game.)


P|k|R|c|ix||^^^^^^^^^^^^^^^^^^|cc|Drampa-GX|When your Pokémon-GX is Knocked Out, your opponent takes 2 Prize cards.

In [93]:
chars = sorted(list(set(text)))
vocab_size = len(chars)
print('total chars:', vocab_size)

total chars: 79


In [94]:
print(''.join(chars))


 !"#&'()*+,-./0123456789:;?@[]^_abcdefghijklmnopqrstuvwxyz{|}~ ×éαβδωݎ—’•↓−♀♂＋


In [95]:
# create character embeddings
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

idx = [char_indices[c] for c in text]

## Create Model

In [96]:
maxlen = 40
sentences = []
next_chars = []
for i in range(len(idx)-maxlen+1):
    sentences.append(idx[i: i + maxlen])
    next_chars.append(idx[i+1: i+maxlen+1])

In [97]:
print('nb sequences:', len(sentences))
print('nb chars:', len(next_chars))

nb sequences: 2162741
nb chars: 2162741


In [98]:
sentences = np.concatenate([[np.array(o)] for o in sentences[:-2]])
next_chars = np.concatenate([[np.array(o)] for o in next_chars[:-2]])

In [101]:
n_fac = 24

In [100]:
model=Sequential([
        Embedding(vocab_size, n_fac, input_length=maxlen),
        LSTM(units=512, input_shape=(n_fac,),return_sequences=True, dropout=0.2, recurrent_dropout=0.2,
             implementation=2),
        Dropout(0.2),
        LSTM(512, return_sequences=True, dropout=0.2, recurrent_dropout=0.2,
             implementation=2),
        Dropout(0.2),
        TimeDistributed(Dense(vocab_size)),
        Activation('softmax')
    ])
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(), metrics=['acc'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 40, 24)            1896      
_________________________________________________________________
lstm_3 (LSTM)                (None, 40, 512)           1099776   
_________________________________________________________________
dropout_3 (Dropout)          (None, 40, 512)           0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 40, 512)           2099200   
_________________________________________________________________
dropout_4 (Dropout)          (None, 40, 512)           0         
_________________________________________________________________
time_distributed_2 (TimeDist (None, 40, 79)            40527     
_________________________________________________________________
activation_2 (Activation)    (None, 40, 79)            0         
Total para

## Train Model

In [225]:
from numpy.random import choice
import random

def print_example(length=800, temperature=1.2, mult=1.7):
    seed_len=40
    path = os.path.join(data_dir,'cards.txt')
    text = open(path).read().lower()[:]
    ind = random.randint(0,len(text)-seed_len-1)
    seed_string = text[ind:ind+seed_len]
    for i in range(length):
        if (seed_string.split('\n')[-1].count('|') == 8 or
        seed_string.startswith(('x','y')) and seed_string.split('\n')[-1].count('|') == 1 or
        seed_string.startswith('z') and seed_string.split('\n')[-1].count('|') == 3):
            temp = temperature * mult # make names more creative
        else:
            temp = temperature
        x=np.array([char_indices[c] for c in seed_string[-40:]])[np.newaxis,:]
        preds = model.predict(x, verbose=0)[0][-1]
        preds = np.log(preds) / temp
        exp_preds = np.exp(preds)
        preds = exp_preds / np.sum(exp_preds)
        next_char = choice(chars, p=preds)
        seed_string = seed_string + next_char
    print(seed_string[seed_len:])

In [211]:
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, LambdaCallback
import h5py

def print_callback(logs, epoch):
    print_example()

weight_path="weights-{epoch:02d}-{acc:.2f}.hdf5"
checkpoint = ModelCheckpoint(os.path.join(data_dir, weight_path),
                             monitor='acc', verbose=1, save_best_only=True, mode='max')
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.2,
                              patience=5, min_lr=0.000000001)
printer = LambdaCallback(on_epoch_end=print_callback)

callbacks_list = [printer, checkpoint, reduce_lr]

In [212]:
num_epochs = 50
history = model.fit(sentences,
                    np.expand_dims(next_chars,-1),
                    batch_size=256,
                    epochs=num_epochs,
                    callbacks=callbacks_list)

Epoch 1/50
   1536/2162739 [..............................] - ETA: 3364s - loss: 0.4351 - acc: 0.8523

KeyboardInterrupt: 

## Process Output

In [260]:
%%capture generated_cards
print_example(length=300000, temperature=1.1, mult=2.4)

In [261]:
with open(os.path.join(data_dir,'cards_generated.txt'), 'w+') as f:
    f.write(generated_cards.stdout)

At this point I redid the prior stuff with a premade tensorflow rnn model.

In [44]:
alphabet = 'abcdefghijklmnopqrstuvwxyz'
type_char = 'cddryifglmpw'
subtypes = Subtype.all()
types = Type.all()
supertypes = ('Pokémon', 'Energy', 'Trainer')
rarities = ('Common', 'Uncommon', 'Rare')
old_names = [c.name.lower() for c in Card.all()]

def char_to_type(chars):
    if chars and len(chars) > 0:
        return [types[type_char.index(char)] for char in chars]
    else:
        return None

cards = []
card = None
with open(os.path.join(data_dir,'cards_generated_tf.txt')) as f:
    for line in f:
        line = line.split('|')
        if line[0] in ('P','E','T'):
            if card and card['name'].lower().rstrip() not in old_names:
                cards.append(card)
            try:
                card = {'supertype': supertypes[('P','E','T').index(line[0])],
                        'subtype': subtypes[alphabet.index(line[1])] if line[1] else None,
                        'rarity': [r for r in rarities if r.startswith(line[2].upper())][0] if line[2] else None,
                        'types': char_to_type(line[3]),
                        'weaknesses':
                        {'type': types[type_char.index(line[4][0])], 'value': '×2' if line[4][1] == 'x' else '-'+str(len(line[4])-1)+'0'} if line[4] else None,
                        'resistances':
                        {'type': types[type_char.index(line[5][0])], 'value': '×2' if line[5][1] == 'x' else '-'+str(len(line[5])-1)+'0'} if line[5] else None,
                        'hp': len(line[6])*10 if line[6] else None,
                        'retreat_cost': char_to_type(line[7]),
                        'name': line[8].rstrip(), 'text': line[9].replace('@',line[8]).rstrip() if len(line) > 9 else None}
            except:
                card = None
                print('Skipped card')
        elif line[0] == 'x' and card and card['supertype'] == 'Pokémon':
            try:
                card['ability'] = {'name':line[1].rstrip(), 'text':line[2].replace('@',card['name']).rstrip() if len(line) > 2 else None}
            except:
                print('Skipped ability')
        elif line[0] == 'y' and card and card['supertype'] == 'Pokémon':
            try:
                card['ancient_trait'] = {'name':line[1].rstrip(), 'text':line[2].replace('@',card['name']).rstrip() if len(line) > 2 else None}
            except:
                print('Skipped trait')
        elif line[0] == 'z' and card and card['supertype'] == 'Pokémon':
            try:
                card.setdefault('attacks', []).append({'cost': char_to_type(line[1]),
                                                       'damage': line[2],
                                                       'name': line[3].rstrip(),
                                                       'text': line[4].replace('@',card['name']).rstrip() if len(line) > 4 else None})
            except:
                print('Skipped attack')
                                     

Skipped attack
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped attack
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped attack
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card
Skipped card


In [45]:
class ExplicitDumper(yaml.SafeDumper):
    def ignore_aliases(self, data):
        return True
    
with open('cards_generated_tf.yml', 'w+') as f:
     yaml.dump(cards, f, allow_unicode=True, Dumper=ExplicitDumper, default_flow_style=False)

In [46]:
from IPython.display import FileLink
FileLink('cards_generated_tf.yml')