In [1]:
from gensim.models.poincare import PoincareModel

In [2]:
import json

In [3]:
mons = json.load(open('../pokedex.json', 'r'))

In [4]:
mons['bulbasaur']

{'num': 1,
 'species': 'Bulbasaur',
 'types': ['Grass', 'Poison'],
 'genderRatio': {'M': 0.875, 'F': 0.125},
 'baseStats': {'hp': 45,
  'atk': 49,
  'def': 49,
  'spa': 65,
  'spd': 65,
  'spe': 45},
 'abilities': {'0': 'Overgrow', 'H': 'Chlorophyll'},
 'heightm': 0.7,
 'weightkg': 6.9,
 'color': 'Green',
 'evos': ['ivysaur'],
 'eggGroups': ['Monster', 'Grass']}

In [5]:
features_to_embed = ['types', 'abilities', 'eggGroups']#, 'evos', 'prevo']
tuples = []
for mon in mons:
    features = mons[mon]
    for feature in features_to_embed:
        if feature not in features:
            t_ = (mon, feature + '_none')
        else:
            if feature=='abilities':
                for ability in features[feature]:
                    t_ = ("mon_" + mon, feature + f"_{features[feature][ability]}")
                    tuples.append(t_)
            elif feature=='types':
                for type_ in features[feature]:
                    t_ = ("mon_" + mon, feature + f"_{type_}")
                    tuples.append(t_)
            elif feature=='eggGroups':
                for eg in features[feature]:
                    t_ = ("mon_" + mon, feature + f"_{eg}")
                    tuples.append(t_)
            elif feature=='evos':
                for evo in features[feature]:
                    t_ = ("mon_" + mon, feature + f"_{evo}")
                    tuples.append(t_)
            elif feature=='prevo':
                t_ = ("mon_" + mon, feature + f"_{features[feature]}")
                tuples.append(t_)

In [6]:
import pandas as pd
moves_tuples = pd.read_csv("./20191005_mon_moves_features.tsv", sep="\t")

In [7]:
moves_tuples.head()

Unnamed: 0,mon,feature_complex,feature,feature_val
0,bulbasaur,move_sleeppowder,move,sleeppowder
1,bulbasaur,move_gigadrain,move,gigadrain
2,bulbasaur,move_hiddenpowerfire,move,hiddenpowerfire
3,bulbasaur,move_hiddenpowerice,move,hiddenpowerice
4,bulbasaur,move_sludgebomb,move,sludgebomb


In [8]:
tuples += list(zip(['mon_' + x for x in moves_tuples['mon']], ['move_' + x for x in moves_tuples['feature_val']]))

In [9]:
m = PoincareModel(tuples, negative=2)

In [10]:
m.train(epochs=50)

In [11]:
m.kv.similarity('mon_bulbasaur', 'mon_oddish')

0.1291160586248858

In [12]:
[x for x in m.kv.most_similar('mon_oddish', 100) if x[0].startswith("mon_")]

[('mon_lurantistotem', 4.830909068069064),
 ('mon_fomantis', 5.104510225092127),
 ('mon_vileplume', 5.201827266967495),
 ('mon_steenee', 5.228508995819073),
 ('mon_bounsweet', 5.341918457636445),
 ('mon_gloom', 5.515837626302728),
 ('mon_necturine', 5.554905652410323)]

In [13]:
[x for x in m.kv.most_similar('mon_bulbasaur', 100) if x[0].startswith("mon_")]

[('mon_bulbasaur', 0.0),
 ('mon_lurantistotem', 4.712798368759483),
 ('mon_lurantis', 4.8749980257024355),
 ('mon_fomantis', 4.953152018367086),
 ('mon_steenee', 5.017066729938283)]

In [16]:
[x for x in m.kv.most_similar('mon_incineroar', 1000) if x[0].startswith("mon_")]

[('mon_torracat', 3.769602462451072),
 ('mon_litten', 4.022447589715846),
 ('mon_pokestarwhitedoor', 4.6301716598215155),
 ('mon_tepig', 4.707009527281012),
 ('mon_darmanitanzen', 5.1208781162837385),
 ('mon_darumaka', 5.188067923426434),
 ('mon_pokestarmonster', 5.267268077778372),
 ('mon_pignite', 5.27842269566542),
 ('mon_smokomodo', 5.316435833756997),
 ('mon_gumshoostotem', 5.518023859791526),
 ('mon_typenull', 5.595402021149531),
 ('mon_lycanrocdusk', 5.654033817209377),
 ('mon_combusken', 5.72976830247726),
 ('mon_pokestargiant', 5.7689596381318795),
 ('mon_pokestarblackbelt', 5.784048442682212),
 ('mon_pikachupopstar', 5.795248865468854),
 ('mon_raticatealolatotem', 5.8263245417778675),
 ('mon_melmetal', 5.849351167918341),
 ('mon_poipole', 5.867865035290514),
 ('mon_pokestarhumanoid', 5.889360241514911),
 ('mon_pokestartransport', 5.895422802409413),
 ('mon_keldeoresolute', 5.916341631754636),
 ('mon_pikachuphd', 5.920949965449266),
 ('mon_litleo', 5.922031772480731),
 ('mon_p

In [17]:
mon_vecs = [m.kv.get_vector(x) for x in m.kv.vocab if x.startswith('mon_')]

In [18]:
mon_vecs_df = pd.DataFrame(mon_vecs, index=[x for x in m.kv.vocab if x.startswith('mon_')])

In [19]:
mon_vecs_df.to_csv("20191005_mon_vecs_type-egg-ability-moves.txt", sep="\t")

In [20]:
tuples_df = pd.DataFrame(tuples)

In [21]:
tuples_df.columns = ['mon_str', 'feature_category']

In [22]:
tuples_df.tail()

Unnamed: 0,mon_str,feature_category
15740,mon_voodoom,move_taunt
15741,mon_voodoom,move_painsplit
15742,mon_voodoom,move_substitute
15743,mon_voodoom,move_hiddenpowerice
15744,mon_voodoom,move_vacuumwave


In [23]:
tuples_df['mon'] = tuples_df['mon_str'].apply(lambda x: x.split("mon_")[1])

In [24]:
tuples_df['feature'] = tuples_df['feature_category'].apply(lambda x: x.split("_")[0])

In [25]:
tuples_df.head()

Unnamed: 0,mon_str,feature_category,mon,feature
0,mon_bulbasaur,types_Grass,bulbasaur,types
1,mon_bulbasaur,types_Poison,bulbasaur,types
2,mon_bulbasaur,abilities_Overgrow,bulbasaur,abilities
3,mon_bulbasaur,abilities_Chlorophyll,bulbasaur,abilities
4,mon_bulbasaur,eggGroups_Monster,bulbasaur,eggGroups


In [26]:
tuples_df['feature_val'] = tuples_df['feature_category'].apply(lambda x: x.split("_")[1])

In [27]:
tuples_df[['mon', 'feature', 'feature_val']].to_csv("./20191005_mon_types-abilities-egg-moves_features.tsv", sep="\t", index=None)

In [28]:
tuples_df.head()

Unnamed: 0,mon_str,feature_category,mon,feature,feature_val
0,mon_bulbasaur,types_Grass,bulbasaur,types,Grass
1,mon_bulbasaur,types_Poison,bulbasaur,types,Poison
2,mon_bulbasaur,abilities_Overgrow,bulbasaur,abilities,Overgrow
3,mon_bulbasaur,abilities_Chlorophyll,bulbasaur,abilities,Chlorophyll
4,mon_bulbasaur,eggGroups_Monster,bulbasaur,eggGroups,Monster


In [None]:
m.save("./20191005_poincare_embeddings_model_w_features_n_moves.model")