In [9]:
import requests
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import os
import networkx as nx


### Initial data load

First, we get the names of all the Pokémon, and append these to a list.

In [163]:
data = requests.get('https://pokeapi.co/api/v2/pokemon?limit=1000')

In [164]:
data2 = data.json()

In [165]:
data3 = data2['results']

In [166]:
pokemons = []
# get the name of the pokemon
for i in range(len(data3)):
    pokemons.append(data3[i]['name'])


In [191]:
pokemons

['bulbasaur',
 'ivysaur',
 'venusaur',
 'charmander',
 'charmeleon',
 'charizard',
 'squirtle',
 'wartortle',
 'blastoise',
 'caterpie',
 'metapod',
 'butterfree',
 'weedle',
 'kakuna',
 'beedrill',
 'pidgey',
 'pidgeotto',
 'pidgeot',
 'rattata',
 'raticate',
 'spearow',
 'fearow',
 'ekans',
 'arbok',
 'pikachu',
 'raichu',
 'sandshrew',
 'sandslash',
 'nidoran-f',
 'nidorina',
 'nidoqueen',
 'nidoran-m',
 'nidorino',
 'nidoking',
 'clefairy',
 'clefable',
 'vulpix',
 'ninetales',
 'jigglypuff',
 'wigglytuff',
 'zubat',
 'golbat',
 'oddish',
 'gloom',
 'vileplume',
 'paras',
 'parasect',
 'venonat',
 'venomoth',
 'diglett',
 'dugtrio',
 'meowth',
 'persian',
 'psyduck',
 'golduck',
 'mankey',
 'primeape',
 'growlithe',
 'arcanine',
 'poliwag',
 'poliwhirl',
 'poliwrath',
 'abra',
 'kadabra',
 'alakazam',
 'machop',
 'machoke',
 'machamp',
 'bellsprout',
 'weepinbell',
 'victreebel',
 'tentacool',
 'tentacruel',
 'geodude',
 'graveler',
 'golem',
 'ponyta',
 'rapidash',
 'slowpoke',
 '

### Data Set Creation

Second, we must request the API for each Pokémon, to get the abilities, types, egg groups, moves and Pokédex entries.

In [301]:
def data_scrape():
    temp_dict = {
        'pokemon': [],
        'abilities': [], 
        'types': [], 
        'egg_groups': [], 
        'moves': [],
        'pokedex_entry': []
    }
    
    for i, name in tqdm(enumerate(pokemons)):
        r = requests.get('https://pokeapi.co/api/v2/pokemon/' + str(i+1)).json()
        # append the name of the pokemon
        temp_dict['pokemon'].append(name)

        # append the abilities of the pokemon
        abilities = [r['abilities'][j]['ability']['name'] for j in range(len(r['abilities']))]
        temp_dict['abilities'].append(abilities)

        # append the types of the pokemon
        types = [r['types'][i]['type']['name'] for i in range(len(r['types']))]
        temp_dict['types'].append(types)

        # append the moves of the pokemon
        moves = [r['moves'][j]['move']['name'] for j in range(len(r['moves']))]
        temp_dict['moves'].append(moves)

        # make new request to get the egg groups and pokedex entry
        r = requests.get('https://pokeapi.co/api/v2/pokemon-species/' + str(i+1)).json()

        # append the egg groups of the pokemon
        egg_groups = [r['egg_groups'][j]['name'] for j in range(len(r['egg_groups']))]
        temp_dict['egg_groups'].append(egg_groups)

        # append the pokedex entry of the pokemon
        entry = r['flavor_text_entries'][0]['flavor_text'].replace('\n', ' ').replace('\f', ' ') if len(r['flavor_text_entries']) > 0 else None
        temp_dict['pokedex_entry'].append(entry)
        

    print('Done!')

    return temp_dict
    

In [395]:
if not os.path.exists('pokemon.csv'):     
    print('Scraping data...')
    #poke_dict = data_scrape()



### Make the dataframe

In [303]:
poke_df = pd.DataFrame(poke_dict) if not os.path.exists('pokemon.csv') else pd.read_csv('pokemon.csv')

In [305]:
poke_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   pokemon        1000 non-null   object
 1   abilities      1000 non-null   object
 2   types          1000 non-null   object
 3   egg_groups     1000 non-null   object
 4   moves          1000 non-null   object
 5   pokedex_entry  905 non-null    object
dtypes: object(6)
memory usage: 47.0+ KB


In [308]:
# save to csv
poke_df.to_csv('pokemon.csv', index=False)

In [306]:
# Remove null values
poke_df_clean = poke_df[poke_df['pokedex_entry'].notnull()]

In [307]:
poke_df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 905 entries, 0 to 904
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   pokemon        905 non-null    object
 1   abilities      905 non-null    object
 2   types          905 non-null    object
 3   egg_groups     905 non-null    object
 4   moves          905 non-null    object
 5   pokedex_entry  905 non-null    object
dtypes: object(6)
memory usage: 49.5+ KB


In [309]:
# save to csv again
poke_df_clean.to_csv('pokemon_clean.csv', index=False)

In [317]:
poke_df_clean.columns

Index(['pokemon', 'abilities', 'types', 'egg_groups', 'moves',
       'pokedex_entry'],
      dtype='object')

First, we get all the unique abilities, types, egg groups and moves.

In [322]:
def find_unique(df, col):
    vals = df[col].values
    all_vals = [item for sublist in vals for item in sublist]
    unique_vals = list(set(all_vals))
    return unique_vals

In [325]:
unique_abilities = find_unique(poke_df_clean, 'abilities')
unique_types = find_unique(poke_df_clean, 'types')
unique_egg_groups = find_unique(poke_df_clean, 'egg_groups')
unique_moves = find_unique(poke_df_clean, 'moves')

In [327]:
print('Number of unique abilities: ', len(unique_abilities))
print('Number of unique types: ', len(unique_types))
print('Number of unique egg groups: ', len(unique_egg_groups))
print('Number of unique moves: ', len(unique_moves))

Number of unique abilities:  249
Number of unique types:  18
Number of unique egg groups:  15
Number of unique moves:  747


In [407]:
# Time to get the text entries for each ability, and move
def get_text_entries(attribute, unique_vals):
    temp_dict = {
        attribute: [],
        'text_entry': []
    }

    for i, val in tqdm(enumerate(unique_vals)):
        r = requests.get('https://pokeapi.co/api/v2/' + attribute + '/' + val).json()
        
        # check if the text entry exists in english
        if len(r['effect_entries']) == 0:
            for j in range(len(r['flavor_text_entries'])):
                if r['flavor_text_entries'][j]['language']['name'] == 'en':
                    temp_dict[attribute].append(val)
                    temp_dict['text_entry'].append(r['flavor_text_entries'][j]['flavor_text'].replace('\n', ' ').replace('\f', ' '))
                    break
        else:
            for j in range(len(r['effect_entries'])):
                if r['effect_entries'][j]['language']['name'] == 'en':
                    temp_dict[attribute].append(val)
                    temp_dict['text_entry'].append(r['effect_entries'][j]['effect'].replace('\n', ' ').replace('\f', ' '))
                    break

    return temp_dict

In [408]:
ability_dict = get_text_entries('ability', unique_abilities)

249it [00:17, 14.05it/s]


In [411]:
move_dict = get_text_entries('move', unique_moves)

747it [01:17,  9.60it/s]


In [412]:
# Make dataframes
ability_df = pd.DataFrame(ability_dict)
move_df = pd.DataFrame(move_dict)

In [414]:
# save to csv
ability_df.to_csv('ability.csv', index=False)
move_df.to_csv('move.csv', index=False)

We might experience that not all abilities nor moves have text, and we will check that here.

In [422]:
# find the missing abilities
missing_abilities = [x for x in unique_abilities if x not in ability_df['ability'].values.tolist()]

# find the missing moves
missing_moves = [x for x in unique_moves if x not in move_df['move'].values.tolist()]


In [424]:
# print number of missing abilities and moves
print('Number of missing abilities: ', len(missing_abilities))
print('Number of missing moves: ', len(missing_moves))

Number of missing abilities:  1
Number of missing moves:  29


In [425]:
# update the unique abilities and moves lists to not include the missing ones
unique_abilities = [x for x in unique_abilities if x not in missing_abilities]
unique_moves = [x for x in unique_moves if x not in missing_moves]

# print number of unique abilities and moves
print('Number of unique abilities: ', len(unique_abilities))
print('Number of unique moves: ', len(unique_moves))

Number of unique abilities:  248
Number of unique moves:  718


This concludes the data collection part of this project. To briefly summarise the data, we have:

In [427]:
# summarize the data
print('Number of pokemon: ', len(poke_df_clean))
print('Number of unique abilities: ', len(unique_abilities))
print('Number of unique types: ', len(unique_types))
print('Number of unique egg groups: ', len(unique_egg_groups))
print('Number of unique moves: ', len(unique_moves))

Number of pokemon:  905
Number of unique abilities:  248
Number of unique types:  18
Number of unique egg groups:  15
Number of unique moves:  718


## Time to make the graph

In [3]:
# load the dataframes
poke_df_clean = pd.read_csv('pokemon_clean.csv')
ability_df = pd.read_csv('ability.csv')
move_df = pd.read_csv('move.csv')

In [4]:
poke_df_clean.head()

Unnamed: 0,pokemon,abilities,types,egg_groups,moves,pokedex_entry
0,bulbasaur,"['overgrow', 'chlorophyll']","['grass', 'poison']","['monster', 'plant']","['razor-wind', 'swords-dance', 'cut', 'bind', ...",A strange seed was planted on its back at birt...
1,ivysaur,"['overgrow', 'chlorophyll']","['grass', 'poison']","['monster', 'plant']","['swords-dance', 'cut', 'bind', 'vine-whip', '...","When the bulb on its back grows large, it appe..."
2,venusaur,"['overgrow', 'chlorophyll']","['grass', 'poison']","['monster', 'plant']","['swords-dance', 'cut', 'bind', 'vine-whip', '...",The plant blooms when it is absorbing solar en...
3,charmander,"['blaze', 'solar-power']",['fire'],"['monster', 'dragon']","['mega-punch', 'fire-punch', 'thunder-punch', ...","Obviously prefers hot places. When it rains, s..."
4,charmeleon,"['blaze', 'solar-power']",['fire'],"['monster', 'dragon']","['mega-punch', 'fire-punch', 'thunder-punch', ...","When it swings its burning tail, it elevates t..."


In [5]:
move_df.head()

Unnamed: 0,move,text_entry
0,last-resort,Inflicts regular damage. This move can only b...
1,foresight,Resets the target's evasion to normal and prev...
2,vital-throw,Inflicts regular damage. Ignores accuracy and...
3,thunderbolt,Inflicts regular damage. Has a $effect_chance...
4,gastro-acid,The target's ability is disabled as long as it...


In [6]:
ability_df.head()

Unnamed: 0,ability,text_entry
0,poison-heal,"If this Pokémon is poisoned, it will heal for ..."
1,ice-body,This Pokémon heals for 1/16 of its maximum HP ...
2,fur-coat,Halves damage from physical attacks.
3,insomnia,This Pokémon cannot be asleep. This causes re...
4,flame-body,Whenever a move makes contact with this Pokémo...


In [150]:
G = nx.Graph()

In [151]:
# make an edgelist for the pokemon
# two pokemon are connected if they share an egg group
def make_edgelist():
    edges = []
    for i in tqdm(range(len(poke_df_clean))):
        for j in range(i+1, len(poke_df_clean)):
            if len(set(eval(poke_df_clean['egg_groups'].iloc[i])).intersection(set(eval(poke_df_clean['egg_groups'].iloc[j])))) > 0:
                num_shared_moves = len(set(eval(poke_df_clean['moves'].iloc[i])).intersection(set(eval(poke_df_clean['moves'].iloc[j]))))
                edges.append((poke_df_clean['pokemon'].iloc[i], poke_df_clean['pokemon'].iloc[j], num_shared_moves))
    
    return edges


In [152]:
edgelist = make_edgelist()


100%|██████████| 905/905 [00:45<00:00, 19.79it/s] 


In [154]:
def update_graph(G, edgelist):
    G.clear()
    G.add_weighted_edges_from(edgelist)
    print('Number of nodes: ', G.number_of_nodes())
    print('Number of edges: ', G.number_of_edges())

In [155]:
update_graph(G, edgelist)

Number of nodes:  904
Number of edges:  64310


In [156]:
# add types and abilities as node attributes
types = [eval(t) for t in poke_df_clean['types'].values]
type_dict = dict(zip(poke_df_clean['pokemon'], types))

abilities = [eval(a) for a in poke_df_clean['abilities'].values]
ability_dict = dict(zip(poke_df_clean['pokemon'], abilities))

nx.set_node_attributes(G, type_dict, 'typing')
nx.set_node_attributes(G, ability_dict, 'abilities')

In [159]:
group = type_dict
nx.set_node_attributes(G, group, 'group')

In [176]:
# save the graph to pickle
import pickle
with open('pokemon_graph.pickle', 'wb') as f:
    pickle.dump(G, f)

In [177]:
import netwulf as nw
nw.visualize(G)

(None, None)