In [51]:
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import os
import networkx as nx
import pickle


### Initial data load

First, we get the names of all the Pokémon, and append these to a list.

In [44]:
data = requests.get('https://pokeapi.co/api/v2/pokemon?limit=1000')

In [45]:
data2 = data.json()

In [46]:
data3 = data2['results']

In [47]:
pokemons = []
# get the name of the pokemon
for i in range(len(data3)):
    pokemons.append(data3[i]['name'])

In [48]:
pokemons

['bulbasaur',
 'ivysaur',
 'venusaur',
 'charmander',
 'charmeleon',
 'charizard',
 'squirtle',
 'wartortle',
 'blastoise',
 'caterpie',
 'metapod',
 'butterfree',
 'weedle',
 'kakuna',
 'beedrill',
 'pidgey',
 'pidgeotto',
 'pidgeot',
 'rattata',
 'raticate',
 'spearow',
 'fearow',
 'ekans',
 'arbok',
 'pikachu',
 'raichu',
 'sandshrew',
 'sandslash',
 'nidoran-f',
 'nidorina',
 'nidoqueen',
 'nidoran-m',
 'nidorino',
 'nidoking',
 'clefairy',
 'clefable',
 'vulpix',
 'ninetales',
 'jigglypuff',
 'wigglytuff',
 'zubat',
 'golbat',
 'oddish',
 'gloom',
 'vileplume',
 'paras',
 'parasect',
 'venonat',
 'venomoth',
 'diglett',
 'dugtrio',
 'meowth',
 'persian',
 'psyduck',
 'golduck',
 'mankey',
 'primeape',
 'growlithe',
 'arcanine',
 'poliwag',
 'poliwhirl',
 'poliwrath',
 'abra',
 'kadabra',
 'alakazam',
 'machop',
 'machoke',
 'machamp',
 'bellsprout',
 'weepinbell',
 'victreebel',
 'tentacool',
 'tentacruel',
 'geodude',
 'graveler',
 'golem',
 'ponyta',
 'rapidash',
 'slowpoke',
 '

### Data Set Creation

Second, we must request the API for each Pokémon, to get the abilities, types, egg groups, moves and Pokédex entries.

In [49]:
def data_scrape():
    temp_dict = {
        'pokemon': [],
        'abilities': [], 
        'types': [], 
        'egg_groups': [], 
        'moves': [],
        'pokedex_entry': []
    }
    
    for i, name in tqdm(enumerate(pokemons)):
        r = requests.get('https://pokeapi.co/api/v2/pokemon/' + str(i+1)).json()
        # append the name of the pokemon
        temp_dict['pokemon'].append(name)

        # append the abilities of the pokemon
        abilities = [r['abilities'][j]['ability']['name'] for j in range(len(r['abilities']))]
        temp_dict['abilities'].append(abilities)

        # append the types of the pokemon
        types = [r['types'][i]['type']['name'] for i in range(len(r['types']))]
        temp_dict['types'].append(types)

        # append the moves of the pokemon
        moves = [r['moves'][j]['move']['name'] for j in range(len(r['moves']))]
        temp_dict['moves'].append(moves)

        # make new request to get the egg groups and pokedex entry
        r = requests.get('https://pokeapi.co/api/v2/pokemon-species/' + str(i+1)).json()

        # append the egg groups of the pokemon
        egg_groups = [r['egg_groups'][j]['name'] for j in range(len(r['egg_groups']))]
        temp_dict['egg_groups'].append(egg_groups)

        # append the pokedex entry of the pokemon
        entry = r['flavor_text_entries'][0]['flavor_text'].replace('\n', ' ').replace('\f', ' ') if len(r['flavor_text_entries']) > 0 else None
        temp_dict['pokedex_entry'].append(entry)
        

    print('Done!')

    return temp_dict
    

In [50]:
if not os.path.exists('pokemon.pickle'):     
    print('Scraping data...')
    poke_dict = data_scrape()


Scraping data...


1000it [09:52,  1.69it/s]

Done!





### Make the dataframe

In [53]:
poke_df = pd.DataFrame(poke_dict) if not os.path.exists('pokemon.pickle') else pd.read_pickle('pokemon.pickle')

poke_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   pokemon        1000 non-null   object
 1   abilities      1000 non-null   object
 2   types          1000 non-null   object
 3   egg_groups     1000 non-null   object
 4   moves          1000 non-null   object
 5   pokedex_entry  905 non-null    object
dtypes: object(6)
memory usage: 47.0+ KB


In [57]:
# save to pickle
poke_df.to_pickle('pokemon.pickle') if not os.path.exists('pokemon.pickle') else print('File already exists')

File already exists


In [60]:
# Remove null values
poke_df_clean = poke_df.dropna()
poke_df_clean['pokemon'] = poke_df_clean['pokemon'].str.capitalize()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  poke_df_clean['pokemon'] = poke_df_clean['pokemon'].str.capitalize()


In [61]:
poke_df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 905 entries, 0 to 904
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   pokemon        905 non-null    object
 1   abilities      905 non-null    object
 2   types          905 non-null    object
 3   egg_groups     905 non-null    object
 4   moves          905 non-null    object
 5   pokedex_entry  905 non-null    object
dtypes: object(6)
memory usage: 49.5+ KB


In [62]:
# save to csv again
poke_df_clean.to_pickle('pokemon_clean.pickle') if not os.path.exists('pokemon_clean.pickle') else print('File already exists')

In [63]:
poke_df_clean.columns

Index(['pokemon', 'abilities', 'types', 'egg_groups', 'moves',
       'pokedex_entry'],
      dtype='object')

First, we get all the unique abilities, types, egg groups and moves.

In [65]:
def find_unique(df, col):
    vals = df[col].values
    all_vals = [item for sublist in vals for item in sublist]
    unique_vals = list(set(all_vals))
    return unique_vals

In [75]:
unique_abilities = find_unique(poke_df_clean, 'abilities')
unique_types = find_unique(poke_df_clean, 'types')
unique_egg_groups = find_unique(poke_df_clean, 'egg_groups')
unique_moves = find_unique(poke_df_clean, 'moves')

In [76]:
print('Number of unique abilities: ', len(unique_abilities))
print('Number of unique types: ', len(unique_types))
print('Number of unique egg groups: ', len(unique_egg_groups))
print('Number of unique moves: ', len(unique_moves))

Number of unique abilities:  249
Number of unique types:  18
Number of unique egg groups:  15
Number of unique moves:  747


In [77]:
# Time to get the text entries for each ability, and move
def get_text_entries(attribute, unique_vals):
    temp_dict = {
        attribute: [],
        'text_entry': []
    }

    for i, val in tqdm(enumerate(unique_vals)):
        r = requests.get('https://pokeapi.co/api/v2/' + attribute + '/' + val).json()
        
        # check if the text entry exists in english
        if len(r['effect_entries']) == 0:
            for j in range(len(r['flavor_text_entries'])):
                if r['flavor_text_entries'][j]['language']['name'] == 'en':
                    temp_dict[attribute].append(val)
                    temp_dict['text_entry'].append(r['flavor_text_entries'][j]['flavor_text'].replace('\n', ' ').replace('\f', ' '))
                    break
        else:
            for j in range(len(r['effect_entries'])):
                if r['effect_entries'][j]['language']['name'] == 'en':
                    temp_dict[attribute].append(val)
                    temp_dict['text_entry'].append(r['effect_entries'][j]['effect'].replace('\n', ' ').replace('\f', ' '))
                    break

    return temp_dict

In [79]:
if not os.path.exists('ability_dict.pickle'):
    ability_dict = get_text_entries('ability', unique_abilities)
    ability_df = pd.DataFrame(ability_dict)
    ability_df.to_pickle('ability_dict.pickle')
else:
    ability_df = pd.read_pickle('ability_dict.pickle')

249it [02:22,  1.75it/s]


In [80]:
if not os.path.exists('move_dict.pickle'):
    move_dict = get_text_entries('move', unique_moves)
    move_df = pd.DataFrame(move_dict)
    move_df.to_pickle('move_dict.pickle')
else:
    move_df = pd.read_pickle('move_dict.pickle')

747it [07:07,  1.75it/s]


We might experience that not all abilities nor moves have text, and we will check that here.

In [81]:
# find the missing abilities
missing_abilities = [x for x in unique_abilities if x not in ability_df['ability'].values.tolist()]

# find the missing moves
missing_moves = [x for x in unique_moves if x not in move_df['move'].values.tolist()]


In [82]:
# print number of missing abilities and moves
print('Number of missing abilities: ', len(missing_abilities))
print('Number of missing moves: ', len(missing_moves))

Number of missing abilities:  1
Number of missing moves:  29


In [83]:
# update the unique abilities and moves lists to not include the missing ones
unique_abilities = [x for x in unique_abilities if x not in missing_abilities]
unique_moves = [x for x in unique_moves if x not in missing_moves]

# print number of unique abilities and moves
print('Number of unique abilities: ', len(unique_abilities))
print('Number of unique moves: ', len(unique_moves))

Number of unique abilities:  248
Number of unique moves:  718


This concludes the data collection part of this project. To briefly summarise the data, we have:

In [84]:
# summarize the data
print('Number of pokemon: ', len(poke_df_clean))
print('Number of unique abilities: ', len(unique_abilities))
print('Number of unique types: ', len(unique_types))
print('Number of unique egg groups: ', len(unique_egg_groups))
print('Number of unique moves: ', len(unique_moves))

Number of pokemon:  905
Number of unique abilities:  248
Number of unique types:  18
Number of unique egg groups:  15
Number of unique moves:  718


## Time to make the graph

In [85]:
# load the dataframes
poke_df_clean = pd.read_pickle('pokemon_clean.pickle')
ability_df = pd.read_pickle('ability_dict.pickle')
move_df = pd.read_pickle('move_dict.pickle')

In [86]:
poke_df_clean.head()

Unnamed: 0,pokemon,abilities,types,egg_groups,moves,pokedex_entry
0,Bulbasaur,"[overgrow, chlorophyll]","[grass, poison]","[monster, plant]","[razor-wind, swords-dance, cut, bind, vine-whi...",A strange seed was planted on its back at birt...
1,Ivysaur,"[overgrow, chlorophyll]","[grass, poison]","[monster, plant]","[swords-dance, cut, bind, vine-whip, headbutt,...","When the bulb on its back grows large, it appe..."
2,Venusaur,"[overgrow, chlorophyll]","[grass, poison]","[monster, plant]","[swords-dance, cut, bind, vine-whip, headbutt,...",The plant blooms when it is absorbing solar en...
3,Charmander,"[blaze, solar-power]",[fire],"[monster, dragon]","[mega-punch, fire-punch, thunder-punch, scratc...","Obviously prefers hot places. When it rains, s..."
4,Charmeleon,"[blaze, solar-power]",[fire],"[monster, dragon]","[mega-punch, fire-punch, thunder-punch, scratc...","When it swings its burning tail, it elevates t..."


In [87]:
move_df.head()

Unnamed: 0,move,text_entry
0,agility,Raises the user's Speed by two stages.
1,sketch,Permanently replaces itself with the target's ...
2,snatch,The next time a Pokémon uses a beneficial move...
3,moonlight,Heals the user for half its max HP. During su...
4,leer,Lowers the target's Defense by one stage.


In [88]:
ability_df.head()

Unnamed: 0,ability,text_entry
0,poison-heal,"If this Pokémon is poisoned, it will heal for ..."
1,bulletproof,"Protects against bullet, ball, and bomb-based ..."
2,dry-skin,This Pokémon takes 1/8 of its maximum HP in da...
3,poison-touch,This Pokémon's contact moves have a 30% chance...
4,rough-skin,Whenever a move makes contact with this Pokémo...


In [89]:
G = nx.Graph()

In [94]:
# make an edgelist for the pokemon
# two pokemon are connected if they share an egg group
def make_edgelist():
    edges = []
    for i in tqdm(range(len(poke_df_clean))):
        for j in range(i+1, len(poke_df_clean)):
            if len(set(poke_df_clean['egg_groups'].iloc[i]).intersection(set(poke_df_clean['egg_groups'].iloc[j]))) > 0:
                num_shared_moves = len(set(poke_df_clean['moves'].iloc[i]).intersection(set(poke_df_clean['moves'].iloc[j])))
                edges.append((poke_df_clean['pokemon'].iloc[i], poke_df_clean['pokemon'].iloc[j], num_shared_moves))
    
    return edges


In [95]:
edgelist = make_edgelist()


100%|██████████| 905/905 [00:14<00:00, 62.10it/s] 


In [96]:
def update_graph(G, edgelist):
    G.clear()
    G.add_weighted_edges_from(edgelist)
    print('Number of nodes: ', G.number_of_nodes())
    print('Number of edges: ', G.number_of_edges())

In [97]:
update_graph(G, edgelist)

Number of nodes:  904
Number of edges:  64310


In [99]:
# add types and abilities as node attributes
types = [t for t in poke_df_clean['types'].values]
type_dict = dict(zip(poke_df_clean['pokemon'], types))

abilities = [a for a in poke_df_clean['abilities'].values]
ability_dict = dict(zip(poke_df_clean['pokemon'], abilities))

egg_groups = [e for e in poke_df_clean['egg_groups'].values]
egg_group_dict = dict(zip(poke_df_clean['pokemon'], egg_groups))

nx.set_node_attributes(G, type_dict, 'typing')
nx.set_node_attributes(G, ability_dict, 'abilities')
nx.set_node_attributes(G, egg_group_dict, 'egg_groups')

In [100]:
def set_group(graph, group_dict):
    nx.set_node_attributes(graph, group_dict, 'group')

set_group(G, type_dict)

In [101]:
# save the graph to pickle
import pickle
with open('pokemon_graph.pickle', 'wb') as f:
    pickle.dump(G, f)

In [102]:
import netwulf as nw
nw.visualize(G)

(None, None)

## Find communities

In [103]:
import community as community_louvain

# find the best partition
partition = community_louvain.best_partition(G)

In [104]:
set_group(G, partition)

In [105]:
from bs4 import BeautifulSoup

In [157]:
def make_number(num):
    if num < 10:
        return '00' + str(num)
    elif num < 100:
        return '0' + str(num)
    else:
        return str(num)


def get_pokemon_data(episode, names):
    r = requests.get('https://bulbapedia.bulbagarden.net/wiki/JN' + episode).text
    soup = BeautifulSoup(r, 'html.parser')
    elems = soup.find_all('a', href=True)
    episode_pokemon = []

    for name in names:
        for elem in elems:
            if name in elem.text:
                text = elem.text
                episode_pokemon.append(text)

    unique_pokemon = list(set(episode_pokemon))

    # remove elements that are not single words
    unique_pokemon = [p for p in unique_pokemon if len(p.split()) == 1]

    # remove nature names
    unique_pokemon.remove('Nature')
    return unique_pokemon


# get episode plots
def get_episode_plot(episode):
    r = requests.get('https://bulbapedia.bulbagarden.net/wiki/JN' + episode).text
    soup = BeautifulSoup(r, 'html.parser')
    elems = soup.find_all('p')
    plot = ''
    for i in range(1,len(elems)):
        if "Who's That Pokémon?" in elems[i].text:
            break
        plot += elems[i].text

    #plot = plot.replace('\n ', ' ')
    plot = plot.replace('\n', ' ')

    # remove trailing whitespace
    plot = plot.strip()
    
    return plot

In [158]:
episode_numbers = [make_number(i) for i in range(1, 137)]
names = poke_df_clean['pokemon'].values.tolist()

In [159]:
def gather_pokemon_data(episode_numbers, names):
    episode_dict = {}
    for episode in tqdm(episode_numbers):
        episode_pokemon = get_pokemon_data(episode, names)
        plot = get_episode_plot(episode)
        episode_dict[episode] = []
        episode_dict[episode].append(episode_pokemon)
        episode_dict[episode].append(plot)
    return episode_dict

In [160]:
if not os.path.exists('episode_dict.pickle'):
    episode_dict = gather_pokemon_data(episode_numbers, names)
    episode_df = pd.DataFrame.from_dict(episode_dict, orient='index', columns=['pokemon', 'plot'])
    episode_df.to_pickle('episode_dict.pickle')
else:
    episode_df = pd.read_pickle('episode_dict.pickle')
    

100%|██████████| 136/136 [02:43<00:00,  1.20s/it]


In [161]:
episode_df.head()

Unnamed: 0,pokemon,plot
1,"[Kakuna, Chansey, Mankey, Bellsprout, Pikachu,...","In Pallet Town, a young Ash Ketchum is beside ..."
2,"[Kakuna, Pidgeotto, Bisharp, Blastoise, Mankey...",Ash is attending the opening of Professor Ceri...
3,"[Pikachu, Yamper, Oddish, Meowth, Poliwag, Pel...",Ash and Goh are embarking on their first full ...
4,"[Zweilous, Turtwig, Nickit, Pikachu, Sneasel, ...",Ash and Goh head to the Galar region to see a ...
5,"[Goomy, Braviary, Seedot, Grubbin, Munchlax, P...",It’s time for research fellows Ash and Goh to ...


In [164]:
len(find_unique(episode_df, 'pokemon'))

677

In [179]:
# make a dataframe with only the pokemon that appear in the anime
if not os.path.exists('anime_pokemon.pickle'):
    anime_pokemon = find_unique(episode_df, 'pokemon')
    anime_pokemon_df = poke_df_clean[poke_df_clean['pokemon'].isin(anime_pokemon)].reset_index(drop=True)
    anime_pokemon_df.to_pickle('anime_pokemon.pickle')
else:
    anime_pokemon_df = pd.read_pickle('anime_pokemon.pickle')

In [180]:
anime_pokemon_df

Unnamed: 0,pokemon,abilities,types,egg_groups,moves,pokedex_entry
0,Bulbasaur,"[overgrow, chlorophyll]","[grass, poison]","[monster, plant]","[razor-wind, swords-dance, cut, bind, vine-whi...",A strange seed was planted on its back at birt...
1,Ivysaur,"[overgrow, chlorophyll]","[grass, poison]","[monster, plant]","[swords-dance, cut, bind, vine-whip, headbutt,...","When the bulb on its back grows large, it appe..."
2,Venusaur,"[overgrow, chlorophyll]","[grass, poison]","[monster, plant]","[swords-dance, cut, bind, vine-whip, headbutt,...",The plant blooms when it is absorbing solar en...
3,Charmander,"[blaze, solar-power]",[fire],"[monster, dragon]","[mega-punch, fire-punch, thunder-punch, scratc...","Obviously prefers hot places. When it rains, s..."
4,Charmeleon,"[blaze, solar-power]",[fire],"[monster, dragon]","[mega-punch, fire-punch, thunder-punch, scratc...","When it swings its burning tail, it elevates t..."
...,...,...,...,...,...,...
663,Eternatus,[pressure],"[poison, dragon]",[no-eggs],"[fly, body-slam, take-down, flamethrower, hype...",むねの　コアが　ガラルちほうの だいちから　わきだす　エネルギーを きゅうしゅうして　かつど...
664,Kubfu,[inner-focus],[fighting],[no-eggs],"[mega-punch, fire-punch, ice-punch, thunder-pu...",きびしい　たんれんを　つみ　わざを みがく。　たいとくした　わざによって しんかしたときに　...
665,Zarude,[leaf-guard],"[dark, grass]",[no-eggs],"[mega-punch, scratch, swords-dance, bind, vine...",むれを　つくり　みつりんで　くらす。 とても　こうげきてきで　もりにすむ ポケモンたちから　...
666,Regieleki,[transistor],[electric],[no-eggs],"[body-slam, take-down, thrash, hyper-beam, thu...",でんきエネルギーの　かたまり。 からだの　リングを　はずすと　ひめた ちからが　ときはなたれ...
