In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [2]:
request = requests.get('https://pokemondb.net/pokedex/all')

In [3]:
soup = BeautifulSoup(request.text, 'html.parser')

In [4]:
names = set()
for tag in soup.find_all(class_ = 'ent-name'):
    names.add(tag.text.lower())

In [62]:
bad_names = ['tapu lele','mr. mime','flabébé','nidoran♂',"sirfetch'd",'tapu fini',
             'mime jr.','tapu bulu','type: null','nidoran♀','tapu koko','mr. rime']
good_names = ['tapu-lele','mr-mime','flabebe','nidoran-m','sirfetchd','tapu-fini',
              'mime-jr','tapu-bulu','type-null','nidoran-f','tapu-koko','mr-rime']

In [61]:
for bad,good in zip(bad_names,good_names):
    names.remove(bad)
    names.add(good)

KeyError: 'tapu lele'

In [6]:
len(names) # number of pokemon 898

898

In [7]:
import time

replace_dict = {
#     '<img alt="Physical"':'physical<img alt="Physical"',
#     '<img alt="Special"':'special<img alt="Special"',
#     '<img alt="Status"':'status<img alt="Status"',
    '<img class="img-fixed" src="https://img.pokemondb.net/images/icons/move-physical.png" width="30" height="20" alt="Physical" title="Physical" loading="lazy">':'physical',
    '<img class="img-fixed" src="https://img.pokemondb.net/images/icons/move-special.png" width="30" height="20" alt="Special" title="Special" loading="lazy">':'special',
    '<img class="img-fixed" src="https://img.pokemondb.net/images/icons/move-status.png" width="30" height="20" alt="Status" title="Status" loading="lazy">':'status'
}

def get_move_data(names):
    data = {}
    for i,name in enumerate(names):
        if i != 0 and i % 50 == 0:
            print(i)
            time.sleep(10)
        url = 'https://pokemondb.net/pokedex/' + name
        request = requests.get(url)
        text = request.text
        for key, val in replace_dict.items():
            text = text.replace(key,val)
        move_tables = pd.read_html(text, attrs={'class':'data-table'})
        data[name] = move_tables
    return data

In [8]:
move_data = get_move_data(names)

50
100
150
200
250
300
350
400
450
500
550
600
650
700
750
800
850


In [15]:
# print bad names (names with no data) if there are any
for key,val in move_data.items():
    if not len(val):
        print(key,val)

In [16]:
# print names with any NaNs in Cat. column
for name, tables in move_data.items():
    for table in tables:
        if table['Cat.'].isna().sum() != 0:
            print(name)

In [44]:
final_move_data = {}
for name,tables in move_data.items():
    for i,table in enumerate(tables):
        if i == 0:
            temp_table = table.copy()
        else:
            temp_table = pd.concat([temp_table,table.copy()],axis=0,ignore_index=True)
    combined_table = temp_table.drop_duplicates(ignore_index=True)
    combined_table['Name'] = name
    final_move_data[name] = combined_table

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  combined_table['Name'] = name


In [49]:
list(final_move_data.items())[3][1]

Unnamed: 0,Lv.,Move,Type,Cat.,Power,Acc.,TM,TR,Name
0,1.0,Poison Gas,Poison,status,—,90,,,salandit
1,1.0,Scratch,Normal,physical,40,100,,,salandit
2,5.0,Smog,Poison,special,30,70,,,salandit
3,10.0,Ember,Fire,special,40,100,,,salandit
4,15.0,Poison Fang,Poison,physical,50,100,,,salandit
...,...,...,...,...,...,...,...,...,...
104,,Poison Jab,Poison,physical,80,100,84.0,,salandit
105,,Swagger,Normal,status,—,85,87.0,,salandit
106,,Sleep Talk,Normal,status,—,—,88.0,,salandit
107,,Substitute,Normal,status,—,—,90.0,,salandit


In [50]:
# combine all pokemon tables into one
for i,(name,table) in enumerate(final_move_data.items()):
    if i == 0:
        temp_table = table.copy()
    else:
        temp_table = pd.concat([temp_table,table.copy()],axis=0,ignore_index=True) 

In [53]:
temp_table.isna().sum()

Lv.        69525
Move           0
Type           0
Cat.           0
Power          0
Acc.           0
Name           0
TM         46224
Method     84511
TR         75850
Details    87614
dtype: int64

In [69]:
names_list = list(names.copy())
bad_names_list = list(names.copy())
for bad,good in zip(bad_names,good_names):
    idx = bad_names_list.index(good)
    bad_names_list[idx] = bad

In [72]:
for a, b in zip(names_list, bad_names_list):
    print(a,b)

scatterbug scatterbug
zangoose zangoose
volcarona volcarona
salandit salandit
eldegoss eldegoss
ekans ekans
drizzile drizzile
nidoran-m nidoran♂
pyroar pyroar
growlithe growlithe
zubat zubat
togekiss togekiss
trubbish trubbish
simipour simipour
pidove pidove
clefairy clefairy
applin applin
venonat venonat
azelf azelf
piloswine piloswine
cherubi cherubi
inkay inkay
greedent greedent
polteageist polteageist
dracovish dracovish
snorlax snorlax
rhyperior rhyperior
swoobat swoobat
virizion virizion
cutiefly cutiefly
dragalge dragalge
moltres moltres
sealeo sealeo
absol absol
mantyke mantyke
furfrou furfrou
spinda spinda
remoraid remoraid
rayquaza rayquaza
darmanitan darmanitan
eevee eevee
slurpuff slurpuff
corvisquire corvisquire
crawdaunt crawdaunt
talonflame talonflame
gurdurr gurdurr
hoppip hoppip
natu natu
scolipede scolipede
gliscor gliscor
arrokuda arrokuda
seedot seedot
gible gible
quilladin quilladin
shelmet shelmet
aegislash aegislash
honedge honedge
lunala lunala
piplup piplup
emo

In [78]:
temp_table['Alt. Name'] = temp_table['Name'].replace({good: bad for good, bad in zip(names_list, bad_names_list)})

In [82]:
temp_table[temp_table['Name'] == 'nidoran-m']

Unnamed: 0,Lv.,Move,Type,Cat.,Power,Acc.,Name,TM,Method,TR,Details,Alt. Name
546,1.0,Leer,Normal,status,—,100,nidoran-m,,,,,nidoran♂
547,1.0,Poison Sting,Poison,physical,15,100,nidoran-m,,,,,nidoran♂
548,5.0,Peck,Flying,physical,35,100,nidoran-m,,,,,nidoran♂
549,10.0,Focus Energy,Normal,status,—,—,nidoran-m,,,,,nidoran♂
550,15.0,Fury Attack,Normal,physical,15,85,nidoran-m,,,,,nidoran♂
...,...,...,...,...,...,...,...,...,...,...,...,...
635,,Toxic Spikes,Poison,status,—,—,nidoran-m,,,54.0,,nidoran♂
636,,Poison Jab,Poison,physical,80,100,nidoran-m,,,57.0,,nidoran♂
637,,Earth Power,Ground,special,90,100,nidoran-m,,,67.0,,nidoran♂
638,,Drill Run,Ground,physical,80,95,nidoran-m,,,87.0,,nidoran♂


In [79]:
temp_table.to_csv('../data/move_data.csv')