In [1]:
# Import Dependencies
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [2]:
# Data Source: BulbaPedia
url = 'https://bulbapedia.bulbagarden.net/wiki/List_of_Pokémon_by_National_Pokédex_number'

In [3]:
# Create soup response to find tables
pkmn_tables=[]
while len(pkmn_tables) < 1:
    response=requests.get(url)
    soup=BeautifulSoup(response.text,'html.parser')
    pkmn_tables=soup.find_all('table')[1:8]

In [5]:
# merge pkmn tables from all generations
all_pkmn_df=pd.DataFrame({})
for gen in range(0,7):
#     trim header row to enable merge
    gen_df = pd.read_html(pkmn_tables[gen].prettify())[0][1:]
    all_pkmn_df=all_pkmn_df.append(gen_df)
# reindex combined table
count=len(all_pkmn_df)
indices=[i for i in range(count)]
all_pkmn_df.index=indices

In [6]:
all_pkmn_df

Unnamed: 0,0,1,2,3,4,5
0,#001,#001,,Bulbasaur,Grass,Poison
1,#002,#002,,Ivysaur,Grass,Poison
2,#003,#003,,Venusaur,Grass,Poison
3,#004,#004,,Charmander,Fire,
4,#005,#005,,Charmeleon,Fire,
5,#006,#006,,Charizard,Fire,Flying
6,#007,#007,,Squirtle,Water,
7,#008,#008,,Wartortle,Water,
8,#009,#009,,Blastoise,Water,
9,#010,#010,,Caterpie,Bug,


In [7]:
# drop alolans and alternate formes
all_pkmn_df = all_pkmn_df.drop_duplicates(subset=[3],keep="first").reset_index()
all_pkmn_df

Unnamed: 0,index,0,1,2,3,4,5
0,0,#001,#001,,Bulbasaur,Grass,Poison
1,1,#002,#002,,Ivysaur,Grass,Poison
2,2,#003,#003,,Venusaur,Grass,Poison
3,3,#004,#004,,Charmander,Fire,
4,4,#005,#005,,Charmeleon,Fire,
5,5,#006,#006,,Charizard,Fire,Flying
6,6,#007,#007,,Squirtle,Water,
7,7,#008,#008,,Wartortle,Water,
8,8,#009,#009,,Blastoise,Water,
9,9,#010,#010,,Caterpie,Bug,


In [8]:
# Drop regional pokedex number and picture column
nat_pkmn_df=all_pkmn_df[[1,3,4,5]]
# Remove pound from national pokedex numbers
pkmn_count=len(nat_pkmn_df)
for form in range(pkmn_count):
    nat_pkmn_df[1][form]=nat_pkmn_df[1][form][1:]
# Rename Columns
nat_pkmn_df=nat_pkmn_df.rename(columns={1:'Nat_Dex',3:'Name',4:'Type1',5:'Type2'})

nat_pkmn_df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,Nat_Dex,Name,Type1,Type2
0,001,Bulbasaur,Grass,Poison
1,002,Ivysaur,Grass,Poison
2,003,Venusaur,Grass,Poison
3,004,Charmander,Fire,
4,005,Charmeleon,Fire,
5,006,Charizard,Fire,Flying
6,007,Squirtle,Water,
7,008,Wartortle,Water,
8,009,Blastoise,Water,
9,010,Caterpie,Bug,


In [9]:
types = sorted(["Normal", "Fire", "Water", "Electric", "Grass", "Ice", "Fighting", "Poison", "Ground", 
 "Flying", "Psychic", "Bug", "Rock", "Ghost", "Dragon", "Dark", "Steel", "Fairy"])
print(types)

['Bug', 'Dark', 'Dragon', 'Electric', 'Fairy', 'Fighting', 'Fire', 'Flying', 'Ghost', 'Grass', 'Ground', 'Ice', 'Normal', 'Poison', 'Psychic', 'Rock', 'Steel', 'Water']


In [10]:
# Create dataframe of binaries for each Pokemon's types
typesdf = []
for i in range(pkmn_count):
#     Clean binaries for each pokemon
    pkmntype = {type:0 for type in types}
    type1 = nat_pkmn_df["Type1"][i]
    pkmntype[type1]=1
    type2 = nat_pkmn_df["Type2"][i]
#     if there is no secondary type, the class appears as float
    if type(type2)==type('str'):
        pkmntype[type2]=1
    typesdf.append(pkmntype)

typesdf = pd.DataFrame(typesdf)
typesdf.head()

Unnamed: 0,Bug,Dark,Dragon,Electric,Fairy,Fighting,Fire,Flying,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
9,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [11]:
pokedex_df=nat_pkmn_df[["Nat_Dex","Name"]].merge(typesdf,left_index=True,right_index=True)
pokedex_df

Unnamed: 0,Nat_Dex,Name,Bug,Dark,Dragon,Electric,Fairy,Fighting,Fire,Flying,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
0,001,Bulbasaur,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
1,002,Ivysaur,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
2,003,Venusaur,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0
3,004,Charmander,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,005,Charmeleon,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
5,006,Charizard,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0
6,007,Squirtle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
7,008,Wartortle,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
8,009,Blastoise,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
9,010,Caterpie,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [12]:
# Save data to csv
pokedex_df.to_csv('Data/Pokedex.csv')