# Bulbapedia Wiki Scraping
## Teng, Lance Ricco L.

### Import libraries

In [1]:
import requests
from bs4 import BeautifulSoup

### Prepare variables

In [2]:
URL = "https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_by_National_Pok%C3%A9dex_number"

### Load the page

In [3]:
page = requests.get(URL)

In [4]:
print(page.content)

="background: #e6e6ff; padding: 2px 10px;"> <a href="/wiki/List_of_Pok%C3%A9mon_by_evolution_family" title="List of Pok\xc3\xa9mon by evolution family">evolution family</a> (<a href="/wiki/List_of_Pok%C3%A9mon_by_evolution_family_in_Pok%C3%A9mon_GO" title="List of Pok\xc3\xa9mon by evolution family in Pok\xc3\xa9mon GO">GO</a>) \xe2\x80\xa2 <a href="/wiki/List_of_Pok%C3%A9mon_that_are_not_part_of_an_evolutionary_line" title="List of Pok\xc3\xa9mon that are not part of an evolutionary line">no evolution family</a> \xe2\x80\xa2 <a href="/wiki/List_of_Pok%C3%A9mon_with_branched_evolutions" title="List of Pok\xc3\xa9mon with branched evolutions">branched</a> \xe2\x80\xa2 <a href="/wiki/List_of_cross-generational_evolution_families" title="List of cross-generational evolution families">cross-generation</a> \xe2\x80\xa2 <a href="/wiki/List_of_Pok%C3%A9mon_that_evolve_at_or_above_a_certain_level" title="List of Pok\xc3\xa9mon that evolve at or above a certain level">levels</a>\n</td></tr>\n<t

### Parse HTML data

In [5]:
soup = BeautifulSoup(page.content, 'html.parser')

### Find all tables that contain Pokemon details

In [6]:
# Get main content <div>
poke_content = soup.find(id='mw-content-text')

# Get all table elements
poke_tables = poke_content.find_all('table')

### Prepare variables

In [7]:
# Generation II to VIII is from index 2 to 8
gen_table_start, gen_table_end = 2, 8

# First Pokemon beings at index 3, Pokemon appear every other row.
info_start = 3

# Base url to Bulbapedia website
base_url = 'https://bulbapedia.bulbagarden.net'

### Extract to array

In [20]:
all_gen_json = {}

for (index, gen_list) in enumerate(poke_tables[gen_table_start:gen_table_end+1], start=gen_table_start):
    gen_json = []

    for i in range(info_start, len(gen_list.contents), 2):
        poke_info = gen_list.contents[i]
        kdex = poke_info.contents[1].text.strip()
        ndex = poke_info.contents[3].text.strip()
        name = poke_info.contents[7].text.strip()
        url = base_url + poke_info.contents[7].contents[1].get('href')
        types = []
        types.append(poke_info.contents[9].text.strip())
        if len(poke_info.contents) > 10:
            types.append(poke_info.contents[11].text.strip())
        gen_json.append({
            "kdex": kdex,
            "ndex": ndex,
            "name": name,
            "types": types,
            "url": url
        })
    
    all_gen_json["gen" + str(index)] = gen_json

all_gen_json
    

'Fighting'],
   'url': 'https://bulbapedia.bulbagarden.net/wiki/Kommo-o_(Pok%C3%A9mon)'},
  {'kdex': '#285',
   'ndex': '#785',
   'name': 'Tapu Koko',
   'types': ['Electric', 'Fairy'],
   'url': 'https://bulbapedia.bulbagarden.net/wiki/Tapu_Koko_(Pok%C3%A9mon)'},
  {'kdex': '#286',
   'ndex': '#786',
   'name': 'Tapu Lele',
   'types': ['Psychic', 'Fairy'],
   'url': 'https://bulbapedia.bulbagarden.net/wiki/Tapu_Lele_(Pok%C3%A9mon)'},
  {'kdex': '#287',
   'ndex': '#787',
   'name': 'Tapu Bulu',
   'types': ['Grass', 'Fairy'],
   'url': 'https://bulbapedia.bulbagarden.net/wiki/Tapu_Bulu_(Pok%C3%A9mon)'},
  {'kdex': '#288',
   'ndex': '#788',
   'name': 'Tapu Fini',
   'types': ['Water', 'Fairy'],
   'url': 'https://bulbapedia.bulbagarden.net/wiki/Tapu_Fini_(Pok%C3%A9mon)'},
  {'kdex': '#289',
   'ndex': '#789',
   'name': 'Cosmog',
   'types': ['Psychic'],
   'url': 'https://bulbapedia.bulbagarden.net/wiki/Cosmog_(Pok%C3%A9mon)'},
  {'kdex': '#290',
   'ndex': '#790',
   'name': 'Cos

### Save to JSON file

In [21]:
import json
import os

filepath = ''

In [22]:
with open(os.path.join(filepath, 'pokedex.json'), 'w', encoding='utf-8') as f:
    json.dump(all_gen_json, f, ensure_ascii=False, indent=4)