## Get PokemonDB page and parse pokemon

We start by getting the national pokedex page which contains all pokemon. We take a count of how many pokemon there are in order to request details about a pokemon with a given ID.

In [32]:
from bs4 import BeautifulSoup
import requests
import json

POKEMON_DB_BASE_URL = "https://pokemondb.net"
POKEDEX_BASE_URL = "https://pokemondb.net/pokedex/"

national_pokedex = requests.get("https://pokemondb.net/pokedex/national")

# Using the "infocard" class to identify pokemon in the list
pokemon_count = len(BeautifulSoup(national_pokedex.text, "html.parser").find_all('div', attrs={"class": "infocard"}))


In [39]:
def getExtraImages(poke_soup):
    link = poke_soup.find('a', text="Additional artwork")

    if link and hasattr(link, 'href'):
        response = requests.get(f"{POKEMON_DB_BASE_URL}{link['href']}")
    else: 
        return []
    
    soup = BeautifulSoup(response.text, "html.parser")
    images = soup.main.find_all('img')
    
    return [img['src'] for img in images]
    

## Loop through pokemon IDs

For each ID, we grab some important info after loading the page. We will get height, weight, the main image link, and the types of this pokemon.

In [37]:


def getPokemonData(num_to_fetch = pokemon_count):
    pokemon = {}
    for dig in range(1, num_to_fetch + 1):
        new_data = {}
        index = f"{dig:03d}"

        try:
            pokemon_page = requests.get(POKEDEX_BASE_URL + index)
        except requests.exceptions.RequestException:
            continue
        
        poke_soup = BeautifulSoup(pokemon_page.text, 'html.parser')

        type_obj = poke_soup.table.find_all('a', {'class': "type-icon"})
        new_data['types'] = [t.text for t in type_obj]

        height = poke_soup.find(text="Height").parent.parent.td.text.split('\xa0m')[0]
        new_data['height_meters'] = height

        weight = poke_soup.find(text="Weight").parent.parent.td.text.split('\xa0kg')[0]
        new_data['weight_kg'] = weight

        img_link = poke_soup.find('a', {'data-title': True})

        new_data['sprites'] = []
        if img_link:
            new_data['sprites'].append(img_link['href'])

        new_data['sprites'].extend(getExtraImages(poke_soup))

        pokemon[index] = new_data
        
    return pokemon




In [40]:
data = getPokemonData()

with open("data/pokemon_data.json", "w") as f:
    json.dump(data, f)