In [None]:
import requests
from bs4 import BeautifulSoup
import os
import time

def get_all_pokemon_with_names():
    url = "https://pokeapi.co/api/v2/pokemon-species?limit=1025"
    response = requests.get(url)
    data = response.json()

    pokemon_list = []

    for idx, entry in enumerate(data['results'], start=1):
        species_url = entry['url']
        species_data = requests.get(species_url).json()

        display_name = species_data['names'][0]['name']  
        for name_entry in species_data['names']:
            if name_entry['language']['name'] == 'en':
                display_name = name_entry['name']
                break

            filename_name = display_name.replace(" ", "_").replace("'", "%27")      

        pokemon_list.append({
            'name': display_name,          
            'slug': entry['name'],         
            'dex': f'{idx:04}',           
            'id': idx,
            'filename': filename_name      
        })

    return pokemon_list

pokemon_list = get_all_pokemon_with_names()

In [25]:
pokemon_list[82]

{'name': 'Farfetch’d',
 'slug': 'farfetchd',
 'dex': '0083',
 'id': 83,
 'filename': 'Farfetch’d'}

In [None]:
BASE_URL = "https://archives.bulbagarden.net"
CATEGORY_URL = "https://archives.bulbagarden.net/wiki/Category:{}"
IMAGE_NAME_TEMPLATE = "Menu HOME {:04}.png"
OUTPUT_FOLDER = "national2"

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

headers = {
    "User-Agent": "Mozilla/5.0 (compatible; ImageScraperBot/1.0)"
}

def get_image_url(category_page_url, target_image_name):
    """Search for the target image name on a given category page"""
    while category_page_url:
        res = requests.get(category_page_url, headers=headers)
        soup = BeautifulSoup(res.text, "html.parser")
        
        thumbs = soup.select(".gallerytext a")
        for a in thumbs:
            if a.text.strip() == target_image_name:
                return BASE_URL + a['href']

        next_page = soup.select_one("a:contains('next page')")
        if next_page:
            category_page_url = BASE_URL + next_page['href']
        else:
            return None

def download_image(image_page_url, output_path):
    """Download the full-size image from its detail page"""
    res = requests.get(image_page_url, headers=headers)
    soup = BeautifulSoup(res.text, "html.parser")
    full_image = soup.select_one(".fullImageLink a")
    if full_image:
        img_url = full_image['href']
        if not img_url.startswith("http"):
            img_url = BASE_URL + img_url
        img_data = requests.get(img_url, headers=headers).content
        with open(output_path, "wb") as f:
            f.write(img_data)
        return True
    return False

for pokemon in pokemon_list:
    dex_num = int(pokemon["dex"])
    image_name = IMAGE_NAME_TEMPLATE.format(dex_num)
    category_page_url = CATEGORY_URL.format(pokemon["filename"])
    output_path = os.path.join(OUTPUT_FOLDER, f"image_{pokemon['id']}.png")

    try:
        print(f"Searching image for {pokemon['name']}...")
        img_page_url = get_image_url(category_page_url, image_name)
        if img_page_url:
            success = download_image(img_page_url, output_path)
            if success:
                print(f"Saved: {output_path}")
            else:
                print(f"Failed to download image from: {img_page_url}")
        else:
            print(f"Image '{image_name}' not found in {pokemon['filename']}")
    except Exception as e:
        print(f"Error with {pokemon['name']}: {e}")
    
    time.sleep(1) 

print("Done.")

Searching image for Bulbasaur...
Saved: national2\image_1.png
Searching image for Ivysaur...
Saved: national2\image_2.png
Searching image for Venusaur...
Saved: national2\image_3.png
Searching image for Charmander...
Saved: national2\image_4.png
Searching image for Charmeleon...
Saved: national2\image_5.png
Searching image for Charizard...
Saved: national2\image_6.png
Searching image for Squirtle...
Saved: national2\image_7.png
Searching image for Wartortle...
Saved: national2\image_8.png
Searching image for Blastoise...
Saved: national2\image_9.png
Searching image for Caterpie...
Saved: national2\image_10.png
Searching image for Metapod...
Saved: national2\image_11.png
Searching image for Butterfree...
Saved: national2\image_12.png
Searching image for Weedle...
Saved: national2\image_13.png
Searching image for Kakuna...
Saved: national2\image_14.png
Searching image for Beedrill...
Saved: national2\image_15.png
Searching image for Pidgey...
Saved: national2\image_16.png
Searching image

In [11]:
import requests
from bs4 import BeautifulSoup
import os
import re

# Starting URL
base_url = "https://archives.bulbagarden.net"
start_url = "/wiki/Category:Type_icons"

# Folder to save images
save_folder = "type_icons"
os.makedirs(save_folder, exist_ok=True)

current_url = start_url

while current_url:
    # Request page
    print(f"Scraping page: {base_url + current_url}")
    response = requests.get(base_url + current_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find all image links ending with 'Sleep.png'
    for link in soup.find_all('a', href=True):
        href = link['href']
        if href.endswith("Sleep.png"):
            file_page_url = base_url + href

            # Visit the file page
            file_page = requests.get(file_page_url)
            file_soup = BeautifulSoup(file_page.text, 'html.parser')

            # Find the actual .png URL
            img = file_soup.find('a', href=re.compile(r'\.png$'))
            if img:
                img_url = img['href']
                if img_url.startswith("//"):
                    img_url = "https:" + img_url

                print(f"Downloading {img_url}")

                # Download the image
                img_data = requests.get(img_url).content
                img_name = img_url.split('/')[-1]
                with open(os.path.join(save_folder, img_name), 'wb') as handler:
                    handler.write(img_data)

    # Find link to the next page
    next_link = soup.find('a', text="next page")
    if next_link:
        current_url = next_link['href']
    else:
        current_url = None  # No more pages


Scraping page: https://archives.bulbagarden.net/wiki/Category:Type_icons
Downloading https://archives.bulbagarden.net/media/upload/2/24/Bug_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/2/24/Bug_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/1/18/Dark_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/1/18/Dark_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/8/83/Dragon_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/8/83/Dragon_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/4/4c/Electric_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/4/4c/Electric_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/2/20/Fairy_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/2/20/Fairy_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/e/ed/Fighting_icon_Sleep.pn

  next_link = soup.find('a', text="next page")


Scraping page: https://archives.bulbagarden.net/w/index.php?title=Category:Type_icons&filefrom=FireIC+XD.png#mw-category-media
Downloading https://archives.bulbagarden.net/media/upload/3/3c/Flying_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/3/3c/Flying_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/e/e3/Ghost_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/e/e3/Ghost_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/e/ef/Grass_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/e/ef/Grass_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/2/2b/Ground_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/2/2b/Ground_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/d/d8/Ice_icon_Sleep.png
Downloading https://archives.bulbagarden.net/media/upload/d/d8/Ice_icon_Sleep.png
Scraping page: https://archives.b