In [6]:
import requests
from bs4 import BeautifulSoup
import re
import os
import time
from urllib.parse import urljoin

# Set up base parameters
base_url = "https://archives.bulbagarden.net"
start_url = "https://archives.bulbagarden.net/w/index.php?title=Category:Scarlet_and_Violet_menu_sprites"
output_dir = "sprites"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

def get_soup(url):
    """Fetches a page and returns its BeautifulSoup object."""
    response = requests.get(url)
    response.raise_for_status()
    return BeautifulSoup(response.text, "html.parser")

def parse_images(soup):
    """Parses the image file names and returns their download URLs and target names."""
    file_links = soup.select("div.mw-category-generated li a")
    results = []

    for link in file_links:
        href = link.get("href")
        text = link.get_text()

        # Convert displayed text like 'Menu SV 0001.png' to match 'Menu_SV_0001.png'
        text_clean = text.replace(" ", "_")

        match = re.search(r"Menu_SV_(\d{4}(?:-[\w]+)?)\.png", text_clean)
        if match:
            dex_id = match.group(1)
            filename = f"Sprite_{dex_id}.png"
            full_image_page = urljoin(base_url, href)
            results.append((full_image_page, filename))

    return results

def get_direct_image_url(image_page_url):
    """Gets the direct URL to the image file."""
    soup = get_soup(image_page_url)
    full_image = soup.select_one("div.fullImageLink a")
    if full_image:
        return urljoin(base_url, full_image.get("href"))
    return None

def download_image(img_url, dest_path):
    """Downloads an image if it doesn't already exist."""
    if not os.path.exists(dest_path):
        response = requests.get(img_url)
        response.raise_for_status()
        with open(dest_path, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded: {dest_path}")
        time.sleep(1)  # be polite
    else:
        print(f"Already exists: {dest_path}")

def find_next_page(soup):
    """Finds the URL for the next page if it exists."""
    for link in soup.select("a"):
        if link.get_text(strip=True).lower() == "next page":
            return urljoin(base_url, link.get("href"))
    return None

def scrape_all(max_pages=10):
    current_url = start_url
    pages_scraped = 0
    while current_url and pages_scraped < max_pages:
        print(f"\nProcessing page: {current_url}")
        soup = get_soup(current_url)
        image_entries = parse_images(soup)
        for image_page_url, filename in image_entries:
            img_url = get_direct_image_url(image_page_url)
            if img_url:
                download_image(img_url, os.path.join(output_dir, filename))
        current_url = find_next_page(soup)
        pages_scraped += 1

if __name__ == "__main__":
    scrape_all()


Processing page: https://archives.bulbagarden.net/w/index.php?title=Category:Scarlet_and_Violet_menu_sprites
Downloaded: sprites\Sprite_0001.png
Downloaded: sprites\Sprite_0002.png
Downloaded: sprites\Sprite_0003.png
Downloaded: sprites\Sprite_0004.png
Downloaded: sprites\Sprite_0005.png
Downloaded: sprites\Sprite_0006.png
Downloaded: sprites\Sprite_0007.png
Downloaded: sprites\Sprite_0008.png
Downloaded: sprites\Sprite_0009.png
Downloaded: sprites\Sprite_0023.png
Downloaded: sprites\Sprite_0024.png
Downloaded: sprites\Sprite_0025-Alola.png
Downloaded: sprites\Sprite_0025-Hoenn.png
Downloaded: sprites\Sprite_0025-Kalos.png
Downloaded: sprites\Sprite_0025-Original.png
Downloaded: sprites\Sprite_0025-Partner.png
Downloaded: sprites\Sprite_0025-Sinnoh.png
Downloaded: sprites\Sprite_0025-Unova.png
Downloaded: sprites\Sprite_0025-World.png
Downloaded: sprites\Sprite_0025.png
Downloaded: sprites\Sprite_0026-Alola.png
Downloaded: sprites\Sprite_0026.png
Downloaded: sprites\Sprite_0027-Alola.

In [7]:
import os
import shutil
import re

sprites_dir = "sprites"
national_dir = os.path.join(sprites_dir, "national")
os.makedirs(national_dir, exist_ok=True)

for filename in os.listdir(sprites_dir):
    if filename.endswith(".png") and filename.startswith("Sprite_"):
        match = re.match(r"Sprite_(\d{4})(?:-([\w]+))?\.png", filename)
        if match:
            _, form = match.groups()
            if form:
                target_dir = os.path.join(sprites_dir, form)
            else:
                target_dir = national_dir
            os.makedirs(target_dir, exist_ok=True)

            src_path = os.path.join(sprites_dir, filename)
            dst_path = os.path.join(target_dir, filename)
            shutil.move(src_path, dst_path)
            print(f"Moved: {filename} → {target_dir}")

Moved: Sprite_0001.png → sprites\national
Moved: Sprite_0002.png → sprites\national
Moved: Sprite_0003.png → sprites\national
Moved: Sprite_0004.png → sprites\national
Moved: Sprite_0005.png → sprites\national
Moved: Sprite_0006.png → sprites\national
Moved: Sprite_0007.png → sprites\national
Moved: Sprite_0008.png → sprites\national
Moved: Sprite_0009.png → sprites\national
Moved: Sprite_0023.png → sprites\national
Moved: Sprite_0024.png → sprites\national
Moved: Sprite_0025-Alola.png → sprites\Alola
Moved: Sprite_0025-Hoenn.png → sprites\Hoenn
Moved: Sprite_0025-Kalos.png → sprites\Kalos
Moved: Sprite_0025-Original.png → sprites\Original
Moved: Sprite_0025-Partner.png → sprites\Partner
Moved: Sprite_0025-Sinnoh.png → sprites\Sinnoh
Moved: Sprite_0025-Unova.png → sprites\Unova
Moved: Sprite_0025-World.png → sprites\World
Moved: Sprite_0025.png → sprites\national
Moved: Sprite_0026-Alola.png → sprites\Alola
Moved: Sprite_0026.png → sprites\national
Moved: Sprite_0027-Alola.png → sprite