# http://www.videogamesprites.net/SuperMarioBros1/

In [1]:
import os
from requests import get
from bs4 import BeautifulSoup
from tqdm import tqdm

In [2]:
root = 'http://www.videogamesprites.net/SuperMarioBros1'
root

'http://www.videogamesprites.net/SuperMarioBros1'

In [3]:
def get_image_links(route: str) -> list:
    """
    Return images from the given route at the root URL.

    Args:
        route: the route to the index pages from root

    Returns:
        a list of string URLs to images on root/route

    """
    # generate the index page from the route
    index = '{}/{}'.format(root, route)
    # generate the endpoint for images from the index
    endpoint = '/'.join(index.split('/')[:-1])
    # query the page and parse it as HTML
    soup = BeautifulSoup(get(index).text, 'html.parser')
    # extract all the links to images on the page
    return ['{}/{}'.format(endpoint, img['src']) for img in soup.find_all('img')] 

In [4]:
def query_save(image_url: str, image_dir: str) -> None:
    """
    Fetch an image from the given URL and save it in a directory.

    Args:
        image_url: the url of the image to fetch
        image_dir: the name of the directory to store the image in

    Returns: 
        None

    """
    # make the build directory if it doesn't exist
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)
    # fetch the raw image data
    image = get(image_url).content
    # write the bytes to disk
    with open('{}/{}'.format(image_dir, image_url.split('/')[-1]), 'wb') as img_file:
        img_file.write(image)

In [5]:
def save_all_images(route: str, output_dir: str) -> None:
    """
    Save all the images route at the given route to the output directory.

    Args:
        route: the route to fetch the image URLs from
        output_dir: the base directory to save images to

    Returns:
        None

    """
    # fetch all the links to the images
    image_urls = get_image_links(route)
    # generate the directory to save images to
    output_dir = '/'.join([output_dir] + route.split('/')[:-1])
    # fetch all the images and save them to disk
    progress = tqdm(image_urls, unit='image')
    for image_url in progress:
        progress.set_postfix(image_url=image_url)
        # query the image and save it in the output directory
        query_save(image_url, output_dir)
    progress.close()

In [6]:
pages = [
    'Characters/Mario/index.html',
    'Characters/Luigi/index.html',
    'Characters/Other/index.html',
    'Items/index.html',
    'Scenery/index.html',
    'Enemies/Overworld/index.html',
    'Enemies/Underground/index.html',
    'Enemies/Castle/index.html',
]

In [7]:
for page in pages:
    save_all_images(page, 'smb1')

100%|██████████| 44/44 [00:16<00:00,  2.62image/s, image_url=http://www.videogamesprites.net/SuperMarioBros1/Characters/Mario/Fiery Mario - Swim6.gif]   
100%|██████████| 44/44 [00:15<00:00,  2.83image/s, image_url=http://www.videogamesprites.net/SuperMarioBros1/Characters/Luigi/Fiery Luigi - Swim6.gif]   
100%|██████████| 2/2 [00:00<00:00,  2.78image/s, image_url=http://www.videogamesprites.net/SuperMarioBros1/Characters/Other/Princess Toadstool.gif]
100%|██████████| 20/20 [00:05<00:00,  3.67image/s, image_url=http://www.videogamesprites.net/SuperMarioBros1/Items/Mushroom Cursor.gif]         
100%|██████████| 20/20 [00:05<00:00,  3.47image/s, image_url=http://www.videogamesprites.net/SuperMarioBros1/Scenery/Giant Mushroom.gif]   
100%|██████████| 25/25 [00:08<00:00,  3.02image/s, image_url=http://www.videogamesprites.net/SuperMarioBros1/Enemies/Overworld/Bullet Bill - Grey.gif]        
100%|██████████| 8/8 [00:02<00:00,  3.39image/s, image_url=http://www.videogamesprites.net/SuperMari