# Web Scraping For PNG Images at : https://www.pngmart.com

In [3]:
import requests
from bs4 import BeautifulSoup

In [44]:
def get_urls_from_sitemap(site_map):
    """
    Extract image URLs from a sitemap XML.

    Parameters:
    - site_map (str): URL of the sitemap.

    Returns:
    - list: List of image URLs found in the sitemap.
    """
    response = requests.get(site_map)
    xml = response.text
    image_urls = []
    soup = BeautifulSoup(xml, "xml")
    
    # Extract URLs containing "image" in the <loc> tag
    for loc in soup.find_all("loc"):
        if "image" in loc.text:
            image_urls.append(loc.text)
    
    return image_urls

def save_image_from_url(image_url, file_name):
    """
    Download and save an image from a given URL.

    Parameters:
    - image_url (str): URL of the image.
    - file_name (str): Name of the directory to save the image.

    Saves the image with a filename constructed from the image URL.
    """
    response = requests.get(image_url)
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Extract the image source URL
    image_source = soup.find("a", {"class": "download"})["href"]
    image = requests.get(image_source)

    # Create a unique filename for the saved image
    image_id = image_url.split("/")[-1]
    image_name = image_source.split("/")[-1]
    image_title = image_id + "-" + image_name
    
    # Save the image to the specified directory
    with open(f"{file_name}/{image_title}", "wb") as f:
        f.write(image.content)

def get_images_from_site_map_url(site_map_url, file_name, limit=100):
    """
    Download images from URLs specified in a sitemap up to a specified limit.

    Parameters:
    - site_map_url (str): URL of the sitemap.
    - file_name (str): Name of the directory to save the images.
    - limit (int): Maximum number of images to download.

    Downloads images from URLs in the sitemap up to the specified limit.
    """
    response = requests.get(site_map_url)
    xml = response.text
    soup = BeautifulSoup(xml, "xml")
    site_maps = []
    count = 0

    # Extract URLs of individual sitemaps
    for loc in soup.find_all("loc"):
        site_maps.append(loc.text)

    # Iterate through individual sitemaps
    for site_map in site_maps:
        # Get image URLs from the current sitemap
        image_urls = get_urls_from_sitemap(site_map)

        # Download and save each image
        for image_url in image_urls:
            save_image_from_url(image_url, file_name)
            count += 1

            # Check if the download limit is reached
            if count == limit:
                break

        # Check if the download limit is reached
        if count == limit:
            break  # Break out of the outer loop when the limit is reached

In [45]:
site_map_url = "https://www.pngmart.com/sitemap.xml"

In [47]:
get_images_from_site_map_url(site_map_url, "images")