In [7]:
import requests
from bs4 import BeautifulSoup
import os
from urllib.parse import urljoin

def scrape_images(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    img_tags = soup.find_all('img')

    if not os.path.exists('images'):
        os.makedirs('images')

    for img in img_tags:
        img_url = img.get('src')
        if img_url:
            if img_url.startswith('/'):
                img_url = urljoin(url, img_url)
            if img_url.startswith(('http', 'https')):
                try:
                    # Adding timeout to avoid long wait
                    img_data = requests.get(img_url, timeout=10).content
                    img_name = os.path.join('images', img_url.split('/')[-1])
                    with open(img_name, 'wb') as img_file:
                        img_file.write(img_data)
                    print(f'Image saved: {img_name}')
                except Exception as e:
                    print(f"Failed to retrieve {img_url}: {e}")
            else:
                print(f'Skipping invalid URL: {img_url}')
        else:
            print('Image tag has no src attribute.')

url = 'https://clubpickleballusa.com/'
scrape_images(url)


Image saved: images/tr?id=1947038898787191&ev=PageView&noscript=1
Skipping invalid URL: data:image/svg+xml,%3Csvg%20xmlns='http://www.w3.org/2000/svg'%20viewBox='0%200%20203%2065'%3E%3C/svg%3E
Image saved: images/Club-Pickleball-USA-1.png
Skipping invalid URL: data:image/svg+xml,%3Csvg%20xmlns='http://www.w3.org/2000/svg'%20viewBox='0%200%2035%2024'%3E%3C/svg%3E
Image saved: images/Group-122.png
Skipping invalid URL: data:image/svg+xml,%3Csvg%20xmlns='http://www.w3.org/2000/svg'%20viewBox='0%200%20203%2065'%3E%3C/svg%3E
Image saved: images/Club-Pickleball-USA-1.png
Skipping invalid URL: data:image/svg+xml,%3Csvg%20xmlns='http://www.w3.org/2000/svg'%20viewBox='0%200%20399%20315'%3E%3C/svg%3E
Image saved: images/Home-Freedom.png
Skipping invalid URL: data:image/svg+xml,%3Csvg%20xmlns='http://www.w3.org/2000/svg'%20viewBox='0%200%2073%208'%3E%3C/svg%3E
Image saved: images/Capa_1-1.png
Skipping invalid URL: data:image/svg+xml,%3Csvg%20xmlns='http://www.w3.org/2000/svg'%20viewBox='0%200%203

In [9]:
import requests
from bs4 import BeautifulSoup
import os
import base64
from urllib.parse import urljoin

def scrape_images(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    img_tags = soup.find_all('img')

    if not os.path.exists('images'):
        os.makedirs('images')

    for img in img_tags:
        img_url = img.get('src')
        if img_url:
            if img_url.startswith('/'):
                img_url = urljoin(url, img_url)

            # Handling base64-encoded data URLs
            if img_url.startswith('data:image'):
                try:
                    header, encoded = img_url.split(',', 1)
                    file_ext = header.split('/')[1].split(';')[0]  # Get file extension
                    img_data = base64.b64decode(encoded)  # Decode base64 data

                    img_name = os.path.join('images', f'image_{img_tags.index(img)}.{file_ext}')
                    with open(img_name, 'wb') as img_file:
                        img_file.write(img_data)
                    print(f'Image saved (base64): {img_name}')
                except Exception as e:
                    print(f"Failed to decode base64 image: {e}")
            elif img_url.startswith(('http', 'https')):
                try:
                    img_data = requests.get(img_url, timeout=10).content
                    img_name = os.path.join('images', img_url.split('/')[-1])
                    with open(img_name, 'wb') as img_file:
                        img_file.write(img_data)
                    print(f'Image saved: {img_name}')
                except Exception as e:
                    print(f"Failed to retrieve {img_url}: {e}")
            else:
                print(f'Skipping invalid URL: {img_url}')
        else:
            print('Image tag has no src attribute.')

url = 'https://clubpickleballusa.com/'
scrape_images(url)


Image saved: images/tr?id=1947038898787191&ev=PageView&noscript=1
Failed to decode base64 image: Incorrect padding
Image saved: images/Club-Pickleball-USA-1.png
Failed to decode base64 image: Incorrect padding
Image saved: images/Group-122.png
Failed to decode base64 image: Incorrect padding
Image saved: images/Club-Pickleball-USA-1.png
Image saved (base64): images/image_7.svg+xml
Image saved: images/Home-Freedom.png
Failed to decode base64 image: Invalid base64-encoded string: number of data characters (65) cannot be 1 more than a multiple of 4
Image saved: images/Capa_1-1.png
Image saved (base64): images/image_11.svg+xml
Image saved: images/Group-133-1.png
Image saved (base64): images/image_13.svg+xml
Image saved: images/Group-134-1.png
Image saved (base64): images/image_15.svg+xml
Image saved: images/USA_Step3-1.png
Failed to decode base64 image: Invalid base64-encoded string: number of data characters (65) cannot be 1 more than a multiple of 4
Image saved: images/Capa_1-1.png
Faile