In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import requests
import time
import os
import re

# Setup ChromeDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Website URL
url = 'https://www.governorsindh.com/'  # Replace with the target website URL

# Open the website
driver.get(url)

# Wait for the page to load (you can adjust the time)
time.sleep(10)  # Increase time if needed

# Get the page source after rendering
soup = BeautifulSoup(driver.page_source, 'html.parser')

# Extract images from <img> tags
img_tags = soup.find_all('img')
img_urls = [requests.compat.urljoin(url, img.get('src') or img.get('data-src') or img.get('srcset')) for img in img_tags]

# Extract images from CSS background
css_background_images = []
for element in soup.find_all(True):  # True finds all tags
    style = element.get('style')
    if style and 'background-image' in style:
        matches = re.findall(r'url\(["\']?(.*?)["\']?\)', style)
        css_background_images.extend([requests.compat.urljoin(url, match) for match in matches])

# Combine image URLs
all_image_urls = img_urls + css_background_images

# Create a directory to save images
if not os.path.exists('images'):
    os.makedirs('images')

# Download and save each image
i = 1
for image_url in all_image_urls:
    try:
        print(f"Downloading: {image_url}")  # Debugging line to see which image is being downloaded
        response = requests.get(image_url)
        
        # Check if the response is valid
        if response.status_code == 200:
            # Determine the file extension from URL
            ext = image_url.split('.')[-1]
            if '?' in ext:
                ext = 'jpg'  # Default extension if URL has query parameters
            file_path = os.path.join('images', f'image{i}.{ext}')
            
            # Open a file to write the image content
            with open(file_path, 'wb') as file:
                file.write(response.content)
            
            print(f'Downloaded: {file_path}')
            i += 1
        else:
            print(f"Failed to fetch {image_url}: Status code {response.status_code}")
    except Exception as e:
        print(f"Failed to download {image_url}: {e}")

# Print the number of images scraped
print("All images scraped:", i - 1)

# Close the browser
driver.quit()


Downloading: https://www.governorsindh.com/_next/static/media/logo.9ff76f62.png
Downloaded: images\image1.png
Downloading: https://www.governorsindh.com/_next/static/media/cover.1d863e39.png
Downloaded: images\image2.png
Downloading: https://www.governorsindh.com/_next/static/media/imageWebsite.5c6ae62f.jpg
Downloaded: images\image3.jpg
Downloading: https://www.governorsindh.com/_next/static/media/imageWebsite2.a102c7b5.jpg
Downloaded: images\image4.jpg
Downloading: https://www.governorsindh.com/_next/static/media/imageWebsite3.b845fe78.jpg
Downloaded: images\image5.jpg
Downloading: https://www.governorsindh.com/_next/static/media/slideShow8.4b79537a.jpg
Downloaded: images\image6.jpg
Downloading: https://www.governorsindh.com/_next/static/media/slideShow1.6f890b58.jpg
Downloaded: images\image7.jpg
Downloading: https://www.governorsindh.com/_next/static/media/slideShow3.0006489a.jpg
Downloaded: images\image8.jpg
Downloading: https://www.governorsindh.com/_next/static/media/slideShow2.ce