In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import requests
import time
import os
import re

# Setup ChromeDriver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# Website URL
url = 'https://panaverse-dao-staging.vercel.app/'

# Open the website
driver.get(url)

# Wait for the page to load (increase time if needed)
time.sleep(15)

# Get the page source after rendering
soup = BeautifulSoup(driver.page_source, 'html.parser')

# Extract images from <img> tags
img_tags = soup.find_all('img')
img_urls = [requests.compat.urljoin(url, img.get('src') or img.get('data-src') or img.get('srcset')) for img in img_tags]

# Extract images from CSS background
css_background_images = []
for element in soup.find_all(True):
    style = element.get('style')
    if style and 'background-image' in style:
        matches = re.findall(r'url\(["\']?(.*?)["\']?\)', style)
        css_background_images.extend([requests.compat.urljoin(url, match) for match in matches])

# Combine image URLs
all_image_urls = img_urls + css_background_images

# Create a directory to save images
if not os.path.exists('images'):
    os.makedirs('images')

# Download and save each image
i = 1
for image_url in all_image_urls:
    try:
        print(f"Downloading: {image_url}")
        response = requests.get(image_url)
        
        if response.status_code == 200:
            # Determine the file extension from URL and handle query parameters
            ext = image_url.split('.')[-1].split('?')[0]
            if ext not in ['jpg', 'jpeg', 'png', 'gif']:
                ext = 'jpg'
            file_path = os.path.join('images', f'image{i}.{ext}')
            
            # Open a file to write the image content
            with open(file_path, 'wb') as file:
                file.write(response.content)
            
            print(f'Downloaded: {file_path}')
            i += 1
        else:
            print(f"Failed to fetch {image_url}: Status code {response.status_code}")
    except Exception as e:
        print(f"Failed to download {image_url}: {e}")

# Print the number of images scraped
print("All images scraped:", i - 1)

# Close the browser
driver.quit()


Downloading: https://panaverse-dao-staging.vercel.app/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fpanaverse-logo.ecb63f00.png&w=2048&q=70
Downloaded: images\image1.jpg
Downloading: https://panaverse-dao-staging.vercel.app/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fbackground-lines.707f7a64.png&w=3840&q=40
Downloaded: images\image2.jpg
Downloading: https://panaverse-dao-staging.vercel.app/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fhero-poster.bb823ade.png&w=3840&q=75
Downloaded: images\image3.jpg
Downloading: https://panaverse-dao-staging.vercel.app/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fmetaverse.7cd9f7b9.png&w=3840&q=30
Downloaded: images\image4.jpg
Downloading: https://panaverse-dao-staging.vercel.app/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fai.2469eb41.png&w=3840&q=30
Downloaded: images\image5.jpg
Downloading: https://panaverse-dao-staging.vercel.app/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Fcloud.4b9a7f40.png&w=3840&q=30
Downloaded: images\image6.jpg
Downloading: https: