Image Crawling

In [None]:
import os
import time
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

# Settings
search_url = "https://unsplash.com/s/photos/Indochinese-tiger" # Unsplash Link 
download_folder = r"" # Your downnload Path
os.makedirs(download_folder, exist_ok=True)
file_name = "" # Set downloaded file name eg
max_images = 700
scroll_pause_time = 2

# Setup Chrome (visible for debugging)
print("Launching Chrome...")
options = webdriver.ChromeOptions()
# options.add_argument("--headless")  # <-- COMMENTED FOR DEBUGGING
options.add_argument("--log-level=3")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

print("Opening Unsplash search page...")
driver.get(search_url)

# Scroll and load images
print("Scrolling to load images...")
image_urls = set()
scroll_count = 0

while len(image_urls) < max_images and scroll_count < 20:
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(scroll_pause_time)
    images = driver.find_elements(By.TAG_NAME, "img")
    
    for img in images:
        src = img.get_attribute("src")
        if src and "images.unsplash.com" in src:
            image_urls.add(src)
        if len(image_urls) >= max_images:
            break

    scroll_count += 1
    print(f"Scrolled {scroll_count} times - found {len(image_urls)} images.")

# Download images
print(f"✅ Found {len(image_urls)} images. Starting download...")
for idx, url in enumerate(list(image_urls)[:max_images]):
    try:
        img_data = requests.get(url).content
        file_path = os.path.join(download_folder, f"{file_name}_{idx}.jpg")
        with open(file_path, "wb") as f:
            f.write(img_data)
        print(f"Downloaded: {file_name}_{idx}.jpg")
    except Exception as e:
        print(f"Failed to download image {idx}: {e}")

driver.quit()
print("\n✅ All done. Check your folder at:")
print(download_folder)
