In [None]:
import os
import time
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from webdriver_manager.chrome import ChromeDriverManager
from duckduckgo_search import DDGS

# Set up directory
output_dir = "a380_images"  #change query and output directory
os.makedirs(output_dir, exist_ok=True)

# Parameters
query = "Airbus A380"
num_images = 1000  # Target number of images
downloaded_urls = set()  # Track unique URLs
batch_size = 100  # Number of images to fetch at a time

# Initialize Selenium WebDriver
def init_driver():
    options = webdriver.ChromeOptions()
    options.add_argument("--headless")  # Run in background
    options.add_argument("--disable-gpu")
    options.add_argument("--no-sandbox")
    options.add_argument("--log-level=3")  # Suppress logs
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver

# Function to fetch image URLs from Google Images
def fetch_google_images(search_query, max_links=100):
    driver = init_driver()
    driver.get(f"https://www.google.com/search?tbm=isch&q={search_query}")

    image_urls = set()
    count = 0

    while len(image_urls) < max_links:
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.END)
        time.sleep(2)  # Allow images to load

        images = driver.find_elements(By.CSS_SELECTOR, "img")
        for img in images:
            src = img.get_attribute("src")
            if src and "http" in src and src not in downloaded_urls:
                image_urls.add(src)
                downloaded_urls.add(src)
                count += 1
                if count >= max_links:
                    break

    driver.quit()
    return list(image_urls)

# Function to fetch image URLs from DuckDuckGo
def fetch_duckduckgo_images(search_query, max_links=100):
    with DDGS() as ddgs:
        results = list(ddgs.images(search_query, max_results=max_links))
    return [img["image"] for img in results if "http" in img["image"]]

# Function to download images
def download_images(image_urls, start_idx):
    count = start_idx
    for image_url in image_urls:
        if image_url in downloaded_urls:
            continue  # Skip duplicates

        try:
            response = requests.get(image_url, stream=True, timeout=10)
            if response.status_code == 200:
                with open(os.path.join(output_dir, f"A380_{count}.jpg"), "wb") as file:
                    for chunk in response.iter_content(1024):
                        file.write(chunk)
                print(f"Downloaded {count}/{num_images}: {image_url}")
                downloaded_urls.add(image_url)
                count += 1
        except Exception as e:
            print(f"❌ Failed to download {image_url}: {e}")

    return count - start_idx  # Return number of images downloaded

# Start downloading images
downloaded = 0
while downloaded < num_images:
    batch_needed = min(batch_size, num_images - downloaded)

    # Get images from Google
    google_images = fetch_google_images(query, max_links=batch_needed)
    new_images = download_images(google_images, downloaded + 1)
    downloaded += new_images

    # If not enough, get images from DuckDuckGo
    if downloaded < num_images:
        extra_needed = num_images - downloaded
        duckduckgo_images = fetch_duckduckgo_images(query, max_links=extra_needed)
        new_images = download_images(duckduckgo_images, downloaded + 1)
        downloaded += new_images

    time.sleep(5)  # Pause to avoid getting blocked

print("✅ Download completed!")


Downloaded 1/2000: https://upload.wikimedia.org/wikipedia/commons/thumb/0/09/A6-EDY_A380_Emirates_31_jan_2013_jfk_(8442269364)_(cropped).jpg/1200px-A6-EDY_A380_Emirates_31_jan_2013_jfk_(8442269364)_(cropped).jpg
Downloaded 2/2000: https://wallpapercave.com/wp/Y2pDp3h.jpg
Downloaded 3/2000: https://cdn.britannica.com/57/93557-050-1F497F11/Airbus-A380.jpg
Downloaded 4/2000: https://images8.alphacoders.com/695/695562.jpg
Downloaded 5/2000: https://wallup.net/wp-content/uploads/2019/09/357383-airbus-a380-airliner-plane-airplane-transport-61.jpg
Downloaded 6/2000: https://images.aircharterservice.com/global/aircraft-guide/group-charter/airbus-a380-1.jpg
Downloaded 7/2000: https://static1.simpleflyingimages.com/wordpress/wp-content/uploads/2023/01/emirates-airbus-a380-800-a6-eoa-3.jpg
Downloaded 8/2000: https://imgproc.airliners.net/photos/airliners/3/1/5/2548513.jpg?v=v40
Downloaded 9/2000: https://4.bp.blogspot.com/-17nsyJiztIM/WRF3S7KzbEI/AAAAAAAABYs/Hl_ILUjG5l886tjQyIIF547Arv79v2tNQCLcB/

KeyboardInterrupt: 

7.5.3
