In [1]:
import os
import requests
from duckduckgo_search import DDGS
from tqdm import tqdm



In [2]:
def download_duckduckgo_images(query, save_folder, num_images=100):
    # Create save folder if not exists
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    # Fetch image URLs from DuckDuckGo
    urls = []
    with DDGS() as ddgs:
        for result in ddgs.images(query, max_results=num_images):
            urls.append(result["image"])

    print(f"Found {len(urls)} images for '{query}'.")

    # Download images
    for i, url in tqdm(enumerate(urls, 1), total=len(urls), desc="Downloading"):
        try:
            response = requests.get(url, stream=True, timeout=10)
            response.raise_for_status()
            ext = url.split('.')[-1].split('?')[0]  # Get file extension
            ext = ext if ext.lower() in ["jpg", "jpeg", "png"] else "jpg"  # Default to jpg
            file_path = os.path.join(save_folder, f"{query}_{i}.{ext}")

            with open(file_path, "wb") as file:
                for chunk in response.iter_content(1024):
                    file.write(chunk)
        except requests.exceptions.RequestException as e:
            print(f"Failed to download {url}: {e}")

    print(f"Downloaded {i} images to '{save_folder}'.")

In [5]:
query = "faces"
save_folder = "./data/test"
download_duckduckgo_images(query, save_folder)

Found 100 images for 'faces'.


Downloading:  12%|███▌                          | 12/100 [00:07<00:34,  2.55it/s]

Failed to download https://www.ukmodels.co.uk/wp-content/uploads/2015/05/shutterstock_141020404.jpg: 403 Client Error: Forbidden for url: https://www.ukmodels.co.uk/wp-content/uploads/2015/05/shutterstock_141020404.jpg
Failed to download https://3.bp.blogspot.com/-CY_AD0wD3mI/ToyNCmDEUnI/AAAAAAAABtk/1Vrl1nC4bac/s1600/Most_Beautiful_Face.jpg: HTTPSConnectionPool(host='3.bp.blogspot.com', port=443): Max retries exceeded with url: /-CY_AD0wD3mI/ToyNCmDEUnI/AAAAAAAABtk/1Vrl1nC4bac/s1600/Most_Beautiful_Face.jpg (Caused by SSLError(SSLError(1, '[SSL: WRONG_VERSION_NUMBER] wrong version number (_ssl.c:1129)')))


Downloading:  50%|███████████████               | 50/100 [00:30<00:33,  1.51it/s]

Failed to download http://www.authenticityassociates.com/wp-content/uploads/2012/08/EmotionsAreEnergy1.jpg: 403 Client Error: Forbidden for url: http://www.authenticityassociates.com/wp-content/uploads/2012/08/EmotionsAreEnergy1.jpg
Failed to download https://www.crushpixel.com/big-static11/preview4/collage-faces-women-with-different-717006.jpg: 403 Client Error: Forbidden for url: https://www.crushpixel.com/big-static11/preview4/collage-faces-women-with-different-717006.jpg


Downloading:  68%|████████████████████▍         | 68/100 [00:41<00:21,  1.51it/s]

Failed to download https://tty-art.com/file/2020/01/Generated-Faces-by-AI-Young-WoMen-V1-003-1.jpg: 403 Client Error: Forbidden for url: https://tty-art.com/file/2020/01/Generated-Faces-by-AI-Young-WoMen-V1-003-1.jpg


Downloading:  76%|██████████████████████▊       | 76/100 [00:46<00:17,  1.39it/s]

Failed to download https://familiescourse.com/wp-content/uploads/2022/05/Faces.jpg: 403 Client Error: Forbidden for url: https://familiescourse.com/wp-content/uploads/2022/05/Faces.jpg


Downloading:  89%|██████████████████████████▋   | 89/100 [00:51<00:03,  3.42it/s]

Failed to download https://singersroom.com/wp-content/uploads/2023/03/Best-The-Faces-Songs-of-All-Time.jpg: 403 Client Error: Forbidden for url: https://singersroom.com/wp-content/uploads/2023/03/Best-The-Faces-Songs-of-All-Time.jpg


Downloading: 100%|█████████████████████████████| 100/100 [00:54<00:00,  1.82it/s]

Downloaded 100 images to './data/test'.



