In [None]:
from fastai.vision.all import *

In [17]:
from ddgs import DDGS # Uses the updated package name
import time
from ddgs.exceptions import RatelimitException

# --- Configuration ---
path = Path('data')
target_count = 100   
pause_time = 5.0 
# ---------------------

# map folder names to search queries
searches = {
    'toilet': 'bathroom with toilet photo',
    'no-toilet': 'bathroom sink and shower only interior',
    'bathroom': 'hotel bathroom' #for manual filtering into the 2 above
}

print("Starting image download process with DuckDuckGo...")

for folder, query in searches.items():
    dest = path/folder
    dest.mkdir(exist_ok=True, parents=True) 
    
    print(f"\nSearching for: '{query}'")
    
    try:
        with DDGS() as ddgs:
            results = ddgs.images(
                query=query,    
                max_results=target_count
            )
            urls = [r['image'] for r in results if 'image' in r]
    
    except RatelimitException as e:
        print(f"!!! RATELIMIT HIT for {folder}. Please wait 10 minutes and try again.")
        print(f"Error details: {e}")
        break

    # 2. Download the Images
    print(f"Found {len(urls)} URLs. Downloading to {dest}...")
    
    download_images(dest, urls=urls)
    
    print(f"Finished downloading for '{folder}'. Pausing {pause_time}s...")
    time.sleep(pause_time) # pause between searches

print("\n--- All downloads complete! ---")

# --- Verification and Cleaning ---
# Find all image files in the data directory
fns = get_image_files(path)
print(f"\nTotal images downloaded: {len(fns)}")

# Verify for any corrupt images and unlink (delete) them
failed = verify_images(fns)
print(f"Failed/Corrupt images removed: {len(failed)}")
failed.map(Path.unlink)

Starting image download process with DuckDuckGo...

Searching for: 'hotel bathroom'
Found 100 URLs. Downloading to data\bathroom...
Finished downloading for 'bathroom'. Pausing 5.0s...

--- All downloads complete! ---

Total images downloaded: 303
Failed/Corrupt images removed: 3


(#3) [None,None,None]