In [None]:
#!/usr/bin/env python3

import os
import json
import time
import requests
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm

# Default configurations
DEFAULT_JSON_FILE = 'amman_user_images.json'
DEFAULT_OUTPUT_DIR = 'amman_profile_images'
DEFAULT_MAX_WORKERS = 5
DEFAULT_DELAY = 0.1  # seconds between downloads


def download_image(url, file_path, delay=0.1):
    """Download an image from a URL and save it to a file.

    Args:
        url (str): URL of the image to download
        file_path (str): Path where the image will be saved
        delay (float): Time to wait after download to avoid overwhelming servers

    Returns:
        bool: True if download was successful, False otherwise
    """
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()

        with open(file_path, 'wb') as image_file:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    image_file.write(chunk)

        # Add a small delay to avoid overwhelming the server
        time.sleep(delay)
        return True
    except Exception as e:
        return False


def download_all_images(json_file, output_dir, max_workers=5, delay=0.1):
    """Download all images from the JSON file.

    Args:
        json_file (str): Path to the JSON file with image URLs
        output_dir (str): Directory to save downloaded images
        max_workers (int): Maximum number of concurrent downloads
        delay (float): Time to wait between downloads

    Returns:
        tuple: (success_count, fail_count)
    """
    # Create output directory if it doesn't exist
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)

    # Load JSON data
    user_data = []
    with open(json_file, 'r') as f:
        user_data = json.load(f)

    print(f"Found {len(user_data)} users in the JSON file")

    # Track success and failures
    success_count = 0
    fail_count = 0

    # Create a list of tasks
    tasks = []

    for user in user_data:
        login = user['login']
        image_url = user['image_url']
        output_file = output_path / f"{login}.jpg"

        # Skip if image already exists
        if output_file.exists():
            print(f"Image for {login} already exists, skipping...")
            success_count += 1
            continue

        # Add task to list
        tasks.append((login, image_url, output_file, delay))

    # Download images with progress bar
    print(f"Downloading {len(tasks)} images...")

    def download_task(task):
        login, url, file_path, delay = task
        success = download_image(url, file_path, delay)
        return login, success

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(download_task, task): task for task in tasks}

        with tqdm(total=len(tasks), desc="Downloading", unit="img") as progress_bar:
            for future in as_completed(futures):
                login, success = future.result()
                if success:
                    success_count += 1
                else:
                    fail_count += 1
                    print(f"\nFailed to download image for {login}")

                progress_bar.update(1)

    print("\nDownload summary:")
    print(f"- Total users: {len(user_data)}")
    print(f"- Successfully downloaded: {success_count}")
    print(f"- Failed: {fail_count}")
    print(f"\nImages saved to: {os.path.abspath(output_dir)}")

    return success_count, fail_count


def main():
    print("Starting profile image downloader...")
    download_all_images(DEFAULT_JSON_FILE, DEFAULT_OUTPUT_DIR, DEFAULT_MAX_WORKERS, DEFAULT_DELAY)
    print("Done!")

main()

Starting profile image downloader...
Found 772 users in the JSON file
Downloading 772 images...


Downloading: 100%|██████████| 772/772 [01:26<00:00,  8.94img/s]


Download summary:
- Total users: 772
- Successfully downloaded: 772
- Failed: 0

Images saved to: /Users/yacoub/hackation_ai/42-face-detection-ai/amman_profile_images
Done!



