<a href="https://colab.research.google.com/github/AURORARISE/MSSP-6070/blob/main/week10_assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Week 9 â€“ Flickr Image Downloader (Improved)

This notebook contains an improved version of the Flickr image downloader script.
It is refactored for better readability, error handling, and easier reuse in a Jupyter environment.

In [None]:
from pathlib import Path
from typing import List
import requests

USER_AGENT = (
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
    "AppleWebKit/537.36 (KHTML, like Gecko) "
    "Chrome/120.0 Safari/537.36"
)

def build_flickr_url(query: str) -> str:
    """Construct the URL of the Flickr public image feed based on the query keywords.

    Parameters
    ----------
    query : str
        Tags / keywords to search for.

    Returns
    -------
    str
        Fully formatted Flickr public feed URL.
    """
    base_url = "https://www.flickr.com/services/feeds/photos_public.gne"
    return f"{base_url}?format=json&nojsoncallback=1&tags={query}"

def get_feed_json(url: str) -> dict:
    """Download the JSON of the Flickr feed.

    Raises
    ------
    requests.HTTPError
        If the request fails (non-2xx status).
    """
    headers = {"User-Agent": USER_AGENT}
    print(f"[INFO] Fetching Flickr feed: {url}")
    res = requests.get(url, headers=headers, timeout=15)
    res.raise_for_status()
    return res.json()

def extract_image_urls(feed_json: dict) -> List[str]:
    """Extract image URLs from Flickr's feed JSON.

    Each item has a ``media`` field containing an ``m`` key, which is a
    medium-sized image link.
    """
    items = feed_json.get("items", [])
    image_urls: List[str] = []
    for item in items:
        media = item.get("media", {})
        img_url = media.get("m")
        if img_url:
            image_urls.append(img_url)
    return image_urls

def download_image(url: str, folder: Path, index: int) -> Path | None:
    """Download a single image and save it to the specified folder.

    Parameters
    ----------
    url : str
        Image URL.
    folder : Path
        Output directory.
    index : int
        Fallback index used in filename if the URL has no basename.
    """
    try:
        print(f"[INFO] Downloading image {index}: {url}")
        res = requests.get(url, timeout=20)
        res.raise_for_status()

        filename = Path(url.split("?")[0]).name
        if not filename:
            filename = f"image_{index}.jpg"

        folder.mkdir(parents=True, exist_ok=True)
        file_path = folder / filename
        file_path.write_bytes(res.content)

        print(f"[OK] Saved to {file_path}")
        return file_path
    except Exception as e:  # noqa: BLE001
        print(f"[ERROR] Failed to download {url}: {e}")
        return None


In [None]:
def run_flickr_downloader(query: str, max_images: int | None = None, output_dir: str | None = None):
    """Run the Flickr downloader for a given query.

    This function is notebook-friendly: instead of using ``input()``,
    it takes ``query`` as an argument.

    Parameters
    ----------
    query : str
        Search term, e.g. "cats" or "sunset".
    max_images : int, optional
        If provided, limit the number of images to download.
    output_dir : str, optional
        Custom output directory. If omitted, a folder named
        ``flickr_<query>`` will be created in the current directory.
    """
    query = query.strip()
    if not query:
        raise ValueError("Query cannot be empty.")

    folder_name = output_dir or f"flickr_{query.replace(' ', '_')}"
    folder = Path(folder_name)
    print(f"[INFO] Images will be saved to folder: {folder}")

    feed_url = build_flickr_url(query)
    try:
        feed_json = get_feed_json(feed_url)
    except Exception as e:  # noqa: BLE001
        print(f"[ERROR] Failed to fetch Flickr feed: {e}")
        return []

    image_urls = extract_image_urls(feed_json)
    print(f"[INFO] Found {len(image_urls)} image URLs in the feed.")
    if not image_urls:
        print("No images found. Try another keyword.")
        return []

    if max_images is not None:
        image_urls = image_urls[:max_images]
        print(f"[INFO] Limiting download to {len(image_urls)} images.")

    saved_paths: list[Path] = []
    for idx, img_url in enumerate(image_urls, start=1):
        saved = download_image(img_url, folder, idx)
        if saved is not None:
            saved_paths.append(saved)

    print("[DONE] All possible images have been processed.")
    return saved_paths


In [None]:
# Example usage (uncomment and run in a real notebook environment):
# downloaded_files = run_flickr_downloader("cats", max_images=5)
# downloaded_files