In [None]:
import requests
import json
import time
import os
import re
from urllib.parse import urlparse
import logging

# Set up logging to file and console
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("wikiart_log.txt"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

base_url = "https://www.wikiart.org/en"

def get_api_data(path, params=None, retries=3, delay=2):
    """Fetch data from WikiArt API with retry logic."""
    url = f"{base_url}/{path}"
    for attempt in range(retries):
        try:
            logger.info(f"Requesting {url} with params {params}")
            response = requests.get(url, params=params, timeout=10)
            logger.info(f"Response status: {response.status_code}")
            if response.status_code == 200:
                try:
                    data = response.json()
                    logger.debug(f"Response JSON: {json.dumps(data, indent=4)[:500]}...")
                    return data
                except json.JSONDecodeError:
                    logger.error(f"Response is not JSON: {response.text[:100]}...")
                    return None
            elif response.status_code == 429:
                logger.warning(f"Rate limit hit, retrying after {delay}s...")
                time.sleep(delay)
                continue
            else:
                logger.error(f"HTTP error {response.status_code}: {response.text[:100]}...")
                return None
        except requests.RequestException as e:
            logger.error(f"Request error on attempt {attempt + 1}/{retries}: {e}")
            if attempt < retries - 1:
                time.sleep(delay)
                continue
            return None
    return None

def modify_image_url(image_url):
    """Remove !Large.jpg from the URL if it exists to get higher resolution."""
    return image_url.replace("!Large.jpg", "")

def sanitize_filename(value):
    """Convert value to a valid filename."""
    value = str(value)
    return re.sub(r'[^\w\-_\. ]', '_', value).replace(' ', '_')

def download_image(image_url, save_path):
    """Download an image to the specified path."""
    if os.path.exists(save_path):
        logger.info(f"File already exists: {save_path}, skipping download.")
        return
    try:
        response = requests.get(image_url, stream=True, timeout=10)
        if response.status_code == 200:
            with open(save_path, 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
            logger.info(f"Downloaded: {save_path}")
        else:
            logger.error(f"Failed to download {image_url}: HTTP {response.status_code}")
    except Exception as e:
        logger.error(f"Error downloading {image_url}: {e}")



In [None]:

def main():
    try:
        # Create output directories
        output_dir = "wikiart_output"
        artist_url = "jeffrey-smart"  # Use the artistUrl from the link
        images_dir = os.path.join(output_dir, f"images_{artist_url}")
        os.makedirs(images_dir, exist_ok=True)
        logger.info(f"Created/using directory: {images_dir}")

        # Fetch all paintings for the artist (this API endpoint returns all in one response)
        logger.info(f"Fetching paintings for artist '{artist_url}'...")
        params = {"artistUrl": artist_url, "json": 2}
        data = get_api_data("App/Painting/PaintingsByArtist", params=params)
        
        # Check if data is valid
        if data is None:
            logger.error("No data returned from API.")
            return
        
        # The response should be a list of paintings
        if isinstance(data, list):
            all_paintings = data
        else:
            logger.error(f"Unexpected response format: {json.dumps(data, indent=4)[:200]}...")
            return

        if not all_paintings:
            logger.info("No paintings found.")
            return

        logger.info(f"Total paintings retrieved: {len(all_paintings)}")
        
        # Modify image URLs to get original size
        for item in all_paintings:
            if "image" in item:
                item["image"] = modify_image_url(item["image"])
            else:
                logger.warning(f"No 'image' field in painting: {item.get('title', 'Unknown')}")

        # Save JSON
        json_path = os.path.join(output_dir, f"paintings_{artist_url}.json")
        try:
            with open(json_path, 'w', encoding='utf-8') as f:
                json.dump(all_paintings, f, indent=4)
            logger.info(f"Saved JSON to: {json_path}")
        except Exception as e:
            logger.error(f"Error saving JSON to {json_path}: {e}")

        # Download images
        for item in all_paintings:
            if "image" in item and item["image"]:
                artist_name = sanitize_filename(item.get("artistName", "UnknownArtist"))
                content_id = sanitize_filename(item.get("contentId", "UnknownID"))
                title = sanitize_filename(item.get("title", "UnknownTitle"))
                parsed_url = urlparse(item["image"])
                file_extension = os.path.splitext(parsed_url.path)[1] or ".jpg"
                filename = f"{artist_name}+{content_id}+{title}{file_extension}"
                save_path = os.path.join(images_dir, filename)
                download_image(item["image"], save_path)
            else:
                logger.warning(f"Skipping download for painting (no image URL): {item.get('title', 'Unknown')}")

        logger.info(f"Total paintings processed: {len(all_paintings)}")

    except KeyboardInterrupt:
        logger.info("Program interrupted by user. Exiting gracefully...")
        exit(0)
    except Exception as e:
        logger.error(f"An unexpected error occurred: {e}")


In [None]:
## run
if __name__ == "__main__":
    main()