In [1]:
import requests
import csv
import os

artifact = "sculpture"
date_start = 100
date_end = 1500

# Set up directories
root_dir = "artifact_dataset"
images_dir = os.path.join(root_dir, "images", artifact)
os.makedirs(images_dir, exist_ok=True)

# CSV file setup (append mode if already exists)
csv_filename = os.path.join(root_dir, "metadata.csv")
write_header = not os.path.exists(csv_filename)

# Function to check if object already exists in the CSV
def is_object_in_csv(object_id):
    with open(csv_filename, "r", newline="", encoding="utf-8") as csv_file:
        csv_reader = csv.reader(csv_file)
        next(csv_reader)  # Skip header row
        for row in csv_reader:
            if row[3] == os.path.join(images_dir, f"{object_id}.jpg"):  # Compare image filename
                return True
    return False

with open(csv_filename, "a", newline="", encoding="utf-8") as csv_file:
    csv_writer = csv.writer(csv_file)
    if write_header:
        csv_writer.writerow(["ArtifactType", "Name", "Age", "Image"])
    
    # MET API search for sculpture objects
    BASE_SEARCH_URL = "https://collectionapi.metmuseum.org/public/collection/v1/search"
    params = {"hasImages": "true", "q": artifact}
    search_response = requests.get(BASE_SEARCH_URL, params=params)
    search_data = search_response.json()
    objectIDs = search_data.get("objectIDs", [])
    print(f"Total objects found: {search_data.get('total', 0)}")
    if not objectIDs:
        print(f"No {artifact} objects found.")
        exit()

    count = 0
    for objectID in objectIDs:
        BASE_OBJECT_URL = "https://collectionapi.metmuseum.org/public/collection/v1/objects/"
        object_response = requests.get(BASE_OBJECT_URL + str(objectID))
        obj_data = object_response.json()

        # Check if the artifact type appears in objectName or title (case-insensitive)
        object_name = obj_data.get("objectName", "").lower()
        title = obj_data.get("title", "").lower()
        if artifact not in object_name and artifact not in title:
            continue

        # Filter by date range
        begin_date = obj_data.get("objectBeginDate")
        end_date = obj_data.get("objectEndDate")
        if begin_date is None or end_date is None:
            continue
        if begin_date < date_start or end_date > date_end:
            continue

        # Get primary image URL
        image_url = obj_data.get("primaryImage")
        if not image_url:
            continue

        # Check if the image and object metadata already exist
        image_filename = os.path.join(images_dir, f"{objectID}.jpg")
        if is_object_in_csv(objectID) or os.path.exists(image_filename):
            print(f"Skipping {artifact}: {obj_data.get('title', 'No Title')} (already exists).")
            continue

        try:
            image_response = requests.get(image_url)
            if image_response.status_code == 200:
                with open(image_filename, "wb") as img_file:
                    img_file.write(image_response.content)
            else:
                continue
        except Exception as e:
            print(f"Error downloading image for object {objectID}: {e}")
            continue

        # Write metadata to CSV
        name = obj_data.get("title", "No Title")
        age_str = f"{begin_date}-{end_date} AD"
        csv_writer.writerow([artifact, name, age_str, image_filename])
        print(f"Saved {artifact}: {name} ({age_str})")
        count += 1

        # Limit to first 10 objects for demonstration
        # if count >= 10:
        #     break

    print(f"Finished {artifact} scraping. Total {count} objects saved.")


Total objects found: 28417
Saved sculpture: Sculpture (1000-1199 AD)
Saved sculpture: Sculpture of a Wise Man (from a Group with the Adoration of the Magi) (1172-1203 AD)
Saved sculpture: Buddha Protected by a Seven-headed Naga (1167-1233 AD)
Saved sculpture: Virgin and Child (from an group with the Adoration of the Magi) (1172-1203 AD)
Saved sculpture: Wise Man (from a group with the Adoration of the Magi) (1172-1203 AD)
Saved sculpture: Joseph (from a group with the Adoration of the Magi) (1172-1203 AD)
Saved sculpture: Sculpture of an Enthroned King (1230-1235 AD)
Saved sculpture: Lectern for the Reading of the Gospels with the Eagle of Saint John the Evangelist (1301-1301 AD)
Saved sculpture: Sculpture of Moses with Tablets of the Law (1167-1173 AD)
Saved sculpture: Column Statue of a King (1150-1160 AD)
Saved sculpture: Table Base with Jonah Swallowed and Cast Up by the Big Fish (300-325 AD)
Saved sculpture: Sculpture of a Kneeling Knight or King (1150-1250 AD)
Saved sculpture: Th