In [1]:
import requests
import csv
import os

# Define the artifact types to search for
artifact_types = ["coin", "sculpture", "inscriptions"]

# Date range for filtering (e.g., 1000-1500 AD)
date_start = 1000
date_end = 1500

# Set up base directories
root_dir = "artifact_dataset"
images_dir = os.path.join(root_dir, "images")

# Create root and images subdirectories
os.makedirs(root_dir, exist_ok=True)
os.makedirs(images_dir, exist_ok=True)
for artifact in artifact_types:
    os.makedirs(os.path.join(images_dir, artifact), exist_ok=True)

# CSV file to store metadata
csv_filename = os.path.join(root_dir, "metadata.csv")

# Open CSV file for writing metadata
with open(csv_filename, "w", newline="", encoding="utf-8") as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(["ArtifactType", "Name", "Age", "Image"])

    # Loop through each artifact type
    for artifact in artifact_types:
        print(f"Processing artifact type: {artifact}")

        # MET API search endpoint
        BASE_SEARCH_URL = "https://collectionapi.metmuseum.org/public/collection/v1/search"
        search_params = {"hasImages": "true", "q": artifact}
        search_response = requests.get(BASE_SEARCH_URL, params=search_params)
        search_data = search_response.json()
        objectIDs = search_data.get("objectIDs", [])
        if not objectIDs:
            print(f"No objects found for {artifact}.")
            continue

        count = 0  # To limit processed items per type (for demonstration)
        for objectID in objectIDs:
            BASE_OBJECT_URL = "https://collectionapi.metmuseum.org/public/collection/v1/objects/"
            object_response = requests.get(BASE_OBJECT_URL + str(objectID))
            obj_data = object_response.json()

            # Ensure the artifact type appears in either the object's name or title (case-insensitive)
            object_name = obj_data.get("objectName", "").lower()
            title = obj_data.get("title", "").lower()
            if artifact not in object_name and artifact not in title:
                continue

            # Retrieve date information
            begin_date = obj_data.get("objectBeginDate")
            end_date = obj_data.get("objectEndDate")
            if begin_date is None or end_date is None:
                continue

            # Filter for objects dated entirely between 1000 and 1500 AD
            if begin_date < date_start or end_date > date_end:
                continue

            # Check for a primary image
            image_url = obj_data.get("primaryImage")
            if not image_url:
                continue

            # Download and save the image to the appropriate folder
            try:
                image_response = requests.get(image_url)
                if image_response.status_code == 200:
                    image_filename = os.path.join(images_dir, artifact, f"{objectID}.jpg")
                    with open(image_filename, "wb") as img_file:
                        img_file.write(image_response.content)
                else:
                    continue
            except Exception as e:
                print(f"Error downloading image for object {objectID}: {e}")
                continue

            # Prepare metadata for CSV
            name = obj_data.get("title", "No Title")
            age_str = f"{begin_date}-{end_date} AD"
            csv_writer.writerow([artifact, name, age_str, image_filename])
            print(f"Saved {artifact}: {name} ({age_str})")

            count += 1
            # Limit to the first 10 objects per artifact type for demonstration
            if count >= 10:
                break

print(f"Scraping complete. Metadata saved to '{csv_filename}' and images stored in '{images_dir}'.")


Processing artifact type: coin
Saved coin: Coin (1074-1124 AD)
Saved coin: “Augustalis” of Frederick II Hohenstaufen (r. 1215–50) (1230-1250 AD)
Saved coin: Dinar of Tughril (r. 1040–63) (1027-1077 AD)
Saved coin: Coin (1105-1155 AD)
Saved coin: Coin (1275-1299 AD)
Saved coin: Coin (1000-1199 AD)
Saved coin: Coin (1100-1199 AD)
Saved coin: Coin (1172-1244 AD)
Saved coin: Coin (1100-1199 AD)
Saved coin: Coin (1200-1299 AD)
Saved coin: Coin (1035-1095 AD)
Saved coin: Coin (1192-1242 AD)
Saved coin: Coin (1100-1299 AD)
Saved coin: Coin (1409-1459 AD)
Saved coin: Coin (1294-1344 AD)
Saved coin: Coin (1300-1399 AD)
Saved coin: Coin (1300-1399 AD)
Saved coin: Coin (1206-1360 AD)
Saved coin: Coin (1100-1399 AD)
Saved coin: Coin (1100-1199 AD)
Processing artifact type: sculpture
Saved sculpture: Sculpture (1000-1199 AD)
Saved sculpture: Sculpture of a Wise Man (from a Group with the Adoration of the Magi) (1172-1203 AD)
Saved sculpture: Buddha Protected by a Seven-headed Naga (1167-1233 AD)
Sa

KeyboardInterrupt: 