In [14]:
import requests
import csv
import os

artifact = "coin"
date_start = 100
date_end = 1500

# Set up directories
root_dir = "artifact_dataset"
images_dir = os.path.join(root_dir, "images", artifact)
os.makedirs(images_dir, exist_ok=True)

# CSV file setup (append mode if already exists)
csv_filename = os.path.join(root_dir, "metadata.csv")
write_header = not os.path.exists(csv_filename)
with open(csv_filename, "a", newline="", encoding="utf-8") as csv_file:
    csv_writer = csv.writer(csv_file)
    if write_header:
        csv_writer.writerow(["ArtifactType", "Name", "Age", "Image"])
    
    # MET API search for coin objects
    BASE_SEARCH_URL = "https://collectionapi.metmuseum.org/public/collection/v1/search"
    params = {"hasImages": "true", "q": artifact}
    search_response = requests.get(BASE_SEARCH_URL, params=params)
    search_data = search_response.json()
    objectIDs = search_data.get("objectIDs", [])
    print(f"Total objects found: {search_data.get('total', 0)}")
    if not objectIDs:
        print(f"No {artifact} objects found.")
        exit()
    
    count = 0
    for objectID in objectIDs:
        BASE_OBJECT_URL = "https://collectionapi.metmuseum.org/public/collection/v1/objects/"
        object_response = requests.get(BASE_OBJECT_URL + str(objectID))
        obj_data = object_response.json()

        # Check if the artifact type appears in objectName or title (case-insensitive)
        object_name = obj_data.get("objectName", "").lower()
        title = obj_data.get("title", "").lower()
        if artifact not in object_name and artifact not in title:
            continue

        # Filter by date range
        begin_date = obj_data.get("objectBeginDate")
        end_date = obj_data.get("objectEndDate")
        if begin_date is None or end_date is None:
            continue
        if begin_date < date_start or end_date > date_end:
            continue

        # Get primary image URL
        image_url = obj_data.get("primaryImage")
        if not image_url:
            continue

        try:
            image_response = requests.get(image_url)
            if image_response.status_code == 200:
                image_filename = os.path.join(images_dir, f"{objectID}.jpg")
                with open(image_filename, "wb") as img_file:
                    img_file.write(image_response.content)
            else:
                continue
        except Exception as e:
            print(f"Error downloading image for object {objectID}: {e}")
            continue

        # Write metadata to CSV
        name = obj_data.get("title", "No Title")
        age_str = f"{begin_date}-{end_date} AD"
        csv_writer.writerow([artifact, name, age_str, image_filename])
        print(f"Saved {artifact}: {name} ({age_str})")
        count += 1

        # Limit to first 10 objects for demonstration
        #if count >= 10:
           # break

    print(f"Finished {artifact} scraping. Total {count} objects saved.")


Total objects found: 5052
Saved coin: Coin (920-970 AD)
Saved coin: Coin (958-1008 AD)
Saved coin: Coin (755-815 AD)
Saved coin: Coin (1074-1124 AD)
Saved coin: Coin (758-808 AD)
Saved coin: Girdle with Coins and Medallions (583-583 AD)
Saved coin: Pectoral with Coins and Pseudo-Medallion (539-550 AD)
Saved coin: “Augustalis” of Frederick II Hohenstaufen (r. 1215–50) (1230-1250 AD)
Saved coin: Dinar of Tughril (r. 1040–63) (1027-1077 AD)
Saved coin: Coin (765-815 AD)
Saved coin: Coin (932-1062 AD)
Saved coin: Coin (400-1399 AD)
Saved coin: Coin (400-1399 AD)
Saved coin: Coin (800-1099 AD)
Saved coin: Coin (1105-1155 AD)
Saved coin: Coin (400-1399 AD)
Saved coin: Coin (400-1399 AD)
Saved coin: Coin (733-793 AD)
Saved coin: Coin (400-1399 AD)
Saved coin: Coin (1275-1299 AD)
Saved coin: Coin (700-899 AD)
Saved coin: Coin (400-1399 AD)
Saved coin: Coin (1000-1199 AD)
Saved coin: Coin (733-793 AD)
Saved coin: Coin (800-1099 AD)
Saved coin: Coin (733-793 AD)
Saved coin: Coin (400-1399 AD)
Sa