In [None]:
%pip install requests

In [None]:
# paramètres
departement_id = 9
qualite_image = "small" #original ou small

json_objects = f"json_objects/met_dep{departement_id}_list.json"

In [None]:
import requests
import json

# URL de l'API pour les objets du département 11
url = f"https://collectionapi.metmuseum.org/public/collection/v1/objects?departmentIds={departement_id}"

session = requests.Session()
session.headers.update({
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                  "(KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36",
    "Accept": "application/json,text/plain,*/*",
    "Accept-Language": "en-US,en;q=0.9",
})

# Faire la requête
response = requests.get(url)

# Vérifier que la requête est correcte
if response.status_code == 200:
    data = response.json()
    
    # Sauvegarder dans un fichier JSON local
    with open(json_objects, "w", encoding="utf-8") as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
    
    print("✅ Données sauvegardées dans met_department11_objects.json")
else:
    print(f"❌ Erreur lors de la récupération des données : {response.status_code}")


In [None]:
import requests
import json
import time
import os
import requests, json, time

# Charger les objectIDs depuis le fichier artworks11.json
with open(json_objects, "r", encoding="utf-8") as f:
    data = json.load(f)

object_ids = data.get("objectIDs", [])

# Préparer une liste pour stocker les résultats
results = []

data_json = f"json_data/met_dep{departement_id}_datas.json"
existing_data = []

if os.path.exists(data_json):
    with open(data_json, "r", encoding="utf-8") as f_out:
        try:
            existing_data = json.load(f_out)
        except json.JSONDecodeError:
            existing_data = []

processed_ids = {item["objectID"] for item in existing_data}

# Fonction pour récupérer les données d'une œuvre

def get_artwork_data(object_id):
    url = f"https://collectionapi.metmuseum.org/public/collection/v1/objects/{object_id}"
    try:
        response = session.get(url, timeout=10)
    except requests.RequestException as e:
        print(f"❌ Requête échouée pour {object_id}: {e}")
        return None

    text = response.text.strip()
    if response.status_code != 200:
        print(f"❌ HTTP {response.status_code} pour {object_id}")
        return None
    if text.startswith("<"):
        print(f"⚠️ Bloqué ou HTML reçu pour {object_id}: {text[:100]!r}")
        return None

    try:
        obj = response.json()
    except json.JSONDecodeError:
        print(f"⚠️ JSON invalide pour {object_id}: {text[:100]!r}")
        return None

    return {
        "objectID": object_id,
        "artist": obj.get("artistDisplayName", "Inconnu"),
        "title": obj.get("title", "Sans titre"),
        "date": obj.get("objectDate", "Date inconnue"),
        "image": obj.get("primaryImage"),
        "image_small": obj.get("primaryImageSmall")
    }


# Itération sur les objectIDs
for i, object_id in enumerate(object_ids):
    if object_id in processed_ids:
        print(f"⏭️  {object_id} déjà traité.")
        continue

    print(f"🔍 Traitement {i + 1}/{len(object_ids)} : {object_id}")
    artwork = get_artwork_data(object_id)
    
    if artwork:
        existing_data.append(artwork)
        processed_ids.add(object_id)
        results.append(artwork)

        with open(data_json, "w", encoding="utf-8") as f_out:
            json.dump(existing_data, f_out, ensure_ascii=False, indent=4)

    
    time.sleep(0.1)  # pause courte pour ne pas surcharger l'API

print(f"✅ Sauvegarde terminée dans {data_json}.json")


In [None]:
import os
import requests
import json
import time

image_folder = f"images/{departement_id}/{qualite_image}"
os.makedirs(image_folder, exist_ok=True)

data_json = f"json_data/met_dep{departement_id}_datas.json"

with open(data_json, "r", encoding="utf-8") as f:
    data = json.load(f)

if qualite_image not in ["original", "small"]:
    raise ValueError("❌ 'qualite_image' doit être 'original' ou 'small'")

def download_image(url, path):
    try:
        r = requests.get(url, stream=True)
        if r.status_code == 200:
            with open(path, "wb") as f:
                for chunk in r.iter_content(1024):
                    f.write(chunk)
            print(f"✅ Image enregistrée : {path}")
        else:
            print(f"⚠️ Erreur de téléchargement ({r.status_code}) : {url}")
    except Exception as e:
        print(f"❌ Erreur : {e}")

for i, item in enumerate(data):
    object_id = item["objectID"]
    image_url = item["image"] if qualite_image == "original" else item["image_small"]

    print(f"🔍 Téléchargement {i + 1}/{len(data)} : {object_id}")

    if not image_url:
        print(f"⏭️  Pas d’image pour {object_id}")
        continue

    # Déterminer extension de fichier
    ext = os.path.splitext(image_url)[-1].split("?")[0]
    filename = f"{object_id}{ext}"
    image_path = os.path.join(image_folder, filename)

    #verifier l'existence du fichier
    if os.path.exists(image_path):
        print(f"🟡 Image déjà présente : {filename}")
        continue

    # Télécharger l'image
    download_image(image_url, image_path)

    time.sleep(2)
    
