In [8]:
from pathlib import Path

BASE_DIR = Path(".")  # à adapter si besoin
print(BASE_DIR.resolve())

C:\Users\phusson\Desktop\Git_Arterm\index_oeuvres\.idea


In [None]:
import json
from pathlib import Path

BASE_DIR = Path(".")  # à adapter si besoin
FOLDERS = {
    "peinture": BASE_DIR / "data" / "entry_peinture",
    "architecture": BASE_DIR / "data" / "entry_architecture"
}

def process_peinture(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    holding = data.pop("holding_institution", None)

    new_data = {}

    for key, value in data.items():
        new_data[key] = value

        # insertion juste après dateCreated
        if key == "dateCreated" and holding:
            new_data["location"] = {
                "type": "holding_institution",
                "institution": {
                    "name": holding.get("name", ""),
                    "place": holding.get("place", ""),
                    "inventory_number": holding.get("inventory_number", ""),
                    "url": holding.get("URL", "")
                }
            }

    return new_data



def process_architecture(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    loc = data.get("location")

    # on évite de retraiter des fichiers déjà migrés
    if loc and "type" not in loc:
        data["location"] = {
            "type": "place",
            "place": {
                "city": loc.get("city", ""),
                "country": loc.get("country", ""),
                "coordinates": loc.get("coordinates", {
                    "latitude": "",
                    "longitude": ""
                })
            }
        }

    return data


def process_folder(folder, processor):
    for json_file in folder.glob("*.json"):
        new_data = processor(json_file)

        with open(json_file, "w", encoding="utf-8") as f:
            json.dump(new_data, f, ensure_ascii=False, indent=2)

        print(f"✔ {json_file.name} mis à jour")


if __name__ == "__main__":
    process_folder(FOLDERS["peinture"], process_peinture)
    process_folder(FOLDERS["architecture"], process_architecture)


In [3]:
import json
from pathlib import Path

PEINTURE_DIR = BASE_DIR / "data" / "entry_peinture"

def reorder_location_after_datecreated(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    # on vérifie que location existe et est du bon type
    location = data.get("location")
    if not location or location.get("type") != "holding_institution":
        return False

    new_data = {}

    for key, value in data.items():
        if key == "location":
            continue  # on la réinsère plus loin

        new_data[key] = value

        if key == "dateCreated":
            new_data["location"] = location

    # sécurité : si dateCreated n'existait pas
    if "location" not in new_data:
        new_data["location"] = location

    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(new_data, f, ensure_ascii=False, indent=2)

    return True


count = 0
for json_file in PEINTURE_DIR.glob("*.json"):
    if reorder_location_after_datecreated(json_file):
        count += 1

print(f"✔ {count} notices peinture corrigées (ordre de location)")


✔ 0 notices peinture corrigées (ordre de location)


In [21]:
import json

ARCHITECTURE_TEMPLATE = {
    "id": "",
    "QID_wikidata": "",
    "entry_type": "architecture",
    "title": "",
    "creator": [
        {
            "xml_id": "",
            "role": ""
        }
    ],
    "dateCreated": {
        "startYear": "",
        "endYear": "",
        "text": ""
    },
    "typology": "",
    "location": {
        "type": "place",
        "place": {
            "city": "",
            "country": "",
            "coordinates": {
                "latitude": "",
                "longitude": ""
            }
        }
    },
    "related_works": [
        {
            "link_type": "",
            "xml_id_work": ""
        }
    ],
    "bibliography": [
        {
            "zotero_key": "",
            "location": ""
        }
    ],
    "illustrations": [
        {
            "id": 0,
            "url": "",
            "copyright": "",
            "caption": "",
            "storage": ""
        }
    ],
    "description": "",
    "commentary": "",
    "history": [
        {
            "date": "",
            "type": "created",
            "author": ""
        },
        {
            "date": "",
            "type": "modified",
            "author": ""
        }
    ]
}


In [19]:
import json
from pathlib import Path

BASE_DIR = Path("C:/Users/phusson/Desktop/Git_Arterm/index_oeuvres")
FOLDERS = {
    "peinture": BASE_DIR / "data" / "entry_peinture",
    "architecture": BASE_DIR / "data" / "entry_architecture"
}

ARCHITECTURE_TEMPLATE = {
    "id": "",
    "QID_wikidata": "",
    "entry_type": "architecture",
    "title": "",
    "creator": [
        {
            "xml_id": "",
            "role": ""
        }
    ],
    "dateCreated": {
        "startYear": "",
        "endYear": "",
        "text": ""
    },
    "typology": "",
    "location": {
        "type": "place",
        "place": {
            "city": "",
            "country": "",
            "coordinates": {
                "latitude": "",
                "longitude": ""
            }
        }
    },
    "related_works": [
        {
            "link_type": "",
            "xml_id_work": ""
        }
    ],
    "bibliography": [
        {
            "zotero_key": "",
            "location": ""
        }
    ],
    "illustrations": [
        {
            "id": 0,
            "url": "",
            "copyright": "",
            "caption": "",
            "storage": ""
        }
    ],
    "description": "",
    "commentary": "",
    "history": [
        {
            "date": "",
            "type": "created",
            "author": ""
        },
        {
            "date": "",
            "type": "modified",
            "author": ""
        }
    ]
}


def normalize_dict(data: dict, template: dict) -> dict:
    result = {}

    for key, default_value in template.items():
        if key not in data:
            result[key] = default_value
            continue

        value = data[key]

        # dictionnaire imbriqué
        if isinstance(default_value, dict) and isinstance(value, dict):
            result[key] = normalize_dict(value, default_value)

        # liste de dictionnaires
        elif isinstance(default_value, list) and isinstance(value, list):
            if default_value and isinstance(default_value[0], dict):
                result[key] = [
                    normalize_dict(item, default_value[0])
                    for item in value
                ]
            else:
                result[key] = value

        else:
            result[key] = value

    return result

def process_architecture(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    # migration location (ancienne structure → nouvelle)
    loc = data.get("location")
    if loc and isinstance(loc, dict) and "type" not in loc:
        data["location"] = {
            "type": "place",
            "place": {
                "city": loc.get("city", ""),
                "country": loc.get("country", ""),
                "coordinates": loc.get("coordinates", {
                    "latitude": "",
                    "longitude": ""
                })
            }
        }

    # normalisation complète + ordre
    normalized = normalize_dict(data, ARCHITECTURE_TEMPLATE)

    return normalized

def process_folder(folder, processor):
    print("Dossier scanné :", folder.resolve())

    files = list(folder.glob("*.json"))
    print("Fichiers trouvés :", len(files))

    for json_file in files:
        print("→ traitement :", json_file.name)
        new_data = processor(json_file)

        with open(json_file, "w", encoding="utf-8") as f:
            json.dump(new_data, f, ensure_ascii=False, indent=2)

        print(f"✔ {json_file.name} mis à jour")



if __name__ == "__main__":
    process_folder(FOLDERS["architecture"], process_architecture)

Dossier scanné : C:\Users\phusson\Desktop\Git_Arterm\index_oeuvres\data\entry_architecture
Fichiers trouvés : 34
→ traitement : AgnoloGaddiArno.json
✔ AgnoloGaddiArno.json mis à jour
→ traitement : AgnoloGaddiTour.json
✔ AgnoloGaddiTour.json mis à jour
→ traitement : BasiliqueSaintAugustinRome.json
✔ BasiliqueSaintAugustinRome.json mis à jour
→ traitement : BasiliqueSantaMariaDelPopolo.json
✔ BasiliqueSantaMariaDelPopolo.json mis à jour
→ traitement : BibliothequeAmbrosienneMilan.json
✔ BibliothequeAmbrosienneMilan.json mis à jour
→ traitement : ChapelleSixtine.json
✔ ChapelleSixtine.json mis à jour
→ traitement : ColonneAntonine.json
✔ ColonneAntonine.json mis à jour
→ traitement : ColonneTrajane.json
✔ ColonneTrajane.json mis à jour
→ traitement : DedaleLabyrinthe.json
✔ DedaleLabyrinthe.json mis à jour
→ traitement : LibreriaPiccolomini.json
✔ LibreriaPiccolomini.json mis à jour
→ traitement : LouvreParis.json
✔ LouvreParis.json mis à jour
→ traitement : PalaisCapriniRome.json
✔ Pal

In [22]:
PEINTURE_TEMPLATE = {
    "id": "",
    "QID_wikidata": "",
    "entry_type": "peinture",
    "title": "",
    "creator": [
        {
            "xml_id": "",
            "role": ""
        }
    ],
    "dateCreated": {
        "startYear": "",
        "endYear": "",
        "text": ""
    },
    "materialsAndTechniques": "",
    "location": {
        "type": "holding_institution",
        "institution": {
            "name": "",
            "place": "",
            "inventory_number": "",
            "url": ""
        }
    },
    "related_works": [
        {
            "link_type": "",
            "xml_id_work": ""
        }
    ],
    "bibliography": [
        {
            "zotero_key": "",
            "location": ""
        }
    ],
    "illustrations": [
        {
            "id": 0,
            "url": "",
            "copyright": "",
            "caption": "",
            "storage": ""
        }
    ],
    "description": "",
    "commentary": "",
    "history": [
        {
            "date": "",
            "type": "created",
            "author": ""
        },
        {
            "date": "",
            "type": "modified",
            "author": ""
        }
    ]
}

def normalize_dict(data: dict, template: dict) -> dict:
    result = {}

    for key, default_value in template.items():
        value = data.get(key, None)

        if value is None:
            result[key] = default_value
            continue

        # dict imbriqué
        if isinstance(default_value, dict) and isinstance(value, dict):
            result[key] = normalize_dict(value, default_value)

        # liste de dicts
        elif isinstance(default_value, list) and isinstance(value, list):
            if default_value and isinstance(default_value[0], dict):
                result[key] = [
                    normalize_dict(item, default_value[0])
                    for item in value
                ]
            else:
                result[key] = value

        else:
            result[key] = value

    return result

def process_peinture(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    # normalisation complète + ordre
    normalized = normalize_dict(data, PEINTURE_TEMPLATE)

    return normalized

if __name__ == "__main__":
    process_folder(FOLDERS["peinture"], process_peinture)

Dossier scanné : C:\Users\phusson\Desktop\Git_Arterm\index_oeuvres\data\entry_peinture
Fichiers trouvés : 138
→ traitement : AndreaAndreaniTriompheGrave.json
✔ AndreaAndreaniTriompheGrave.json mis à jour
→ traitement : AndreaDelCastagnoExecutionConjures.json
✔ AndreaDelCastagnoExecutionConjures.json mis à jour
→ traitement : AnonymeHelene.json
✔ AnonymeHelene.json mis à jour
→ traitement : ApelleCalomnie.json
✔ ApelleCalomnie.json mis à jour
→ traitement : ApelleCampaspe.json
✔ ApelleCampaspe.json mis à jour
→ traitement : ArcheAlliance.json
✔ ArcheAlliance.json mis à jour
→ traitement : BotticelliDivineComedie.json
✔ BotticelliDivineComedie.json mis à jour
→ traitement : CarracheGalerieFarnese.json
✔ CarracheGalerieFarnese.json mis à jour
→ traitement : CavalliniCrucifixSaintPaul.json
✔ CavalliniCrucifixSaintPaul.json mis à jour
→ traitement : CopieLeonardCeneParis.json
✔ CopieLeonardCeneParis.json mis à jour
→ traitement : CorregeCoupoleAssomption.json
✔ CorregeCoupoleAssomption.json

In [1]:
import json
import os

DOSSIER_JSON = "C:\\Users\\phusson\\Desktop\\Git_Arterm\\index_oeuvres\\data\\entry_peinture"
SORTIE_JSON = "C:\\Users\\phusson\\Desktop\\Git_Arterm\\index_oeuvres\\data\\list_form\\institutions.json"

institutions = set()

for fichier in os.listdir(DOSSIER_JSON):
    if fichier.endswith(".json"):
        chemin = os.path.join(DOSSIER_JSON, fichier)
        try:
            with open(chemin, "r", encoding="utf-8") as f:
                data = json.load(f)

            name = (
                data.get("location", {})
                    .get("institution", {})
                    .get("name")
            )

            if name:
                institutions.add(name.strip())

        except (json.JSONDecodeError, OSError) as e:
            print(f"Erreur avec {fichier} : {e}")

# Tri alphabétique
liste_finale = sorted(institutions)

# Écriture du JSON final
with open(SORTIE_JSON, "w", encoding="utf-8") as f:
    json.dump(liste_finale, f, ensure_ascii=False, indent=2)

print(f"{len(liste_finale)} institutions enregistrées dans {SORTIE_JSON}")


29 institutions enregistrées dans C:\Users\phusson\Desktop\Git_Arterm\index_oeuvres\data\list_form\institutions.json
