In [2]:
import json
from pathlib import Path
import shutil

json_file = Path("Datos Poder Judicial.json")
backup_file = json_file.with_suffix(".backup.json")
base_path = "AIMARACENIA/PODER_JUDICIAL/corte_suprema_de_justicia/"

# backup
if json_file.exists():
    shutil.copy2(json_file, backup_file)

data = json.loads(json_file.read_text(encoding="utf-8"))

modified = 0
for rec in data:
    val = rec.get("direccion_pdf")
    if isinstance(val, str) and val.strip():
        if not val.startswith(base_path):
            rec["direccion_pdf"] = base_path + val
            modified += 1

json_file.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Registros procesados: {len(data)}. Modificados: {modified}. Backup en: {backup_file}")


Registros procesados: 52055. Modificados: 52055. Backup en: Datos Poder Judicial.backup.json


In [3]:
import json
import csv
import re
from pathlib import Path

json_file = Path("Datos Poder Judicial.json")
csv_file = json_file.with_suffix(".csv")

def safe_load_json_text(text):
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        # reemplaza literales no v√°lidos como NaN / None por null y reintenta
        fixed = re.sub(r'\bNaN\b', 'null', text)
        fixed = re.sub(r'\bNone\b', 'null', fixed)
        return json.loads(fixed)

def to_cell(v):
    if v is None:
        return ""
    if isinstance(v, (dict, list)):
        return json.dumps(v, ensure_ascii=False)
    return str(v)

text = json_file.read_text(encoding="utf-8")
data = safe_load_json_text(text)

if not isinstance(data, list):
    raise SystemExit("El JSON debe contener una lista de registros en el nivel superior.")

# detectar claves top-level y claves dentro de metadata
top_keys = set()
meta_keys = set()
for rec in data:
    for k, v in rec.items():
        if k == "metadata" and isinstance(v, dict):
            meta_keys.update(v.keys())
        else:
            top_keys.add(k)

# ordenar columnas; metadata_... al final
top_keys = sorted(top_keys)
meta_keys = sorted(meta_keys)
fieldnames = top_keys + [f"metadata_{k}" for k in meta_keys]

with csv_file.open("w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    for rec in data:
        row = {}
        for k in top_keys:
            row[k] = to_cell(rec.get(k))
        meta = rec.get("metadata", {}) or {}
        for mk in meta_keys:
            row[f"metadata_{mk}"] = to_cell(meta.get(mk))
        writer.writerow(row)

print(f"CSV creado: {csv_file} ({len(data)} registros)")

CSV creado: Datos Poder Judicial.csv (52055 registros)
