In [1]:
import json
import re
from typing import List, Dict, Any

# Path to the dataset
DATA_PATH = "/workspace/uploads/creatures_2025.json"

# Try to read via Editor tool if available; otherwise fallback to standard file IO
raw_text = None
json_valid = False
read_errors = []

try:
    # Attempt to import the Editor tool
    from metagpt.tools.libs.editor import Editor  # type: ignore
    try:
        # Use top-level await in Jupyter to call async function
        fb = await Editor.read(DATA_PATH)  # type: ignore
        raw_text = fb.content if hasattr(fb, "content") else str(fb)
    except Exception as e:
        read_errors.append(f"Editor.read failed: {e}")
except Exception as e:
    # Editor tool not available; fallback
    read_errors.append(f"Editor tool not available or import failed: {e}")

if raw_text is None:
    # Fallback to standard IO if Editor was not usable
    try:
        with open(DATA_PATH, "r", encoding="utf-8") as f:
            raw_text = f.read()
    except Exception as e:
        print(f"ERROR: Failed to read file: {e}")
        # Print summary with failure and exit
        print(f"JSON validity: False")
        print(f"Item count: 0")
        print(f"Unique IDs: False")
        exit()

# Parse JSON
data = None
try:
    data = json.loads(raw_text)
    json_valid = True
except Exception as e:
    json_valid = False

errors: List[str] = []
warnings: List[str] = []

if not json_valid:
    errors.append("File is not valid JSON (parsing failed).")

# Basic structure and count
if json_valid:
    if not isinstance(data, list):
        errors.append(f"Top-level JSON must be an array; found type {type(data).__name__}.")
        item_count = 0
    else:
        item_count = len(data)
        if item_count != 10:
            errors.append(f"Array must contain exactly 10 items; found {item_count}.")
else:
    item_count = 0

# Validation helpers
required_keys = [
    "id",
    "name",
    "species",
    "short_pitch",
    "description",
    "personality_traits",
    "powers",
    "home_realm",
    "color_palette",
    "suggested_themes",
    "age_suitability",
    "languages",
    "tags",
    "illustration_prompt",
    "story_seed",
    "content_rating",
]
slug_re = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$")
age_re = re.compile(r"^\d+-\d+$")

ids: List[str] = []
bad_slug_ids: List[str] = []
duplicate_ids: List[str] = []

if json_valid and isinstance(data, list):
    # Collect IDs first
    for idx, item in enumerate(data):
        if not isinstance(item, dict):
            errors.append(f"Item {idx} is not an object; found type {type(item).__name__}.")
            continue
        if "id" not in item or not isinstance(item.get("id"), str):
            errors.append(f"Item {idx} missing 'id' as string.")
        else:
            ids.append(item["id"])
            if not slug_re.match(item["id"]):
                bad_slug_ids.append(item["id"])

    # Check uniqueness
    seen = set()
    for _id in ids:
        if _id in seen:
            duplicate_ids.append(_id)
        else:
            seen.add(_id)

    # Per-item schema checks
    for idx, item in enumerate(data):
        if not isinstance(item, dict):
            # already reported
            continue

        # Required keys and types
        for key in required_keys:
            if key not in item:
                errors.append(f"Item {idx} ({item.get('id')}) missing required key '{key}'.")
        # If a required key is missing, skip type checks for that key to avoid cascading errors
        def has(k): return (k in item)

        # Strings
        for k in ["id", "name", "species", "short_pitch", "description", "home_realm", "age_suitability", "illustration_prompt", "story_seed", "content_rating"]:
            if has(k) and not isinstance(item[k], str):
                errors.append(f"Item {idx} ({item.get('id')}) field '{k}' must be a string; found {type(item[k]).__name__}.")

        # Arrays of strings
        for k in ["personality_traits", "powers", "color_palette", "suggested_themes", "languages", "tags"]:
            if has(k):
                if not isinstance(item[k], list):
                    errors.append(f"Item {idx} ({item.get('id')}) field '{k}' must be an array; found {type(item[k]).__name__}.")
                else:
                    if not all(isinstance(x, str) for x in item[k]):
                        errors.append(f"Item {idx} ({item.get('id')}) field '{k}' must be an array of strings.")
        
        # Length constraints
        if has("personality_traits") and isinstance(item["personality_traits"], list):
            n = len(item["personality_traits"])
            if not (3 <= n <= 7):
                errors.append(f"Item {idx} ({item.get('id')}) personality_traits length must be 3-7; found {n}.")
        if has("powers") and isinstance(item["powers"], list):
            n = len(item["powers"])
            if not (3 <= n <= 7):
                errors.append(f"Item {idx} ({item.get('id')}) powers length must be 3-7; found {n}.")
        if has("color_palette") and isinstance(item["color_palette"], list):
            n = len(item["color_palette"])
            if not (3 <= n <= 6):
                errors.append(f"Item {idx} ({item.get('id')}) color_palette length must be 3-6; found {n}.")
        if has("suggested_themes") and isinstance(item["suggested_themes"], list):
            n = len(item["suggested_themes"])
            if not (3 <= n <= 7):
                errors.append(f"Item {idx} ({item.get('id')}) suggested_themes length must be 3-7; found {n}.")

        # Age suitability pattern
        if has("age_suitability") and isinstance(item.get("age_suitability"), str):
            if not age_re.match(item["age_suitability"]):
                errors.append(f"Item {idx} ({item.get('id')}) age_suitability must match '\\d+-\\d+'; found '{item['age_suitability']}'.")

        # Languages must include ['da', 'en']
        if has("languages") and isinstance(item.get("languages"), list):
            langs = set([x.lower() for x in item["languages"] if isinstance(x, str)])
            if not {"da", "en"}.issubset(langs):
                errors.append(f"Item {idx} ({item.get('id')}) languages must include 'da' and 'en'; found {item['languages']}.")

        # Content rating must be "G"
        if has("content_rating") and isinstance(item.get("content_rating"), str):
            if item["content_rating"] != "G":
                errors.append(f"Item {idx} ({item.get('id')}) content_rating must be 'G'; found '{item['content_rating']}'.")

# Summaries
print(f"JSON validity: {json_valid}")
print(f"Item count: {item_count}")
unique_ok = (len(duplicate_ids) == 0) and (len(ids) == len(set(ids))) and (item_count == 10)
print(f"Unique IDs: {unique_ok}")
if bad_slug_ids:
    print(f"Non-slug-safe IDs: {bad_slug_ids}")
if duplicate_ids:
    print(f"Duplicate IDs: {duplicate_ids}")

# Print read warnings (if any)
for w in read_errors:
    print(f"Note: {w}")

# Print detailed errors, if any
if errors:
    print("DETAILED ERRORS:")
    for e in errors:
        print(f"- {e}")
else:
    # If no detailed errors and also slug-safe and unique are satisfied, pass
    if bad_slug_ids:
        print("Slug-safe ID check failed, but other validations passed.")
    elif not unique_ok:
        print("Uniqueness check failed, but other validations passed.")
    else:
        print("VALIDATION PASSED")

JSON validity: True
Item count: 10
Unique IDs: True
Note: Editor tool not available or import failed: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject
VALIDATION PASSED


2025-10-06 02:12:20.108 | INFO     | metagpt.const:get_metagpt_root:33 - PROJECT_ROOT set from environment variable to /
