In [1]:
import json
import os

In [7]:
# munging changes cut and pasted from schema_changelog.xlsx
with open("../src/utils/changes_for_semantic_schema.json") as f:
    changes = f.read().split("\n")

In [10]:
with open("../src/utils/changes_for_semantic_schema.json", "w") as f:
    json.dump(changes,f)

In [11]:
def get_value_by_path(data, path_parts):
    """
    Traverses the dictionary using a list of keys.
    Returns the value if found, otherwise None.
    """
    current = data
    try:
        for part in path_parts:
            current = current[part]
        return current
    except KeyError:
        return None
    except TypeError:
        # Occurs if we try to access a key on a non-dict (e.g. list or string)
        return None

def set_nested_value(dictionary, path_parts, value):
    """
    Sets a value in a nested dictionary, creating the structure 
    if it doesn't exist.
    """
    current = dictionary
    # Iterate over all parts except the last one to build the tree
    for part in path_parts[:-1]:
        if part not in current:
            current[part] = {}
        
        # Safety check: if an intermediate key exists but is not a dict
        # (e.g., we are trying to add a child to a string), we can't proceed.
        if not isinstance(current[part], dict):
            print(f"Error: Conflict at '{part}'. It exists but is not a dictionary.")
            return
            
        current = current[part]
    
    # Set the value at the leaf
    current[path_parts[-1]] = value

In [20]:
input_filename = "../src/utils/schema.json"
output_filename = "../src/utils/semanticSchema.json"
changes_path = "../src/utils/changes_for_semanticSchema.json"

In [21]:
# ---------------------------------------------------------
# 3. MAIN LOGIC
# ---------------------------------------------------------

def convert(input_filename, output_filename, changes_path):
    # Check if input file exists
    if not os.path.exists(input_filename):
        print(f"Error: {input_filename} not found.")
        return

    # Load the original schema
    with open(input_filename, 'r', encoding='utf-8') as f:
        try:
            full_schema = json.load(f)
        except json.JSONDecodeError:
            print("Error: Failed to decode JSON. Check your schema.json file.")
            return

    with open(changes_path) as f:
        paths_to_extract = json.load(f)

    # Initialize the new sparse schema
    semantic_schema = {}

    print(f"Processing {len(paths_to_extract)} paths...")

    for path in paths_to_extract:
        # Clean the path and split into parts
        # This handles "$defs/..." or "/$defs/..."
        clean_path = path.strip('/') 
        path_parts = clean_path.split('/')

        # 1. Get the current value from the full schema
        original_value = get_value_by_path(full_schema, path_parts)

        if original_value is not None:
            # 2. Write that value into our new sparse dictionary
            set_nested_value(semantic_schema, path_parts, original_value)
            print(f"  [OK] Extracted: {path}")
        else:
            print(f"  [WARNING] Path not found in schema: {path}")

    # Save the result
    with open(output_filename, 'w', encoding='utf-8') as f:
        json.dump(semantic_schema, f, indent=4)

    print("-" * 30)
    print(f"Success! Semantic schema saved to '{output_filename}'")


In [23]:
convert(input_filename, output_filename, changes_path)

Processing 18 paths...
  [OK] Extracted: $defs/Summary/properties/title/description
  [OK] Extracted: $defs/Summary/properties/title/examples
  [OK] Extracted: $defs/Summary/properties/title/guidance
  [OK] Extracted: $defs/Summary/properties/abstract/description
  [OK] Extracted: $defs/Summary/properties/abstract/examples
  [OK] Extracted: $defs/Summary/properties/abstract/guidance
  [OK] Extracted: $defs/Summary/properties/dataCustodian/description
  [OK] Extracted: $defs/Summary/properties/dataCustodian/examples
  [OK] Extracted: $defs/Summary/properties/keywords/description
  [OK] Extracted: $defs/Summary/properties/keywords/examples
  [OK] Extracted: $defs/Summary/properties/keywords/guidance
  [OK] Extracted: $defs/Summary/properties/contactPoint/description
  [OK] Extracted: $defs/Summary/properties/contactPoint/examples
  [OK] Extracted: $defs/Summary/properties/contactPoint/guidance
  [OK] Extracted: properties/version/description
  [OK] Extracted: properties/version/guidance
