In [1]:
import json

def flatten_json(y, path_prefix=""):
    """
    Recursively flattens a JSON dictionary into a single dictionary of paths.
    e.g. {"a": {"b": 1}} becomes {"a/b": 1}
    """
    out = {}

    def flatten(x, name=''):
        if type(x) is dict:
            for a in x:
                flatten(x[a], name + a + '/')
        elif type(x) is list:
            # We treat lists as atomic values for simple schema comparison unless
            # you need to deep compare list items by index. 
            # For schemas, lists often contain enums or types which we can compare directly.
            out[name[:-1]] = x
        else:
            out[name[:-1]] = x

    flatten(y, path_prefix)
    return out

def compare_schemas(user_schema, standard_schema):
    # Flatten both schemas to get paths like "$defs/Summary/properties/title/description"
    user_flat = flatten_json(user_schema)
    std_flat = flatten_json(standard_schema)
    
    results = []

    # Check for Added and Changed
    for path, value in user_flat.items():
        if path not in std_flat:
            results.append(["added", path])
        elif std_flat[path] != value:
            results.append(["changed", path])
            
    # Check for Deleted
    for path in std_flat:
        if path not in user_flat:
            results.append(["deleted", path])
            
    return results


In [2]:
import os
directory = "../src/utils"

In [9]:
with open(os.path.join(directory, "hdruk_schema.json")) as f:
    hdruk = json.load(f)
with open(os.path.join(directory,"schema.json")) as f:
    cruk = json.load(f)

In [10]:
results = compare_schemas(cruk, hdruk)

In [13]:
! pip install pandas
import pandas as pd

Collecting pandas
  Downloading pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl.metadata (91 kB)
Collecting numpy>=1.26.0 (from pandas)
  Downloading numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl.metadata (6.6 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl (10.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0mm
[?25hDownloading numpy-2.4.1-cp314-cp314-macosx_14_0_arm64.whl (5.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hUsing cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Installing collected packages: pytz, numpy, pandas
Successfully installed numpy-2.4.1 pandas-2.3.3 pytz-2025.2


In [16]:
pd.DataFrame(results, columns = ["change", "address"]).to_csv("comparison.csv")

In [6]:
c = flatten_json(cruk)

In [7]:
c

{'': '../src/utils/schema.json'}