In [4]:
import json
# Provided CMIP data
cmip_data = json.load(open('../../MIP_grid_label.json'))['grid_label']

print(cmip_data)

# Helper function to generate the short name
def generate_short_name(acronym):
    parts = []
    if 'gn' in acronym:
        parts.append('Grid Native')
    elif 'gr' in acronym:
        parts.append('Regridded')

    if 'a' in acronym:
        parts.append('Antarctica')
    if 'g' in acronym:
        parts.append('Greenland')
    if 'z' in acronym:
        parts.append('Zonal Mean')

    if '1' in acronym or '2' in acronym or '3' in acronym or '4' in acronym or '5' in acronym or '6' in acronym or '7' in acronym or '8' in acronym or '9' in acronym:
        parts.append(f"Alternate {acronym[-1]}")

    return ' '.join(parts) if parts else 'Global Mean'

# Helper function to determine the region
def determine_region(acronym):
    if 'a' in acronym:
        return 'Antarctica'
    elif 'g' in acronym:
        return 'Greenland'
    else:
        return 'Global'

# Constructing the full data structure
label_data = {}
for acronym, description in cmip_data.items():
    label_data[acronym] = {
        'short_name': generate_short_name(acronym),
        'region': determine_region(acronym),
        'full_description': description
    }


{'gm': 'global mean data', 'gn': "data reported on a model's native grid", 'gna': 'data reported on a native grid in the region of Antarctica', 'gng': 'data reported on a native grid in the region of Greenland', 'gnz': "zonal mean data reported on a model's native latitude grid", 'gr': "regridded data reported on the data provider's preferred target grid", 'gr1': 'regridded data reported on a grid other than the native grid and other than the preferred target grid', 'gr1a': 'regridded data reported in the region of Antarctica on a grid other than the native grid and other than the preferred target grid', 'gr1g': 'regridded data reported in the region of Greenland on a grid other than the native grid and other than the preferred target grid', 'gr1z': 'regridded zonal mean data reported on a grid other than the native latitude grid and other than the preferred latitude target grid', 'gr2': 'regridded data reported on a grid other than the native grid and other than the preferred target g

In [5]:
import json

# Creating a more detailed schema for the CMIP grid labels
schema = {
    "$schema": "http://json-schema.org/draft-07/schema#",
    "title": "CMIP Grid Labels",
    "description": "Schema for defining CMIP grid labels and their associated metadata",
    "type": "object",
    "properties": {
        "@id": {
            "type": "string",
            "description": "Unique identifier for the grid label, following the pattern mip-cmor-tables:grid/label/[label]",
            "pattern": "^mip-cmor-tables:grid/label/[a-zA-Z0-9]+$"
        },
        "@type": {
            "type": "string",
            "description": "Type of the object, should be mip:grid-label",
            "enum": ["mip:grid-label"]
        },
        "acronym": {
            "type": "string",
            "description": "Acronym representing the grid label"
        },
        "short name": {
            "type": "string",
            "description": "Short name or alias for the grid label"
        },
        "description": {
            "type": "string",
            "description": "Full description of the grid label"
        },
        "region": {
            "type": "string",
            "description": "Region associated with the grid label, can be global, antarctica, or greenland",
            "enum": ["global", "antarctica", "greenland"]
        }
    },
    "required": ["@id", "@type", "acronym", "short name", "description", "region"]
}

with open(f"label/schema.jsonld", "w") as f:
    f.write(json.dumps(schema, indent=4))


# # Minified schema for output
# minified_schema = json.dumps(schema, separators=(',', ':'))
# minified_schema

In [7]:
from pprint import pprint
import json
for i,d in label_data.items():
    print(i,label_data[i]["short_name"],label_data[i]["full_description"],label_data[i]["region"])
    
    
    jout = {
        "@id": f"mip-cmor-tables:grid/label/{i.lower().strip()}",
        "@type": "grid-label",
        "acronym": i,
        "short name": label_data[i]["short_name"],  
        "description": label_data[i]["full_description"],
        "region": label_data[i]["region"].lower()
        
    }
    
    with open(f"label/{i.lower().strip()}.json", "w") as f:
        f.write(json.dumps(jout, indent=4))
    
    pprint(jout)

gm Greenland global mean data Greenland
{'@id': 'mip-cmor-tables:grid/label/gm',
 '@type': 'grid-label',
 'acronym': 'gm',
 'description': 'global mean data',
 'region': 'greenland',
 'short name': 'Greenland'}
gn Grid Native Greenland data reported on a model's native grid Greenland
{'@id': 'mip-cmor-tables:grid/label/gn',
 '@type': 'grid-label',
 'acronym': 'gn',
 'description': "data reported on a model's native grid",
 'region': 'greenland',
 'short name': 'Grid Native Greenland'}
gna Grid Native Antarctica Greenland data reported on a native grid in the region of Antarctica Antarctica
{'@id': 'mip-cmor-tables:grid/label/gna',
 '@type': 'grid-label',
 'acronym': 'gna',
 'description': 'data reported on a native grid in the region of Antarctica',
 'region': 'antarctica',
 'short name': 'Grid Native Antarctica Greenland'}
gng Grid Native Greenland data reported on a native grid in the region of Greenland Greenland
{'@id': 'mip-cmor-tables:grid/label/gng',
 '@type': 'grid-label',
 'ac