In [5]:
import json, os, glob
from jsonschema import validate,exceptions
import pprint

def validate_json(data, schema, name = 'none', success=True):
    try:
        validate(instance=data, schema=schema)
        if success:
            print(f"Validation succeeded: {name}")
    except exceptions.ValidationError as err:
        print("Validation error:",name, err.message,)
  

{'@id': 'mip-cmor-tables/institutions/bcc',
 '@type': 'cmip:institution',
 'institution:acronyms': [],
 'institution:aliases': [],
 'institution:cmip_acronym': 'BCC',
 'institution:established': 2008,
 'institution:labels': [],
 'institution:location': {'@id': 'mip-cmor-tables/institutions/location/01spyyb53',
                          '@nest': {'location:city': 'Beijing',
                                    'location:country': ['China', 'CN'],
                                    'location:lat': 39.950128,
                                    'location:lon': 116.383679},
                          '@type': 'institution:location'},
 'institution:name': 'Beijing Biocytogen (China)',
 'institution:ror': '01spyyb53',
 'institution:type': 'Company',
 'institution:url': ['http://www.bbctg.com.cn/']}


In [47]:
      
inst = glob.glob('institutions/*.json')

schema = {
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "Institution Data Schema",
  "type": "object",
  "properties": {
    "@id": {
      "type": "string",
      "pattern": "^mip-cmor-tables:organisations/institutions/.*$"
    },
    "@type": {
      "type": "string",
      "enum": ["cmip:institution"]
    },
    "institution:acronyms": {
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "institution:aliases": {
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "institution:cmip_acronym": {
      "type": "string"
    },
    "institution:established": {
      "type": "integer",
      "minimum": 0
    },
    "institution:labels": {
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "institution:location": {
      "type": "object",
      "properties": {
        "@id": {
          "type": "string",
          "pattern": "^mip-cmor-tables:organisations/institutions/location/.*$"
        },
        "@nest": {
          "type": "object",
          "properties": {
            "location:city": {
              "type": "string"
            },
            "location:country": {
              "type": "array",
              "items": {
                "type": "string"
              }
            },
            "location:lat": {
              "type": "number"
            },
            "location:lon": {
              "type": "number"
            }
          },
          "required": ["location:city", "location:country", "location:lat", "location:lon"]
        },
        "@type": {
          "type": "string",
          "enum": ["institution:location"]
        }
      },
      "required": ["@id", "@nest", "@type"]
    },
    "institution:name": {
      "type": "string"
    },
    "institution:ror": {
      "type": "string"
    },
    "institution:type": {
      "type": "string"
    },
    "institution:url": {
      "type": "array",
      "items": {
        "type": "string",
        "format": "uri"
      }
    }
  },
  "required": [
    "@id",
    "@type",
    "institution:acronyms",
    "institution:aliases",
    "institution:cmip_acronym",
    "institution:established",
    "institution:labels",
    "institution:location",
    "institution:name",
    "institution:ror",
    "institution:type",
    "institution:url"
  ]
}

json.dump(schema,open('institutions/schema.json','w'),indent=4)


for iloc in inst:
    text = open(iloc,'r').read()
    textnew = text.replace('mip-cmor-tables/institutions','mip-cmor-tables:organisations/institutions')
    print(iloc)
    js = json.loads(textnew)
    # pprint.pprint(js)
    
    validate_json(js,schema,iloc)
    
    # print(f'./inst/{iloc.lower()}')
    # with open(f'./inst/{iloc.lower()}','w') as f:
    #   jst = json.dumps(js,indent=4)
    #   f.write(jst)
    #   # os.system(f'rm {iloc}')
 

institutions/BCC.json
Validation succeeded: institutions/BCC.json
./inst/institutions/bcc.json
institutions/UA.json
Validation succeeded: institutions/UA.json
./inst/institutions/ua.json
institutions/NTU.json
Validation succeeded: institutions/NTU.json
./inst/institutions/ntu.json
institutions/NASA-GSFC.json
Validation succeeded: institutions/NASA-GSFC.json
./inst/institutions/nasa-gsfc.json
institutions/CCCma.json
Validation succeeded: institutions/CCCma.json
./inst/institutions/cccma.json
institutions/CNES.json
Validation succeeded: institutions/CNES.json
./inst/institutions/cnes.json
institutions/MPI-M.json
Validation succeeded: institutions/MPI-M.json
./inst/institutions/mpi-m.json
institutions/NUIST.json
Validation succeeded: institutions/NUIST.json
./inst/institutions/nuist.json
institutions/AER.json
Validation succeeded: institutions/AER.json
./inst/institutions/aer.json
institutions/MOHC.json
Validation succeeded: institutions/MOHC.json
./inst/institutions/mohc.json
institution

In [50]:
      
inst = glob.glob('consortia/*.json')

schema = {
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "Consortium Data Schema",
  "type": "object",
  "properties": {
    "@id": {
      "type": "string",
      "pattern": "^mip-cmor-tables:organisations/consortia/.*$"
    },
    "@type": {
      "type": "string",
      "enum": ["cmip:consortium"]
    },
    "consortium:cmip_acronym": {
      "type": "string"
    },
    "consortium:description": {
      "type": "string"
    },
    "consortium:url": {
      "type": "string"
    },
    "consortium:changes": {
      "type": "string"
    },
    "consortium:members": {
      "type": "object",
      "properties": {
        "@set": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "@id": {
                "type": "string"
              }
            },
            "required": ["@id"]
          }
        }
      },
      "required": ["@set"]
    }
  },
  "required": [
    "@id",
    "@type",
    "consortium:cmip_acronym",
    "consortium:description",
    "consortium:url",
    "consortium:changes",
    "consortium:members"
  ]
}


json.dump(schema,open('consortia/schema.json','w'),indent=4)


for iloc in inst:
    text = open(iloc,'r').read()
    textnew = text.replace('mip-cmor-tables/consortia','mip-cmor-tables:organisations/consortia')
    print(iloc)
    js = json.loads(textnew)
    # pprint.pprint(js)
    
    validate_json(js,schema,iloc)
    
    with open(f'./{iloc.lower()}','w') as f:
      jst = json.dumps(js,indent=4)
      f.write(jst)
      # os.system(f'rm {iloc}')
 

consortia/ncc.json
Validation succeeded: consortia/ncc.json
consortia/ec-earth-consortium.json
Validation succeeded: consortia/ec-earth-consortium.json
consortia/espri-ipsl.json
Validation succeeded: consortia/espri-ipsl.json
consortia/nims-kma.json
Validation succeeded: consortia/nims-kma.json
consortia/messy-consortium.json
Validation succeeded: consortia/messy-consortium.json
consortia/incois-nio-ipsl.json
Validation succeeded: consortia/incois-nio-ipsl.json
consortia/noaa-gfdl.json
Validation succeeded: consortia/noaa-gfdl.json
consortia/fio-qlnm.json
Validation succeeded: consortia/fio-qlnm.json
consortia/e3sm-project.json
Validation succeeded: consortia/e3sm-project.json
consortia/dlr-bira.json
Validation succeeded: consortia/dlr-bira.json
consortia/pcmdi.json
Validation succeeded: consortia/pcmdi.json
consortia/csiro-arccss.json
Validation succeeded: consortia/csiro-arccss.json
consortia/iamc.json
Validation succeeded: consortia/iamc.json
consortia/miroc.json
Validation succeede