In [24]:
import json, os, glob
from jsonschema import validate,exceptions
import pprint

from pyld import jsonld

def validate_json(data, schema, name = 'none', success=True):
    try:
        validate(instance=data, schema=schema)
        if success:
            print(f"Validation succeeded: {name}")
    except exceptions.ValidationError as err:
        print("Validation error:",name, err.message,)
  

In [31]:
      
inst = glob.glob('institutions/*.json')

schema = {
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "Institution Data Schema",
  "type": "object",
  "properties": {
    "@id": {
      "type": "string",
      "pattern": "^mip-cmor-tables:organisations/institutions/.*$"
    },
    "@type": {
      "type": "string",
      "enum": ["cmip:institution"]
    },
    "acronyms": {
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "aliases": {
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "cmip_acronym": {
      "type": "string"
    },
    "established": {
      "type": "integer",
      "minimum": 0
    },
    "labels": {
      "type": "array",
      "items": {
        "type": "string"
      }
    },
    "location": {
      "type": "object",
      "properties": {
        "@id": {
          "type": "string",
          "pattern": "^mip-cmor-tables:organisations/institutions/location/.*$"
        },
        "@nest": {
          "type": "object",
          "properties": {
            "city": {
              "type": "string"
            },
            "country": {
              "type": "array",
              "items": {
                "type": "string"
              }
            },
            "lat": {
              "type": "number"
            },
            "lon": {
              "type": "number"
            }
          },
          "required": ["city", "country", "lat", "lon"]
        },
        "@type": {
          "type": "string",
          "enum": ["location"]
        }
      },
      "required": ["@id", "@nest", "@type"]
    },
    "name": {
      "type": "string"
    },
    "ror": {
      "type": "string"
    },
    "type": {
      "type": "string"
    },
    "url": {
      "type": "array",
      "items": {
        "type": "string",
        "format": "uri"
      }
    }
  },
  "required": [
    "@id",
    "@type",
    "acronyms",
    "aliases",
    "cmip_acronym",
    "established",
    "labels",
    "location",
    "name",
    "ror",
    "type",
    "url"
  ]
}

json.dump(schema,open('institutions/schema.json','w'),indent=4)


for iloc in inst:
    text = open(iloc,'r').read()
    textnew = text.replace('mip-cmor-tables/institutions','mip-cmor-tables:organisations/institutions').replace('institution:','').replace('location:','')
    print(iloc)
    js = json.loads(textnew)
    # pprint.pprint(js)
    
    validate_json(js,schema,iloc)
    
    # print(f'./inst/{iloc.lower()}')
    with open(f'{iloc.lower()}','w') as f:
      jst = json.dumps(js,indent=4)
      f.write(jst)
    #   # os.system(f'rm {iloc}')
 

institutions/bcc.json
Validation succeeded: institutions/bcc.json
institutions/ua.json
Validation succeeded: institutions/ua.json
institutions/ntu.json
Validation succeeded: institutions/ntu.json
institutions/uootago.json
Validation succeeded: institutions/uootago.json
institutions/nasa-gsfc.json
Validation succeeded: institutions/nasa-gsfc.json
institutions/cccma.json
Validation succeeded: institutions/cccma.json
institutions/cnes.json
Validation succeeded: institutions/cnes.json
institutions/mpi-m.json
Validation succeeded: institutions/mpi-m.json
institutions/pmod.json
Validation succeeded: institutions/pmod.json
institutions/version.json
Validation error: institutions/version.json [{'@id': '../../https://github.com/PCMDI/mip-cmor-tables/blob/main/JSONLD/organisations/institutions/uootago.json', '@type': 'version', 'version:file': '../../JSONLD/organisations/institutions/uootago.json', 'version:release': {'mip-cmor-tables': 'v6.5.0.2', 'cmip6plus': 'v6.5.1.4'}, 'version:previous_upd

In [32]:
      
inst = glob.glob('consortia/*.json')

schema = {
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "Consortium Data Schema",
  "type": "object",
  "properties": {
    "@id": {
      "type": "string",
      "pattern": "^mip-cmor-tables:organisations/consortia/.*$"
    },
    "@type": {
      "type": "string",
      "enum": ["cmip:consortium"]
    },
    "cmip_acronym": {
      "type": "string"
    },
    "description": {
      "type": "string"
    },
    "url": {
      "type": "string"
    },
    "changes": {
      "type": "string"
    },
    "members": {
      "type": "object",
      "properties": {
        "@set": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "@id": {
                "type": "string"
              }
            },
            "required": ["@id"]
          }
        }
      },
      "required": ["@set"]
    }
  },
  "required": [
    "@id",
    "@type",
    "cmip_acronym",
    "description",
    "url",
    "changes",
    "members"
  ]
}


json.dump(schema,open('consortia/schema.json','w'),indent=4)


for iloc in inst:
    text = open(iloc,'r').read()
    textnew = text.replace('mip-cmor-tables/consortia','mip-cmor-tables:organisations/consortia')
    print(iloc)
    js = json.loads(textnew)
    # pprint.pprint(js)
    
    validate_json(js,schema,iloc)
    
    with open(f'./{iloc.lower()}','w') as f:
      jst = json.dumps(js,indent=4)
      f.write(jst)
      # os.system(f'rm {iloc}')
 

consortia/version.json
Validation error: consortia/version.json [{'@id': '../../https://github.com/PCMDI/mip-cmor-tables/blob/main/JSONLD/organisations/consortia/solaris-heppa.json', '@type': 'version', 'version:file': '../../JSONLD/organisations/consortia/solaris-heppa.json', 'version:release': {'mip-cmor-tables': 'v6.5.0.2', 'cmip6plus': 'v6.5.1.4'}, 'version:previous_updates': 2, 'version:date': '2024-06-11T20:24:13+01:00', 'version:commit': {'hash': 'b619cb7885b6786c8f159b28616ddc51e8ad2744', 'message': 'update to solaris-heppa to link with institutions', 'author': {'name': 'Daniel Ellis', 'email': 'daniel.ellis@ext.esa.int'}}, 'version:data': {'@id': '../../mip-cmor-tables:organisations/consortia/solaris-heppa'}}] is not of type 'object'
consortia/graph.json
Validation error: consortia/graph.json 'cmip_acronym' is a required property
consortia/frame.json
Validation error: consortia/frame.json '@id' is a required property
consortia/schema.json
Validation error: consortia/schema.jso

In [33]:
consortia =  glob.glob('consortia/*.json')
      
inst = [i.replace('institutions/','').replace('.json','').lower() for i in glob.glob('institutions/*.json')]


In [34]:
for c in consortia:
    text = open(c,'r').read()
    if '@id' not in text or 'schema' in text: continue
    textnew = text.replace('mip-cmor-tables/consortia','mip-cmor-tables:organisations/consortia')
    js = json.loads(textnew)
    
    if 'members' not in js: continue
    
    members = js['members']
    
    
    newmembers = []
    for m in members:
        item = m['institution']['@id']
        
        if 'mip-cmor-tables:' in item:
            item = item.split('/')[-1]    
        
    
        
        if '(' in item:
            item = item.split('(')[0].strip()
        
        item = item.strip().split(' ')[0]
        item = f"mip-cmor-tables:organisations/institutions/{item.lower()}"
         
         
            
        if item.split('/')[-1] not in inst: 
            status = 'prior'
        else:
            status = 'active'
        
        
        print(item)
        
        newmembers.append({
            "@type":"consortia:member",
            'institution':{'@id': item},
            'membership_type':status,
            'dates':[{"from":2016, "to":2022, "phase":"CMIP6" }],
        })
            
       

    
    js.update({'members':{"@set":newmembers}, 
            'status':'active',})

    jsonld.expand(js)
    with open(c,'w') as f:
      jst = json.dumps(js,indent=4)
      f.write(jst)

[{'@type': 'consortia:member',
  'consortia:institution': {'@id': 'mip-cmor-tables:organisations/institutions/NorESM'},
  'consortia:membership_type': 'prior',
  'consortia:dates': [{'from': 2016, 'to': 2022, 'phase': 'CMIP6'}]},
 {'@type': 'consortia:member',
  'consortia:institution': {'@id': 'mip-cmor-tables:organisations/institutions/MET-Norway'},
  'consortia:membership_type': 'prior',
  'consortia:dates': [{'from': 2016, 'to': 2022, 'phase': 'CMIP6'}]},
 {'@type': 'consortia:member',
  'consortia:institution': {'@id': 'mip-cmor-tables:organisations/institutions/NERSC'},
  'consortia:membership_type': 'prior',
  'consortia:dates': [{'from': 2016, 'to': 2022, 'phase': 'CMIP6'}]},
 {'@type': 'consortia:member',
  'consortia:institution': {'@id': 'mip-cmor-tables:organisations/institutions/NILU'},
  'consortia:membership_type': 'prior',
  'consortia:dates': [{'from': 2016, 'to': 2022, 'phase': 'CMIP6'}]},
 {'@type': 'consortia:member',
  'consortia:institution': {'@id': 'mip-cmor-tab