# Register new input4MIPs source_id entry

## imports

In [103]:
%%time
import json
import os
import requests
from IPython.display import clear_output

CPU times: user 11 μs, sys: 1e+03 ns, total: 12 μs
Wall time: 15 μs


## function defs

In [104]:
def make_srcId(keyId, stdDict, dataProviderDict, dataProviderFileDict):
    '''
    Generate source_id from user-provided inputs
    '''
    srcId = {}
    srcId[keyId] = {}
    # standard keys
    stdKeys = ["_status","contact", "dataset_category", "datetime_start",
               "datetime_stop", "frequency", "further_info_url",
               "grid_label", "institution_id", "license", "mip_era",
               "nominal_resolution", "realm", "source",
               "source_id", "source_version", "target_mip"]
    # fill standard entries
    for cnt, key in enumerate(stdKeys):
        srcId[keyId][key] = stdDict[key]
    # data provider extra keys
    dataProviderExtraKeys = dataProviderDict.keys()
    srcId[keyId]["|dataProviderExtra"] = {}
    # fill data provider extra entries
    for key in dataProviderExtraKeys:
        srcId[keyId]["|dataProviderExtra"][key] = dataProviderDict[key]
    # data provider file keys
    dataProviderFileKeys = ["Conventions", "creation_date", "tracking_id"]
    srcId[keyId]["|dataProviderFile"] = {}
    # fill data provider file entries
    for key in dataProviderFileKeys:
        srcId[keyId]["|dataProviderFile"][key] = dataProviderFileDict[key]
    # ESGF default entries (once data is published)
    esgfKeys = ["_timestamp", "data_node", "latest", "replica",
                "version", "xlink"]
    srcId[keyId]["|esgfIndex"] = {}
    # fill ESGF entries
    for key in esgfKeys:
        srcId[keyId]["|esgfIndex"][key] = ""

    return srcId



## Data provider to complete form entries below

In [107]:
stdDict = {}

# required keys
# PCMDI-AMIP-Had1p1-1-0
stdDict["_status"] = "Registered"
stdDict["contact"] = "zelinka1@llnl.gov; durack1@llnl.gov"
stdDict["dataset_category"] = "SSTsAndSeaIce"
stdDict["datetime_start"] = "1979-01-16"
stdDict["datetime_stop"] = "2024-06-16"
stdDict["frequency"] = "mon"
stdDict["further_info_url"] = "https://pcmdi.llnl.gov/mips/amip"
stdDict["grid_label"] = "gn"
stdDict["institution_id"] = "PCMDI"
stdDict["license"] = "CC BY 4.0"
stdDict["mip_era"] = "CMIP6Plus"
stdDict["nominal_resolution"] = "1x1 degree"
stdDict["realm"] = "ocean"
stdDict["region"] = ["global_ocean"]
stdDict["source"] = " ".join(["PCMDI-AMIP Had-1.1 1.0: SST based on UK MetOffice HadISST 1.1"])
stdDict["source_id"] = "PCMDI-AMIP-Had1p1-1-0"
print(stdDict["source_id"])
stdDict["source_version"] = "1.0"
stdDict["target_mip"] = "Prototype"
stdDict["title"] = "PCMDI-AMIP Had-1.1 1.0 prototype dataset prepared for input4MIPs"

# optional entries
# dataProviderExtra
dataProviderDict = {}
dataProviderDict["source_variables"] = ["tos", "tosbcs"]
dataProviderDict["comment"] = " ".join(["Prototype dataset for the evaluation of SST forcing",
                                        "uncertainty over the satellite era - not for production use"])
dataProviderDict["data_usage_tips"] = " ".join(["This dataset has been generated for evaluation",
                                                "purposes only - not for production use in CMIP7",
                                                "simulations"])
dataProviderDict["grid"] = "0.25x0.25 degree latitude x longitude"

# dataProviderFile
dataProviderFileDict = {}
dataProviderFileDict["Conventions"] = "CF-1.7"
dataProviderFileDict["creation_date"] = ""
dataProviderFileDict["tracking_id"] = ""

# call function to populate
newId = make_srcId(stdDict["source_id"], stdDict, dataProviderDict, dataProviderFileDict)

PCMDI-AMIP-Had1p1-1-0


In [108]:
newId

{'PCMDI-AMIP-Had1p1-1-0': {'_status': 'Registered',
  'contact': 'zelinka1@llnl.gov; durack1@llnl.gov',
  'dataset_category': 'SSTsAndSeaIce',
  'datetime_start': '1979-01-16',
  'datetime_stop': '2024-06-16',
  'frequency': 'mon',
  'further_info_url': 'https://pcmdi.llnl.gov/mips/amip',
  'grid_label': 'gn',
  'institution_id': 'PCMDI',
  'license': 'CC BY 4.0',
  'mip_era': 'CMIP6Plus',
  'nominal_resolution': '1x1 degree',
  'realm': 'ocean',
  'source': 'PCMDI-AMIP Had-1.1 1.0: SST based on UK MetOffice HadISST 1.1',
  'source_id': 'PCMDI-AMIP-Had1p1-1-0',
  'source_version': '1.0',
  'target_mip': 'Prototype',
  '|dataProviderExtra': {'source_variables': ['tos', 'tosbcs'],
   'comment': 'Prototype dataset for the evaluation of SST forcing uncertainty over the satellite era - not for production use',
   'data_usage_tips': 'This dataset has been generated for evaluation purposes only - not for production use in CMIP7 simulations',
   'grid': '0.25x0.25 degree latitude x longitude'}

## sync with repo - input4MIPs_source_id.json (and DRES input4MIPs_institution_id.json)

In [111]:
%%time
# read input4mips_cvs source_id
# remote
#srcIdGithub = "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_source_id.json"
#js = requests.get(srcIdGithub)
# srcIds = json.load(f)
# local
srcIdGithub = "../input4MIPs_source_id.json"
with open(srcIdGithub, 'r') as f:
    srcIds = json.load(f)

srcIds

# add new source_id
#srcIds["source_id"][stdDict["source_id"]] = {}
#srcIds["source_id"][stdDict["source_id"]] = newId[stdDict["source_id"]]

# fix PCMDI-AMIP-OI2p1-1-0
#srcIds["source_id"]["PCMDI-AMIP-OI2p1-1-0"]["source"] = " ".join(["PCMDI-AMIP OI-2.1 1.0: SST",
#                                                                  "based on NOAA NCEP OI2.1"])

# Write all out
#oF = "../input4MIPs_source_id.json"
#if os.path.exists(oF):
#    os.remove(oF)
#fH = open(oF, "w")
#json.dump(srcIds, fH, ensure_ascii=True, sort_keys=True, indent=4, separators=(",", ":"),)
#fH.close()

CPU times: user 4.23 ms, sys: 2.26 ms, total: 6.49 ms
Wall time: 5.91 ms


{'source_id': {'ACCESS1-3-rcp85-1-0': {'_status': 'Published',
   'contact': 'ISMIP6 Steering Team (ismip6@gmail.com)',
   'dataset_category': 'surfaceFluxes',
   'datetime_start': '1950-07-01',
   'datetime_stop': None,
   'frequency': 'yrC',
   'further_info_url': 'http://www.climate-cryosphere.org/wiki/index.php?title=ISMIP6_wiki_page',
   'grid_label': 'grg',
   'institution_id': 'NASA-GSFC',
   'license': 'CC BY 4.0',
   'mip_era': 'CMIP6',
   'nominal_resolution': '10 km',
   'realm': 'landIce',
   'region': '',
   'source': 'ACCESS1-3-rcp85-1-0 derived dataset computed from CMIP5 ACCESS1-3 historical and rcp85 simulations for ISMIP6',
   'source_id': 'ACCESS1-3-rcp85-1-0',
   'source_version': '1.0',
   'target_mip': 'ISMIP6',
   'title': 'ISMIP6 (CMIP6) - ACCESS1-3-rcp85-1-0 derived data prepared for input4MIPs',
   '|dataProviderExtra': {'source_variables': ['acabf',
     'evspsbl',
     'mrros',
     'pr',
     'sftflf',
     'so',
     'ts']},
   '|dataProviderFile': {'Conve