# Register new input4MIPs source_id entry

## imports

In [1]:
%%time
import json
import os
import requests
from IPython.display import clear_output

CPU times: user 32.4 ms, sys: 16 ms, total: 48.5 ms
Wall time: 69.9 ms


## function defs

In [42]:
def make_srcId(keyId, stdDict, dataProviderDict, dataProviderFileDict):
    '''
    Generate source_id from user-provided inputs
    '''
    srcId = {}
    srcId[keyId] = {}
    # standard keys
    stdKeys = ["_status","contact", "dataset_category", "datetime_start",
               "datetime_stop", "frequency", "further_info_url",
               "grid_label", "institution_id", "mip_era",
               "nominal_resolution", "realm", "source",
               "source_id", "source_version", "target_mip"]
    # fill standard entries
    for cnt, key in enumerate(stdKeys):
        srcId[keyId][key] = stdDict[key]
    # data provider extra keys
    dataProviderExtraKeys = ["source_variables"]
    srcId[keyId]["|dataProviderExtra"] = {}
    # fill data provider extra entries
    for key in dataProviderExtraKeys:
        srcId[keyId]["|dataProviderExtra"][key] = dataProviderDict[key]
    # data provider file keys
    dataProviderFileKeys = ["Conventions", "creation_date", "tracking_id"]
    srcId[keyId]["|dataProviderFile"] = {}
    # fill data provider file entries
    for key in dataProviderFileKeys:
        srcId[keyId]["|dataProviderFile"][key] = dataProviderFileDict[key]
    # ESGF default entries (once data is published)
    esgfKeys = ["_timestamp", "data_node", "latest", "replica",
                "version", "xlink"]
    srcId[keyId]["|esgfIndex"] = {}
    # fill ESGF entries
    for key in esgfKeys:
        srcId[keyId]["|esgfIndex"][key] = ""

    return srcId



## Data provider to complete form entries below

In [43]:
# required keys
stdDict = {}
'''
# BB4CMIP7-1-0
stdDict["_status"] = "Registered"
stdDict["contact"] = "Margreet.vanMarle@deltares.nl; guido.vanderwerf@wur.nl"
stdDict["dataset_category"] = "emissions"
stdDict["datetime_start"] = "1990-01-16"
stdDict["datetime_stop"] = "2022-12-16"
stdDict["frequency"] = "mon"
stdDict["further_info_url"] = "http://www.globalfiredata.org"
stdDict["grid_label"] = "gn"
stdDict["institution_id"] = "DRES"
stdDict["license"] = "CC BY 4.0"
stdDict["mip_era"] = "CMIP6Plus"
stdDict["nominal_resolution"] = "25 km"
stdDict["realm"] = "atmos"
stdDict["region"] = ["global_land"]
stdDict["source"] = " ".join(["Global fire emissions 1750 through 2022 based on GFED4s,",
                              "FireMIP, visibility-observations, and GCD data. Data",
                              "sources differ per region and year"])
stdDict["source_id"] = "DRES-CMIP-BB4CMIP7-1-0"
print(stdDict["source_id"])
stdDict["source_version"] = "1.0"
stdDict["target_mip"] = "CMIP"
stdDict["title"] = "WUR/Deltares BB4CMIP7 1.0 Global Biomass Burning Emissions prepared for input4MIPs"
'''
# CR-CMIP-0-2-0
stdDict["_status"] = "Registered"
stdDict["contact"] = "zebedee.nicholls@climate-resource.com; malte.meinshausen@climate-resource.com"
stdDict["dataset_category"] = "GHGConcentrations"
stdDict["datetime_start"] = "0001-01-15"
stdDict["datetime_stop"] = "2022-12-15"
stdDict["frequency"] = "mon"
stdDict["further_info_url"] = "https://www.climate-resource.com/"
stdDict["grid_label"] = "gm"
stdDict["institution_id"] = "CR"
stdDict["license"] = "CC BY 4.0"
stdDict["mip_era"] = "CMIP6Plus"
stdDict["nominal_resolution"] = "10000 km"
stdDict["realm"] = "atmos"
stdDict["region"] = ["global"]
stdDict["source"] = "Global greenhouse gas concentrations 0001 through 2022 based on NOAA/AGAGE/GAGE data"
stdDict["source_id"] = "CR-CMIP-0-2-0"
print(stdDict["source_id"])
stdDict["source_version"] = "0.2.0"
stdDict["target_mip"] = "CMIP"
stdDict["title"] = "Climate Resource CMIP 0.2.0 Global Greenhouse Gas (GHG) Concentrations prepared for input4MIPs"
# optional entries
# dataProviderExtra
dataProviderDict = {}
dataProviderDict["source_variables"] = ""
dataProviderDict["comment"] = " ".join(["[TBC which grant] Data produced by Climate Resource",
                                        "supported by funding from the CMIP IPO (Coupled Model",
                                        "Intercomparison Project International Project Office).",
                                        "This is an interim dataset, not for production use"])
# dataProviderFile
dataProviderFileDict = {}
dataProviderFileDict["Conventions"] = ""  # "CF-1.7"
dataProviderFileDict["creation_date"] = ""
dataProviderFileDict["tracking_id"] = ""

# call function to populate
newId = make_srcId(stdDict["source_id"], stdDict, dataProviderDict, dataProviderFileDict)

CR-CMIP-0-2-0


In [45]:
newId

{'CR-CMIP-0-2-0': {'_status': 'Registered',
  'contact': 'zebedee.nicholls@climate-resource.com; malte.meinshausen@climate-resource.com',
  'dataset_category': 'GHGConcentrations',
  'datetime_start': '0001-01-15',
  'datetime_stop': '2022-12-15',
  'frequency': 'mon',
  'further_info_url': 'https://www.climate-resource.com/',
  'grid_label': 'gm',
  'institution_id': 'CR',
  'mip_era': 'CMIP6Plus',
  'nominal_resolution': '10000 km',
  'realm': 'atmos',
  'source': 'Global greenhouse gas concentrations 0001 through 2022 based on NOAA/AGAGE/GAGE data',
  'source_id': 'CR-CMIP-0-2-0',
  'source_version': '0.2.0',
  'target_mip': 'CMIP',
  '|dataProviderExtra': {'source_variables': ''},
  '|dataProviderFile': {'Conventions': '',
   'creation_date': '',
   'tracking_id': ''},
  '|esgfIndex': {'_timestamp': '',
   'data_node': '',
   'latest': '',
   'replica': '',
   'version': '',
   'xlink': ''}}}

## sync with repo - input4MIPs_CVs.json

In [62]:
%%time
# read input4mips_cvs source_id
srcIdGithub = "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_source_id.json"
js = requests.get(srcIdGithub)
srcIds = json.loads(js.text)

# add new source_id
srcIds["source_id"][stdDict["source_id"]] = {}
srcIds["source_id"][stdDict["source_id"]] = newId[stdDict["source_id"]]

# Write all out
oF = "../input4MIPs_source_id.json"
if os.path.exists(oF):
    os.remove(oF)
fH = open(oF, "w")
json.dump(srcIds, fH, ensure_ascii=True, sort_keys=True, indent=4, separators=(",", ":"),)
fH.close()

CPU times: user 18.4 ms, sys: 5.4 ms, total: 23.7 ms
Wall time: 42.7 ms
