# Register new input4MIPs source_id entry

## imports

In [134]:
%%time
import json
import os
import requests
from IPython.display import clear_output

CPU times: user 10 μs, sys: 1e+03 ns, total: 11 μs
Wall time: 14.1 μs


## function defs

In [135]:
def make_srcId(keyId, stdDict, dataProviderDict, dataProviderFileDict):
    '''
    Generate source_id from user-provided inputs
    '''
    srcId = {}
    srcId[keyId] = {}
    # standard keys
    stdKeys = ["_status","contact", "dataset_category", "datetime_start",
               "datetime_stop", "frequency", "further_info_url",
               "grid_label", "institution_id", "license", "mip_era",
               "nominal_resolution", "realm", "region", "source",
               "source_id", "source_version", "target_mip"]
    # fill standard entries
    for cnt, key in enumerate(stdKeys):
        srcId[keyId][key] = stdDict[key]
    # data provider extra keys
    dataProviderExtraKeys = dataProviderDict.keys()
    srcId[keyId]["|dataProviderExtra"] = {}
    # fill data provider extra entries
    for key in dataProviderExtraKeys:
        srcId[keyId]["|dataProviderExtra"][key] = dataProviderDict[key]
    # data provider file keys
    dataProviderFileKeys = ["Conventions", "creation_date", "tracking_id"]
    srcId[keyId]["|dataProviderFile"] = {}
    # fill data provider file entries
    for key in dataProviderFileKeys:
        srcId[keyId]["|dataProviderFile"][key] = dataProviderFileDict[key]
    # ESGF default entries (once data is published)
    esgfKeys = ["_timestamp", "data_node", "latest", "replica",
                "version", "xlink"]
    srcId[keyId]["|esgfIndex"] = {}
    # fill ESGF entries
    for key in esgfKeys:
        srcId[keyId]["|esgfIndex"][key] = ""

    return srcId



## Data provider to complete form entries below

In [136]:
stdDict = {}
# required keys
# SOLARIS-HEPPA-CMIP-4-1
stdDict["_status"] = "Registered"
stdDict["contact"] = "bernd@iaa.es"
stdDict["dataset_category"] = "solar"
stdDict["datetime_start"] = "1850-01-01"
stdDict["datetime_stop"] = "2023-12-31"
stdDict["frequency"] = ["day", "mon", "yr"]
stdDict["further_info_url"] = "http://solarisheppa.geomar.de/cmip7"
stdDict["grid_label"] = "gn"
stdDict["institution_id"] = "SOLARIS-HEPPA"
stdDict["license"] = "CC BY 4.0"
stdDict["mip_era"] = "CMIP6Plus"
stdDict["nominal_resolution"] = "250 km"
stdDict["realm"] = "atmos"
stdDict["region"] = ["global"]
stdDict["source"] = " ".join(["SOLARIS-HEPPA CMIP 4.1 solar forcing based on SSI,",
                              "TSI, and F10.7 from ssi_v03r00_preliminary",
                              "(Odele Coddington et al., pers. comm.); Ap and Kp",
                              "from ftp.ngdc.noaa.gov until 2014, afterwards from",
                              "GFZ Potsdam (https://kp.gfz-potsdam.de), P-IPR from",
                              "SEP-II (Ilya Usoskin et al., pers. comm.), MEE-IPR",
                              "from FMI APEEP v2024b_cmip7 (Max van de Kamp et al.,",
                              "pers. comm.), GCR-IPR from CRII v2024-02 (Ilya",
                              "Usoskin et al., pers. comm.)"])
stdDict["source_id"] = "SOLARIS-HEPPA-CMIP-4-1"
print(stdDict["source_id"])
stdDict["source_version"] = "4.1"
stdDict["target_mip"] = "CMIP"
stdDict["title"] = "SOLARIS-HEPPA 4.1 CMIP7 solar forcing"

# optional entries
# dataProviderExtra
dataProviderDict = {}
dataProviderDict["source_variables"] = ["multiple"]
dataProviderDict["metadata_url"] = "".join(["see http://solarisheppa.geomar.de/solarisheppa/",
                                            "sites/default/files/data/cmip7/CMIP7_metadata_",
                                            "description_4.1.pdf"])
dataProviderDict["contributor_names"] = " ".join(["Bernd Funke, Timo Asikainen, Stefan Bender,",
                                                  "Odele Coddington, Thierry Dudok de Wit,",
                                                  "Illaria Ermolli, Margit Haberreiter,",
                                                  "Doug Kinnison, Judith Lean,",
                                                  "Sergey Koldoboskiy, Daniel R. Marsh,",
                                                  "Hilde Nesse, Annika Seppaelae,",
                                                  "Miriam Sinnhuber, Ilya Usoskin,",
                                                  "Max van de Kamp, Pekka T. Verronen"])
dataProviderDict["reference"] = " ".join([
    "Funke et al., 2024: Towards the definition of a solar forcing dataset for CMIP7,",
    "Geosci. Model Dev., 17 (3), pp 1217--1227. doi: https://doi.org/10.5194/gmd-17-1217-2024"])

# dataProviderFile
dataProviderFileDict = {}
dataProviderFileDict["Conventions"] = "CF-1.6"
dataProviderFileDict["creation_date"] = ""
dataProviderFileDict["tracking_id"] = ""

# call function to populate
newId = make_srcId(stdDict["source_id"], stdDict, dataProviderDict, dataProviderFileDict)

SOLARIS-HEPPA-CMIP-4-1


In [137]:
newId

{'SOLARIS-HEPPA-CMIP-4-1': {'_status': 'Registered',
  'contact': 'bernd@iaa.es',
  'dataset_category': 'solar',
  'datetime_start': '1850-01-01',
  'datetime_stop': '2023-12-31',
  'frequency': ['day', 'mon', 'yr'],
  'further_info_url': 'http://solarisheppa.geomar.de/cmip7',
  'grid_label': 'gn',
  'institution_id': 'SOLARIS-HEPPA',
  'license': 'CC BY 4.0',
  'mip_era': 'CMIP6Plus',
  'nominal_resolution': '250 km',
  'realm': 'atmos',
  'region': ['global'],
  'source': 'SOLARIS-HEPPA CMIP 4.1 solar forcing based on SSI, TSI, and F10.7 from ssi_v03r00_preliminary (Odele Coddington et al., pers. comm.); Ap and Kp from ftp.ngdc.noaa.gov until 2014, afterwards from GFZ Potsdam (https://kp.gfz-potsdam.de), P-IPR from SEP-II (Ilya Usoskin et al., pers. comm.), MEE-IPR from FMI APEEP v2024b_cmip7 (Max van de Kamp et al., pers. comm.), GCR-IPR from CRII v2024-02 (Ilya Usoskin et al., pers. comm.)',
  'source_id': 'SOLARIS-HEPPA-CMIP-4-1',
  'source_version': '4.1',
  'target_mip': 'CMIP',

## sync with repo - input4MIPs_source_id.json (and DRES input4MIPs_institution_id.json)

In [143]:
%%time
# read input4mips_cvs source_id
# remote
srcIdGithub = "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_source_id.json"
js = requests.get(srcIdGithub)
srcIds = json.loads(js.text)
# local
#srcIdGithub = "../input4MIPs_source_id.json"
#with open(srcIdGithub, 'r') as f:
#    srcIds = json.load(f)

# add new source_id
#srcIds["source_id"][stdDict["source_id"]] = {}
#srcIds["source_id"][stdDict["source_id"]] = newId[stdDict["source_id"]]

# fix inconsistent fields
for srcId in ["VUA-CMIP-BB4CMIP6-1-0", "VUA-CMIP-BB4CMIP6-1-1", "VUA-CMIP-BB4CMIP6-1-2"]:
    srcIds["source_id"][srcId]["contact"] = "guido.vander.werf@vu.nl; m.j.e.van.marle@vu.nl"
    srcIds["source_id"][srcId]["region"] = ["global"]
    srcIds["source_id"][srcId]["source"] = " ".join(["Global fire emissions from 1750 through 2015",
                                                     "based on GFED4s, FireMIP, visibility-observations",
                                                     "and GCD data. Data sources differ per region and year"])
    srcIds["source_id"][srcId]["|dataProviderExtra"]["source_variables"] = ["multiple"]

# Write all out
oF = "../input4MIPs_source_id.json"
if os.path.exists(oF):
    os.remove(oF)
fH = open(oF, "w")
json.dump(srcIds, fH, ensure_ascii=True, sort_keys=True, indent=4, separators=(",", ":"),)
fH.close()

CPU times: user 31.3 ms, sys: 7.89 ms, total: 39.2 ms
Wall time: 249 ms
