# Register new input4MIPs source_id entry

## imports

In [1]:
%%time
import json
import os
import requests
from IPython.display import clear_output

CPU times: user 33.3 ms, sys: 17.4 ms, total: 50.7 ms
Wall time: 76.1 ms


## function defs

In [2]:
def make_srcId(keyId, stdDict, dataProviderDict, dataProviderFileDict):
    '''
    Generate source_id from user-provided inputs
    '''
    srcId = {}
    srcId[keyId] = {}
    # standard keys
    stdKeys = ["_status","contact", "dataset_category", "datetime_start",
               "datetime_stop", "frequency", "further_info_url",
               "grid_label", "institution_id", "license", "mip_era",
               "nominal_resolution", "realm", "region", "source",
               "source_id", "source_version", "target_mip", "title"]
    # fill standard entries
    for cnt, key in enumerate(stdKeys):
        srcId[keyId][key] = stdDict[key]
    # data provider extra keys
    dataProviderExtraKeys = dataProviderDict.keys()
    srcId[keyId]["|dataProviderExtra"] = {}
    # fill data provider extra entries
    for key in dataProviderExtraKeys:
        srcId[keyId]["|dataProviderExtra"][key] = dataProviderDict[key]
    # data provider file keys
    dataProviderFileKeys = ["Conventions", "creation_date", "tracking_id"]
    srcId[keyId]["|dataProviderFile"] = {}
    # fill data provider file entries
    for key in dataProviderFileKeys:
        srcId[keyId]["|dataProviderFile"][key] = dataProviderFileDict[key]
    # ESGF default entries (once data is published)
    esgfKeys = ["_timestamp", "data_node", "latest", "replica",
                "version", "xlink"]
    srcId[keyId]["|esgfIndex"] = {}
    # fill ESGF entries
    for key in esgfKeys:
        srcId[keyId]["|esgfIndex"][key] = ""

    return srcId



## Data provider to complete form entries below

In [136]:
stdDict = {}
# required keys - SOLARIS-HEPPA-CMIP-4-1
stdDict["_status"] = "Registered"
stdDict["contact"] = "bernd@iaa.es"
stdDict["dataset_category"] = "solar"
stdDict["datetime_start"] = "1850-01-01"
stdDict["datetime_stop"] = "2023-12-31"
stdDict["frequency"] = ["day", "mon", "yr"]
stdDict["further_info_url"] = "http://solarisheppa.geomar.de/cmip7"
stdDict["grid_label"] = "gn"
stdDict["institution_id"] = "SOLARIS-HEPPA"
stdDict["license"] = "CC BY 4.0"
stdDict["mip_era"] = "CMIP6Plus"
stdDict["nominal_resolution"] = "250 km"
stdDict["realm"] = "atmos"
stdDict["region"] = ["global"]
stdDict["source"] = " ".join(["SOLARIS-HEPPA CMIP 4.1 solar forcing based on SSI,",
                              "TSI, and F10.7 from ssi_v03r00_preliminary",
                              "(Odele Coddington et al., pers. comm.); Ap and Kp",
                              "from ftp.ngdc.noaa.gov until 2014, afterwards from",
                              "GFZ Potsdam (https://kp.gfz-potsdam.de), P-IPR from",
                              "SEP-II (Ilya Usoskin et al., pers. comm.), MEE-IPR",
                              "from FMI APEEP v2024b_cmip7 (Max van de Kamp et al.,",
                              "pers. comm.), GCR-IPR from CRII v2024-02 (Ilya",
                              "Usoskin et al., pers. comm.)"])
stdDict["source_id"] = "SOLARIS-HEPPA-CMIP-4-1"
print(stdDict["source_id"])
stdDict["source_version"] = "4.1"
stdDict["target_mip"] = "CMIP"
stdDict["title"] = "SOLARIS-HEPPA 4.1 CMIP7 solar forcing"

# optional entries
# dataProviderExtra
dataProviderDict = {}
dataProviderDict["source_variables"] = ["multiple"]
dataProviderDict["metadata_url"] = "".join(["see http://solarisheppa.geomar.de/solarisheppa/",
                                            "sites/default/files/data/cmip7/CMIP7_metadata_",
                                            "description_4.1.pdf"])
dataProviderDict["contributor_names"] = " ".join(["Bernd Funke, Timo Asikainen, Stefan Bender,",
                                                  "Odele Coddington, Thierry Dudok de Wit,",
                                                  "Illaria Ermolli, Margit Haberreiter,",
                                                  "Doug Kinnison, Judith Lean,",
                                                  "Sergey Koldoboskiy, Daniel R. Marsh,",
                                                  "Hilde Nesse, Annika Seppaelae,",
                                                  "Miriam Sinnhuber, Ilya Usoskin,",
                                                  "Max van de Kamp, Pekka T. Verronen"])
dataProviderDict["reference"] = " ".join([
    "Funke et al., 2024: Towards the definition of a solar forcing dataset for CMIP7,",
    "Geosci. Model Dev., 17 (3), pp 1217--1227. doi: https://doi.org/10.5194/gmd-17-1217-2024"])

# dataProviderFile
dataProviderFileDict = {}
dataProviderFileDict["Conventions"] = "CF-1.6"
dataProviderFileDict["creation_date"] = ""
dataProviderFileDict["tracking_id"] = ""

# call function to populate
newId = make_srcId(stdDict["source_id"], stdDict, dataProviderDict, dataProviderFileDict)

SOLARIS-HEPPA-CMIP-4-1


In [137]:
newId

{'SOLARIS-HEPPA-CMIP-4-1': {'_status': 'Registered',
  'contact': 'bernd@iaa.es',
  'dataset_category': 'solar',
  'datetime_start': '1850-01-01',
  'datetime_stop': '2023-12-31',
  'frequency': ['day', 'mon', 'yr'],
  'further_info_url': 'http://solarisheppa.geomar.de/cmip7',
  'grid_label': 'gn',
  'institution_id': 'SOLARIS-HEPPA',
  'license': 'CC BY 4.0',
  'mip_era': 'CMIP6Plus',
  'nominal_resolution': '250 km',
  'realm': 'atmos',
  'region': ['global'],
  'source': 'SOLARIS-HEPPA CMIP 4.1 solar forcing based on SSI, TSI, and F10.7 from ssi_v03r00_preliminary (Odele Coddington et al., pers. comm.); Ap and Kp from ftp.ngdc.noaa.gov until 2014, afterwards from GFZ Potsdam (https://kp.gfz-potsdam.de), P-IPR from SEP-II (Ilya Usoskin et al., pers. comm.), MEE-IPR from FMI APEEP v2024b_cmip7 (Max van de Kamp et al., pers. comm.), GCR-IPR from CRII v2024-02 (Ilya Usoskin et al., pers. comm.)',
  'source_id': 'SOLARIS-HEPPA-CMIP-4-1',
  'source_version': '4.1',
  'target_mip': 'CMIP',

## sync repo - input4MIPs_source_id.json

In [17]:
%%time
# read input4mips_cvs source_id
# remote
srcIdGithub = "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_source_id.json"
js = requests.get(srcIdGithub)
srcIds = json.loads(js.text)
# local
#srcIdGithub = "../input4MIPs_source_id.json"
#with open(srcIdGithub, 'r') as f:
#    srcIds = json.load(f)

# add new source_id
#srcIds["source_id"][stdDict["source_id"]] = {}
#srcIds["source_id"][stdDict["source_id"]] = newId[stdDict["source_id"]]

# Catch issues
srcIds["source_id"]["MRI-JRA55-do-1-3"]["datetime_start"] = "1958-01-01"
srcIds["source_id"]["MRI-JRA55-do-1-3"]["datetime_stop"] = "2018-02-02"
srcIds["source_id"]["MRI-JRA55-do-1-3"]["realm"] = ["atmos", "landIce", "ocean", "seaIce"]
srcIds["source_id"]["MRI-JRA55-do-1-3-2"]["datetime_start"] = "1958-01-01"
srcIds["source_id"]["MRI-JRA55-do-1-3-2"]["datetime_stop"] = "2019-01-05"
srcIds["source_id"]["MRI-JRA55-do-1-3-2"]["realm"] = ["land", "landIce", "ocean"]
srcIds["source_id"]["MRI-JRA55-do-1-4-0"]["datetime_start"] = "1958-01-01"
srcIds["source_id"]["MRI-JRA55-do-1-4-0"]["datetime_stop"] = "2019-01-05"
srcIds["source_id"]["MRI-JRA55-do-1-4-0"]["realm"] = ["atmos", "land", "landIce", "ocean", "seaIce"]
srcIds["source_id"]["MRI-JRA55-do-1-5-0"]["datetime_stop"] = "2020-07-15"
srcIds["source_id"]["MRI-JRA55-do-1-5-0"]["realm"] = ["atmos", "land", "landIce", "ocean", "seaIce"]
srcIds["source_id"]["MRI-JRA55-do-1-6-0"]["datetime_stop"] = "2024-02-01"
srcIds["source_id"]["MRI-JRA55-do-1-6-0"]["realm"] = ["atmos", "land", "landIce", "ocean", "seaIce"]

srcIds["source_id"]["ImperialCollege-1-1"]["|dataProviderExtra"]["source_variables"] = ["delta13co2_in_air"]
srcIds["source_id"]["ImperialCollege-2-0"]["|dataProviderExtra"]["source_variables"] = ["Delta14co2_in_air"] # missing
srcIds["source_id"]["ImperialCollege-AIM-ssp370-1-0"]["datetime_start"] = "2015-01-01"
srcIds["source_id"]["ImperialCollege-AIM-ssp370-1-0"]["datetime_stop"] = "2100-01-01"
srcIds["source_id"]["ImperialCollege-AIM-ssp370-1-0"]["region"] = ["global"]
srcIds["source_id"]["ImperialCollege-AIM-ssp370-1-0"]["|dataProviderExtra"]["source_variables"] = ["delta13co2_in_air", "Delta14co2_in_air"]
srcIds["source_id"]["ImperialCollege-GLOBIOM-ssp245-1-0"]["datetime_start"] = "2015-01-01"
srcIds["source_id"]["ImperialCollege-GLOBIOM-ssp245-1-0"]["datetime_stop"] = "2100-01-01"
srcIds["source_id"]["ImperialCollege-GLOBIOM-ssp245-1-0"]["region"] = ["global"]
srcIds["source_id"]["ImperialCollege-GLOBIOM-ssp245-1-0"]["|dataProviderExtra"]["source_variables"] = ["delta13co2_in_air", "Delta14co2_in_air"]
srcIds["source_id"]["ImperialCollege-IMAGE-ssp119-1-0"]["datetime_start"] = "2015-01-01"
srcIds["source_id"]["ImperialCollege-IMAGE-ssp119-1-0"]["datetime_stop"] = "2100-01-01"
srcIds["source_id"]["ImperialCollege-IMAGE-ssp119-1-0"]["region"] = ["global"]
srcIds["source_id"]["ImperialCollege-IMAGE-ssp119-1-0"]["|dataProviderExtra"]["source_variables"] = ["delta13co2_in_air", "Delta14co2_in_air"]
srcIds["source_id"]["ImperialCollege-IMAGE-ssp126-1-0"]["datetime_start"] = "2015-01-01"
srcIds["source_id"]["ImperialCollege-IMAGE-ssp126-1-0"]["datetime_stop"] = "2100-01-01"
srcIds["source_id"]["ImperialCollege-IMAGE-ssp126-1-0"]["region"] = ["global"]
srcIds["source_id"]["ImperialCollege-IMAGE-ssp126-1-0"]["|dataProviderExtra"]["source_variables"] = ["delta13co2_in_air", "Delta14co2_in_air"]
srcIds["source_id"]["ImperialCollege-REMIND-MAGPIE-ssp534os-1-0"]["datetime_start"] = "2015-01-01"
srcIds["source_id"]["ImperialCollege-REMIND-MAGPIE-ssp534os-1-0"]["datetime_stop"] = "2100-01-01"
srcIds["source_id"]["ImperialCollege-REMIND-MAGPIE-ssp534os-1-0"]["region"] = ["global"]
srcIds["source_id"]["ImperialCollege-REMIND-MAGPIE-ssp534os-1-0"]["|dataProviderExtra"]["source_variables"] = ["delta13co2_in_air", "Delta14co2_in_air"]
srcIds["source_id"]["ImperialCollege-REMIND-MAGPIE-ssp585-1-0"]["datetime_start"] = "2015-01-01"
srcIds["source_id"]["ImperialCollege-REMIND-MAGPIE-ssp585-1-0"]["datetime_stop"] = "2100-01-01"
srcIds["source_id"]["ImperialCollege-REMIND-MAGPIE-ssp585-1-0"]["|dataProviderExtra"]["source_variables"] = ["delta13co2_in_air", "Delta14co2_in_air"]
srcIds["source_id"]["ImperialCollege-REMIND-MAGPIE-ssp585-1-0"]["region"] = ["global"]

srcIds["source_id"]["IACETH-SAGE3lambda-2-1-0"]["region"] = ["global"]
srcIds["source_id"]["IACETH-SAGE3lambda-2-1-0"]["|dataProviderExtra"]["source_variables"] = ["multiple"]
srcIds["source_id"]["IACETH-SAGE3lambda-3-0-0"]["region"] = ["global"]
srcIds["source_id"]["IACETH-SAGE3lambda-3-0-0"]["|dataProviderExtra"]["source_variables"] = ["multiple"]
srcIds["source_id"]["IACETH-SAGE3lambda-3-0-0"]["datetime_start"] = "1850-01-16"
srcIds["source_id"]["IACETH-SAGE3lambda-3-0-0"]["datetime_stop"] = "2014-12-15"

srcIds["source_id"]["ImperialCollege-ssp245-covid-4-8-1"]["datetime_start"] = "2015-01-16"
srcIds["source_id"]["ImperialCollege-ssp245-covid-4-8-1"]["datetime_stop"] = "2050-12-16"
srcIds["source_id"]["ImperialCollege-ssp245-covid-4-8-1"]["region"] = ["global"]
srcIds["source_id"]["ImperialCollege-ssp245-covid-4-8-1"]["|dataProviderExtra"]["source_variables"] = ["multiple"]

srcIds["source_id"]["DCPP-C-ipv-1-1"]["datetime_start"] = "1900-01-01"
srcIds["source_id"]["DCPP-C-ipv-1-1"]["datetime_stop"] = "2013-01-01"
srcIds["source_id"]["DCPP-C-ipv-1-1"]["region"] = ["global"]
srcIds["source_id"]["DCPP-C-ipv-1-1"]["|dataProviderExtra"]["source_variables"] = ["ipv_index"]

srcIds["source_id"]["UoM-ssp126-1-1-0"]["contact"] = "malte.meinshausen@unimelb.edu.au"
srcIds["source_id"]["UoM-ssp126-1-1-0"]["dataset_category"] = "GHGConcentrations"
srcIds["source_id"]["UoM-ssp126-1-1-0"]["datetime_start"] = "2015-01-16"
srcIds["source_id"]["UoM-ssp126-1-1-0"]["datetime_stop"] = "2500-12-16"
srcIds["source_id"]["UoM-ssp126-1-1-0"]["frequency"] = ["mon", "yr"]
srcIds["source_id"]["UoM-ssp126-1-1-0"]["further_info_url"] = "http://climatecollege.unimelb.edu.au/cmip6"
srcIds["source_id"]["UoM-ssp126-1-1-0"]["nominal_resolution"] = "10000 km"
srcIds["source_id"]["UoM-ssp126-1-1-0"]["region"] = ["global"]
srcIds["source_id"]["UoM-ssp126-1-1-0"]["source_version"] = "1.1.0"
srcIds["source_id"]["UoM-ssp126-1-1-0"]["|dataProviderExtra"]["source_variables"] = ["multiple"]

srcIds["source_id"]["MPI-M-MACv2-SP-1-0"]["datetime_start"] = "1850-01-01"
srcIds["source_id"]["MPI-M-MACv2-SP-1-0"]["datetime_stop"] = "2100-12-31"
srcIds["source_id"]["MPI-M-MACv2-SP-1-0"]["region"] = ["global"]
srcIds["source_id"]["MPI-M-MACv2-SP-1-0"]["target_mip"] = "RFMIP"
srcIds["source_id"]["MPI-M-MACv2-SP-1-0"]["|dataProviderExtra"]["source_variables"] = ["multiple"]

srcIds["source_id"]["UCI-present-1-0"]["datetime_start"] = "1979-01-16"
srcIds["source_id"]["UCI-present-1-0"]["datetime_stop"] = "2014-12-16"
srcIds["source_id"]["UCI-present-1-0"]["region"] = ["global"]
srcIds["source_id"]["UCI-present-1-0"]["|dataProviderExtra"]["source_variables"] = ["siconc"]

for srcId in ["UColorado-RFMIP-0-4", "UColorado-RFMIP-1-0", "UColorado-RFMIP-1-1", "UColorado-RFMIP-1-2"]:
    srcIds["source_id"][srcId]["datetime_start"] = "2014-01-01"
    srcIds["source_id"][srcId]["datetime_stop"] = "2014-12-31"

# Write all out
oF = "../input4MIPs_source_id.json"
if os.path.exists(oF):
    os.remove(oF)
fH = open(oF, "w")
json.dump(srcIds, fH, ensure_ascii=True, sort_keys=True, indent=4, separators=(",", ":"),)
fH.close()

CPU times: user 12.6 ms, sys: 3.16 ms, total: 15.7 ms
Wall time: 29.4 ms


## sync repo changes - create composite CV - CVs/input4MIPs_CVs.json

In [4]:
%%time
CVExportList = ["activity_id", "dataset_category", "DRS", "institution_id",
                "license", "mip_era", "product", "required_global_attributes",
                "source_id", "target_mip", "tracking_id"]
CVExportList.sort()

# Collate all CVs in master
tmp = {}
tmp["CV"] = {}
for key in CVExportList:
    fileName = "".join(["input4MIPs_", key, ".json"])
    filePath = os.path.join("..", fileName)
    #print(filePath)
    with open(filePath,'r') as fH:
        keyDict = json.load(fH)
        tmp["CV"][key] = keyDict[key]

# Write collated CVs to file
if not os.path.exists("../CVs"):
    os.makedirs("../CVs")
oF = "../CVs/input4MIPs_CV.json"
if os.path.exists(oF):
    os.remove(oF)
fH = open(oF, "w")
json.dump(tmp, fH, ensure_ascii=True, sort_keys=True, indent=4, separators=(",", ":"),)
fH.close()

CPU times: user 18.9 ms, sys: 5.76 ms, total: 24.7 ms
Wall time: 24.6 ms
