# Pull MIP table files; reformulate for CMOR 3.10
<div style="text-align: right">
<p>
    <img src="https://pcmdi.github.io/assets/PCMDI/199x65px-PCMDI-Logo-Text-rectangle.png"
         width="91"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="Program for Climate Model Diagnosis and Intercomparison"
         alt="Program for Climate Model Diagnosis and Intercomparison"
    >&nbsp;
    <img src="https://pcmdi.github.io/assets/LLNL/212px-LLNLiconPMS286-WHITEBACKGROUND.png"
         width="30"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="Lawrence Livermore National Laboratory"
         alt="Lawrence Livermore National Laboratory"
    >&nbsp;
    <img src="https://pcmdi.github.io/assets/DOE/459x127px-DOE-Logo_Color_TextOnly.png"
         width="108"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="United States Department of Energy"
         alt="United States Department of Energy"
    >
</p>
</div>

**Summary**

This file pulls a CMIP6Plus/CMOR3.9.0-era MIP table files, strips out extraneous variables and saves the files for local use

**Authors**

Paul J. Durack ([durack1](https://github.com/durack1); [PCMDI](https://pcmdi.llnl.gov/), [Lawrence Livermore National Laboratory](https://www.llnl.gov/))

**Notes**

PJD 25 Feb 2025 - initiated<br>
PJD 25 Feb 2025 - first pass at initial CMOR 3.10 test tables and CMIP7_CV.json<br>
TODO:

**Links**

### imports

In [1]:
%%time
import datetime
import hashlib
import json
import os
import requests

CPU times: user 33.4 ms, sys: 18.7 ms, total: 52.2 ms
Wall time: 66.8 ms


### set upstream table URLs and pull files

In [6]:
%%time
urls = {
    "APday": "https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/Tables/MIP_APday.json",
    "OPmon": "https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/Tables/MIP_OPmon.json",
    "OPmonLev": "https://raw.githubusercontent.com/PCMDI/mip-cmor-tables/refs/heads/main/Tables/MIP_OPmonLev.json",
}

# loop through urls
keys = urls.keys()
for count, key in enumerate(keys):
    print(count, key)
    url = urls[key]
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses (4/5xx)
        vars()[key] = json.loads(response.text)
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    except json.JSONDecodeError as e:
        print(f"JSON decode failed: {e}")
    except Exception as e:
        print(f"Unexpected error occurred: {e}")

0 APday
1 OPmon
2 OPmonLev
CPU times: user 19.6 ms, sys: 5.48 ms, total: 25.1 ms
Wall time: 76.2 ms


### APday - trim out redundant variables

In [3]:
keyList = list(APday["variable_entry"].keys())
keepKeys = ["pr", "tas"]
for count, key in enumerate(keyList):
    if key not in keepKeys:
        APday["variable_entry"].pop(key)

### OPmon - trim out redundant variables

In [4]:
keyList = list(OPmon["variable_entry"].keys())
keepKeys = ["tos"]
for count, key in enumerate(keyList):
    if key not in keepKeys:
        OPmon["variable_entry"].pop(key)
# cleanup comments
for key in OPmon["variable_entry"].keys():
    OPmon["variable_entry"][key]["comment"] = OPmon["variable_entry"][key][
        "comment"
    ].replace(".", "")

### OPmonLev - trim out redundant variables

In [5]:
keyList = list(OPmonLev["variable_entry"].keys())
keepKeys = ["thetao"]
for count, key in enumerate(keyList):
    if key not in keepKeys:
        OPmonLev["variable_entry"].pop(key)
# cleanup comments
for key in OPmonLev["variable_entry"].keys():
    OPmonLev["variable_entry"][key]["comment"] = OPmonLev["variable_entry"][key][
        "comment"
    ].replace(".", "")

### Header update notes

In [6]:
# Karl notes https://github.com/PCMDI/cmor/issues/762#issuecomment-2673038397
"""
"Header": {
    **** MOVE TO CV.json FILE: "data_specs_version": "CMIP_specs7.0.0.0-alpha",
    **** MOVE TO CV.json FILE: "mip_era": "CMIP6",
    **** MOVE TO CV.json FILE: "approx_interval": "30.00000", 
    "checksum":"", **** New Header entry will contain md5 checksum value
    "cmor_version": "3.10", 
    "table_id": "atmos", 
    "realm": "atmos", **** Sets realm default value, overridden by modeling_realm attribute for individual variables
    "table_date":"2025-02-14", 
    "missing_value": "1e20", 
    "int_missing_value": "-999", 
    "product": "model-output", 
    "generic_levels": "alevel alevhalf", 
    "Conventions": "CF-1.11 CMIP-7alpha???"
    -"type":"real", **** This and following attributes are default values, overridden for individual variables
    -"positive":"",
    -"valid_min":"",
    -"valid_max":"",
    -"ok_min_mean_abs":"",
    -"ok_max_mean_abs":"",
},
"""

'\n"Header": {\n    **** MOVE TO CV.json FILE: "data_specs_version": "CMIP_specs7.0.0.0-alpha",\n    **** MOVE TO CV.json FILE: "mip_era": "CMIP6",\n    **** MOVE TO CV.json FILE: "approx_interval": "30.00000", \n    "checksum":"", **** New Header entry will contain md5 checksum value\n    "cmor_version": "3.10", \n    "table_id": "atmos", \n    "realm": "atmos", **** Sets realm default value, overridden by modeling_realm attribute for individual variables\n    "table_date":"2025-02-14", \n    "missing_value": "1e20", \n    "int_missing_value": "-999", \n    "product": "model-output", \n    "generic_levels": "alevel alevhalf", \n    "Conventions": "CF-1.11 CMIP-7alpha???"\n    -"type":"real", **** This and following attributes are default values, overridden for individual variables\n    -"positive":"",\n    -"valid_min":"",\n    -"valid_max":"",\n    -"ok_min_mean_abs":"",\n    -"ok_max_mean_abs":"",\n},\n'

### update Headers

In [7]:
for table in ["APday", "OPmon", "OPmonLev"]:
    tmp = eval(table)
    # drop keys - move to Project_CV.json
    tmp["Header"].pop("approx_interval")
    tmp["Header"].pop("data_specs_version")
    # sort by keys
    tmp["Header"]["checksum"] = ""
    sorted_dict = dict(sorted(tmp.items()))
    # update entries
    tmp["Header"]["Conventions"] = "CF-1.11 CMIP-7.0.0.0"
    tmp["Header"]["cmor_version"] = "3.10"
    tmp["Header"]["table_id"] = "atmos"
    # realm specific
    if table == "APday":
        tmp["Header"][
            "generic_levels"
        ] = "alevel alevhalf"  # relevant for atmos variables
        tmp["Header"][
            "realm"
        ] = "atmos"  # Sets realm default value, overridden by modeling_realm attribute for individual variables
    elif table == "OPmon":
        tmp["Header"][
            "realm"
        ] = "ocean"  # Sets realm default value, overridden by modeling_realm attribute for individual variables
    elif table == "OPmonLev":
        tmp["Header"][
            "generic_levels"
        ] = "olevel olevhalf"  # relevant for ocean variables
        tmp["Header"][
            "realm"
        ] = "ocean"  # Sets realm default value, overridden by modeling_realm attribute for individual variables
    tmp["Header"]["table_date"] = datetime.datetime.now().strftime("%Y-%m-%d")
    tmp["Header"]["missing_value"] = "1e20"
    tmp["Header"]["int_missing_value"] = "-999"
    tmp["Header"]["product"] = "model-output"
    tmp["Header"][
        "type"
    ] = "real"  # This and following attributes are default values, overridden for individual variables
    tmp["Header"]["positive"] = ""
    tmp["Header"]["valid_min"] = ""
    tmp["Header"]["valid_max"] = ""
    tmp["Header"]["ok_min_mean_abs"] = ""
    tmp["Header"]["ok_max_mean_abs"] = ""
    # and once entries updated, generate checksum, add back in and write
    dictStr = json.dumps(tmp, sort_keys=True)
    tmp["Header"]["checksum"] = hashlib.md5(dictStr.encode("utf8")).hexdigest()

In [8]:
APday

{'Header': {'Conventions': 'CF-1.11 CMIP-7.0.0.0',
  'checksum': '2256e1691dea97aedc111ebba90b81dc',
  'cmor_version': '3.10',
  'generic_levels': 'alevel alevhalf',
  'int_missing_value': '-999',
  'missing_value': '1e20',
  'product': 'model-output',
  'table_date': '2025-02-25',
  'table_id': 'atmos',
  'realm': 'atmos',
  'type': 'real',
  'positive': '',
  'valid_min': '',
  'valid_max': '',
  'ok_min_mean_abs': '',
  'ok_max_mean_abs': ''},
 'variable_entry': {'pr': {'cell_measures': 'area: areacella',
   'cell_methods': 'area: time: mean',
   'comment': 'includes both liquid and solid phases',
   'dimensions': ['longitude', 'latitude', 'time'],
   'frequency': 'day',
   'long_name': 'Precipitation',
   'modeling_realm': ['atmos'],
   'ok_max_mean_abs': '',
   'ok_min_mean_abs': '',
   'out_name': 'pr',
   'positive': '',
   'standard_name': 'precipitation_flux',
   'type': 'real',
   'units': 'kg m-2 s-1',
   'valid_max': '',
   'valid_min': ''},
  'tas': {'cell_measures': 'area

In [9]:
OPmon

{'Header': {'Conventions': 'CF-1.11 CMIP-7.0.0.0',
  'checksum': 'db4b1e36de301ace2874e9851f0b0a2c',
  'cmor_version': '3.10',
  'generic_levels': '',
  'int_missing_value': '-999',
  'missing_value': '1e20',
  'product': 'model-output',
  'table_date': '2025-02-25',
  'table_id': 'atmos',
  'realm': 'ocean',
  'type': 'real',
  'positive': '',
  'valid_min': '',
  'valid_max': '',
  'ok_min_mean_abs': '',
  'ok_max_mean_abs': ''},
 'variable_entry': {'tos': {'cell_measures': 'area: areacello',
   'cell_methods': 'area: mean where sea time: mean',
   'comment': 'Temperature of upper boundary of the liquid ocean, including temperatures below sea-ice and floating ice shelves',
   'dimensions': ['longitude', 'latitude', 'time'],
   'frequency': 'mon',
   'long_name': 'Sea Surface Temperature',
   'modeling_realm': ['ocean'],
   'ok_max_mean_abs': '',
   'ok_min_mean_abs': '',
   'out_name': 'tos',
   'positive': '',
   'standard_name': 'sea_surface_temperature',
   'type': 'real',
   'uni

In [10]:
OPmonLev

{'Header': {'Conventions': 'CF-1.11 CMIP-7.0.0.0',
  'checksum': '61b34e5e0d6dd30f5107587442eb36be',
  'cmor_version': '3.10',
  'generic_levels': 'olevel olevhalf',
  'int_missing_value': '-999',
  'missing_value': '1e20',
  'product': 'model-output',
  'table_date': '2025-02-25',
  'table_id': 'atmos',
  'realm': 'ocean',
  'type': 'real',
  'positive': '',
  'valid_min': '',
  'valid_max': '',
  'ok_min_mean_abs': '',
  'ok_max_mean_abs': ''},
 'variable_entry': {'thetao': {'cell_measures': 'area: areacello volume: volcello',
   'cell_methods': 'area: mean where sea time: mean',
   'comment': 'Diagnostic should be contributed even for models using conservative temperature as prognostic field',
   'dimensions': ['longitude', 'latitude', 'olevel', 'time'],
   'frequency': 'mon',
   'long_name': 'Sea Water Potential Temperature',
   'modeling_realm': ['ocean'],
   'ok_max_mean_abs': '',
   'ok_min_mean_abs': '',
   'out_name': 'thetao',
   'positive': '',
   'standard_name': 'sea_water

### create CMIP7_CVs.json

In [11]:
%%time
# get CMIP6_CVs
urls = {
    "CV": "https://raw.githubusercontent.com/PCMDI/cmip6-cmor-tables/refs/heads/main/Tables/CMIP6_CV.json",
}

# loop through urls
keys = urls.keys()
for count, key in enumerate(keys):
    print(count, key)
    url = urls[key]
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses (4/5xx)
        vars()[key] = json.loads(response.text)
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    except json.JSONDecodeError as e:
        print(f"JSON decode failed: {e}")
    except Exception as e:
        print(f"Unexpected error occurred: {e}")

0 CV
CPU times: user 4.28 ms, sys: 1.4 ms, total: 5.68 ms
Wall time: 20.6 ms


### CVs - trim out redundant entries

In [12]:
keyList = list(CV["CV"].keys())
keepKeys = [
    "frequency",
    "grid_label",
    "license",
    "product",
    "realm",
    "source_type",
    "tracking_id",
]
for count, key in enumerate(keyList):
    if key not in keepKeys:
        CV["CV"].pop(key)

### CVs - cleanup

In [13]:
# frequency
keepKeys = ["1hr", "1hrCM", "3hr", "6hr", "day", "dec", "fx", "mon", "monC", "yr"]
keyList = list(CV["CV"]["frequency"].keys())
for count, key in enumerate(keyList):
    if key not in keepKeys:
        CV["CV"]["frequency"].pop(key)
# grid_label
keepKeys = ["gm", "gn", "gna", "gng", "gnz", "gr", "gra", "grg", "grz"]
keyList = list(CV["CV"]["grid_label"].keys())
for count, key in enumerate(keyList):
    if key not in keepKeys:
        CV["CV"]["grid_label"].pop(key)
# license
CV["CV"]["license"][0] = CV["CV"]["license"][0].replace("CMIP6", "CMIP7")

In [14]:
CV["CV"]["approx_interval"] = {}
CV["CV"]["approx_interval"]["subhr"] = (
    1 / 24 / 4
)  # 15 mins (60/25) = 25 mins CMIP6, see also "approx_interval_error", "approx_interval_warning"
# https://github.com/PCMDI/cmip6-cmor-tables/blob/e3644d3b814d632c4343b9ae5a59faf1ed20191c/Tables/CMIP6_CFsubhr.json#L11-L13
CV["CV"]["approx_interval"]["1hr"] = 1.0 / 24
CV["CV"]["approx_interval"]["3hr"] = 1.0 / 8
CV["CV"]["approx_interval"]["6hr"] = 1.0 / 4
CV["CV"]["approx_interval"]["day"] = 1.0
CV["CV"]["approx_interval"]["mon"] = 30.0
CV["CV"]["approx_interval"]["yr"] = 365.0
CV["CV"]["approx_interval"]["dec"] = 365.0 * 10
CV["CV"]["data_specs_version"] = "CMIP-7.0.0.0"
CV["CV"]["mip_era"] = "CMIP7"

In [15]:
# sort CV contents
CV["CV"] = dict(sorted(CV["CV"].items()))

### write all files out

In [16]:
files = ["APday", "OPmon", "OPmonLev", "CV"]
for count, name in enumerate(files):
    print(count, name)
    dic = eval(name)
    # set outpath
    outPath = "/Users/durack1/sync/git/cmor/TestTables"
    # write file
    outFile = "".join(["CMIP7_", name, ".json"])
    outPathAndFileName = os.path.join(outPath, outFile)
    print("outPathAndFileName:", outPathAndFileName)
    with open(outPathAndFileName, "w") as f:
        json.dump(
            dic, f, ensure_ascii=True, sort_keys=True, indent=4, separators=(",", ":")
        )

0 APday
outPathAndFileName: /Users/durack1/sync/git/cmor/TestTables/CMIP7_APday.json
1 OPmon
outPathAndFileName: /Users/durack1/sync/git/cmor/TestTables/CMIP7_OPmon.json
2 OPmonLev
outPathAndFileName: /Users/durack1/sync/git/cmor/TestTables/CMIP7_OPmonLev.json
3 CV
outPathAndFileName: /Users/durack1/sync/git/cmor/TestTables/CMIP7_CV.json
