# Pull MIP table files
<div style="text-align: right">
<p>
    <img src="https://pcmdi.github.io/assets/PCMDI/199x65px-PCMDI-Logo-Text-rectangle.png"
         width="91"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="Program for Climate Model Diagnosis and Intercomparison"
         alt="Program for Climate Model Diagnosis and Intercomparison"
    >&nbsp;
    <img src="https://pcmdi.github.io/assets/LLNL/212px-LLNLiconPMS286-WHITEBACKGROUND.png"
         width="30"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="Lawrence Livermore National Laboratory"
         alt="Lawrence Livermore National Laboratory"
    >&nbsp;
    <img src="https://pcmdi.github.io/assets/DOE/459x127px-DOE-Logo_Color_TextOnly.png"
         width="108"
         height="30"
         class="fixed-height-image"
         style="margin-right: 20px"
         title="United States Department of Energy"
         alt="United States Department of Energy"
    >
</p>
</div>

# **Summary**

This file pulls input4MIPs CMIP6Plus/CMOR3.7.x-era CVs, strips out extraneous variables and saves the files for local use.

**Authors:**

Paul J. Durack ([durack1](https://github.com/durack1); [PCMDI](https://pcmdi.llnl.gov/), [Lawrence Livermore National Laboratory](https://www.llnl.gov/))

**Notes:**

PJD 21 Jul 2025 - initiated<br>
PJD 25 Jul 2025 - updated to fix issue with missing CV upper-level dic in input4MIPs_CVs.json<br>
PJD 25 Jul 2025 - added "CMIP7" to mip_era

**TODO:**

**Links:**

### imports

In [15]:
%%time
import datetime
import json
import os
import requests

CPU times: user 5 μs, sys: 6 μs, total: 11 μs
Wall time: 11.9 μs


## set CV files and pull

In [16]:
%%time
targets = [
    "activity_id",
    "dataset_category",
    "frequency",
    "grid_label",
    "institution_id",
    "license",
    "mip_era",
    "nominal_resolution",
    "product",
    "realm",
    "region",
    "required_global_attributes",
    "source_id",
    "target_mip",
]
urlTmp = "https://raw.githubusercontent.com/PCMDI/input4mips-cmor-tables/refs/heads/master/input4MIPs_TARGET.json"

# loop through urls
for count, key in enumerate(targets):
    print(count, key)
    url = urlTmp.replace("TARGET", key)
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise HTTPError for bad responses (4/5xx)
        vars()[key] = json.loads(response.text)
    except requests.exceptions.RequestException as e:
        print(f"Request failed: {e}")
    except json.JSONDecodeError as e:
        print(f"JSON decode failed: {e}")
    except Exception as e:
        print(f"Unexpected error occurred: {e}")

0 activity_id
1 dataset_category
2 frequency
3 grid_label
4 institution_id
5 license
6 mip_era
7 nominal_resolution
8 product
9 realm
10 region
11 required_global_attributes
12 source_id
13 target_mip
CPU times: user 57.1 ms, sys: 19.4 ms, total: 76.5 ms
Wall time: 2.13 s


### cleanup institution_id

In [17]:
institution_id["institution_id"].keys()

dict_keys(['CCCma', 'CNRM-Cerfacs', 'IACETH', 'IAMC', 'ImperialCollege', 'MOHC', 'MPI-B', 'MPI-M', 'MRI', 'NASA-GSFC', 'NCAR', 'NCAS', 'PCMDI', 'PNNL-JGCRI', 'SOLARIS-HEPPA', 'UCI', 'UColorado', 'UReading', 'UoM', 'UofMD', 'VUA'])

In [18]:
keyList = list(institution_id["institution_id"].keys())
for count, key in enumerate(keyList):
    if key != "PCMDI":
        institution_id["institution_id"].pop(key)
institution_id["institution_id"].keys()

dict_keys(['PCMDI'])

### clean up mip_era

In [19]:
mip_era["mip_era"].append("CMIP7")

### clean up source_id

In [20]:
source_id["source_id"].keys()

dict_keys(['ACCESS1-3-rcp85-1-0', 'CCSM4-rcp26-1-0', 'CCSM4-rcp85-1-0', 'CESM2-ssp585-1-0', 'CNRM-CM6-1-ssp126-1-0', 'CNRM-CM6-1-ssp585-1-0', 'CNRM-ESM2-1-ssp585-1-0', 'CSIRO-MK3-6-0-rcp85-1-0', 'HadGEM2-ES-rcp85-1-0', 'IPSL-CM5A-MR-rcp26-1-0', 'IPSL-CM5A-MR-rcp85-1-0', 'MIROC-ESM-CHEM-rcp26-1-0', 'MIROC-ESM-CHEM-rcp85-1-0', 'MIROC5-rcp26-1-0', 'MIROC5-rcp85-1-0', 'MRI-JRA55-do-1-3', 'MRI-JRA55-do-1-3-2', 'MRI-JRA55-do-1-4-0', 'MRI-JRA55-do-1-5-0', 'MRI-JRA55-do-1-6-0', 'NorESM1-M-rcp26-1-0', 'NorESM1-M-rcp85-1-0', 'PCMDI-AMIP-1-1-3', 'PCMDI-AMIP-1-1-4', 'PCMDI-AMIP-1-1-5', 'PCMDI-AMIP-1-1-6', 'PCMDI-AMIP-1-1-7', 'PCMDI-AMIP-1-1-8', 'PCMDI-AMIP-1-1-9', 'UKESM1-0-LL-ssp585-1-0'])

### cleanup non-PCMDI keys

In [21]:
keyList = list(source_id["source_id"].keys())
for count, key in enumerate(keyList):
    if "PCMDI" not in key:
        source_id["source_id"].pop(key)
source_id["source_id"].keys()

dict_keys(['PCMDI-AMIP-1-1-3', 'PCMDI-AMIP-1-1-4', 'PCMDI-AMIP-1-1-5', 'PCMDI-AMIP-1-1-6', 'PCMDI-AMIP-1-1-7', 'PCMDI-AMIP-1-1-8', 'PCMDI-AMIP-1-1-9'])

### update source_id to include PCMDI-AMIP-1-1-10

In [22]:
source_id["source_id"]["PCMDI-AMIP-1-1-10"] = source_id["source_id"]["PCMDI-AMIP-1-1-9"]
source_id["source_id"]["PCMDI-AMIP-1-1-10"]["mip_era"] = "CMIP7"
source_id["source_id"]["PCMDI-AMIP-1-1-10"]["release_year"] = "2025"
source_id["source_id"]["PCMDI-AMIP-1-1-10"][
    "source"
] = "PCMDI-AMIP 1.1.10: Merged SST based on UK MetOffice HadISST and NCEP OI2"
source_id["source_id"]["PCMDI-AMIP-1-1-10"][
    "source_description"
] = "Sea surface temperature and sea-ice datasets produced by PCMDI (LLNL) for the AMIP (DECK) experiment of CMIP7"
source_id["source_id"]["PCMDI-AMIP-1-1-10"]["source_id"] = "PCMDI-AMIP-1-1-10"
source_id["source_id"]["PCMDI-AMIP-1-1-10"]["source_version"] = "1.1.10"
source_id["source_id"]["PCMDI-AMIP-1-1-10"][
    "title"
] = "PCMDI-AMIP 1.1.10 dataset prepared for input4MIPs"

In [23]:
source_id["source_id"]["PCMDI-AMIP-1-1-10"]

{'calendar': 'gregorian',
 'comment': 'Based on Hurrell SST/sea ice consistency criteria applied to merged HadISST (1870-01 to 1981-10) & NCEP-0I2 (1981-11 to 2022-12)',
 'contact': 'PCMDI (pcmdi-cmip@llnl.gov)',
 'dataset_category': 'SSTsAndSeaIce',
 'further_info_url': 'https://pcmdi.llnl.gov/mips/amip',
 'grid': '1x1 degree longitude x latitude',
 'grid_label': 'gn',
 'institution': 'Program for Climate Model Diagnosis and Intercomparison, Lawrence Livermore National Laboratory, Livermore, CA 94550, USA',
 'institution_id': 'PCMDI',
 'license': 'AMIP boundary condition data produced by PCMDI is licensed under a Creative Commons Attribution 4.0 International License (CC BY 4.0; https://creativecommons.org/licenses/by/4.0). Consult https://pcmdi.llnl.gov/CMIP6/TermsOfUse for terms of use governing input4MIPs output, including citation requirements and proper acknowledgment. Further information about this data, including some limitations, can be found via the further_info_url (recorded

### correct license format

### create input4MIPs_CV.json composite

In [24]:
input4MIPs_CV = {}
input4MIPs_CV["CV"] = {}
for count, name in enumerate(targets):
    print(count, name)
    dic = eval(name)
    input4MIPs_CV["CV"][name] = dic[name]

0 activity_id
1 dataset_category
2 frequency
3 grid_label
4 institution_id
5 license
6 mip_era
7 nominal_resolution
8 product
9 realm
10 region
11 required_global_attributes
12 source_id
13 target_mip


### write all files out to Tables subdir

In [25]:
%%time
targets.append("input4MIPs_CV")
for count, name in enumerate(targets):
    print(count, name)
    dic = eval(name)
    # add Header
    if name not in ["input4MIPs_CV"]:
        outFile = "".join(["../CVs/input4MIPs_", name, ".json"])
    else:
        outFile = "".join(["../Tables/", name, ".json"])
    # cleanup
    if os.path.exists(outFile):
        os.remove(outFile)
    with open(outFile, "w") as f:
        json.dump(
            dic, f, ensure_ascii=True, sort_keys=True, indent=4, separators=(",", ":")
        )

0 activity_id
1 dataset_category
2 frequency
3 grid_label
4 institution_id
5 license
6 mip_era
7 nominal_resolution
8 product
9 realm
10 region
11 required_global_attributes
12 source_id
13 target_mip
14 input4MIPs_CV
CPU times: user 2.11 ms, sys: 3.96 ms, total: 6.06 ms
Wall time: 4.69 ms
