In [None]:
import json
import os
import sys
import requests
import xml.etree.ElementTree as ET

In [None]:
def fetch_cf_standard_name_table(version, location=None):
    """
    Downloads the specified CF standard name table version and saves it to file

    :param str version: CF standard name table version number (i.e 34)
    :param str location: Path/filename to write downloaded xml file to
    """

    if (
        location is None
    ):  # This case occurs when updating the packaged version from command line
        location = "cf_table/cf_table.xml"

    if version == "latest":
        url = "http://cfconventions.org/Data/cf-standard-names/current/src/cf-standard-name-table.xml"
    else:
        url = f"http://cfconventions.org/Data/cf-standard-names/{version}/src/cf-standard-name-table.xml"

    r = requests.get(url, allow_redirects=True)
    r.raise_for_status()

    print(
        f"Downloading cf-standard-names table version {version} from: {url}",
        file=sys.stderr,
    )
    
    try:
        os.makedirs(os.path.dirname(location), exist_ok=True)  # Create directories if needed
        with open(location, "wb") as f:
            f.write(r.content)
    except OSError as e:
        print(f"Error creating file or directory: {e}")
    
    return location

In [None]:
def xml_to_json(input_xml):
  """
  Parses an XML document and extracts specific data into a JSON format.

  Args:
      input_xml: Path to the XML file

  Returns:
      dict: JSON dictionary containing the extracted data.
  """
  tree = ET.parse(input_xml)
  root = tree.getroot()

  data = []
  for entry in root.findall('.//entry'):
    standard_name = entry.attrib["id"]
    canonical_units = entry.find('canonical_units').text
    description = entry.find('description').text
    # Omitting description as per request

    var_data = {
      "name_vocabulary": "CF Standard Name",
      "standard_name": standard_name,
      "alerts": {
        "threshold_min": None,
        "threshold_max": None
      },
      "display_name": {
        "en": None
      },
      "display_description": {
        "en": description
      },
      "canonical_unit_id": canonical_units,
      "preferred_metric_unit_id": None,
      "preferred_imperial_unit_id": None,
      "units": []
    }

    data.append(var_data)

  return data

In [None]:
xml_out = fetch_cf_standard_name_table(version="latest")

In [None]:
parsed_data = xml_to_json(input_xml=xml_out)

In [None]:
json_out = "vocabulary_json/cf_standard.json"

with open(json_out, "w") as outfile:
          json.dump(parsed_data, outfile, indent=4)