# Compare crop yields to observations

- Uses raw annual CTSM outputs (NOT timeseries files).

Notebook created by Sam Rabin (samrabin@ucar.edu).

In [None]:
import importlib
import os
import sys
import warnings

from crops import bokeh_html_utils
from crops import clm_and_earthstat_maps as caem
from crops import crop_timeseries_figs
from crops import earthstat
from crops import incl_years_ranges_dict_class
from crops import results_maps
from crops import clm_crop_season_utils
from crops import clm_crops_with_obs_area
from crops import parallelizable_plot_loop

# Plotting utils
import matplotlib.pyplot as plt
import numpy as np
from crops import plotting_utils
import xarray as xr
from dask.distributed import Client, as_completed

# Start a local Dask cluster using all available cores
client = Client()


# Configure warnings in all workers
def setup_worker_warnings():
    import warnings

    warning_regex_list = [
        "invalid value encountered in divide",
        "invalid value encountered in cast",
    ]
    for message in warning_regex_list:
        warnings.filterwarnings("ignore", message=message)


client.run(setup_worker_warnings)
client

## 1. Settings

### 1.1 Parameters modifiable in config.yml

#### 1.1.1 CTSM cases to include

In [None]:
# Where land output is stored
CESM_output_dir = os.path.join(
    os.path.sep,
    "glade",
    "work",
    "samrabin",
    "clm6_crop_reparam_outputs",
)

# Full casenames that are present in CESM_output_dir and in individual filenames
case_name_list = [
    "agu25_gswp3_cals-50_params-old_1958-2014",
    "agu25_gswp3_cals-53_params-old_1958-2014",
    "agu25_crujra_cals-53_params-old_1958-2014",
    "agu25_crujra_cals-53_params-new3_1958-2014",  # omni07
]

# Names of cases to show in figure legends
case_legend_list = [
    "Original",
    "New cals",
    "New cals, CRU-JRA",
    "New cals, CRU-JRA, new params",
]

#### 1.1.2 Plot options related to CTSM cases

In [None]:
# The case against which other cases will be compared. Must be a member of
# case_legend_list or None. If None, will skip these comparisons.
if "New cals, CRU-JRA" in case_legend_list:
    # Specify this for the purposes of the example setup
    key_case = "New cals, CRU-JRA"
else:
    key_case = case_legend_list[0]

# Line colors (set to None for automatic; otherwise, list with one for each case)
if "New cals, CRU-JRA" in case_legend_list and len(case_legend_list) == 4:
    # Specify this for the purposes of the example setup
    line_colors = ["darkcyan", "magenta", "darkred", "blue"]
else:
    line_colors = None

#### 1.1.3 Observational data for yields and areas

In [None]:
# The top level of the CUPiD observational data store
obs_data_dir = os.path.join(
    os.sep + "glade",
    "campaign",
    "cesm",
    "development",
    "cross-wg",
    "diagnostic_framework",
    "CUPiD_obs_data",
)

# Keys (not case-sensitive) are used in crop_timeseries_figs as list of
# observational datasets to include. Values are used there to set line color in
# plots.
obs_timeseries_linecolors = {
    "EarthStat": "black",
    "FAOSTAT": "0.5",  # gray
}

#### 1.1.4 Other plot options

In [None]:
# The actual calendar years we care about. Note that, because all the variables
# processed in this notebook must be saved in instantaneous files at the end of
# each year, the netCDF timestamp for start_year Y will actually have year Y+1.
start_year = None
end_year = None

# Year periods to calculate figures for
incl_yrs_ranges = ["all", "1995-2005"]

# Maturity levels to consider in yield and production figures. See explanations in "Yield,
# production, and area" section.
maturity_levels_to_plot = ["marketable", "any", "mature"]

#### 1.1.5 Other options

In [None]:
# Path to CUPiD externals. This method is supposedly unreliable, so it's best for this to be
# overridden by a value given in config.yml. See examples/crops/config.yml.
externals_path = os.path.join(os.getcwd(), os.pardir, os.pardir, "externals")

# Whether to print verbose messaging
verbose = True

# Parallel loops in and called by this notebook will happily keep submitting
# jobs until the available memory is exceeded. The best way to avoid this
# would be to check whether we're getting close to the limit of available
# memory, but that's not something that is visible when assigned by JupyterHub,
# for instance. So instead, just set this low enough that it doesn't happen,
# knowing that fewer parallel jobs means the notebook will be slower to
# complete.
# Default was tested with four f09 (~1-degree) cases covering 1961-2024 and
# had plenty of headroom given 120 GB of memory.
max_parallel_jobs = 48

# Parameters relating to the special cft_ds.nc file that this notebook usually
# uses (if it already exists) or tries to create (if it doesn't) for each case.
# force_new_cft_ds_file: If True, will always try to create a new cft_ds.nc.
force_new_cft_ds_file = False
# force_no_cft_ds_file: If True, will never try to read or create cft_ds.nc;
# instead, will read the necessary data into memory.
force_no_cft_ds_file = False

# If False, will warn and skip figure subplots if errors are encountered.
debug = False

### 1.2 Other settings

In [None]:
# Settings related to included crops (e.g., corn)

crops_to_plot = [
    "corn",
    "cotton",
    "rice",
    "soybean",
    "sugarcane",
    "wheat",
]

# Dictionary translating FAOSTAT crop names (keys) to CLM names (values)
fao_to_clm_dict = {
    "Maize": "corn",
    "Rice": "rice",
    "Seed cotton, unginned": "cotton",
    "Soya beans": "soybean",
    "Sugar cane": "sugarcane",
    "Wheat": "wheat",
}
if "faostat" in [k.lower() for k in obs_timeseries_linecolors.keys()]:
    if not all(crop in crops_to_plot for crop in fao_to_clm_dict.values()):
        raise ValueError(
            "Some values in fao_to_clm_dict not in crops_to_plot.\n"
            f"crops_to_plot: {crops_to_plot}\n"
            f"fao_to_clm_dict: {fao_to_clm_dict}"
        )
    if not all(crop in fao_to_clm_dict.values() for crop in crops_to_plot):
        raise ValueError(
            "Some crops in crops_to_plot have no corresponding FAO crop.\n"
            f"crops_to_plot: {crops_to_plot}\n"
            f"fao_to_clm_dict: {fao_to_clm_dict}"
        )

In [None]:
# # JUST FOR TESTING; DELETE BEFORE MERGING
# # start_year = 1961
# # end_year = 2024
# # key_case = None
# crops_to_plot = ["corn"]
# debug = True
# maturity_levels_to_plot = ["marketable"]
# incl_yrs_ranges = ["all"]
force_new_cft_ds_file = True

In [None]:
# Misc. setup and checks

# Set up directory for any scratch output
if "SCRATCH" in os.environ:
    cupid_temp = os.path.join(os.environ["SCRATCH"], "CUPiD_scratch")
    os.makedirs(cupid_temp, exist_ok=True)
else:
    cupid_temp = "."

N_PFTS = 78

short_names = [case.split(".")[-1] for case in case_name_list]

if start_year is not None and end_year is not None and start_year > end_year:
    raise RuntimeError(f"start_year ({start_year}) > end_year ({end_year})")

if case_legend_list:
    if len(case_name_list) != len(case_legend_list):
        raise RuntimeError("case_legend_list must be same length as case_name_list")
else:
    case_legend_list = case_name_list

if key_case and len(case_legend_list) > 1 and key_case not in case_legend_list:
    raise KeyError(f"key_case '{key_case}' not in case_legend_list {case_legend_list}")

imm_unm_list = ["unmarketable", "immature"]

In [None]:
# Fill default case line colors, if needed
n_cases = len(case_name_list)
if line_colors is None:
    line_colors = plt.rcParams["axes.prop_cycle"].by_key()["color"][:n_cases]

# Check that correct number of colors were provided
msg = f"Expected {n_cases} colors in line_colors but got: {line_colors}"
assert len(line_colors) == n_cases, msg

In [None]:
# Check formatting of some options

# Ensure lists
maturity_levels_to_plot = list(maturity_levels_to_plot)

# Ensure all lowercase
maturity_levels_to_plot = [x.lower() for x in maturity_levels_to_plot]

In [None]:
# Move options to dict for easier passing among functions
opts = {}
opts["CESM_output_dir"] = CESM_output_dir
del CESM_output_dir
opts["case_name_list"] = case_name_list
del case_name_list
opts["case_legend_list"] = case_legend_list
del case_legend_list
opts["start_year"] = start_year
del start_year
opts["end_year"] = end_year
del end_year
opts["crops_to_plot"] = crops_to_plot
del crops_to_plot
opts["fao_to_clm_dict"] = fao_to_clm_dict
del fao_to_clm_dict
opts["verbose"] = verbose
del verbose
opts["obs_data_dir"] = obs_data_dir
del obs_data_dir
opts["force_new_cft_ds_file"] = force_new_cft_ds_file
del force_new_cft_ds_file
opts["force_no_cft_ds_file"] = force_no_cft_ds_file
del force_no_cft_ds_file
opts["key_case"] = key_case
del key_case
opts["line_colors"] = line_colors
del line_colors
opts["imm_unm_list"] = imm_unm_list
del imm_unm_list
opts["max_parallel_jobs"] = max_parallel_jobs
del max_parallel_jobs
opts["debug"] = debug
del debug
opts["maturity_levels_to_plot"] = maturity_levels_to_plot
del maturity_levels_to_plot

# Convert to lowercase before saving
opts["obs_timeseries_linecolors"] = {}
for k in obs_timeseries_linecolors.keys():
    opts["obs_timeseries_linecolors"][k.lower()] = obs_timeseries_linecolors[k]
del obs_timeseries_linecolors

In [None]:
# Define dictionary whose keys will be used to populate the "Crop" dropdown
# and whose values will be used for the clm_and_earthstat_maps_1crop() crop arg.
crop_dict = {}
for crop in opts["crops_to_plot"]:
    crop_dict[crop.capitalize()] = crop

In [None]:
# Define dictionary whose keys will be used to populate the "Years" dropdown
importlib.reload(incl_years_ranges_dict_class)

incl_yrs_ranges = [p.capitalize() for p in incl_yrs_ranges]
incl_yrs_ranges_dict = incl_years_ranges_dict_class.InclYrsRangesDict(
    opts["start_year"], opts["end_year"]
)
for incl_yrs_range in incl_yrs_ranges:
    incl_yrs_ranges_dict.add(incl_yrs_range)

In [None]:
# Define dictionary whose keys will be used to populate the "Statistic" dropdown
# and whose values will be used as inputs to crop_timeseries_figs() and
# clm_and_earthstat_maps_1crop().
stat_dict = {}
for full, short in zip(["Yield", "Production"], ["yield", "prod"]):
    for mat_lev in opts["maturity_levels_to_plot"]:
        stat_dict[f"{full} ({mat_lev})"] = "@".join([short, mat_lev])
stat_dict["Area"] = "area"

In [None]:
# Define dictionary whose keys will be used as "key case" radio button options
#  and whose values will be used as input to the clm_and_earthstat_maps_1plot()
# "key_case" arg (plus maybe elsewhere).
key_case_dict = {"Values": None}
if opts["key_case"] is not None:
    key_case_dict["Diff. from key case"] = opts["key_case"]

### 1.3 Import stuff from externals

In [None]:
sys.path.append(externals_path)
import ctsm_postprocessing.crops.faostat as faostat
import ctsm_postprocessing.utils as utils
from ctsm_postprocessing.crops import crop_secondary_variables as c2o
from ctsm_postprocessing.crops import cropcase
from ctsm_postprocessing.timing import Timing
from ctsm_postprocessing.crops import crop_case_list
from ctsm_postprocessing import extending_xarray_ops
from ctsm_postprocessing.crops import combine_cft_to_crop
from ctsm_postprocessing.crops import extra_area_prod_yield_etc
from ctsm_postprocessing.crops import crop_biomass

## 2. Import data

### 2.1 Import cases

In [None]:
importlib.reload(crop_case_list)
importlib.reload(clm_crop_season_utils)

case_list = crop_case_list.CropCaseList(
    opts=opts,
)

# Fill "All" years values, if needed
if incl_yrs_ranges_dict["All"][0] is None:
    incl_yrs_ranges_dict["All"][0] = min(
        [c.cft_ds["time"].values[0] for c in case_list]
    )
if incl_yrs_ranges_dict["All"][1] is None:
    incl_yrs_ranges_dict["All"][1] = max(
        [c.cft_ds["time"].values[-1] for c in case_list]
    )

# Calculate overwintering for each year's harvests
for case in case_list:
    case.cft_ds = clm_crop_season_utils.cft_ds_overwintering(case.cft_ds)

### 2.2 Import FAOSTAT

The [FAOSTAT website](https://www.fao.org/faostat/en/#home) "provides free access to food and agriculture data for over 245 countries and territories and covers all FAO regional groupings from 1961 to the most recent year available." Here, we import the ["Crops and livestock production"](https://www.fao.org/faostat/en/#data/QCL) data downloaded Feb. 25, 2025. These data are provided at country level only and are thus not gridded.

Cropland area producing multiple harvests in a single year "can be" counted multiple times, just as it is here for CLM outputs. So if there is 100 km$^2$ of rice in a CLM gridcell that all produces two harvests in 1987, the area harvested for rice in 1987 will be 200 km$^2$.

Production is given in "units of basic product weight."

Note that assigning crop production to a single year is somewhat complicated. FAOSTAT describes it like so: "When the production data available refers to a production period falling into two successive calendar years and it is not possible to allocate the relative production to each of them, it is usual to refer production data to that year into which the bulk of the production falls." However, FAOSTAT metadata often mentions reassignment of production from one calendar year to another, so this process is not perfect.

In any case, note that this does not match the "assign to year of harvest" method used here for both CLM production _and area_. This is expected to introduce a higher level of interannual variability in the CLM results than we see in FAOSTAT. Rearranging the CLM data such that each gridcell has only one harvest per year, e.g., by assigning to the year of _planting_, would reduce this discrepancy (as done in Global Gridded Crop Model Intercomparison postprocessing; Müller et al., 2017).

1. Food and Agriculture Organization of the United Nations. (2024-12-16). Production / Crops and livestock products. FAOSTAT. https://www.fao.org/faostat/en/#data/QCL. Accessed 2025-02-25.
2. Müller, C. et al. (2017). Global gridded crop model evaluation: benchmarking, skills, deficiencies and implications. _Geosci. Model Dev. 10_, 1403–1422.
  

In [None]:
fao_file = os.path.join(
    opts["obs_data_dir"],
    "lnd",
    "analysis_datasets",
    "ungridded",
    "timeseries",
    "FAOSTAT",
    "Production_Crops_Livestock_2025-02-25",
    "norm",
    "Production_Crops_Livestock_E_All_Data_(Normalized).csv",
)

try:
    fao_dict = faostat.FaostatProductionCropsLivestock(
        fao_file,
        y1=opts["start_year"],
        yN=opts["end_year"],
    ).get_clm_yield_prod_area_dict(opts["fao_to_clm_dict"])
except Exception as e:
    skip_msg = "Couldn't import FAOSTAT data due to"
    plotting_utils.handle_exception(opts["debug"], e, skip_msg)
    fao_dict = None

### 2.3 Import EarthStat (gridded FAOSTAT)

"EarthStat" is how this particular dataset will be referred to here, but note that it is actually a gridded version of FAOSTAT data. The gridding was based on the ["Harvested Area and Yield for 175 Crops year 2000"](http://www.earthstat.org/harvested-area-yield-175-crops/) dataset (Monfreda et al., 2008) produced by the [EarthStat collaboration](http://www.earthstat.org/). This dataset was first described by Lombardozzi et al. (2020):

> The FAOSTAT data set included area harvested, production, and yield globally for numerous crops by country from 1961 to 2016. To better understand the spatial distribution within each country, the FAOSTAT data were downscaled using observed crop distribution and yield data from EarthStat (http://www.earthstat.org/harvested-area-yield-175-crops/), a global data product that combines national, state, and county‐level data to provide yield, production, and harvested area data at 10 km spatial resolution for 175 different crop types for the Year 2000 (Monfreda et al., 2008). Both the CLM5 and the downscaled FAOSTAT data were compared to the USDA‐NASS data for the United States from 1990 to 2010 to evaluate the accuracy of downscaling using EarthStat data.

Note that the FAOSTAT data used by Lombardozzi et al. (2020) were a different version from what we import above, so we do not expect them to align perfectly.

-  Lombardozzi, D. L., et al. (2020). Simulating agriculture in the Community Land Model version 5. *JGR: Biogeosci.* *125*, e2019JG005529. doi: 10.1029/2019jg005529

- Monfreda, C., N. Ramankutty, and J. A. Foley (2008). Farming the planet: 2. Geographic distribution of crop areas, yields, physiological types, and net primary production in the year 2000, _Global Biogeochem. Cycles 22_, GB1022. doi: 10.1029/2007GB002947.

In [None]:
importlib.reload(earthstat)
importlib.reload(clm_crops_with_obs_area)

earthstat_dir = os.path.join(
    opts["obs_data_dir"],
    "lnd",
    "analysis_datasets",
    "multi_grid",
    "annual",
    "FAO-EarthStatYields",
)

try:
    earthstat_data = earthstat.EarthStat(earthstat_dir, case_list.resolutions, opts)
except Exception as e:
    skip_msg = "Couldn't import EarthStat data due to"
    plotting_utils.handle_exception(opts["debug"], e, skip_msg)
    earthstat_data = None

if earthstat_data is not None:
    case_list = clm_crops_with_obs_area.process_caselist(
        case_list, earthstat_data, opts
    )

## 3. Yield, production, and area


The results for CLM yield and production depend on how a successful harvest is defined. These figures give three options:
- **Marketable**: Include only harvests where the crop reached a certain level of maturity, as defined in the ISIMIP-Agriculture phase 3 (also Global Gridded Crop Model Intercomparison phase 3). Corn must reach 80% of its maturity requirement (growing degree-days) to be included; other crops must reach 90%. This is the definition against which CLM6.0 crops were calibrated.
- **Mature**: Only include harvests where the crop reached full maturity. Other harvests receive a yield and production of 0.
- **Any**: Include all harvests, regardless of maturity level.

The "Area source" menus allow choosing between statistics calculated using different crop areas:
* **CLM**: Crop areas used in the CLM simulation after all CFT merging has taken place, such as rye being merged to spring wheat.
* **EarthStat**: Crop yields from the CLM simulation but areas from EarthStat. Note that this will not give you the same results as you would get if you actually ran CLM with the EarthStat areas, because here we are just multiplying CLM's yields by EarthStat areas. If EarthStat has some crop in a gridcell but CLM doesn't, we will get a zero there for our CLM x EarthStat yields and production. Note also that the CLM simulation lines in the area figure might not align perfectly with one another or EarthStat due to differing land masks.

### 3.1 Yield, production, and area time series

In [None]:
importlib.reload(crop_timeseries_figs)

# Where figure files will be saved
img_dir = os.path.join("Global_crop_yield_compare_obs", "timeseries_yieldprodarea")

crop_timeseries_figs.main(stat_dict, img_dir, earthstat_data, case_list, fao_dict, opts)

### 3.2: Yield interannual variability

1. Normalized via division by mean.
2. Detrended using 5-year running mean, after Müller et al. (2017; doi:10.5194/gmd-10-1403-2017).

So although it says "unitless," you can think of the units as "deviation from running mean as a fraction of timeseries mean."

In [None]:
importlib.reload(crop_timeseries_figs)

# Dictionary whose keys will be used as radio button options and whose values
# will be used as inputs to crop_timeseries_figs()
iav_stat_dict = {
    k: v.replace("yield", "yield_normdetrend")
    for k, v in stat_dict.items()
    if "Yield" in k
}

# Where figure files will be saved
img_dir = os.path.join("Global_crop_yield_compare_obs", "timeseries_yield_normdetrend")

crop_timeseries_figs.main(
    iav_stat_dict, img_dir, earthstat_data, case_list, fao_dict, opts
)

### 3.3 Yield, production, and area maps

In [None]:
importlib.reload(caem)
importlib.reload(parallelizable_plot_loop)

# List whose two members will correspond to the CLM maps and the "CLM minus
# EarthStat maps" in the "Difference from observations?" dropdown. At the
# moment this could work as radio buttons, but I'd like to eventually add a
# few more observational data sources.
clm_or_obsdiff_list = ["None"]
if earthstat_data is not None:
    clm_or_obsdiff_list.append("EarthStat")

# Where figure files will be saved
img_dir = os.path.join("Global_crop_yield_compare_obs", "maps_yieldprodarea")
os.makedirs(img_dir, exist_ok=True)

timer_overall = Timing()

# For Dask parallel execution
parallel = opts["max_parallel_jobs"] > 1
if parallel:
    futures = []
# Pre-distribute large objects to workers once to avoid "Error during
# deserialization of the task graph." That might only be happening because
# of the warnings context manager around the client.submit() call? This
# also might help alleviate at least some "Sending large graph" warnings,
# but not all of them.
if parallel:
    earthstat_data_tmp = client.scatter(earthstat_data, broadcast=True)
else:
    earthstat_data_tmp = earthstat_data

this_fn = caem.clm_and_earthstat_maps_1crop

for stat, stat_input in stat_dict.items():

    for crop, crop_input in crop_dict.items():

        for incl_yrs_plot_items in incl_yrs_ranges_dict.plot_items():
            if opts["verbose"]:
                print(f"Submitting {crop} {stat} {incl_yrs_plot_items[1]}...")

            kwargs = {
                "stat_strings": (stat, stat_input),
                "case_list": case_list.sel(crop=crop_input),
                "case_legend_list": opts["case_legend_list"],
                "earthstat_data": earthstat_data_tmp,
                "verbose": opts["verbose"],
                "crop": crop_input,
                "key_case_dict": key_case_dict,
                "clm_or_obsdiff_list": clm_or_obsdiff_list,
                "img_dir": img_dir,
                "incl_yrs_plot_items": incl_yrs_plot_items,
                "debug": opts["debug"],
            }

            # Submit job, optionally to a dask worker/queue
            with warnings.catch_warnings():
                # I'd like to actually resolve this warning rather than just suppressing
                # it, but it is *complicated*.
                warnings.filterwarnings("ignore", message=".*sending large graph.*")
                if parallel:
                    future = client.submit(this_fn, **kwargs)
                else:
                    this_fn(**kwargs)
            if parallel:
                futures.append(future)
                if len(futures) >= opts["max_parallel_jobs"]:
                    parallelizable_plot_loop.wait_for_jobs_to_finish(
                        client, opts, futures
                    )
                    del futures
                    futures = []

# Wait for jobs to complete
if parallel and futures:
    parallelizable_plot_loop.wait_for_jobs_to_finish(client, opts, futures)

timer_overall.end_all("All maps", opts["verbose"])

importlib.reload(bokeh_html_utils)

# Build dropdown specs
dropdown_specs = [
    {
        "title": "Statistic",
        "options": list(stat_dict.keys()),
    },
    {
        "title": "Crop",
        "options": list(crop_dict.keys()),
    },
    {
        "title": "CLM minus...",
        "options": clm_or_obsdiff_list,
    },
]

# Build radio specs
radio_specs = [
    {
        "title": "Period",
        "options": incl_yrs_ranges_dict.get_yr_range_str_list(),
    },
]
if opts["key_case"] is not None:
    radio_specs.append(
        {
            "title": "Diff?",
            "options": list(key_case_dict.keys()),
        }
    )

# Display in notebook
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
    image_max_height=1200,
)

## 4. Immature and unmarketable harvests

### 4.1 Immature and unmarketable harvests: Timeseries

In [None]:
importlib.reload(crop_timeseries_figs)
importlib.reload(bokeh_html_utils)

# Dictionary whose keys will be used as dropdown menu options and whose values
# will be used as the basis for variable names (e.g., f"{value}_unmarketable"). At
# the moment this could work as radio buttons, but I'd like to eventually add a
# few more observational data sources.
area_source_dict = {"CLM": "crop_harv_area"}
if earthstat_data is not None:
    area_source_dict["EarthStat"] = "crop_area_es"

img_dir = os.path.join("Global_crop_yield_compare_obs", "immature_and_unmarketable")
os.makedirs(img_dir, exist_ok=True)

# Make figures
for imm_or_unm in opts["imm_unm_list"]:
    for area_source, area_var in area_source_dict.items():
        # Get figure layout info
        fig_opts, fig, axes = crop_timeseries_figs.setup_fig(opts)

        for i, crop in enumerate(opts["crops_to_plot"]):
            ax = axes.ravel()[i]
            plt.sca(ax)

            # Plot case data
            for c, case in enumerate(case_list.sel(crop=crop)):
                var = f"{area_var}_{imm_or_unm}"

                try:
                    # Get global area of this crop in this crop-area dataset
                    area_da = case.cft_ds[area_var]
                    if "pft" in area_da.dims:
                        sum_dim = "pft"
                    elif "gridcell" in area_da.dims:
                        sum_dim = "gridcell"
                    else:
                        raise RuntimeError(
                            f"Unsure what dimension to sum over: {area_da.dims}"
                        )
                    global_crop_area = area_da.sum(dim=sum_dim)

                    # Get global fraction
                    crop_data_ts = case.cft_ds[var].sum(dim=["pft"]) / global_crop_area

                except Exception as e:  # pylint: disable=broad-exception-caught
                    skip_msg = f"Skipping {imm_or_unm} {area_source} {crop} for case {case.name} due to"
                    plotting_utils.handle_exception(opts["debug"], e, skip_msg)
                    crop_data_ts = plotting_utils.get_dummy_timeseries(case.cft_ds)

                # Plot
                plot_kwargs = crop_timeseries_figs.get_line_plot_kwargs(opts, c)
                fig_opts["title"] = f"Fraction {imm_or_unm} crop area"
                if area_source != "CLM":
                    fig_opts["title"] += f" (if CLM used {area_source} areas)"
                with warnings.catch_warnings():
                    # I'd like to actually resolve this warning rather than just suppressing
                    # it, but it is *complicated*.
                    warnings.filterwarnings(
                        "ignore", message=".*[Ss]ending large graph.*"
                    )
                    crop_data_ts.plot(**plot_kwargs)

            # Finish plot
            ax.set_title(crop)
            plt.xlabel("")

        crop_timeseries_figs.finish_fig(
            opts, fig_opts, fig, incl_faostat=False, incl_earthstat=False
        )

        fig_basename = (
            bokeh_html_utils.sanitize_filename("_".join([area_source, imm_or_unm]))
            + ".png"
        )
        fig_path = os.path.join(img_dir, fig_basename)
        plt.savefig(fig_path, dpi=150)
        plt.savefig(fig_path.replace("png", "pdf"), dpi=150)
        plt.close()

importlib.reload(bokeh_html_utils)

# Build dropdown specs
dropdown_specs = [
    {
        "title": "Area source",
        "options": list(area_source_dict.keys()),
    },
]

# Build radio specs
radio_specs = [
    {
        "title": "Immature or unmarketable?",
        "options": [x.capitalize() for x in opts["imm_unm_list"]],
    }
]

# Display in notebook (no HTML file created)
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
)

### 4.2 Immature and unmarketable harvests: Maps

In [None]:
importlib.reload(plotting_utils)
importlib.reload(results_maps)
importlib.reload(bokeh_html_utils)
importlib.reload(parallelizable_plot_loop)

# List whose members will be used as radio button options
area_frac_list = ["area", "fraction"]

img_dir = os.path.join("Global_crop_yield_compare_obs", "immature_and_unmarketable")
os.makedirs(img_dir, exist_ok=True)


def get_map(case, imm_or_unm, area_or_frac):
    AREA_VAR = f"crop_harv_area_{imm_or_unm}"
    tmp = case.cft_ds
    TMP_VAR = "tmp"

    if area_or_frac == "area":
        tmp[TMP_VAR] = tmp[AREA_VAR].mean(dim="time")
        if "units" not in tmp[AREA_VAR].attrs:
            units = "m2"
            # warnings.warn(f"Assuming {AREA_VAR} units {units}", UserWarning)
        else:
            units = tmp[AREA_VAR].attrs["units"]
    elif area_or_frac == "fraction":
        tmp[TMP_VAR] = tmp[AREA_VAR].sum(dim="time") / tmp["crop_harv_area"].sum(
            dim="time"
        )
        units = "unitless"
    else:
        raise RuntimeError(f"Unrecognized area_or_frac value: {area_or_frac}")

    map_clm = utils.grid_one_variable(tmp, TMP_VAR)
    map_clm.attrs["units"] = units

    return map_clm


key_diff_abs_error = False
vrange = (None, None)

# For Dask parallel execution
parallel = opts["max_parallel_jobs"] > 1
if parallel:
    dask_client = client
else:
    dask_client = None

# No custom dropdown items for unmarketable/immature figs
custom_dropdown_items = []

timing = Timing()
for imm_or_unm in opts["imm_unm_list"]:
    for area_or_frac in area_frac_list:
        if opts["verbose"]:
            print(f"{imm_or_unm} seasons ({area_or_frac}):")

        results_da_name = f"{imm_or_unm} harvest {area_or_frac}"
        custom_radio_items = [imm_or_unm.capitalize(), area_or_frac.capitalize()]
        get_mean_fn_args = [imm_or_unm, area_or_frac]

        parallelizable_plot_loop.plot_loop(
            get_mean_fn=get_map,
            get_mean_fn_args=get_mean_fn_args,
            key_diff_abs_error=key_diff_abs_error,
            results_da_name=results_da_name,
            img_dir=img_dir,
            dask_client=dask_client,
            case_list=case_list,
            opts=opts,
            crop_dict=crop_dict,
            incl_yrs_ranges_dict=incl_yrs_ranges_dict,
            key_case_dict=key_case_dict,
            custom_dropdown_items=custom_dropdown_items,
            custom_radio_items=custom_radio_items,
            vrange=vrange,
        )
        if opts["verbose"]:
            print("=" * 50)

timing.end_all("Maps", opts["verbose"])
if opts["verbose"]:
    print("Done.")

# Build dropdown specs
dropdown_specs = [
    {
        "title": "Crop",
        "options": list(crop_dict.keys()),
    },
    {
        "title": "Period",
        "options": incl_yrs_ranges_dict.get_yr_range_str_list(),
    },
]

# Build radio specs
radio_specs = [
    {
        "title": "Immature or unmarketable?",
        "options": [x.capitalize() for x in opts["imm_unm_list"]],
    },
    {
        "title": "Area or fraction?",
        "options": [x.capitalize() for x in area_frac_list],
    },
]
if opts["key_case"] is not None:
    radio_specs.append(
        {
            "title": "Diff?",
            "options": list(key_case_dict.keys()),
        }
    )

importlib.reload(bokeh_html_utils)

# Display in notebook (no HTML file created)
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
    image_max_height=1200,
)

## 5. Growing seasons

### 5.1 Overwintering in CLM

In [None]:
importlib.reload(plotting_utils)
importlib.reload(results_maps)
importlib.reload(utils)
importlib.reload(combine_cft_to_crop)
importlib.reload(parallelizable_plot_loop)

# List whose members will be used as radio button options
area_frac_list = ["area", "fraction"]


def get_overwinter_fraction_crop_timemean(case):
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", message="invalid value encountered in divide")
        da = case.cft_ds["overwinter_area_crop"].sum(dim="time") / case.cft_ds[
            "crop_harv_area"
        ].sum(dim="time")
    assert not np.any(da < 0)
    assert not np.any(da > 1)

    # This should be changed to happen automatically elsewhere!
    da.attrs["units"] = "unitless"

    # Grid
    var = "dummy"
    case.cft_ds[var] = da
    da_gridded = utils.grid_one_variable(case.cft_ds, var)

    return da_gridded


def get_overwinter_area_crop_timemean(case):
    da = case.cft_ds["overwinter_area_crop"].mean(dim="time", keep_attrs=True)

    # Mask
    da = da.where(case.cft_ds["crop_harv_area"].sum(dim="time") > 0)

    # Grid
    var = "dummy"
    case.cft_ds[var] = da
    da_gridded = utils.grid_one_variable(case.cft_ds, var)

    return da_gridded


img_dir = os.path.join("Global_crop_yield_compare_obs", "overwinter_clm")
os.makedirs(img_dir, exist_ok=True)

key_diff_abs_error = False

# For Dask parallel execution
parallel = opts["max_parallel_jobs"] > 1
if parallel:
    dask_client = client
else:
    dask_client = None

# No custom dropdown items for overwintering figs
custom_dropdown_items = []

timer = Timing()
for area_or_frac in area_frac_list:
    if opts["verbose"]:
        print(f"{area_or_frac}:")
    var = f"overwinter_{area_or_frac}_crop_timemean"
    results_da_name = f"Overwintering {area_or_frac}"
    custom_radio_items = [area_or_frac]
    if area_or_frac == "area":
        get_mean_fn = get_overwinter_area_crop_timemean
        vrange = (None, None)
    elif area_or_frac == "fraction":
        get_mean_fn = get_overwinter_fraction_crop_timemean
        vrange = (0, 1)
    else:
        raise NotImplementedError(
            f"area_or_frac '{area_or_frac}' not recognized; options {area_frac_list}"
        )

    parallelizable_plot_loop.plot_loop(
        get_mean_fn=get_mean_fn,
        key_diff_abs_error=key_diff_abs_error,
        results_da_name=results_da_name,
        img_dir=img_dir,
        dask_client=dask_client,
        case_list=case_list,
        opts=opts,
        crop_dict=crop_dict,
        incl_yrs_ranges_dict=incl_yrs_ranges_dict,
        key_case_dict=key_case_dict,
        custom_dropdown_items=custom_dropdown_items,
        custom_radio_items=custom_radio_items,
        vrange=vrange,
    )
    if opts["verbose"]:
        print("=" * 50)

timer.end_all("Plotting", opts["verbose"])
if opts["verbose"]:
    print("Done")

# Build dropdown specs
dropdown_specs = [
    {
        "title": "Crop",
        "options": list(crop_dict.keys()),
    },
    {
        "title": "Period",
        "options": incl_yrs_ranges_dict.get_yr_range_str_list(),
    },
]

# Build radio specs
radio_specs = [
    {
        "title": "Area or fraction?",
        "options": [x.capitalize() for x in area_frac_list],
    }
]
if opts["key_case"] is not None:
    radio_specs.append(
        {
            "title": "Diff?",
            "options": list(key_case_dict.keys()),
        }
    )

importlib.reload(bokeh_html_utils)

# Display in notebook (no HTML file created)
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
    image_max_height=1200,
)

### 5.2 Mean growing season length

In [None]:
importlib.reload(plotting_utils)
importlib.reload(results_maps)
importlib.reload(utils)
importlib.reload(parallelizable_plot_loop)
importlib.reload(clm_crop_season_utils)

# List whose members will be used as dropdown menu options
var_list = ["Growing season length"]


def get_mean_gslength(case):
    da = (
        case.cft_ds["gslen_perharv_crop"]
        .weighted(
            case.cft_ds["crop_harv_area"],
        )
        .mean(
            dim=["time", "mxharvests"],
            keep_attrs=True,
        )
    )

    # Grid
    var = "dummy"
    case.cft_ds[var] = da
    da_gridded = utils.grid_one_variable(case.cft_ds, var)

    return da_gridded


for case in case_list:
    case.cft_ds = clm_crop_season_utils.cft_ds_gslen(case.cft_ds)

img_dir = os.path.join("Global_crop_yield_compare_obs", "calendars")
os.makedirs(img_dir, exist_ok=True)

key_diff_abs_error = False

# For Dask parallel execution
parallel = opts["max_parallel_jobs"] > 1
if parallel:
    dask_client = client
else:
    dask_client = None

# No custom radio items for overwintering figs
custom_radio_items = []

timer = Timing()
for results_da_name in var_list:
    if opts["verbose"]:
        print(f"{results_da_name}:")
    if results_da_name == "Growing season length":
        get_mean_fn = get_mean_gslength
        vrange = (None, None)
    else:
        raise NotImplementedError(f"var '{var}' not recognized; options {var_list}")
    custom_dropdown_items = [results_da_name]

    parallelizable_plot_loop.plot_loop(
        get_mean_fn=get_mean_fn,
        key_diff_abs_error=key_diff_abs_error,
        results_da_name=results_da_name,
        img_dir=img_dir,
        dask_client=dask_client,
        case_list=case_list,
        opts=opts,
        crop_dict=crop_dict,
        incl_yrs_ranges_dict=incl_yrs_ranges_dict,
        key_case_dict=key_case_dict,
        custom_dropdown_items=custom_dropdown_items,
        custom_radio_items=custom_radio_items,
        vrange=vrange,
    )
    if opts["verbose"]:
        print("=" * 50)

timer.end_all("Plotting", opts["verbose"])
if opts["verbose"]:
    print("Done")

# Build dropdown specs
dropdown_specs = [
    {
        "title": "Variable",
        "options": [x.capitalize() for x in var_list],
    },
    {
        "title": "Crop",
        "options": list(crop_dict.keys()),
    },
    {
        "title": "Period",
        "options": incl_yrs_ranges_dict.get_yr_range_str_list(),
    },
]

# Build radio specs
radio_specs = []
if opts["key_case"] is not None:
    radio_specs.append(
        {
            "title": "Diff?",
            "options": list(key_case_dict.keys()),
        }
    )

importlib.reload(bokeh_html_utils)

# Display in notebook (no HTML file created)
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
    image_max_height=1200,
)

### 5.3 GGCMI growing seasons ("observations")

In [None]:
importlib.reload(plotting_utils)
importlib.reload(results_maps)
importlib.reload(bokeh_html_utils)

subplot_title_list = [
    "Sowing date",
    "Harvest date",
    "Growing season length",
    "Overwinter?",
]

crop_cal_dir = os.path.join(
    opts["obs_data_dir"],
    "lnd",
    "analysis_datasets",
    "ggcmi_grid",
    "annual_avg",
    "crop_calendar",
)

img_dir = os.path.join("Global_crop_yield_compare_obs", "ggcmi_calendars")
os.makedirs(img_dir, exist_ok=True)

# Dictionary with keys the crop display name and values the GGCMI name
# (dropdown menu)
ggcmi_crop_dict = {
    "Corn": "mai",
    "Cotton": "cot",
    "Rice": "ri1",
    "Soy": "soy",
    "Sugarcane": "sgc",
    "Spring wheat": "swh",
}

# Dictionary with keys the rainfed/irrigated display name and values the GGCMI
# rf/ir code (radio buttons)
ggcmi_rfir_dict = {
    "Rainfed": "rf",
    "Irrigated": "ir",
}

for crop, crop_ggcmi in ggcmi_crop_dict.items():
    overwinter = None
    for rfir, rfir_ggcmi in ggcmi_rfir_dict.items():
        results = results_maps.ResultsMaps()

        cropi = f"{crop_ggcmi}_{rfir_ggcmi}"
        suptitle = f"GGCMI growing seasons: {cropi}"
        file = os.path.join(
            crop_cal_dir, f"{cropi}_ggcmi_crop_calendar_phase3_v1.01.nc4"
        )
        ds = xr.open_dataset(file, decode_times=False)
        sdates = ds["planting_day"]
        hdates = ds["maturity_day"]

        results["Sowing date"] = sdates
        results.plot_vranges["Sowing date"] = [0, 365]

        results["Harvest date"] = hdates
        results.plot_vranges["Harvest date"] = [0, 365]

        results["Growing season length"] = ds["growing_season_length"]
        results.plot_vranges["Growing season length"] = [0, 365]

        is_nh = ds["lat"] >= 0
        nh_overwinter = is_nh & (hdates < sdates)
        sh_overwinter = ~is_nh & (sdates < 182.5) & (hdates > 182.5)
        overwinter = nh_overwinter | sh_overwinter
        overwinter = overwinter.where(~np.isnan(hdates))
        results["Overwinter?"] = overwinter
        results.plot_vranges["Overwinter?"] = [0, 1]

        fig_basename = (
            bokeh_html_utils.sanitize_filename("_".join([crop, rfir])) + ".png"
        )
        fig_path = os.path.join(img_dir, fig_basename)
        results.plot(
            subplot_title_list=subplot_title_list, suptitle=suptitle, fig_path=fig_path
        )

# Build dropdown specs
dropdown_specs = [
    {
        "title": "Crop",
        "options": list(ggcmi_crop_dict.keys()),
    }
]

# Build radio specs
radio_specs = [
    {
        "title": "Irrigated?",
        "options": list(ggcmi_rfir_dict.keys()),
    }
]

importlib.reload(bokeh_html_utils)

# Display in notebook (no HTML file created)
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
)

## 6. Allocation and biomass

In [None]:
importlib.reload(plotting_utils)
importlib.reload(results_maps)
importlib.reload(utils)
importlib.reload(combine_cft_to_crop)
importlib.reload(parallelizable_plot_loop)
importlib.reload(crop_biomass)

# List whose values will be used in titles and to populate the "Variable"
# dropdown menu
biomass_var_list = ["Max LAI"]

case_list = crop_biomass.get_caselist_crop_biomass_vars(case_list)


def get_max_lai(case):
    if "max_tlai_crop" not in case.cft_ds:
        return xr.full_like(case.cft_ds["area"], fill_value=np.nan)

    # Get area-weighted mean over time
    da = case.cft_ds["max_tlai_crop"]
    case.cft_ds["max_tlai_crop_timemean"] = da.weighted(
        case.cft_ds["crop_harv_area"]
    ).mean(dim="time", keep_attrs=True)

    # Grid
    da_gridded = utils.grid_one_variable(case.cft_ds, "max_tlai_crop_timemean")

    return da_gridded


# For Dask parallel execution
parallel = opts["max_parallel_jobs"] > 1
if parallel:
    dask_client = client
else:
    dask_client = None

key_diff_abs_error = False
vrange = (None, None)

# No custom radio items for biomass figs
custom_radio_items = []

img_dir = os.path.join("Global_crop_yield_compare_obs", "biomass")
os.makedirs(img_dir, exist_ok=True)

timer = Timing()
for biomass_var in biomass_var_list:

    if biomass_var == "Max LAI":
        get_mean_fn = get_max_lai
    else:
        raise NotImplementedError(f"Unrecognized biomass_var: {biomass_var}")

    custom_dropdown_items = [biomass_var]

    parallelizable_plot_loop.plot_loop(
        get_mean_fn=get_mean_fn,
        key_diff_abs_error=key_diff_abs_error,
        results_da_name=biomass_var,
        img_dir=img_dir,
        dask_client=dask_client,
        case_list=case_list,
        opts=opts,
        crop_dict=crop_dict,
        incl_yrs_ranges_dict=incl_yrs_ranges_dict,
        key_case_dict=key_case_dict,
        custom_dropdown_items=custom_dropdown_items,
        custom_radio_items=custom_radio_items,
        vrange=vrange,
    )
    if opts["verbose"]:
        print("=" * 50)

timer.end_all("Plotting", opts["verbose"])
if opts["verbose"]:
    print("Done")

# Build dropdown specs
dropdown_specs = [
    {
        "title": "Variable",
        "options": biomass_var_list,
    },
    {
        "title": "Crop",
        "options": list(crop_dict.keys()),
    },
    {
        "title": "Period",
        "options": incl_yrs_ranges_dict.get_yr_range_str_list(),
    },
]

# Build radio specs
radio_specs = []
if opts["key_case"] is not None:
    radio_specs.append(
        {
            "title": "Diff?",
            "options": list(key_case_dict.keys()),
        }
    )

importlib.reload(bokeh_html_utils)

# Display in notebook (no HTML file created)
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
    image_max_height=1200,
)