# Compare crop yields to observations

- Uses raw annual CTSM outputs (NOT timeseries files).

Notebook created by Sam Rabin (samrabin@ucar.edu).

In [None]:
import glob
import importlib
import os
import sys
import warnings
from time import time

import bokeh_html_utils
import caselist
import clm_and_earthstat_maps as caem
import convert_pft1d_to_sparse
import crop_timeseries_figs
import earthstat
import incl_years_ranges_dict_class
import results_maps

# Plotting utils
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotting_utils
import xarray as xr
from dask.distributed import Client, wait

# Start a local Dask cluster using all available cores
client = Client()
client

## 1. Settings

### 1.1 Parameters modifiable in config.yml

In [None]:
# Path to CUPiD externals. This method is supposedly unreliable, so it's best for this to be
# overridden by a value given in config.yml. See examples/crops/config.yml.
externals_path = os.path.join(os.getcwd(), os.pardir, os.pardir, "externals")

# Where land output is stored
CESM_output_dir = os.path.join(
    os.path.sep,
    "glade",
    "work",
    "samrabin",
    "clm6_crop_reparam_outputs",
)

# Full casenames that are present in CESM_output_dir and in individual filenames
case_name_list = [
    # "ctsm53019_f09_BNF_hist",
    # "clm6_crop_032",
    # "clm6_crop_032_nomaxlaitrig",
    # "clm6_crop_032_nmlt_phaseparams",
    "alpha-ctsm5.4.CMIP7.09.ctsm5.3.068",
    # "alpha-ctsm5.4.CMIP7.09.ctsm5.3.068_nogddadapt",
    "crujra_matreqs",
    # "crujra_matreqs_nogddadapt",
    # "clm6_crop_032_nmlt_arooti",
    # "crujra_matreqs_and_gdd20blv2",
    # "crujra_matreqs_gdd20blv2_nomxmatv2",
    "clm6_crops_omni01",
    "clm6_crops_omni02",
    "clm6_crops_omni02_unsimgeneric",
]

# Names of cases to show in figure legends
case_legend_list = [
    # "A: ctsm5.3.019 (GSWP3)",
    # "B: ctsm5.3.032 (CRU-JRA)",
    # "C: As B + w/o max LAI triggering grainfill",
    # "D: As C + pre-CLM5 crop phase params",
    "E: 5.4 branch",
    # "F: 5.4 branch (no GDD adapt)",
    "G: CRU-JRA mat reqs",
    # "H: CRU-JRA mat reqs (no GDD adapt)",
    # "I: As C + pre-CLM5 arooti",
    # "J: CRU-JRA mat reqs + gdd20",
    # "K: CRU-JRA mat reqs + gdd20, mxmat360",
    "Omnibus 1",
    "Omnibus 2",
    "Omnibus 2 no CFT merge",
]

# The case against which other cases will be compared. Must be a member of
# case_legend_list or None. If None, will compare to first case in
# case_legend_list. Ignored if case_legend_list has only one member.
key_case = "G: CRU-JRA mat reqs"

# # Where land output is stored
# CESM_output_dir = os.path.join(
#     os.path.sep,
#     "glade",
#     "work",
#     "samrabin",
#     "wwieder_run_outputs",
# )
# # Full casenames that are present in CESM_output_dir and in individual filenames
# case_name_list = [
#     # "ctsm53n04ctsm52028_f09_hist",  # Doesn't have GRAINC_TO_FOOD_PERHARV
#     # "ctsm53041_54surfdata_snowTherm_100_HIST",
#     "ctsm5.4_5.3.068_PPEcal115f09_118_HIST",
#     "ctsm5.4.CMIP7_ciso_ctsm5.3.075_f09_124_HIST",
# ]
# # Names of cases to show in figure legends
# case_legend_list = [
#     # "ctsm53n04ctsm52028_f09_hist",  # Doesn't have GRAINC_TO_FOOD_PERHARV
#     # "Run 100",
#     "Run 118",
#     "Run 124",
# ]


# The actual netCDF timesteps, not the names of the files
start_year = 1961
end_year = 2024

# Year periods to calculate figures for
# incl_yrs_ranges = ["all", "1980-2009", "1995-2005", "2001-2024"]
incl_yrs_ranges = ["all", "1995-2005"]

cfts_to_include = [
    "temperate_corn",
    "tropical_corn",
    "cotton",
    "rice",
    "temperate_soybean",
    "tropical_soybean",
    "sugarcane",
    "spring_wheat",
    "irrigated_temperate_corn",
    "irrigated_tropical_corn",
    "irrigated_cotton",
    "irrigated_rice",
    "irrigated_temperate_soybean",
    "irrigated_tropical_soybean",
    "irrigated_sugarcane",
    "irrigated_spring_wheat",
]

crops_to_include = [
    "corn",
    "cotton",
    "rice",
    "soybean",
    "sugarcane",
    "wheat",
]
fao_to_clm_dict = {
    "Maize": "corn",
    "Rice": "rice",
    "Seed cotton, unginned": "cotton",
    "Soya beans": "soybean",
    "Sugar cane": "sugarcane",
    "Wheat": "wheat",
}

verbose = True

obs_data_dir = os.path.join(
    os.sep + "glade",
    "campaign",
    "cesm",
    "development",
    "cross-wg",
    "diagnostic_framework",
    "CUPiD_obs_data",
)

force_new_cft_ds_file = False
force_no_cft_ds_file = False

### 1.2 Other settings

In [None]:
# Set up directory for any scratch output
if "SCRATCH" in os.environ:
    cupid_temp = os.path.join(os.environ["SCRATCH"], "CUPiD_scratch")
    os.makedirs(cupid_temp, exist_ok=True)
else:
    cupid_temp = "."

N_PFTS = 78

short_names = [case.split(".")[-1] for case in case_name_list]

if start_year > end_year:
    raise RuntimeError(f"start_year ({start_year}) > end_year ({end_year})")

if case_legend_list:
    if len(case_name_list) != len(case_legend_list):
        raise RuntimeError("case_legend_list must be same length as case_name_list")
else:
    case_legend_list = case_name_list

if key_case and len(case_legend_list) > 1:
    if key_case is None:
        key_case = case_legend_list[0]
    elif key_case not in case_legend_list:
        raise KeyError(
            f"key_case '{key_case}' not in case_legend_list {case_legend_list}"
        )

In [None]:
# Move options to dict for easier passing among functions
opts = {}
opts["CESM_output_dir"] = CESM_output_dir
del CESM_output_dir
opts["case_name_list"] = case_name_list
del case_name_list
opts["case_legend_list"] = case_legend_list
del case_legend_list
opts["start_year"] = start_year
del start_year
opts["end_year"] = end_year
del end_year
opts["cfts_to_include"] = cfts_to_include
del cfts_to_include
opts["crops_to_include"] = crops_to_include
del crops_to_include
opts["fao_to_clm_dict"] = fao_to_clm_dict
del fao_to_clm_dict
opts["verbose"] = verbose
del verbose
opts["obs_data_dir"] = obs_data_dir
del obs_data_dir
opts["force_new_cft_ds_file"] = force_new_cft_ds_file
del force_new_cft_ds_file
opts["force_no_cft_ds_file"] = force_no_cft_ds_file
del force_no_cft_ds_file
opts["key_case"] = key_case
del key_case

In [None]:
importlib.reload(incl_years_ranges_dict_class)

# Dictionary whose keys will be used to populate the "Years" dropdown
incl_yrs_ranges = [p.capitalize() for p in incl_yrs_ranges]
incl_yrs_ranges_dict = incl_years_ranges_dict_class.InclYrsRangesDict(
    opts["start_year"], opts["end_year"]
)
for incl_yrs_range in incl_yrs_ranges:
    incl_yrs_ranges_dict.add(incl_yrs_range)

### 1.3 Import stuff from externals

In [None]:
sys.path.append(externals_path)
import ctsm_postprocessing.crops.faostat as faostat
import ctsm_postprocessing.utils as utils
from ctsm_postprocessing.crops import crop_secondary_variables as c2o
from ctsm_postprocessing.crops import cropcase
from ctsm_postprocessing.resolutions import identify_resolution
from ctsm_postprocessing.timing import Timing

## 2. Import case data

### 2.1 Import cases

In [None]:
importlib.reload(cropcase)
importlib.reload(caselist)

case_list = caselist.CaseList(
    CropCase=cropcase.CropCase,
    identify_resolution=identify_resolution,
    opts=opts,
)

### 2.3 Import FAOSTAT

In [None]:
fao_file = os.path.join(
    opts["obs_data_dir"],
    "lnd",
    "analysis_datasets",
    "ungridded",
    "timeseries",
    "FAOSTAT",
    "Production_Crops_Livestock_2025-02-25",
    "norm",
    "Production_Crops_Livestock_E_All_Data_(Normalized).csv",
)

fao = faostat.FaostatProductionCropsLivestock(
    fao_file,
    y1=opts["start_year"],
    yN=opts["end_year"],
)

# TODO: Move all the following to FaostatProductionCropsLivestock class

fao_prod = fao.get_element("Production", fao_to_clm_dict=opts["fao_to_clm_dict"])
fao_area = fao.get_element("Area harvested", fao_to_clm_dict=opts["fao_to_clm_dict"])


# Only include where both production and area data are present
def drop_a_where_not_in_b(a, b):
    return a.drop([i for i in a.index.difference(b.index)])


fao_prod = drop_a_where_not_in_b(fao_prod, fao_area)
fao_area = drop_a_where_not_in_b(fao_area, fao_prod)
if not fao_prod.index.equals(fao_area.index):
    raise RuntimeError("Mismatch of prod and area indices after trying to align them")

# Don't allow production where no area
is_bad = (fao_prod["Value"] > 0) & (fao_area["Value"] == 0)
where_bad = np.where(is_bad)[0]
bad_prod = fao_prod.iloc[where_bad]
bad_area = fao_area.iloc[where_bad]
fao_prod = fao_prod[~is_bad]
fao_area = fao_area[~is_bad]
if not fao_prod.index.equals(fao_area.index):
    raise RuntimeError(
        "Mismatch of prod and area indices after disallowing production where no area"
    )

# Get yield
fao_yield = fao_prod.copy()
fao_yield["Element"] = "Yield"
fao_yield["Unit"] = "/".join([fao_prod["Unit"].iloc[0], fao_area["Unit"].iloc[0]])
fao_yield["Value"] = fao_prod["Value"] / fao_area["Value"]

# Get dict
fao_dict = {}
fao_dict["yield"] = fao_yield
fao_dict["prod"] = fao_prod
fao_dict["area"] = fao_area

### 2.3 Import EarthStat (basically gridded FAOSTAT)

In [None]:
importlib.reload(earthstat)

earthstat_dir = os.path.join(
    opts["obs_data_dir"],
    "lnd",
    "analysis_datasets",
    "multi_grid",
    "annual",
    "FAO-EarthStatYields",
)

earthstat_data = earthstat.EarthStat(earthstat_dir, case_list.resolutions, opts)

In [None]:
importlib.reload(crop_timeseries_figs)
importlib.reload(earthstat)

# Get versions of CLM stats as if planted with EarthStat area
# TODO: Don't hard-code EARTHSTAT_RES_TO_PLOT here
EARTHSTAT_RES_TO_PLOT = "f09"
for case in case_list:
    case_ds = case.cft_ds

    for i, crop in enumerate(opts["crops_to_include"]):
        # Get EarthStat area
        crop_area_es = utils.ungrid(
            gridded_data=earthstat_data[EARTHSTAT_RES_TO_PLOT].get_data("area", crop),
            ungridded_ds=case_ds,
        )

        # Setup crop_*crop_area_es_expanded variable or append to it
        if i == 0:
            crop_area_es_expanded = crop_area_es.expand_dims(dim="crop", axis=0)
        else:
            # Append this crop's DataArray to existing one
            crop_area_es_expanded = xr.concat(
                [crop_area_es_expanded, crop_area_es],
                dim="crop",
            )

    # Convert area units
    clm_units = case_ds["crop_area"].attrs["units"]
    es_units = crop_area_es.attrs["units"]
    if clm_units == "m2" and es_units == "Mha":
        crop_area_es_expanded *= 1e4 * 1e6
        crop_area_es_expanded.attrs["units"] = "m2"
    else:
        raise NotImplementedError(
            f"Conversion assumes CLM area in m2 (got {clm_units}) and EarthStat area in Mha (got {es_units})"
        )

    # Before saving, check alignment of all dims
    crop_area_es_expanded = earthstat.check_dim_alignment(
        crop_area_es_expanded, case_ds
    )

    # Save to case_ds, filling with NaN as necessary (e.g., if there are CLM years not in EarthStat).
    case_ds["crop_area_es"] = crop_area_es_expanded

    # Calculate production as if planted with EarthStat area
    area_units = case_ds["crop_area_es"].attrs["units"]
    area_units_exp = "m2"
    yield_units = case_ds["crop_yield"].attrs["units"]
    yield_units_exp = "g/m2"
    if area_units != area_units_exp or yield_units != yield_units_exp:
        raise NotImplementedError(
            f"Yield calculation assumes area in {area_units_exp} (got {area_units}) and yield in {yield_units_exp} (got {yield_units})"
        )
    case_ds["crop_prod_es"] = case_ds["crop_area_es"] * case_ds["crop_yield"].rename(
        {"pft": "gridcell"}
    )
    case_ds["crop_prod_es"].attrs["units"] = "g"

    # Save EarthStat time axis to avoid plotting years with no EarthStat data
    earthstat_time = crop_area_es_expanded["time"]
    earthstat_time = earthstat_time.rename({"time": "earthstat_time_coord"})
    case_ds["earthstat_time"] = earthstat_time

## 3. Time series figures

The "Area source" menu allows choosing between statistics calculated using different crop areas:
* CLM: Crop areas used in the CLM simulation after all CFT merging has taken place, such as rye being merged to spring wheat.
* EarthStat: Crop yields from the CLM simulation but areas from EarthStat. Note that this will not give you the same results as you would get if you actually ran CLM with the EarthStat areas, because here we are just multiplying CLM's yields by EarthStat areas. If EarthStat has some crop in a gridcell but CLM doesn't, we will get a zero there for our CLM x EarthStat yields and production. Note also that the CLM simulation lines in the area figure might not align perfectly with one another or EarthStat due to differing land masks.

In [None]:
importlib.reload(crop_timeseries_figs)
importlib.reload(bokeh_html_utils)

# Dictionary whose keys will be used as dropdown menu options and whose values
# will be used for the use_earthstat_area arg in crop_timeseries_figs(). At the
# moment this could work as radio buttons, but I'd like to eventually add a few
# more observational data sources.
area_source_dict = {
    "CLM": False,
    "EarthStat": True,
}

# Dictionary whose keys will be used as radio button options and whose values
# will be used as inputs to crop_timeseries_figs()
stat_dict = {
    "Yield": "yield",
    "Production": "prod",
    "Area": "area",
}

# Where figure files will be saved
img_dir = os.path.join("Global_crop_yield_compare_obs", "timeseries_yieldprodarea")
os.makedirs(img_dir, exist_ok=True)

for stat, stat_input in stat_dict.items():
    for area_source, use_earthstat_area in area_source_dict.items():
        # Get filename to which figure will be saved. Members of join_list
        # must first be any dropdown menu members and then any radio button
        # group members, in the orders given in dropdown_specs and radio_specs,
        # respectively.
        join_list = [area_source, stat]
        fig_basename = bokeh_html_utils.sanitize_filename("_".join(join_list))
        fig_basename += ".png"
        fig_path = os.path.join(img_dir, fig_basename)

        with warnings.catch_warnings():
            # This suppresses some very annoying warnings when
            # use_earthstat_area=True. I'd like to eventually resolve this
            # properly, which will probably requiring compute()ing some of
            # the metadata variables in the cft_ds Datasets.
            warnings.filterwarnings(
                "ignore",
                message="Sending large graph.*",
                category=UserWarning,
            )
            crop_timeseries_figs.main(
                stat_input,
                earthstat_data,
                case_list,
                fao_dict[stat_input],
                opts,
                use_earthstat_area=use_earthstat_area,
                fig_file=fig_path,
            )

In [None]:
# Build dropdown specs
dropdown_specs = [
    {
        "title": "Area source",
        "options": list(area_source_dict.keys()),
    }
]

# Build radio specs
radio_specs = [
    {
        "title": "Statistic",
        "options": list(stat_dict.keys()),
    }
]

# Display in notebook
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
)

## 4. Yield, production, and area maps

In [None]:
importlib.reload(caem)

# List whose two members will correspond to the CLM maps and the "CLM minus
# EarthStat maps" in the "Difference from observations?" dropdown. At the
# moment this could work as radio buttons, but I'd like to eventually add a
# few more observational data sources.
clm_or_obsdiff_list = ["None", "EarthStat"]

# Dictionary whose keys will be used to populate the "Crop" dropdown and whose
# values will be used for the clm_and_earthstat_maps_1crop() crop arg.
crop_dict = {}
# for crop in opts["crops_to_include"]:
for crop in ["cotton"]:
    crop_dict[crop.capitalize()] = crop

# Dictionary whose keys will be used as radio button options and whose values
# will be used as input to the clm_and_earthstat_maps() "which" arg.
stat_dict = {
    "Yield": "yield",
    # "Production": "prod",
    # "Area": "area",
}

# Dictionary whose keys will be used as radio button options and whose values
# will be used as input to the clm_and_earthstat_maps_1plot() "key_case" arg.
key_case_dict = {"Values": None}
if opts["key_case"] is not None:
    key_case_dict["Diff. from key case"] = opts["key_case"]

# Where figure files will be saved
img_dir = os.path.join("Global_crop_yield_compare_obs", "maps_yieldprodarea")
os.makedirs(img_dir, exist_ok=True)


def get_fig_path(crop, clm_or_obsdiff, stat, opts):
    """
    Get filenames to which figures will be saved. Members of join_list
    must first be any dropdown menu members and then any radio button
    group members, in the orders given in dropdown_specs and radio_specs,
    respectively.
    """
    join_list = [crop, clm_or_obsdiff, stat]
    fig_basename = bokeh_html_utils.sanitize_filename("_".join(join_list))
    fig_basename += ".png"
    fig_path = os.path.join(img_dir, fig_basename)
    return fig_path


for stat, stat_input in stat_dict.items():
    if opts["verbose"]:
        print(stat)
    timer = Timing()
    for crop, crop_input in crop_dict.items():
        # Get figure output paths
        fig_path_clm = get_fig_path(crop, clm_or_obsdiff_list[0], stat, opts)
        fig_path_diff_earthstat = get_fig_path(crop, clm_or_obsdiff_list[1], stat, opts)

        caem.clm_and_earthstat_maps_1crop(
            which=stat_input,
            case_list=case_list,
            case_legend_list=opts["case_legend_list"],
            earthstat_data=earthstat_data,
            utils=utils,
            verbose=opts["verbose"],
            timer=timer,
            crop=crop_input,
            fig_path_clm=fig_path_clm,
            fig_path_diff_earthstat=fig_path_diff_earthstat,
            key_case_dict=key_case_dict,
        )
    timer.end_all(f"{stat} maps", opts["verbose"])

In [None]:
importlib.reload(bokeh_html_utils)

# Build dropdown specs
dropdown_specs = [
    {
        "title": "Crop",
        "options": list(crop_dict.keys()),
    },
    {
        "title": "CLM minus...",
        "options": clm_or_obsdiff_list,
    },
]

# Build radio specs
radio_specs = [
    {
        "title": "Statistic",
        "options": list(stat_dict.keys()),
    }
]
if opts["key_case"] is not None:
    radio_specs.append(
        {
            "title": "Diff?",
            "options": list(key_case_dict.keys()),
        }
    )

# Display in notebook
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
    image_max_height=1200,
)

## 5. Immature and failed harvests

### 5.1 Immature and failed harvests: Timeseries

In [None]:
importlib.reload(crop_timeseries_figs)
importlib.reload(bokeh_html_utils)

imm_fail_list = ["failed", "immature"]

img_dir = os.path.join("Global_crop_yield_compare_obs", "immature_and_failed")
os.makedirs(img_dir, exist_ok=True)

for imm_or_fail in imm_fail_list:
    # Get figure layout info
    fig_opts, fig, axes = crop_timeseries_figs.setup_fig(opts)

    for i, crop in enumerate(opts["crops_to_include"]):
        ax = axes.ravel()[i]
        plt.sca(ax)

        # Plot case data
        for c, case in enumerate(case_list):
            var = f"crop_harv_area_{imm_or_fail}"
            crop_data_ts = case.cft_ds.sel(crop=crop)[var].sum(
                dim=["pft"]
            ) / case.cft_ds.sel(crop=crop)["crop_harv_area"].sum(dim=["pft"])

            # Change line style for one line that overlaps another for some crops
            # TODO: Optionally define linestyle for each case in config.yml
            if "clm6_crop_032_nomaxlaitrig" in opts["case_name_list"] and opts[
                "case_name_list"
            ][c].endswith("clm6_crop_032_nmlt_phaseparams"):
                linestyle = "--"
            else:
                linestyle = "-"

            # Plot
            fig_opts["title"] = f"Fraction {imm_or_fail} crop area"
            crop_data_ts.plot(linestyle=linestyle)

        # Finish plot
        ax.set_title(crop)
        plt.xlabel("")

    crop_timeseries_figs.finish_fig(opts, fig_opts, fig, incl_obs=False)

    fig_basename = bokeh_html_utils.sanitize_filename(imm_or_fail) + ".png"
    fig_path = os.path.join(img_dir, fig_basename)
    plt.savefig(fig_path, dpi=150)
    plt.close()

In [None]:
importlib.reload(bokeh_html_utils)

# No dropdown items
dropdown_specs = []

# Build radio specs
radio_specs = [
    {
        "title": "Immature or failed?",
        "options": [x.capitalize() for x in imm_fail_list],
    }
]

# Display in notebook (no HTML file created)
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
)

### 5.2 Immature and failed harvests: Maps

In [None]:
importlib.reload(plotting_utils)
importlib.reload(results_maps)
importlib.reload(bokeh_html_utils)

# Dictionary whose keys will be used to populate the "Crop" dropdown and whose
# values will be used in .sel(crop=...)
crop_dict = {}
# for crop in opts["crops_to_include"]:
for crop in ["cotton"]:
    crop_dict[crop.capitalize()] = crop

# List whose members will be used as radio button options
imm_fail_list = ["failed", "immature"]

# Dictionary whose keys will be used as radio button options and whose values
# will be used as input to the ResultsMaps.plot() "key_case" arg.
key_case_dict = {"Values": None}
if opts["key_case"] is not None:
    key_case_dict["Diff. from key case"] = opts["key_case"]

img_dir = os.path.join("Global_crop_yield_compare_obs", "immature_and_failed")
os.makedirs(img_dir, exist_ok=True)


def get_map(cft_ds, crop_input, imm_or_fail):
    AREA_VAR = f"crop_harv_area_{imm_or_fail}"
    FRAC_VAR = f"frac_{imm_or_fail}_harv_timemean"

    tmp = cft_ds.sel(crop=crop_input)
    tmp[FRAC_VAR] = tmp[AREA_VAR].sum(dim="time") / tmp["crop_harv_area"].sum(
        dim="time"
    )
    map_clm = utils.grid_one_variable(tmp, FRAC_VAR)

    return map_clm


key_diff_abs_error = False

timing = Timing()
for imm_or_fail in imm_fail_list:
    if opts["verbose"]:
        print(f"{imm_or_fail} seasons:")

    for crop, crop_input in crop_dict.items():
        if opts["verbose"]:
            print(f"    {crop}:")

        for (
            incl_yrs_range_input,
            yr_range_str,
            time_slice,
        ) in incl_yrs_ranges_dict.plot_items():
            if opts["verbose"]:
                print(f"        {yr_range_str}")

            results = results_maps.ResultsMaps(
                vrange=[0, 1], incl_yrs_range=incl_yrs_range_input
            )
            suptitle = None
            for key_case_key, key_case_value in key_case_dict.items():

                if opts["verbose"]:
                    print(f"            {key_case_key}")

                # Get key case, if needed
                key_case = plotting_utils.get_key_case(opts, key_case_value, case_list)

                case_incl_yr_dict = {}
                for c, case in enumerate(case_list):

                    (
                        n_timesteps,
                        map_clm,
                        case_first_yr,
                        case_last_yr,
                    ) = plotting_utils.get_mean_map(
                        case,
                        key_case,
                        key_diff_abs_error,
                        time_slice,
                        # Special inputs just for immature/failed harvests
                        crop_input,  # arg passed to get_map()
                        imm_or_fail,  # arg passed to get_map()
                        special_mean=get_map,  # Function to calculate special mean
                    )

                    map_clm.attrs["units"] = "unitless"
                    map_clm.name = f"Fraction {imm_or_fail} harvests"
                    case_legend = opts["case_legend_list"][c]
                    results[case_legend] = map_clm
                    if suptitle is None:
                        suptitle = (
                            f"{results[case_legend].name}: {crop} [{yr_range_str}]"
                        )
                    if n_timesteps == 0:
                        case_incl_yr_dict[case_legend] = None
                    else:
                        case_incl_yr_dict[case_legend] = [case_first_yr, case_last_yr]

                join_list = [crop, yr_range_str, imm_or_fail]
                if opts["key_case"] is not None:
                    join_list.append(key_case_key)
                fig_basename = (
                    bokeh_html_utils.sanitize_filename("_".join(join_list)) + ".png"
                )
                fig_path = os.path.join(img_dir, fig_basename)
                if key_case_value is None:
                    key_plot = None
                else:
                    key_plot = key_case_value + "DONE"
                results.plot(
                    subplot_title_list=opts["case_legend_list"],
                    suptitle=suptitle,
                    one_colorbar=(key_case_value is None),
                    fig_path=fig_path,
                    key_plot=key_plot,
                    case_incl_yr_dict=case_incl_yr_dict,
                )
timing.end_all("Maps", opts["verbose"])
if opts["verbose"]:
    print("Done.")

In [None]:
# Build dropdown specs
dropdown_specs = [
    {
        "title": "Crop",
        "options": list(crop_dict.keys()),
    },
    {
        "title": "Period",
        "options": incl_yrs_ranges_dict.get_yr_range_str_list(),
    },
]

# Build radio specs
radio_specs = [
    {
        "title": "Immature or failed?",
        "options": [x.capitalize() for x in imm_fail_list],
    }
]
if opts["key_case"] is not None:
    radio_specs.append(
        {
            "title": "Diff?",
            "options": list(key_case_dict.keys()),
        }
    )

importlib.reload(bokeh_html_utils)

# Display in notebook (no HTML file created)
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
    image_max_height=1200,
)

## 6. Growing seasons

### 6.1 Overwintering in CLM

In [None]:
importlib.reload(plotting_utils)
importlib.reload(results_maps)
importlib.reload(utils)
from ctsm_postprocessing.crops import combine_cft_to_crop

importlib.reload(combine_cft_to_crop)

# Dictionary whose keys will be used to populate the "Crop" dropdown and whose
# values will be used in .sel(crop=...)
crop_dict = {}
# for crop in opts["crops_to_include"]:
for crop in ["cotton"]:
    crop_dict[crop.capitalize()] = crop

# List whose members will be used as radio button options
area_frac_list = ["area", "fraction"]

# Dictionary whose keys will be used as radio button options and whose values
# will be used as input to the clm_and_earthstat_maps() "key_case" arg.
key_case_dict = {"Values": None}
if opts["key_case"] is not None:
    key_case_dict["Diff. from key case"] = opts["key_case"]

# Calculate overwintering for each year's harvests
for case in case_list:
    ds = case.cft_ds

    is_nh = ds["pfts1d_lat"] >= 0
    nh_overwinter = is_nh & (ds["HDATES"] < ds["SDATES_PERHARV"])
    sh_overwinter = ~is_nh & (ds["SDATES_PERHARV"] < 182.5) & (ds["HDATES"] > 182.5)
    overwinter = (nh_overwinter | sh_overwinter) & (ds["HARVEST_REASON_PERHARV"] > 0)

    ds["overwinter_area"] = (overwinter * ds["cft_harv_area"]).sum(dim="mxharvests")
    ds = combine_cft_to_crop.combine_cft_to_crop(
        ds, "overwinter_area", "overwinter_area_crop", method="sum"
    )

    # This should be changed to happen automatically elsewhere!
    ds["overwinter_area_crop"].attrs["units"] = "m2"


def get_overwinter_fraction_crop_timemean(cft_ds, crop):
    # Preprocess Dataset
    cft_ds = cft_ds.sel(crop=crop)

    da = cft_ds["overwinter_area_crop"].sum(dim="time") / cft_ds["crop_harv_area"].sum(
        dim="time"
    )
    assert not np.any(da < 0)
    assert not np.any(da > 1)

    # This should be changed to happen automatically elsewhere!
    da.attrs["units"] = "unitless"

    # Grid
    var = "dummy"
    cft_ds[var] = da
    da_gridded = utils.grid_one_variable(cft_ds, var)

    return da_gridded


def get_overwinter_area_crop_timemean(cft_ds, crop):
    # Preprocess Dataset
    cft_ds = cft_ds.sel(crop=crop)

    da = cft_ds["overwinter_area_crop"].mean(dim="time", keep_attrs=True)

    # Mask
    da = da.where(cft_ds["crop_harv_area"].sum(dim="time") > 0)

    # Grid
    var = "dummy"
    cft_ds[var] = da
    da_gridded = utils.grid_one_variable(cft_ds, var)

    return da_gridded


img_dir = os.path.join("Global_crop_yield_compare_obs", "overwinter_clm")
os.makedirs(img_dir, exist_ok=True)

key_diff_abs_error = False

# TODO: Delete this once these maps use the year periods dict
first_time = f"{opts['start_year']}-01-01"
last_time = f"{opts['end_year']}-12-31"
time_slice = slice(first_time, last_time)

timer = Timing()
for area_or_frac in area_frac_list:
    if opts["verbose"]:
        print(f"{area_or_frac}:")
    var = f"overwinter_{area_or_frac}_crop_timemean"
    if area_or_frac == "area":
        vrange = None
    elif area_or_frac == "fraction":
        vrange = [0, 1]
    else:
        raise NotImplementedError(
            f"area_or_frac '{area_or_frac}' not recognized; options {area_frac_list}"
        )

    for crop in crop_dict.values():
        if opts["verbose"]:
            print(f"    {crop}")
            timer.start()
        results = results_maps.ResultsMaps(vrange=vrange)

        suptitle = None
        for key_case_key, key_case_value in key_case_dict.items():

            if opts["verbose"]:
                print(f"        {key_case_key}")

            # Get key case, if needed
            key_case = plotting_utils.get_key_case(opts, key_case_value, case_list)

            case_incl_yr_dict = {}
            for c, case in enumerate(case_list):
                case_legend = opts["case_legend_list"][c]

                if area_or_frac == "area":
                    get_mean_fn = get_overwinter_area_crop_timemean
                elif area_or_frac == "fraction":
                    get_mean_fn = get_overwinter_fraction_crop_timemean
                else:
                    raise NotImplementedError(
                        f"area_or_frac '{area_or_frac}' not recognized; options {area_frac_list}"
                    )

                (
                    n_timesteps,
                    map_clm,
                    case_first_yr,
                    case_last_yr,
                ) = plotting_utils.get_mean_map(
                    case,
                    key_case,
                    key_diff_abs_error,
                    time_slice,
                    # Special inputs just for overwintering figs
                    crop,  # arg passed to get_mean_fn()
                    special_mean=get_mean_fn,  # Function to calculate special mean
                )

                # Save to ResultsMap
                results[case_legend] = map_clm
                results[case_legend].name = f"Overwintering {area_or_frac}"

                # Get the overall title for the figure. Only need to do this once,
                # which is why it's in this if-statement.
                if suptitle is None:
                    suptitle = f"{results[case_legend].name}: {crop}"

            join_list = [crop, area_or_frac]
            if opts["key_case"] is not None:
                join_list.append(key_case_key)
            fig_basename = (
                bokeh_html_utils.sanitize_filename("_".join(join_list)) + ".png"
            )
            fig_path = os.path.join(img_dir, fig_basename)

            if key_case_value is None:
                key_plot = None
            else:
                key_plot = key_case_value + "DONE"
            results.plot(
                subplot_title_list=opts["case_legend_list"],
                suptitle=suptitle,
                one_colorbar=(key_case_value is None),
                fig_path=fig_path,
                key_plot=key_plot,
            )
        timer.end(f"    {crop} {area_or_frac}", opts["verbose"])
timer.end_all("Plotting", opts["verbose"])
if opts["verbose"]:
    print("Done")

In [None]:
# Build dropdown specs
dropdown_specs = [
    {
        "title": "Crop",
        "options": list(crop_dict.keys()),
    }
]

# Build radio specs
radio_specs = [
    {
        "title": "Area or fraction?",
        "options": [x.capitalize() for x in area_frac_list],
    }
]
if opts["key_case"] is not None:
    radio_specs.append(
        {
            "title": "Diff?",
            "options": list(key_case_dict.keys()),
        }
    )

importlib.reload(bokeh_html_utils)

# Display in notebook (no HTML file created)
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
    image_max_height=1200,
)

### 6.2 GGCMI growing seasons ("observations")

In [None]:
importlib.reload(plotting_utils)
importlib.reload(results_maps)
importlib.reload(bokeh_html_utils)

subplot_title_list = [
    "Sowing date",
    "Harvest date",
    "Growing season length",
    "Overwinter?",
]

crop_cal_dir = os.path.join(
    opts["obs_data_dir"],
    "lnd",
    "analysis_datasets",
    "ggcmi_grid",
    "annual_avg",
    "crop_calendar",
)

img_dir = os.path.join("Global_crop_yield_compare_obs", "ggcmi_calendars")
os.makedirs(img_dir, exist_ok=True)

# Dictionary with keys the crop display name and values the GGCMI name
# (dropdown menu)
ggcmi_crop_dict = {
    "Corn": "mai",
    "Cotton": "cot",
    "Rice": "ri1",
    "Soy": "soy",
    "Sugarcane": "sgc",
    "Spring wheat": "swh",
}

# Dictionary with keys the rainfed/irrigated display name and values the GGCMI
# rf/ir code (radio buttons)
ggcmi_rfir_dict = {
    "Rainfed": "rf",
    "Irrigated": "ir",
}

for crop, crop_ggcmi in ggcmi_crop_dict.items():
    overwinter = None
    for rfir, rfir_ggcmi in ggcmi_rfir_dict.items():
        results = results_maps.ResultsMaps()

        cropi = f"{crop_ggcmi}_{rfir_ggcmi}"
        suptitle = f"GGCMI growing seasons: {cropi}"
        file = os.path.join(
            crop_cal_dir, f"{cropi}_ggcmi_crop_calendar_phase3_v1.01.nc4"
        )
        ds = xr.open_dataset(file, decode_times=False)
        sdates = ds["planting_day"]
        hdates = ds["maturity_day"]

        results["Sowing date"] = sdates
        results.plot_vranges["Sowing date"] = [0, 365]

        results["Harvest date"] = hdates
        results.plot_vranges["Harvest date"] = [0, 365]

        results["Growing season length"] = ds["growing_season_length"]
        results.plot_vranges["Growing season length"] = [0, 365]

        is_nh = ds["lat"] >= 0
        nh_overwinter = is_nh & (hdates < sdates)
        sh_overwinter = ~is_nh & (sdates < 182.5) & (hdates > 182.5)
        overwinter = nh_overwinter | sh_overwinter
        overwinter = overwinter.where(~np.isnan(hdates))
        results["Overwinter?"] = overwinter
        results.plot_vranges["Overwinter"] = [0, 1]

        fig_basename = (
            bokeh_html_utils.sanitize_filename("_".join([crop, rfir])) + ".png"
        )
        fig_path = os.path.join(img_dir, fig_basename)
        results.plot(
            subplot_title_list=subplot_title_list, suptitle=suptitle, fig_path=fig_path
        )

In [None]:
# Build dropdown specs
dropdown_specs = [
    {
        "title": "Crop",
        "options": list(ggcmi_crop_dict.keys()),
    }
]

# Build radio specs
radio_specs = [
    {
        "title": "Irrigated?",
        "options": list(ggcmi_rfir_dict.keys()),
    }
]

importlib.reload(bokeh_html_utils)

# Display in notebook (no HTML file created)
bokeh_html_utils.create_static_html(
    dropdown_specs=dropdown_specs,
    radio_specs=radio_specs,
    output_dir=img_dir,
    show_in_notebook=True,
)

# SCRATCH

In [None]:
from ctsm_postprocessing import extending_xarray_ops
from ctsm_postprocessing.timing import Timing

importlib.reload(extending_xarray_ops)
importlib.reload(plotting_utils)

results_clm = results_maps.ResultsMaps()
t = Timing()
for case in case_list:
    da = extending_xarray_ops.da_circmean_doy(
        case.cft_ds["SDATES"].isel(mxsowings=0), dim="time"
    )
    results_clm[case.name] = da
t.end_all("Loop")