# Land ice SMB model comparison
This notebook compares the downscaled output of surface mass balance (SMB) over the Greenland ice sheet (GrIS) to the regional model MAR. In what follows, we interchangeably call the MAR data "observation".
\
Note1: the MAR data are processed as a climatology spanning 1960-1999.\
Note2: the MAR data are available at a uniform resolution of 1km using the same projection as the CISM grid. This notebook requires the interpolation of the MAR data on the CISM grid. The interpolation is done in this notebook (for now) to allow for the eventuality of the CISM grid or the MAR grid to change in the future. \
creation: 05-26-24 \
contact: Gunter Leguy (gunterl@ucar.edu)

In [None]:
# Import packages
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as mcm
from netCDF4 import Dataset
import os
from scipy.interpolate import RegularGridInterpolator

# to display figures in notebook after executing the code.
%matplotlib inline

In [None]:
# Parameter Defaults

CESM_output_dir = "/glade/campaign/cesm/development/cross-wg/diagnostic_framework/CESM_output_for_testing"
case_name = "b.e23_alpha17f.BLT1850.ne30_t232.092"  # case name
climo_nyears = 40  # number of years to compute the climatology
last_year = 101

base_case_output_dir = CESM_output_dir
base_case_name = None
base_last_year = last_year

obs_path = "/glade/u/home/gunterl/obs_diagnostic_cesm"  # path to observed dataset
obs_name = "GrIS_MARv3.12_climo_1960_1999.nc"

In [None]:
case_init_file = f"{CESM_output_dir}/{case_name}/glc/hist/{case_name}.cism.gris.initial_hist.0001-01-01-00000.nc"  # name of glc file output

case_path = f"{CESM_output_dir}/{case_name}/cpl/hist"  # path to glc output
case_file = f"{case_path}/{case_name}.cpl.hx.1yr2glc.{last_year:04d}-01-01-00000.nc"  # name of glc file output
obs_file = f"{obs_path}/{obs_name}"  # name of observed dataset file

if base_case_name:
    base_case_path = (
        f"{base_case_output_dir}/{base_case_name}/cpl/hist"  # path to cpl output
    )
    base_file = f"{base_case_path}/{base_case_name}.cpl.hx.1yr2glc.{base_last_year:04d}-01-01-00000.nc"  # name of last cpl simulation output

In [None]:
## CISM grid information and loading a field used for filtering

# Reading the information we need from the glc file
nid = Dataset(case_init_file, "r")
x_cism = nid.variables["x1"][:]
y_cism = nid.variables["y1"][:]
thk_cism = np.squeeze(nid.variables["thk"][0, :, :])
nid.close()

# Defining the grid dimensions
## For the CISM grid
nx_cism = len(x_cism)
ny_cism = len(y_cism)

In [None]:
## The observed dataset
nid = Dataset(obs_file, "r")
x_obs = nid.variables["x"][:]
y_obs = nid.variables["y"][:]
smb_obs_src = np.squeeze(nid.variables["SMB"][0, :, :])
nid.close()

## For the observed grid
nx_obs = len(x_obs)
ny_obs = len(y_obs)

In [None]:
# Constants
res = np.abs(x_cism[1] - x_cism[0])  # CISM output resolution

rhoi = 917  # ice density kg/m3
rhow = 1000  # water density kg/m3
sec_in_yr = 60 * 60 * 24 * 365  # seconds in a year

smb_convert = sec_in_yr / rhoi * 1000  # converting kg m-2 s-1 ice to mm y-1 w.e.
kg_to_Gt = 1e-12  # Converting kg to Gt
mm_to_Gt = rhow * 1e-3 * res**2 * kg_to_Gt  # converting mm/yr to Gt/yr

In [None]:
# Functions used in this notebook
def set_plot_prop_clean(ax):
    """
    This function cleans up the figures from unnecessary default figure properties.

    """
    ax.invert_yaxis()
    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.set_xticklabels("")
    ax.set_yticklabels("")
    ax.set_xticks([])
    ax.set_yticks([])


def rmse(prediction, target):
    """
    This function returns the root mean square error for the SMB.
    Input:
        prediction = field to predict
        target = field to compare with the prediction
    """
    return np.sqrt(((prediction - target) ** 2).mean())


def net_avrg(data):
    """
    This function returns the net average of a data field
    """
    return np.sum(np.sum(data, axis=0), axis=0)


def read_smb(file):
    """
    This function reads the CISM SMB dataset from a CESM simulation output
    in the cpl directory. The output is adjusted to be converted to mm/yr w.e unit.

    Input:
        file: name of the file to extract the SMB
    """
    nid = Dataset(file, "r")
    smb_cism = np.squeeze(nid.variables["glc1Exp_Flgl_qice"][0, :, :]) * smb_convert
    nid.close()
    return smb_cism

In [None]:
# More functions used in this notebook
params = {
    "ny_cism": ny_cism,
    "nx_cism": nx_cism,
    "climo_nyears": climo_nyears,
}


def create_climo(path, case_name, last_year, params):
    # Initializing a field for the climatology
    climo_out = np.zeros((params["ny_cism"], params["nx_cism"]))

    # Counter for available year (only needed if the number of years available is smaller
    # than the number of years requested to create the climatology.
    count_yr = 0

    for k in range(params["climo_nyears"]):

        year_to_read = last_year - k
        filename = (
            f"{path}/{case_name}.cpl.hx.1yr2glc.{year_to_read:04d}-01-01-00000.nc"
        )

        if not os.path.isfile(filename):
            print(f"The couple file for time {year_to_read} does not exist.")
            print(
                "We will only use the files that existed until now to create the SMB climatology."
            )
            break

        climo_out = climo_out + read_smb(filename)
        count_yr = count_yr + 1

    print("number of years used in climatology = ", count_yr)
    # Averaging the climo data
    return climo_out / count_yr

In [None]:
# Loading the data
if base_case_name:
    nid = Dataset(base_file, "r")
    smb_cism = np.squeeze(nid.variables["glc1Exp_Flgl_qice"][0, :, :]) * smb_convert
    nid.close()

In [None]:
# creating the SMB climatology for new case
smb_case_climo = create_climo(case_path, case_name, last_year, params)

# creating the SMB climatology for base_case
if base_case_name:
    smb_base_climo = create_climo(
        base_case_path, base_case_name, base_last_year, params
    )

In [None]:
# Interpolating the observed data onto the CISM grid

# Defining the interpolation functions
myInterpFunction_smb_obs = RegularGridInterpolator(
    (x_obs, y_obs),
    smb_obs_src.transpose(),
    method="linear",
    bounds_error=False,
    fill_value=None,
)

# Initializing the glacier ID variable
smb_obs_climo = np.zeros((ny_cism, nx_cism))

# Performing the interpolation
for j in range(ny_cism):
    point_y = np.zeros(nx_cism)
    point_y[:] = y_cism[j]
    pts = (x_cism[:], point_y[:])
    smb_obs_climo[j, :] = myInterpFunction_smb_obs(pts)

In [None]:
# Filtering out fill values
smb_obs_climo = np.where(smb_obs_climo > 1e20, 0, smb_obs_climo)
mask = thk_cism[:, :] == 0
smb_obs_climo = np.where(mask, 0, smb_obs_climo)
smb_case_climo = np.where(mask, 0, smb_case_climo)
if base_case_name:
    smb_cism = np.where(mask, 0, smb_cism)
    smb_base_climo = np.where(mask, 0, smb_base_climo)

In [None]:
def plot_contour(data, ax, title, vmin, vmax, cmap):
    avg_data = np.round(net_avrg(data) * mm_to_Gt, 2)
    last_panel0 = ax.imshow(smb_case_climo[:, :], vmin=vmin, vmax=vmax, cmap=cmap)
    ax.set_title(title, fontsize=16)
    set_plot_prop_clean(ax)
    ax.annotate("net avg =" + str(avg_data) + " Gt/yr", xy=(5, 5), fontsize=16)

    pos = ax.get_position()
    cax = fig.add_axes([0.35, pos.y0, 0.02, pos.y1 - pos.y0])

    cbar = fig.colorbar(last_panel0, cax=cax)
    cbar.ax.tick_params(labelsize=16)


def plot_contour_diff(data_new, data_old, ax, title, vmin, vmax, cmap):
    avg_data = np.round(net_avrg(data_new - data_old) * mm_to_Gt, 2)
    last_panel2 = ax.imshow(data_new - data_old, vmin=vmin, vmax=vmax, cmap=cmap)

    ax.set_title(title, fontsize=16)
    set_plot_prop_clean(ax)

    ax.annotate("net avg =" + str(avg_data) + " Gt/yr", xy=(5, 5), fontsize=16)

    pos = ax.get_position()
    cax = fig.add_axes([0.89, pos.y0, 0.02, pos.y1 - pos.y0])

    cbar = fig.colorbar(last_panel2, cax=cax)
    cbar.ax.tick_params(labelsize=16)

In [None]:
# Comparing SMB new run vs obs

# Colormap choice
my_cmap = mcm.get_cmap("Spectral")
my_cmap_diff = mcm.get_cmap("bwr_r")


# Colorbar bounds
vmin = -2000
vmax = 2000

# Figure
fig, ax = plt.subplots(1, 3, sharey=True, figsize=[22, 9])

## Left panel
plot_contour(
    smb_case_climo, ax[0], f"{case_name}\nSMB (mm/y w.e.)", vmin, vmax, my_cmap
)

## Center panel
plot_contour(smb_obs_climo, ax[1], "SMB Obs\n(mm/y w.e.)", vmin, vmax, my_cmap)

## Right panel
plot_contour_diff(
    smb_case_climo,
    smb_obs_climo,
    ax[2],
    "SMB bias (mm/yr w.e.)",
    vmin,
    vmax,
    my_cmap_diff,
)

In [None]:
# Comparing SMB new run vs obs

if base_case_name:
    # Colormap choice
    my_cmap = mcm.get_cmap("Spectral")
    my_cmap_diff = mcm.get_cmap("bwr_r")

    # Colorbar bounds
    vmin = -2000
    vmax = 2000

    # Figure
    fig, ax = plt.subplots(1, 3, sharey=True, figsize=[22, 9])

    ## Left panel
    plot_contour(
        smb_case_climo, ax[0], f"{case_name}\nSMB (mm/y w.e.)", vmin, vmax, my_cmap
    )

    ## Center panel
    plot_contour(
        smb_base_climo, ax[1], f"{base_case_name}\nSMB (mm/y w.e.)", vmin, vmax, my_cmap
    )

    ## Right panel
    plot_contour_diff(
        smb_case_climo,
        smb_base_climo,
        ax[2],
        "SMB difference (mm/yr w.e.)",
        vmin,
        vmax,
        my_cmap_diff,
    )

In [None]:
# Integrated SMB time series
def compute_annual_climo(path, case_name, last_year, params):
    # Initializing a field for the climatology
    avg_smb_timeseries = np.zeros(last_year)

    # Counter for available year (only needed if the number of years available is smaller
    # than the number of years requested to create the climatology.
    count_yr = 0

    for k in range(last_year):

        year_to_read = last_year - k
        file_name = (
            f"{path}/{case_name}.cpl.hx.1yr2glc.{year_to_read:04d}-01-01-00000.nc"
        )

        if not os.path.isfile(file_name):
            print("The couple file for time", year_to_read, "does not exist.")
            print(
                "We will only use the files that existed until now to create the time series."
            )
            break

        smb_temp = read_smb(file_name)
        smb_temp = np.where(params["mask"], 0, smb_temp)

        avg_smb_timeseries[year_to_read - 1] = np.round(
            net_avrg(smb_temp) * mm_to_Gt, 2
        )
        count_yr = count_yr + 1

        if count_yr == params["climo_nyears"]:
            break

        del smb_temp

    first_year = year_to_read

    print("number of years used in climatology = ", count_yr)
    return first_year, avg_smb_timeseries

In [None]:
# Integrated SMB time series
params["mask"] = mask
first_year, avg_smb_case_climo = compute_annual_climo(
    case_path, case_name, last_year, params
)

if base_case_name:
    base_first_year, avg_smb_base_case_climo = compute_annual_climo(
        base_case_path, base_case_name, base_last_year, params
    )

In [None]:
def plot_line(data, time, line, color, label, linewidth):
    plt.plot(
        time,
        data,
        line,
        ms=3,
        mfc=color,
        color=color,
        label=label,
        linewidth=linewidth,
    )

In [None]:
# Plotting the SMB spatially averaged time series

# TODO: include base case, base case climo (horizontal line), new case, new case climo, and obs climo
# Note: base case is 10 years of historical, new case is PI.
#       what comparisons make sense when base case is HIST and new case is 1850?


time = np.arange(first_year, last_year)
if base_case_name:
    base_time = np.arange(base_first_year, base_last_year) + last_year - base_last_year
    base_nt = len(base_time)
nt = len(time)

avg_smb_obs_timeseries = np.zeros(nt)
avg_smb_case_timeseries = np.zeros(nt)
if base_case_name:
    avg_smb_base_timeseries = np.zeros(base_nt)

avg_smb_obs_timeseries[:] = np.round(net_avrg(smb_obs_climo) * mm_to_Gt, 2)
avg_smb_case_timeseries[:] = np.round(net_avrg(smb_case_climo) * mm_to_Gt, 2)
if base_case_name:
    avg_smb_base_timeseries[:] = np.round(net_avrg(smb_base_climo) * mm_to_Gt, 2)


x_ticks = np.arange(first_year, last_year + 2, 5)
tickx = x_ticks

ymin = 100
ymax = 600
y_step = 50
y_ticks = np.arange(ymin, ymax + y_step, y_step)


plt.figure(figsize=(16, 7))

# Plotting annual / spatial means
plt.subplot(111)
plot_line(
    avg_smb_case_climo[first_year::],
    time,
    line="-",
    color="blue",
    label=f"{case_name}",
    linewidth=2,
)
plot_line(
    avg_smb_case_timeseries[:],
    time,
    line=":",
    color="blue",
    label=f"{case_name} (mean)",
    linewidth=2,
)
if base_case_name:
    plot_line(
        avg_smb_base_case_climo[base_first_year::],
        base_time,
        line="-",
        color="red",
        label=f"{base_case_name}",
        linewidth=2,
    )
    plot_line(
        avg_smb_base_timeseries[:],
        base_time,
        line=":",
        color="red",
        label=f"{base_case_name} (mean)",
        linewidth=2,
    )
plot_line(
    avg_smb_obs_timeseries[:],
    time,
    line="-",
    color="black",
    label="Observations (mean)",
    linewidth=2,
)

sizefont = 16
plt.xlim([first_year, last_year])
plt.xticks(x_ticks, tickx, fontsize=sizefont)
plt.xlabel(r"$Time$ (y)", fontsize=sizefont)
plt.ylabel("SMB average evolution (Gt/yr)", multialignment="center", fontsize=sizefont)
plt.ylim([ymin, ymax])
plt.yticks(fontsize=sizefont)
plt.legend(loc="upper left", ncol=1, frameon=True, borderaxespad=0)

plt.title("SMB average evolution", fontsize=sizefont);