# Retrieval evaluation

This notebook assesses the accuracy of the CCIC retrieval on the test data.

In [None]:
%load_ext autoreload
%autoreload 2
import numpy as np
import xarray as xr
import cmocean
import matplotlib.pyplot as plt
from matplotlib import style
from ccic.plotting import set_style
from pathlib import Path
from tqdm import tqdm
from scipy.stats import binned_statistic_2d
import cartopy.crs as ccrs

set_style()
#DATA_PATH = Path("/home/simonpf/copper/ccic")
DATA_PATH = Path("/home/amell/mnt/sun/data/ccic")

## Retrieval accuracy

In [None]:
from pathlib import Path
from ccic.processing import determine_cloud_class, determine_column_cloud_class
result_files_cpcir = sorted(list((DATA_PATH / "results/test_data/").glob("results_cpcir_*.nc")))
results_cpcir = []
for result_file in tqdm(np.random.permutation(result_files_cpcir)):
    
    with xr.open_dataset(result_file) as data:
        data = data.drop_vars(["encodings"]).load()
        cloud_class = determine_cloud_class(data.cloud_class_prob.data, axis=-1)
        data["cloud_class"] = (("samples", "levels"), cloud_class) 
        
        cc = determine_column_cloud_class(cloud_class)
        data["column_class"] = (("samples",), cc)
        
        results_cpcir.append(data)
        
results_cpcir = xr.concat(results_cpcir, "samples")

In [None]:
from pathlib import Path
result_files_gridsat = sorted(list((DATA_PATH / "results/test_data/").glob("results_gridsat*.nc")))
results_gridsat = []
for result_file in tqdm(np.random.permutation(result_files_gridsat)):
    
    with xr.open_dataset(result_file) as data:
        data = data.drop_vars(["encodings"]).load()
        cloud_class = determine_cloud_class(data.cloud_class_prob.data, axis=-1)
        data["cloud_class"] = (("samples", "levels"), cloud_class) 
        results_gridsat.append(data)
        
        cc = determine_column_cloud_class(cloud_class)
        data["column_class"] = (("samples",), cc)
        
results_gridsat = xr.concat(results_gridsat, "samples")

### TIWP Retrieval accuracy

#### Scatter plots

In [None]:
# Commented out fpavg, but kept it in case we want to use it
# (it gives better marginally better results)

from scipy.stats import binned_statistic
bins = np.logspace(-3, 2, 201)
# y_cpc = np.histogram2d(results_cpcir.tiwp_fpavg_true.data, results_cpcir.tiwp_fpavg_mean.data, bins=bins, density=True)[0]
y_cpc = np.histogram2d(results_cpcir.tiwp_true.data, results_cpcir.tiwp_mean.data, bins=bins, density=True)[0]
y_cpc /= np.trapz(np.diff(bins) * y_cpc, axis=-1)[..., None]
cond_mean_cpc = binned_statistic(
    # results_cpcir.tiwp_fpavg_true.data,
    # results_cpcir.tiwp_fpavg_mean.data,
    results_cpcir.tiwp_true.data,
    results_cpcir.tiwp_mean.data,
    lambda x: x.mean() if x.size > 1e2 else np.nan,
    bins=bins)[0]
corr_cpc = np.corrcoef(
    # results_cpcir.tiwp_fpavg_true.data,
    # results_cpcir.tiwp_fpavg_mean.data
    results_cpcir.tiwp_true.data,
    results_cpcir.tiwp_mean.data
)[0, 1]
# bias_cpc = np.mean(results_cpcir.tiwp_fpavg_mean.data - results_cpcir.tiwp_fpavg_true.data) / np.mean(results_cpcir.tiwp_fpavg_mean.data)
bias_cpc = np.mean(results_cpcir.tiwp_mean.data - results_cpcir.tiwp_true.data) / np.mean(results_cpcir.tiwp_mean.data)

# y_gs = np.histogram2d(results_gridsat.tiwp_fpavg_true.data, results_gridsat.tiwp_fpavg_mean.data, bins=bins, density=True)[0]
y_gs = np.histogram2d(results_gridsat.tiwp_true.data, results_gridsat.tiwp_mean.data, bins=bins, density=True)[0]
y_gs /= np.trapz(np.diff(bins) * y_gs, axis=-1)[..., None]
cond_mean_gs = binned_statistic(
    # results_gridsat.tiwp_fpavg_true.data,
    # results_gridsat.tiwp_fpavg_mean.data,
    results_gridsat.tiwp_true.data,
    results_gridsat.tiwp_mean.data,
    lambda x: x.mean() if x.size > 1e2 else np.nan,
    bins=bins)[0]
corr_gs = np.corrcoef(
    # results_gridsat.tiwp_fpavg_true.data,
    # results_gridsat.tiwp_fpavg_mean.data
    results_gridsat.tiwp_true.data,
    results_gridsat.tiwp_mean.data
)[0, 1]
# bias_gs = np.mean(results_gridsat.tiwp_fpavg_mean.data - results_gridsat.tiwp_fpavg_true.data) / np.mean(results_gridsat.tiwp_fpavg_mean.data)
bias_gs = np.mean(results_gridsat.tiwp_mean.data - results_gridsat.tiwp_true.data) / np.mean(results_gridsat.tiwp_mean.data)

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LogNorm

fig = plt.figure(figsize=(8, 3.5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.075])

norm = LogNorm(1e-3, 1e2)
levels = np.logspace(-3, 2, 11)

txtcol = "C0"
diagcol = "orangered"

ax = fig.add_subplot(gs[0, 0])
ax.set_title("(a) CPCIR", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_cpc.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_cpc, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_ylabel(r"$\text{TIWP}_\text{ret}$ $[\si{\kilo \gram \per \meter \squared}$]")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")
props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(
    1, 2e-3, f"Corr.: {corr_cpc:0.2f} \n Bias: {100 * bias_cpc:0.2f}\%",
    fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props
)
ax.legend()


ax = fig.add_subplot(gs[0, 1])
ax.set_title("(b) GridSat", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_gs.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_gs, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")
for l in ax.yaxis.get_ticklabels():
    l.set_visible(False)

props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(1, 2e-3, f"Corr.: {corr_gs:0.2f} \n Bias: {100 * bias_gs:0.2f}\%",
        fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props)

ax = fig.add_subplot(gs[0, 2])
plt.colorbar(m, label=r"$\text{p}(\text{TIWP}_\text{ret} | \text{TIWP}_\text{ref}$) $[(\si{\kilo \gram \per \meter \squared})^{-1}$]", cax=ax)
# fig.savefig("../figures/scatter_tiwp_fpavg.pdf", dpi=200, bbox_inches="tight")
fig.savefig("../figures/scatter_tiwp.pdf", dpi=200, bbox_inches="tight")

#### Zonal distributions

In [None]:
iwp_max = 1e2
iwp_bins = np.logspace(-3, np.log10(iwp_max), 41)
lat_bins = np.linspace(-60, 60, 61)

y_cpc_true = np.histogram2d(
    results_cpcir.latitude.data,
    results_cpcir.tiwp_true.data,
    bins=(lat_bins, iwp_bins),
    density=True
)[0]
y_cpc_true /= np.trapz(np.diff(iwp_bins)[None] * y_cpc_true, axis=0)[None]
y_cpc_ret = np.histogram2d(
    results_cpcir.latitude.data,
    results_cpcir.tiwp_sample.data,
    bins=(lat_bins, iwp_bins),
    density=True
)[0]
y_cpc_ret /= np.trapz(np.diff(iwp_bins)[None] * y_cpc_ret, axis=0)[None]
mean_true_cpc = binned_statistic(results_cpcir.latitude.data, results_cpcir.tiwp_true.data, bins=lat_bins)[0]
mean_ret_cpc = binned_statistic(results_cpcir.latitude.data, results_cpcir.tiwp_mean.data, bins=lat_bins)[0]

y_gs_true = np.histogram2d(
    results_gridsat.latitude.data,
    results_gridsat.tiwp_true.data,
    bins=(lat_bins, iwp_bins),
    density=True
)[0]
y_gs_true /= np.trapz(np.diff(iwp_bins)[None] * y_gs_true, axis=0)[None]
y_gs_ret = np.histogram2d(
    results_gridsat.latitude.data,
    results_gridsat.tiwp_sample.data,
    bins=(lat_bins, iwp_bins),
    density=True
)[0]
y_gs_ret /= np.trapz(np.diff(iwp_bins)[None] * y_gs_ret, axis=0)[None]
mean_true_gs = binned_statistic(results_gridsat.latitude.data, results_gridsat.tiwp_true.data, bins=lat_bins)[0]
mean_ret_gs = binned_statistic(results_gridsat.latitude.data, results_gridsat.tiwp_mean.data, bins=lat_bins)[0]

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LogNorm, Normalize
from matplotlib.cm import get_cmap, ScalarMappable

fig = plt.figure(figsize=(8, 3.5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.075])

lats = 0.5 * (lat_bins[1:] + lat_bins[:-1])
iwps = 0.5 * (iwp_bins[1:] + iwp_bins[:-1])
norm = LogNorm(1e-2, 1e2)
levels = np.logspace(-2, 2, 11)

m = ScalarMappable(norm=norm, cmap="Greys")
cmap = get_cmap("Greys").copy()
cmap.set_over(m.to_rgba(1e-2))

ax = fig.add_subplot(gs[0, 0])
m_filled = ax.contourf(iwps, lats, y_cpc_true, norm=norm, levels=levels, extend="max", cmap="Greys")
for c in m_filled.collections:
    c.set_rasterized(True)
ax.contour(iwps, lats, y_cpc_ret, norm=norm, levels=levels, linestyles="--", cmap="cmo.amp_r", linewidths=1)
ax.plot(mean_true_cpc, lats, c="C1")
ax.plot(mean_ret_cpc, lats, c="C2", ls="--")
ax.set_xlabel("TIWP [$\si{\kilo \gram \per \meter \squared}$]")
ax.set_ylabel("Latitude [$\si{\degree}$]")
ax.set_xscale("log")
ax.set_ylim([-58, 58])
ax.set_title("(a) CPCIR", loc="left")


ax = fig.add_subplot(gs[0, 1])
m_filled = ax.contourf(iwps, lats, y_gs_true, norm=norm, levels=levels, extend="max", cmap="Greys")
for c in m_filled.collections:
    c.set_rasterized(True)
m_cts = ax.contour(iwps, lats, y_gs_ret, norm=norm, levels=levels, linestyles="--", cmap="cmo.amp_r", linewidths=1)
ax.plot(mean_true_gs, lats, c="C1", label="Zonal mean (reference)")
ax.plot(mean_ret_gs, lats, c="C2", ls="--", label="Zonal mean (retrieved)")
ax.set_xlabel("TIWP [$\si{\kilo \gram \per \meter \squared}$]")
ax.set_xscale("log")
ax.set_ylim([-58, 58])
ax.set_xlim([0, iwp_max])
ax.set_title("(b) GridSat", loc="left")
for l in ax.yaxis.get_ticklabels():
    l.set_visible(False)
ax.legend()

ax = fig.add_subplot(gs[0, 2])
plt.colorbar(m_filled, cax=ax)
plt.colorbar(m_cts, label=r"$\text{p}(\text{TIWP} | \text{Latitude})$ [$(\si{\kilo \gram \per \meter \squared})^{-1}$]", cax=ax)
fig.savefig("../figures/zonal_dist_tiwp.pdf", bbox_inches="tight")

#### Global IWP maps

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LogNorm, Normalize
from matplotlib.cm import get_cmap, ScalarMappable

In [None]:
lat_bins = np.arange(-90, 91, 5)
lon_bins = np.arange(-180, 181, 5)

levels = np.logspace(-3, 2, 11)

fig = plt.figure(figsize=(7.5, 5))
gs = GridSpec(3, 3, width_ratios=[1.0, 1, 0.05])

## CPCIR

ax_cpcir = fig.add_subplot(gs[0,0], projection=ccrs.PlateCarree())

iwp_cpcir = binned_statistic_2d(results_cpcir.longitude.data, results_cpcir.latitude.data, results_cpcir.tiwp_mean.data, statistic=np.nanmean, bins=[lon_bins, lat_bins])[0].T
m = ax_cpcir.pcolormesh(
    lon_bins[:-1] + np.diff(lon_bins) / 2,
    lat_bins[:-1] + np.diff(lat_bins) / 2,
    iwp_cpcir,
    vmin=0, vmax=0.3,
    transform=ccrs.PlateCarree(),
    rasterized=True
)
ax_cpcir.coastlines()
ax_cpcir.set_ylim(-70, 70)
ax_cpcir.set_title('(a) CPCIR retrieved', loc='left')

ax_ref = fig.add_subplot(gs[1,0], projection=ccrs.PlateCarree())

iwp_ref = binned_statistic_2d(results_cpcir.longitude.data, results_cpcir.latitude.data, results_cpcir.tiwp_true.data, statistic=np.nanmean, bins=[lon_bins, lat_bins])[0].T

m = ax_ref.pcolormesh(
    lon_bins[:-1] + np.diff(lon_bins) / 2,
    lat_bins[:-1] + np.diff(lat_bins) / 2,
    iwp_ref,
    vmin=0, vmax=0.3,
    transform=ccrs.PlateCarree(),
    rasterized=True
)
ax_ref.coastlines()
ax_ref.set_ylim(-70, 70)
ax_ref.set_title('(c) CPCIR reference', loc='left')

ax_diff = fig.add_subplot(gs[2,0], projection=ccrs.PlateCarree())

diff = iwp_cpcir - iwp_ref
truncated_diff = diff / np.maximum(iwp_ref, 1e-2)
m = ax_diff.pcolormesh(
    lon_bins[:-1] + np.diff(lon_bins) / 2,
    lat_bins[:-1] + np.diff(lat_bins) / 2,
    truncated_diff * 100,
    vmin=-100, vmax=100,
    cmap='coolwarm',
    transform=ccrs.PlateCarree(),
    rasterized=True
)
ax_diff.coastlines()
ax_diff.set_ylim(-70, 70)
ax_diff.set_title(r'(e) CPCIR retrieved $-$ reference', loc='left')

print(np.nanquantile(abs(truncated_diff).flatten(), 0.9))

## GridSat
# Remember that for GridSat we get those stripes
# due to the 3-h resolution

ax_gridsat = fig.add_subplot(gs[0,1], projection=ccrs.PlateCarree())

iwp_gridsat = binned_statistic_2d(results_gridsat.longitude.data, results_gridsat.latitude.data, results_gridsat.tiwp_mean.data, statistic=np.nanmean, bins=[lon_bins, lat_bins])[0].T
m = ax_gridsat.pcolormesh(
    lon_bins[:-1] + np.diff(lon_bins) / 2,
    lat_bins[:-1] + np.diff(lat_bins) / 2,
    iwp_gridsat,
    vmin=0, vmax=0.3,
    transform=ccrs.PlateCarree(),
    rasterized=True
)
ax_gridsat.coastlines()
ax_gridsat.set_ylim(-70, 70)
ax_gridsat.set_title('(b) GridSat retrieved', loc='left')

ax_ref = fig.add_subplot(gs[1,1], projection=ccrs.PlateCarree())

iwp_ref = binned_statistic_2d(results_gridsat.longitude.data, results_gridsat.latitude.data, results_gridsat.tiwp_true.data, statistic=np.nanmean, bins=[lon_bins, lat_bins])[0].T

m = ax_ref.pcolormesh(
    lon_bins[:-1] + np.diff(lon_bins) / 2,
    lat_bins[:-1] + np.diff(lat_bins) / 2,
    iwp_ref,
    vmin=0, vmax=0.3,
    transform=ccrs.PlateCarree(),
    rasterized=True
)
ax_ref.coastlines()
ax_ref.set_ylim(-70, 70)
ax_ref.set_title('(d) GridSat reference', loc='left')

ax = fig.add_subplot(gs[:2,-1])
cbar = fig.colorbar(m, cax=ax, label=r'TIWP [\si{\kilo \gram \per \meter \squared}]', extend='both')

ax_diff = fig.add_subplot(gs[2,1], projection=ccrs.PlateCarree())

diff = iwp_gridsat - iwp_ref
truncated_diff = diff / np.maximum(iwp_ref, 1e-2)
m = ax_diff.pcolormesh(
    lon_bins[:-1] + np.diff(lon_bins) / 2,
    lat_bins[:-1] + np.diff(lat_bins) / 2,
    truncated_diff * 100,
    vmin=-100, vmax=100,
    cmap='coolwarm',
    transform=ccrs.PlateCarree(),
    rasterized=True
)
ax_diff.coastlines()
ax_diff.set_ylim(-70, 70)
ax_diff.set_title(r'(f) GridSat retrieved $-$ reference', loc='left')

print(np.nanquantile(abs(truncated_diff).flatten(), 0.9))

ax = fig.add_subplot(gs[2,2])
cbar = fig.colorbar(m, cax=ax, extend='both')
cbar.set_label(
    r'$\frac{\Delta \text{TIWP}}{\max(\text{TIWP}_{\text{ref}}, \SI{e-3}{\kilo\gram\per\square\metre})}$',
    loc='center'
)
cbar.set_ticks([-100, 0, 100])
cbar.set_ticklabels([r'-100\%', '0', r'100\%'])

fig.savefig("../figures/global_dist_tiwp.pdf", bbox_inches="tight")

## Retrieval accuracy of TIWC

### Scatter plots

In [None]:
from scipy.stats import binned_statistic
bins = np.logspace(-3, 1, 201)
y_cpc = np.histogram2d(
    results_cpcir.tiwc_true.data.ravel(),
    results_cpcir.tiwc_mean.data.ravel(),
    bins=bins,
    density=True
)[0]
y_cpc /= np.trapz(np.diff(bins) * y_cpc, axis=-1)[..., None]

cond_mean_cpc = binned_statistic(
    results_cpcir.tiwc_true.data.ravel(),
    results_cpcir.tiwc_mean.data.ravel(),
    lambda x: x.mean() if x.size > 1e3 else np.nan,
    bins=bins
)[0]
corr_cpc = np.corrcoef(
    results_cpcir.tiwc_true.data.ravel(),
    results_cpcir.tiwc_mean.data.ravel()
)[0, 1]
bias_cpc = (
    np.mean(results_cpcir.tiwc_mean.data.ravel() - results_cpcir.tiwc_true.data.ravel()) / 
    np.mean(results_cpcir.tiwc_mean.data.ravel())
)

y_gs = np.histogram2d(
    results_gridsat.tiwc_true.data.ravel(),
    results_gridsat.tiwc_mean.data.ravel(),
    bins=bins, density=True)[0]
y_gs /= np.trapz(np.diff(bins) * y_gs, axis=-1)[..., None]

cond_mean_gs = binned_statistic(
    results_gridsat.tiwc_true.data.ravel(),
    results_gridsat.tiwc_mean.data.ravel(),
    lambda x: x.mean() if x.size > 1e3 else np.nan,
    bins=bins
)[0]
corr_gs = np.corrcoef(
    results_gridsat.tiwc_true.data.ravel(),
    results_gridsat.tiwc_mean.data.ravel()
)[0, 1]
bias_gs = (
    np.mean(results_gridsat.tiwc_mean.data.ravel() - results_gridsat.tiwc_true.data.ravel()) /
    np.mean(results_gridsat.tiwc_mean.data.ravel())
)

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LogNorm

fig = plt.figure(figsize=(8, 3.5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.075])

norm = LogNorm(1e-4, 1e2)
levels = np.logspace(-3, 2, 11)

txtcol = "C0"
diagcol = "C0"

ax = fig.add_subplot(gs[0, 0])
ax.set_title("(a) CPCIR", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_cpc.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_cpc, c=diagcol, label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_ylabel(r"$\text{TIWC}_\text{ret}$ $[\si{\gram \per \meter \cubed}$]")
ax.set_xlabel(r"$\text{TIWC}_\text{ref}$ $[\si{\gram \per \meter \cubed}$]")
props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(
    4e-1, 2e-3, f"Corr.: {corr_cpc:0.2f} \n Bias: {100 * bias_cpc:0.2f}\%",
    fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props
)


ax = fig.add_subplot(gs[0, 1])
ax.set_title("(b) GridSat", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_gs.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_gs, c=diagcol, label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel(r"$\text{TIWC}_\text{ref}$ $[\si{\gram \per \meter \cubed}$]")
props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(4e-1, 2e-3, f"Corr.: {corr_gs:0.2f} \n Bias: {100 * bias_gs:0.2f}\%",
        fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props)
ax.legend()
for l in ax.yaxis.get_ticklabels():
    l.set_visible(False)
    
ax = fig.add_subplot(gs[0, 2])
plt.colorbar(m, label=r"$\text{p}(\text{TIWC}_\text{ret} | \text{TIWC}_\text{ref}$) $[(\si{\gram \per \meter \cubed})^{-1}$]", cax=ax)
fig.savefig("../figures/scatter_plot_tiwc.pdf", dpi=200, bbox_inches="tight")

#### Latitude-altitude plots

In [None]:
# It takes a while to prepare the data for plotting (about 18 minutes!)
# so first compute it, then plot

lat_bins = np.arange(-90, 91, 5)
altitude_bins = np.arange(0, 20e3 + 1000, 1000)

lon_ranges = {key: {} for key in [(-40, -20), (20, 40), (-180, -160), (-180, 180)]}
for lon_key in tqdm(lon_ranges.keys()):
    lon_min, lon_max = lon_key
    # CPCIR
    idxs = (lon_min <= results_cpcir.longitude) & (results_cpcir.longitude <= lon_max)
    results_cpcir_slice = results_cpcir.sel(samples=idxs)

    latitude_data, level_data = np.meshgrid(
        results_cpcir_slice.latitude.data,
        results_cpcir_slice.levels.data,
        indexing='ij'
    )

    lon_ranges[lon_key]['cpcir'] = {
        'iwc_ref': binned_statistic_2d(latitude_data.flatten(), level_data.flatten(), results_cpcir_slice.tiwc_true.data.flatten(), statistic=np.nanmean, bins=[lat_bins, altitude_bins])[0].T,
        'iwc_cpcir': binned_statistic_2d(latitude_data.flatten(), level_data.flatten(), results_cpcir_slice.tiwc_mean.data.flatten(), statistic=np.nanmean, bins=[lat_bins, altitude_bins])[0].T
    }

    # GridSat
    idxs = (lon_min <= results_gridsat.longitude) & (results_gridsat.longitude <= lon_max)
    results_gridsat_slice = results_gridsat.sel(samples=idxs)

    latitude_data, level_data = np.meshgrid(
        results_gridsat_slice.latitude.data,
        results_gridsat_slice.levels.data,
        indexing='ij'
    )
    lon_ranges[lon_key]['gridsat'] = {
        'iwc_ref': binned_statistic_2d(latitude_data.flatten(), level_data.flatten(), results_gridsat_slice.tiwc_true.data.flatten(), statistic=np.nanmean, bins=[lat_bins, altitude_bins])[0].T,
        'iwc_gridsat': binned_statistic_2d(latitude_data.flatten(), level_data.flatten(), results_gridsat_slice.tiwc_mean.data.flatten(), statistic=np.nanmean, bins=[lat_bins, altitude_bins])[0].T
    }

In [None]:
norm = LogNorm(1e-4, 1e0)
levels = np.logspace(-3, 0, 7)

axes_data = []
for lon_key, results in lon_ranges.items():
    fig = plt.figure(figsize=(8, 6))
    gs = GridSpec(3, 3, width_ratios=[1.0, 1.0, 0.075])

    ax = fig.add_subplot(gs[0,0])
    axes_data.append(ax)

    m = ax.contourf(
        lat_bins[:-1] + np.diff(lat_bins) / 2,
        (altitude_bins[:-1] + np.diff(altitude_bins) / 2)/1e3,
        results['cpcir']['iwc_cpcir'],
        norm=norm,
        levels=levels,
        extend='both'
    )
    for c in m.collections:
        c.set_rasterized(True)
    ax.set_title('(a) CPCIR retrieved')

    ax = fig.add_subplot(gs[1,0])
    axes_data.append(ax)

    m = ax.contourf(
        lat_bins[:-1] + np.diff(lat_bins) / 2,
        (altitude_bins[:-1] + np.diff(altitude_bins) / 2)/1e3,
        results['cpcir']['iwc_ref'],
        norm=norm,
        levels=levels,
        extend='both'
    )
    for c in m.collections:
        c.set_rasterized(True)
    ax.set_title('(c) CPCIR reference')

    ax = fig.add_subplot(gs[2,0])
    axes_data.append(ax)

    diff = results['cpcir']['iwc_cpcir'] - results['cpcir']['iwc_ref']
    m_diff = ax.pcolormesh(
        lat_bins[:-1] + np.diff(lat_bins) / 2,
        (altitude_bins[:-1] + np.diff(altitude_bins) / 2)/1e3,
        (diff / np.maximum(results['cpcir']['iwc_ref'], 1e-3)) * 100,
        vmin=-100, vmax=100,
        cmap='coolwarm',
        rasterized=True
    )
    ax.set_title('(e) CPCIR retrieved - reference')

    ax = fig.add_subplot(gs[0,1])
    axes_data.append(ax)

    m = ax.contourf(
        lat_bins[:-1] + np.diff(lat_bins) / 2,
        (altitude_bins[:-1] + np.diff(altitude_bins) / 2)/1e3,
        results['gridsat']['iwc_gridsat'],
        norm=norm,
        levels=levels,
        extend='both'
    )
    for c in m.collections:
        c.set_rasterized(True)
    ax.set_title('(b) GridSat retrieved')

    ax = fig.add_subplot(gs[1,1])
    axes_data.append(ax)

    m = ax.contourf(
        lat_bins[:-1] + np.diff(lat_bins) / 2,
        (altitude_bins[:-1] + np.diff(altitude_bins) / 2)/1e3,
        results['gridsat']['iwc_ref'],
        norm=norm,
        levels=levels,
        extend='both'
    )
    for c in m.collections:
        c.set_rasterized(True)
    ax.set_title('(d) GridSat reference')

    ax = fig.add_subplot(gs[2,1])
    axes_data.append(ax)

    diff = results['gridsat']['iwc_gridsat'] - results['gridsat']['iwc_ref']
    m_diff = ax.pcolormesh(
        lat_bins[:-1] + np.diff(lat_bins) / 2,
        (altitude_bins[:-1] + np.diff(altitude_bins) / 2)/1e3,
        (diff / np.maximum(results['gridsat']['iwc_ref'], 1e-3)) * 100,
        vmin=-100, vmax=100,
        cmap='coolwarm',
        rasterized=True,
    )
    ax.set_title('(f) GridSat retrieved - reference')

    ax = fig.add_subplot(gs[0,2])
    fig.colorbar(m, cax=ax, label=r"$\text{TIWC}$ [\si{\gram \per \meter \cubed}]")
    
    ax = fig.add_subplot(gs[1,2])
    fig.colorbar(m, cax=ax, label=r"$\text{TIWC}$ [\si{\gram \per \meter \cubed}]")

    ax = fig.add_subplot(gs[2,2])
    cbar = fig.colorbar(m_diff, cax=ax, label=r"$\frac{\Delta \text{TIWC}}{\max(\text{TIWC}_{\text{ref}}, \SI{e-3}{\gram\per\cubic\metre})}$", extend='both')
    cbar.set_ticks([-100, 0, 100])
    cbar.set_ticklabels([r'-100\%', '0', r'100\%'])

    fig.suptitle(f'Longitudes: $[\\SI{{{lon_key[0]}}}{{\\degree}}, \\SI{{{lon_key[1]}}}{{\\degree}}]$')
    
    for ax in axes_data:
        ax.set_xlabel('Latitude [\si{\degree}]')
        ax.set_ylabel('Altitude [\si{\kilo\metre}]')
        ax.set_xlim(-70, 70)
    fig.subplots_adjust(hspace=0.5, wspace=0.25)
    fig.savefig(
        f'../figures/tiwc_lat_vs_alt_{lon_key[0]}_{lon_key[1]}_5.pdf',
        bbox_inches='tight'
    )

In [None]:
# Remember that for GridSat we get those stripes
# due to the 3-h resolution
# Takes a while to run this cell: about 18 minutes

lat_bins = np.arange(-90, 91, 5)
lon_bins = np.arange(-180, 181, 5)

norm = LogNorm(5e-3, 5e-1)

fig = plt.figure(figsize=(8, 3.5))
gs = GridSpec(4, 3, height_ratios=[1, 1, 1, 0.075])

for product, results_ds in {'GridSat': results_gridsat, 'CPCIR': results_cpcir}.items():
    row_i = 0
    for level_i in reversed(range(20)):
        # if level_i not in [4, 8, 12, 16]:
        if level_i not in [5, 10, 15]:
            continue
        level = results_ds.levels[level_i].item()

        iwc_estimate = binned_statistic_2d(
            results_ds.longitude.data,
            results_ds.latitude.data,
            results_ds.tiwc_mean.sel(levels=level).data,
            statistic=np.nanmean,
            bins=[lon_bins, lat_bins]
        )[0].T

        iwc_ref = binned_statistic_2d(
            results_ds.longitude.data,
            results_ds.latitude.data,
            results_ds.tiwc_true.sel(levels=level).data,
            statistic=np.nanmean,
            bins=[lon_bins, lat_bins]
        )[0].T

        diff = iwc_estimate - iwc_ref

        ax_estimate = fig.add_subplot(gs[row_i, 0], projection=ccrs.PlateCarree())
        ax_estimate.coastlines()

        ax_reference = fig.add_subplot(gs[row_i, 1], projection=ccrs.PlateCarree())
        ax_reference.coastlines()
        
        ax_diff = fig.add_subplot(gs[row_i, 2], projection=ccrs.PlateCarree())
        ax_diff.coastlines()
        if row_i == 0:
            ax_estimate.set_title(f'(a) TIWC retrieved')
            ax_reference.set_title(f'(b) TIWC reference')
            ax_diff.set_title(f'(c) Retrieved $-$ reference')

        m = ax_estimate.pcolormesh(
            lon_bins[:-1] + np.diff(lon_bins) / 2,
            lat_bins[:-1] + np.diff(lat_bins) / 2,
            iwc_estimate,
            norm=norm,
            transform=ccrs.PlateCarree(),
            rasterized=True
        )

        m = ax_reference.pcolormesh(
            lon_bins[:-1] + np.diff(lon_bins) / 2,
            lat_bins[:-1] + np.diff(lat_bins) / 2,
            iwc_ref,
            norm=norm,
            transform=ccrs.PlateCarree(),
            rasterized=True
        )

        cbar = fig.colorbar(m, cax=fig.add_subplot(gs[-1, :2]), extend='both', orientation='horizontal')
        cbar.set_label(label=r'TIWC [\si{\gram \per \meter \cubed}]')
        
        m_diff = ax_diff.pcolormesh(
            lon_bins[:-1] + np.diff(lon_bins) / 2,
            lat_bins[:-1] + np.diff(lat_bins) / 2,
            diff / np.maximum(iwc_ref, 5e-3) * 100,
            vmin=-100, vmax=100,
            cmap='coolwarm',
            rasterized=True
        )
        cbar = fig.colorbar(
            m_diff,
            cax=fig.add_subplot(gs[-1, -1]),
            extend='both',
            orientation='horizontal'
        )
        cbar.set_label(
            label=r'$\frac{\Delta \text{TIWC}}{\max(\text{TIWC}_{\text{ref}}, \SI{5e-3}{\gram\per\cubic\metre})}$',
        )
        cbar.set_ticks([-100, 0, 100])
        cbar.set_ticklabels([r'-100\%', '0', r'100\%'])

        # A hack:
        ax_estimate.set_ylabel(f'\SI{{{level/1e3:.1f}}}{{\\kilo\metre}}')
        ax_estimate.set_yticks([])

        ax_estimate.set_ylim(-70, 70)
        ax_reference.set_ylim(-70, 70)
        ax_diff.set_ylim(-70, 70)

        row_i += 1

    fig.subplots_adjust(wspace=0.05, hspace=0.01)

    # Title goes very far up...
    # fig.suptitle(product)

    fig.savefig(
        f'../figures/global_dist_tiwc_{product}_5.pdf',
        bbox_inches='tight'
    )

## Cloud detection

### Validation data

We begin by calculating precision and recall curves for the validation data, in order to determine an optimal decision threshold for the cloud classification.

Note: The optimal values derived in this section were implemented in the code used to determine the confusion matrix in the next section.

In [None]:
from sklearn.metrics import precision_recall_curve

In [None]:
results_cpcir_val = xr.load_dataset(DATA_PATH / "results" / "validation_data" / "validation_results_cpcir.nc")
results_gridsat_val = xr.load_dataset(DATA_PATH / "results" / "validation_data" / "validation_results_gridsat.nc")

In [None]:
# CPCIR
cp = results_cpcir_val.cloud_prob.data.ravel()
cp_true = results_cpcir_val.cloud_prob_true.data.ravel()
prec_cp_cpcir, rec_cp_cpcir, thresh = precision_recall_curve(
    cp_true.astype("bool"),
    cp
)

## p-2 distance to (1,1)
d = np.sqrt((1.0 - prec_cp_cpcir) ** 2 + (1.0 - rec_cp_cpcir) ** 2)

## Optimal values
t_cp_cpcir_val = thresh[np.argmin(d)]
prec_cp_cpcir_opt_val = prec_cp_cpcir[np.argmin(d)]
rec_cp_cpcir_opt_val = rec_cp_cpcir[np.argmin(d)]

# GridSat
cp = results_gridsat_val.cloud_prob.data.ravel()
cp_true = results_gridsat_val.cloud_prob_true.data.ravel()
prec_cp_gridsat, rec_cp_gridsat, thresh = precision_recall_curve(cp_true.astype("bool"), cp)

## p-2 distance to (1,1)
d = np.sqrt((1.0 - prec_cp_gridsat) ** 2 + (1.0 - rec_cp_gridsat) ** 2)

## Optimal values
t_cp_gridsat_val = thresh[np.argmin(d)]
prec_cp_gridsat_opt_val = prec_cp_gridsat[np.argmin(d)]
rec_cp_gridsat_opt_val = rec_cp_gridsat[np.argmin(d)]


In [None]:
import pandas as pd
pd.DataFrame({
    "Dataset": ["CPCIR", "GridSat"],
    "Threshold": [round(t_cp_cpcir_val, 3), round(t_cp_gridsat_val, 3)],
    "Precision": [round(prec_cp_cpcir_opt_val, 3), round(prec_cp_gridsat_opt_val, 3)],
    "Recall": [round(rec_cp_cpcir_opt_val, 3), round(rec_cp_gridsat_opt_val, 3)],
})

In [None]:
# CPCIR
cm = results_cpcir_val.cloud_class_prob.data[..., 0].ravel()
cm_true = results_cpcir_val.cloud_class_true.data.ravel() == 0
prec_cm_cpcir, rec_cm_cpcir, thresh = precision_recall_curve(
    ~cm_true,
    1.0 - cm
)
## p-2 distance to (1,1)
d = np.sqrt((1.0 - prec_cm_cpcir) ** 2 + (1.0 - rec_cm_cpcir) ** 2)
## Optimal values
t_cm_cpcir_val = thresh[np.argmin(d)]
prec_cm_cpcir_opt_val = prec_cm_cpcir[np.argmin(d)]
rec_cm_cpcir_opt_val = rec_cm_cpcir[np.argmin(d)]

# GridSat
cm = results_gridsat_val.cloud_class_prob.data[..., 0].ravel()
cm_true = results_gridsat_val.cloud_class_true.data.ravel() == 0
prec_cm_gridsat, rec_cm_gridsat, thresh = precision_recall_curve(
    ~cm_true,
    1.0 - cm
)
## p-2 distance to (1,1)
d = np.sqrt((1.0 - prec_cm_gridsat) ** 2 + (1.0 - rec_cm_gridsat) ** 2)
## Optimal values
t_cm_gridsat_val = thresh[np.argmin(d)]
prec_cm_gridsat_opt_val = prec_cm_gridsat[np.argmin(d)]
rec_cm_gridsat_opt_val = rec_cm_gridsat[np.argmin(d)]

In [None]:
import pandas as pd
pd.DataFrame({
    "Dataset": ["CPCIR", "GridSat"],
    "Threshold": [round(t_cm_cpcir_val, 3), round(t_cm_gridsat_val, 3)],
    "Precision": [round(prec_cm_cpcir_opt_val, 3), round(prec_cm_gridsat_opt_val, 3)],
    "Recall": [round(rec_cm_cpcir_opt_val, 3), round(rec_cm_gridsat_opt_val, 3)],
})

### Test data

In [None]:
from sklearn.metrics import precision_recall_curve

# CPCIR
cp = results_cpcir.cloud_prob.data.ravel()
cp_true = results_cpcir.cloud_prob_true.data.ravel()
prec_cp_cpcir, rec_cp_cpcir, thresh = precision_recall_curve(
    cp_true.astype("bool"),
    cp
)
## p-2 distance to (1,1)
d = np.sqrt((1.0 - prec_cp_cpcir) ** 2 + (1.0 - rec_cp_cpcir) ** 2)
## Optimal values
t_cp_cpcir = thresh[np.argmin(d)]
prec_cp_cpcir_opt = np.interp(t_cp_cpcir_val, thresh, prec_cp_cpcir[:-1])
rec_cp_cpcir_opt = np.interp(t_cp_cpcir_val, thresh, rec_cp_cpcir[:-1])

# GridSat
cp = results_gridsat.cloud_prob.data.ravel()
cp_true = results_gridsat.cloud_prob_true.data.ravel()
prec_cp_gridsat, rec_cp_gridsat, thresh = precision_recall_curve(cp_true.astype("bool"), cp)
## p-2 distance to (1,1)
d = np.sqrt((1.0 - prec_cp_gridsat) ** 2 + (1.0 - rec_cp_gridsat) ** 2)
## Optimal values
t_cp_gridsat = thresh[np.argmin(d)]
prec_cp_gridsat_opt = np.interp(t_cp_gridsat_val, thresh, prec_cp_gridsat[:-1])
rec_cp_gridsat_opt = np.interp(t_cp_gridsat_val, thresh, rec_cp_gridsat[:-1])

In [None]:
import pandas as pd
pd.DataFrame({
    "Dataset": ["CPCIR", "GridSat"],
    "Threshold test": [round(t_cp_cpcir, 3), round(t_cp_gridsat, 3)],
    "Threshold val": [round(t_cp_cpcir_val, 3), round(t_cp_gridsat_val, 3)],
    "Precision (with validation threshold)": [round(prec_cp_cpcir_opt, 3), round(prec_cp_gridsat_opt, 3)],
    "Recall (with validation threshold)": [round(rec_cp_cpcir_opt, 3), round(rec_cp_gridsat_opt, 3)],
})

In [None]:
cm = results_cpcir.cloud_class_prob.data[..., 0].ravel()
cm_true = results_cpcir.cloud_class_true.data.ravel() == 0
prec_cm_cpcir, rec_cm_cpcir, thresh = precision_recall_curve(
    ~cm_true,
    1.0 - cm
)
d = np.sqrt((1.0 - prec_cm_cpcir) ** 2 + (1.0 - rec_cm_cpcir) ** 2)
t_cm_cpcir = thresh[np.argmin(d)]
prec_cm_cpcir_opt = np.interp(t_cm_cpcir_val, thresh, prec_cm_cpcir[:-1])
rec_cm_cpcir_opt = np.interp(t_cm_cpcir_val, thresh, rec_cm_cpcir[:-1])

cm = results_gridsat.cloud_class_prob.data[..., 0].ravel()
cm_true = results_gridsat.cloud_class_true.data.ravel() == 0
prec_cm_gridsat, rec_cm_gridsat, thresh = precision_recall_curve(
    ~cm_true,
    1.0 - cm
)
d = np.sqrt((1.0 - prec_cm_gridsat) ** 2 + (1.0 - rec_cm_gridsat) ** 2)
t_cm_gridsat = thresh[np.argmin(d)]
prec_cm_gridsat_opt = np.interp(t_cm_gridsat_val, thresh, prec_cm_gridsat[:-1])
rec_cm_gridsat_opt = np.interp(t_cm_gridsat_val, thresh, rec_cm_gridsat[:-1])

In [None]:
import pandas as pd
pd.DataFrame({
    "Dataset": ["CPCIR", "GridSat"],
    "Threshold test": [round(t_cm_cpcir, 3), round(t_cm_gridsat, 3)],
    "Threshold val": [round(t_cm_cpcir_val, 3), round(t_cm_gridsat_val, 3)],
    "Precision (with validation threshold)": [round(prec_cm_cpcir_opt, 3), round(prec_cm_gridsat_opt, 3)],
    "Recall (with validation threshold)": [round(rec_cm_cpcir_opt, 3), round(rec_cm_gridsat_opt, 3)],
})

In [None]:
from matplotlib.gridspec import GridSpec

f = plt.figure(figsize=(7, 3))
gs = GridSpec(1, 2, width_ratios=[1.0, 1.0])

ax = f.add_subplot(gs[0, 0])
ax.plot(rec_cp_cpcir, prec_cp_cpcir, label="CPCIR", c="C0")
ax.scatter([rec_cp_cpcir_opt], [prec_cp_cpcir_opt], c="C0")
ax.plot(rec_cp_gridsat, prec_cp_gridsat, label="GridSat", c="C1")
ax.scatter([rec_cp_gridsat_opt], [prec_cp_gridsat_opt], c="C1")
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.set_ylabel("Precision")
ax.set_xlabel("Recall")
ax.set_title("(a) 2D cloud mask", loc="left")
ax.set_aspect(1.0)
ax.legend(loc="lower left")

ax = f.add_subplot(gs[0, 1])
handles = []
handles += ax.plot(rec_cm_cpcir, prec_cm_cpcir, label="CPCIR", c="C0")
ax.scatter([rec_cm_cpcir_opt], [prec_cm_cpcir_opt], c="C0")
handles += ax.plot(rec_cm_gridsat, prec_cm_gridsat, label="GridSat", c="C1")
ax.scatter([rec_cm_gridsat_opt], [prec_cm_gridsat_opt], c="C1")
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.set_xlabel("Recall")
ax.set_aspect(1.0)
ax.set_title("(b) 3D cloud mask", loc="left")

ax.set_yticklabels([])
# for l in ax.yaxis.get_ticklines():
#     l.set_visible(False)
    

f.savefig("../figures/cloud_detection_prec_rec.pdf")

### Confusion matrix

Note: the cloud classes were determined using the treshold for the validation set determined in the previous section.

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
cc = results_cpcir.cloud_class.data.ravel()
cc_t = results_cpcir.cloud_class_true.data.ravel()
valid = cc_t <= 8
cc = cc[valid]
cc_t = cc_t[valid]
cm_cpcir = confusion_matrix(cc_t, cc, normalize="true")

cc = results_gridsat.cloud_class.data.ravel()
cc_t = results_gridsat.cloud_class_true.data.ravel()
valid = cc_t <= 8
cc = cc[valid]
cc_t = cc_t[valid]
cm_gridsat = confusion_matrix(cc_t, cc, normalize="true")

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import Normalize
from ccic.data.cloudsat import CLOUD_CLASSES

fig = plt.figure(figsize=(8, 3.5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.075])
norm = Normalize(0, 1)

ax = fig.add_subplot(gs[0, 0])
ax.pcolormesh(cm_cpcir.T, norm=norm, rasterized=True)
ax.set_xticks(np.arange(9) + 0.5)
ax.set_xticklabels(CLOUD_CLASSES)
for l in ax.xaxis.get_ticklabels():
    l.set_rotation(45)
ax.set_yticks(np.arange(9) + 0.5)
ax.set_yticklabels(CLOUD_CLASSES)
for l in ax.yaxis.get_ticklabels():
    l.set_rotation(45)
ax.set_title("(a) CPCIR", loc="left")
ax.set_xlabel("True class")
ax.set_ylabel("Retrieved class")
ax.set_aspect(1.0)
ax.plot(np.arange(10), np.arange(10), c="grey", ls="--")

ax = fig.add_subplot(gs[0, 1])
m = ax.pcolormesh(cm_gridsat.T, norm=norm, rasterized=True)
ax.set_xticks(np.arange(9) + 0.5)
ax.set_xticklabels(CLOUD_CLASSES)
ax.set_yticks(np.arange(9) + 0.5)
ax.set_yticklabels(CLOUD_CLASSES)
for l in ax.xaxis.get_ticklabels():
    l.set_rotation(45)
for l in ax.yaxis.get_ticklabels():
    l.set_visible(False)
ax.set_title("(b) GridSat", loc="left")
ax.set_xlabel("True class")
ax.set_aspect(1.0)
ax.plot(np.arange(10), np.arange(10), c="grey", ls="--")

ax = fig.add_subplot(gs[0, 2])
plt.colorbar(m, cax=ax, label="P(retrieved $|$ true)")

fig.savefig("../figures/confusion_matrix_cloud_mask.pdf", bbox_inches="tight")

## Column classification

In [None]:
from sklearn.metrics import precision_recall_curve, precision_recall_fscore_support

# CPCIR
valid = np.isfinite(results_cpcir.tbs) & np.isfinite(results_cpcir.tiwp_mean) & (results_cpcir.column_class >= 0)
tbs = results_cpcir.tbs.data[valid]
tiwp = results_cpcir.tiwp_mean.data[valid]
cc = results_cpcir.column_class.data[valid]

# Note: negating tbs reverses the ordering of the scores
# so that higher Tb's correspond to the no-cloud class (False)
# and lower Tb's correspond to the cloud class (True)
prec_cloud_tbs_cpcir, rec_cloud_tbs_cpcir, _ = precision_recall_curve(cc > 0, -tbs)
prec_cloud_tiwp_cpcir, rec_cloud_tiwp_cpcir, t_cloud_cpcir = precision_recall_curve(cc > 0, tiwp)
prec_feng_cloud_cpcir, recall_feng_cloud_cpcir, _, _ = precision_recall_fscore_support(cc > 0, tbs < 225)
opt_cloud_cpcir = np.argmin(np.sqrt((1 - prec_cloud_tiwp_cpcir) ** 2 + (1 - rec_cloud_tiwp_cpcir) ** 2))

prec_conv_tbs_cpcir, rec_conv_tbs_cpcir, _ = precision_recall_curve(cc > 1, -tbs)
prec_conv_tiwp_cpcir, rec_conv_tiwp_cpcir, t_conv_cpcir = precision_recall_curve(cc > 1, tiwp)
prec_feng_conv_cpcir, recall_feng_conv_cpcir, _, _ = precision_recall_fscore_support(cc > 1, tbs <= 240)
opt_conv_cpcir = np.argmin(np.sqrt((1 - prec_conv_tiwp_cpcir) ** 2 + (1 - rec_conv_tiwp_cpcir) ** 2))

# GridSat
valid = np.isfinite(results_gridsat.tbs) & np.isfinite(results_gridsat.tiwp_mean) & (results_gridsat.column_class >= 0)
tbs = results_gridsat.tbs.data[valid]
tiwp = results_gridsat.tiwp_mean.data[valid]
cc = results_gridsat.column_class.data[valid]

prec_cloud_tbs_gridsat, rec_cloud_tbs_gridsat, _ = precision_recall_curve(cc > 0, -tbs)
prec_cloud_tiwp_gridsat, rec_cloud_tiwp_gridsat, t_cloud_gridsat = precision_recall_curve(cc > 0, tiwp)
prec_feng_cloud_gridsat, recall_feng_cloud_gridsat, _, _ = precision_recall_fscore_support(cc > 0, tbs < 225)
opt_cloud_gridsat = np.argmin(np.sqrt((1 - prec_cloud_tiwp_gridsat) ** 2 + (1 - rec_cloud_tiwp_gridsat) ** 2))

prec_conv_tbs_gridsat, rec_conv_tbs_gridsat, _ = precision_recall_curve(cc > 1, -tbs)
prec_conv_tiwp_gridsat, rec_conv_tiwp_gridsat, t_conv_gridsat = precision_recall_curve(cc > 1, tiwp)
prec_feng_conv_gridsat, recall_feng_conv_gridsat, _, _ = precision_recall_fscore_support(cc > 1, tbs <= 240)
opt_conv_gridsat = np.argmin(np.sqrt((1 - prec_conv_tiwp_gridsat) ** 2 + (1 - rec_conv_tiwp_gridsat) ** 2))

In [None]:
(t_cloud_cpcir[opt_cloud_cpcir], t_conv_cpcir[opt_conv_cpcir]), (t_cloud_gridsat[opt_cloud_gridsat], t_conv_gridsat[opt_conv_gridsat])

In [None]:
from matplotlib.gridspec import GridSpec
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.4])
f = plt.figure(figsize=(10, 4))

ax = f.add_subplot(gs[0, 0])
handles = []
handles += ax.plot(rec_cloud_tiwp_cpcir, prec_cloud_tiwp_cpcir, label="TIWP (CPCIR)", c="C0")
handles += ax.plot(rec_cloud_tbs_cpcir, prec_cloud_tbs_cpcir, label="$T_B$ (CPCIR)", c="C1")
handles += ax.plot(rec_cloud_tiwp_gridsat, prec_cloud_tiwp_gridsat, label="TIWP (GridSat)", c="C0", ls="--")
handles += ax.plot(rec_cloud_tbs_gridsat, prec_cloud_tbs_gridsat, label="$T_B$ (GridSat)", c="C1", ls="--")
handles.append(ax.scatter(recall_feng_cloud_cpcir[1], prec_feng_cloud_cpcir[1], marker="*", c="C2", label ="Feng et al. (2021)", zorder=20, s=100))
ax.scatter(recall_feng_cloud_gridsat[1], prec_feng_cloud_gridsat[1], marker="*", c="C2", zorder=20, s=100)
handles.append(ax.scatter([rec_cloud_tiwp_cpcir[opt_cloud_cpcir]], [prec_cloud_tiwp_cpcir[opt_cloud_cpcir]], marker="*", c="C0", zorder=20, s=100, label="Optimal threshold"))
ax.scatter([rec_cloud_tiwp_gridsat[opt_cloud_gridsat]], [prec_cloud_tiwp_gridsat[opt_cloud_gridsat]], marker="*", c="C0", zorder=20, s=100)
handles.append(ax.scatter([-1], [-1], marker="x", c="C0", zorder=20, s=50, label="Alternative threshold\n[kg m$^{-2}$]"))
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")
ax.set_title("(a) Cloud detection", loc="left")
ax.set_aspect(1.0)
for thresh, x_shift, y_shift in zip([0.005, 0.05, 0.1], [0.125, 0.05, 0], [-0.01, -0.05, -0.1]):
    rec = np.interp(thresh, t_cloud_cpcir, rec_cloud_tiwp_cpcir[:-1])
    prec = np.interp(thresh, t_cloud_cpcir, prec_cloud_tiwp_cpcir[:-1])
    ax.scatter([rec], [prec], marker="x", c="C0")
    ax.text(rec + x_shift, prec + y_shift, f"{thresh}", color="C0", va='center', ha='center')


ax = f.add_subplot(gs[0, 1])
ax.plot(rec_conv_tiwp_cpcir, prec_conv_tiwp_cpcir, label="TIWP (CPCIR)", c="C0")
ax.plot(rec_conv_tbs_cpcir, prec_conv_tbs_cpcir, label="$T_B$ (CPCIR)", c="C1")
ax.plot(rec_conv_tiwp_gridsat, prec_conv_tiwp_gridsat, label="TIWP (GridSat)", c="C0", ls="--")
ax.plot(rec_conv_tbs_gridsat, prec_conv_tbs_gridsat, label="$T_B$ (GridSat)", c="C1", ls="--")
ax.scatter(recall_feng_conv_cpcir[1], prec_feng_conv_cpcir[1], marker="*", c="C2", zorder=20, s=100)
ax.scatter(recall_feng_conv_gridsat[1], prec_feng_conv_gridsat[1], marker="*", c="C2", zorder=20, s=100)
ax.scatter([rec_conv_tiwp_cpcir[opt_conv_cpcir]], [prec_conv_tiwp_cpcir[opt_conv_cpcir]], marker="*", c="C0", zorder=20, s=100)
ax.scatter([rec_conv_tiwp_gridsat[opt_conv_gridsat]], [prec_conv_tiwp_gridsat[opt_conv_gridsat]], marker="*", c="C0", zorder=20, s=100)
ax.set_xlim([0, 1])
ax.set_ylim([0, 1])
ax.set_xlabel("Recall")
ax.set_title("(b) Detection of convection", loc="left")
ax.set_aspect(1.0)

for thresh, x_shift, y_shift in zip([0.1, 0.5, 1.0], [-0.05, 0.025, 0], [0, 0.03, -0.05]):
    rec = np.interp(thresh, t_conv_cpcir, rec_conv_tiwp_cpcir[:-1])
    prec = np.interp(thresh, t_conv_cpcir, prec_conv_tiwp_cpcir[:-1])
    ax.scatter([rec], [prec], marker="x", c="C0")
    ax.text(rec + x_shift, prec + y_shift, f"{thresh}", color="C0", ha='center', va='center')

ax = f.add_subplot(gs[0, -1])
ax.set_axis_off()
ax.legend(handles=handles, loc="center", edgecolor="none", facecolor="none")

f.savefig("../figures/cloud_detection.pdf", bbox_inches="tight")

## TIWP vs $\int \text{TIWC} \, d\text{altitude}$

Note: the code in the cells below must be executed sequentially.

In [None]:
from ccic.data.cloudsat import ALTITUDE_LEVELS

tiwp_retrieved_gridsat = results_gridsat.tiwp_mean.values
tiwp_trapz_retrieved_gridsat = np.trapz(results_gridsat.tiwc_mean, ALTITUDE_LEVELS, axis=1) * 1e-3 # grams to kilograms
tiwp_retrieved_cpcir = results_cpcir.tiwp_mean.values
tiwp_trapz_retrieved_cpcir = np.trapz(results_cpcir.tiwc_mean, ALTITUDE_LEVELS, axis=1) * 1e-3 # grams to kilograms

tiwp_true_gridsat = results_gridsat.tiwp_true.values
tiwp_trapz_true_gridsat = np.trapz(results_gridsat.tiwc_true, ALTITUDE_LEVELS, axis=1) * 1e-3 # grams to kilograms
tiwp_true_cpcir = results_cpcir.tiwp_true.values
tiwp_trapz_true_cpcir = np.trapz(results_cpcir.tiwc_true, ALTITUDE_LEVELS, axis=1) * 1e-3 # grams to kilograms

In [None]:
from scipy.stats import binned_statistic
bins = np.logspace(-3, 2, 201)
y_cpc = np.histogram2d(tiwp_retrieved_cpcir, tiwp_trapz_retrieved_cpcir, bins=bins, density=True)[0]
y_cpc /= np.trapz(np.diff(bins) * y_cpc, axis=-1)[..., None]
cond_mean_cpc = binned_statistic(
    tiwp_retrieved_cpcir,
    tiwp_trapz_retrieved_cpcir,
    lambda x: x.mean() if x.size > 1e2 else np.nan,
    bins=bins)[0]
corr_cpc = np.corrcoef(
    tiwp_retrieved_cpcir,
    tiwp_trapz_retrieved_cpcir
)[0, 1]
bias_cpc = np.mean(tiwp_trapz_retrieved_cpcir - tiwp_retrieved_cpcir) / np.mean(tiwp_trapz_retrieved_cpcir)

y_gs = np.histogram2d(tiwp_retrieved_gridsat, tiwp_trapz_retrieved_gridsat, bins=bins, density=True)[0]
y_gs /= np.trapz(np.diff(bins) * y_gs, axis=-1)[..., None]
cond_mean_gs = binned_statistic(
    tiwp_retrieved_gridsat,
    tiwp_trapz_retrieved_gridsat,
    lambda x: x.mean() if x.size > 1e2 else np.nan,
    bins=bins)[0]
corr_gs = np.corrcoef(
    tiwp_retrieved_gridsat,
    tiwp_trapz_retrieved_gridsat
)[0, 1]
bias_gs = np.mean(tiwp_trapz_retrieved_gridsat - tiwp_retrieved_gridsat) / np.mean(tiwp_trapz_retrieved_gridsat)

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LogNorm

fig = plt.figure(figsize=(8, 3.5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.075])

norm = LogNorm(1e-3, 1e2)
levels = np.logspace(-3, 2, 11)

txtcol = "C0"
diagcol = "orangered"

ax = fig.add_subplot(gs[0, 0])
ax.set_title("(a) CPCIR", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_cpc.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_cpc, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_ylabel(r"$\int \text{TIWC}_\text{ret} dz$ $[\si{\kilo \gram \per \meter \squared}$]")
ax.set_xlabel(r"$\text{TIWP}_\text{ret}$ $[\si{\kilo \gram \per \meter \squared}$]")
props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(
    1, 2e-3, f"Corr.: {corr_cpc:0.2f} \n Bias: {100 * bias_cpc:0.2f}\%",
    fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props
)
ax.legend()


ax = fig.add_subplot(gs[0, 1])
ax.set_title("(b) GridSat", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_gs.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_gs, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel(r"$\text{TIWP}_\text{ret}$ $[\si{\kilo \gram \per \meter \squared}$]")
for l in ax.yaxis.get_ticklabels():
    l.set_visible(False)

props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(1, 2e-3, f"Corr.: {corr_gs:0.2f} \n Bias: {100 * bias_gs:0.2f}\%",
        fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props)

ax = fig.add_subplot(gs[0, 2])
plt.colorbar(m, label=r"$\text{p}\left(\int \text{TIWC}_\text{ret} dz | \text{TIWP}_\text{ret} \right)$ $[(\si{\kilo \gram \per \meter \squared})^{-1}$]", cax=ax)
fig.savefig("../figures/tiwp_ret_vs_tiwc_trapz_ret.pdf", dpi=200, bbox_inches="tight")

In [None]:
from scipy.stats import binned_statistic
bins = np.logspace(-3, 2, 201)
y_cpc = np.histogram2d(tiwp_true_cpcir, tiwp_trapz_true_cpcir, bins=bins, density=True)[0]
y_cpc /= np.trapz(np.diff(bins) * y_cpc, axis=-1)[..., None]
cond_mean_cpc = binned_statistic(
    tiwp_true_cpcir,
    tiwp_trapz_true_cpcir,
    lambda x: x.mean() if x.size > 1e2 else np.nan,
    bins=bins)[0]
corr_cpc = np.corrcoef(
    tiwp_true_cpcir,
    tiwp_trapz_true_cpcir
)[0, 1]
bias_cpc = np.mean(tiwp_trapz_true_cpcir - tiwp_true_cpcir) / np.mean(tiwp_true_cpcir)

y_gs = np.histogram2d(tiwp_true_gridsat, tiwp_trapz_true_gridsat, bins=bins, density=True)[0]
y_gs /= np.trapz(np.diff(bins) * y_gs, axis=-1)[..., None]
cond_mean_gs = binned_statistic(
    tiwp_true_gridsat,
    tiwp_trapz_true_gridsat,
    lambda x: x.mean() if x.size > 1e2 else np.nan,
    bins=bins)[0]
corr_gs = np.corrcoef(
    tiwp_true_gridsat,
    tiwp_trapz_true_gridsat
)[0, 1]
bias_gs = np.mean(tiwp_trapz_true_gridsat - tiwp_true_gridsat) / np.mean(tiwp_true_gridsat)

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LogNorm

fig = plt.figure(figsize=(8, 3.5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.075])

norm = LogNorm(1e-3, 1e2)
levels = np.logspace(-3, 2, 11)

txtcol = "C0"
diagcol = "orangered"

ax = fig.add_subplot(gs[0, 0])
ax.set_title("(a) CPCIR", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_cpc.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_cpc, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_ylabel(r"$\int \text{TIWC}_\text{ref} dz$ $[\si{\kilo \gram \per \meter \squared}$]")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")
props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(
    1, 2e-3, f"Corr.: {corr_cpc:0.2f} \n Bias: {100 * bias_cpc:0.2f}\%",
    fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props
)
ax.legend()


ax = fig.add_subplot(gs[0, 1])
ax.set_title("(b) GridSat", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_gs.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_gs, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")
for l in ax.yaxis.get_ticklabels():
    l.set_visible(False)

props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(1, 2e-3, f"Corr.: {corr_gs:0.2f} \n Bias: {100 * bias_gs:0.2f}\%",
        fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props)

ax = fig.add_subplot(gs[0, 2])
plt.colorbar(m, label=r"$\text{p}\left(\int \text{TIWC}_\text{ref} dz | \text{TIWP}_\text{ref} \right)$ $[(\si{\kilo \gram \per \meter \squared})^{-1}$]", cax=ax)
fig.savefig("../figures/tiwp_true_vs_tiwc_trapz_true.pdf", dpi=200, bbox_inches="tight")

In [None]:
from scipy.stats import binned_statistic
bins = np.logspace(-3, 2, 201)
y_cpc = np.histogram2d(tiwp_trapz_true_cpcir, tiwp_true_cpcir, bins=bins, density=True)[0]
y_cpc /= np.trapz(np.diff(bins) * y_cpc, axis=-1)[..., None]
cond_mean_cpc = binned_statistic(
    tiwp_trapz_true_cpcir,
    tiwp_true_cpcir,
    lambda x: x.mean() if x.size > 1e2 else np.nan,
    bins=bins)[0]
corr_cpc = np.corrcoef(
    tiwp_trapz_true_cpcir,
    tiwp_true_cpcir
)[0, 1]
bias_cpc = np.mean(tiwp_true_cpcir - tiwp_trapz_true_cpcir) / np.mean(tiwp_trapz_true_cpcir)

y_gs = np.histogram2d(tiwp_trapz_true_gridsat, tiwp_true_gridsat, bins=bins, density=True)[0]
y_gs /= np.trapz(np.diff(bins) * y_gs, axis=-1)[..., None]
cond_mean_gs = binned_statistic(
    tiwp_trapz_true_gridsat,
    tiwp_true_gridsat,
    lambda x: x.mean() if x.size > 1e2 else np.nan,
    bins=bins)[0]
corr_gs = np.corrcoef(
    tiwp_trapz_true_gridsat,
    tiwp_true_gridsat,
)[0, 1]
bias_gs = np.mean(tiwp_true_gridsat - tiwp_trapz_true_gridsat) / np.mean(tiwp_trapz_true_gridsat)

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LogNorm

fig = plt.figure(figsize=(8, 3.5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.075])

norm = LogNorm(1e-3, 1e2)
levels = np.logspace(-3, 2, 11)

txtcol = "C0"
diagcol = "orangered"

ax = fig.add_subplot(gs[0, 0])
ax.set_title("(a) CPCIR", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_cpc.T.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_cpc, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_ylabel(r"$\int \text{TIWC}_\text{ref} dz$ $[\si{\kilo \gram \per \meter \squared}$]")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")
props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(
    1, 2e-3, f"Corr.: {corr_cpc:0.2f} \n Bias: {100 * bias_cpc:0.2f}\%",
    fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props
)
ax.legend()


ax = fig.add_subplot(gs[0, 1])
ax.set_title("(b) GridSat", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_gs.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_gs, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")
for l in ax.yaxis.get_ticklabels():
    l.set_visible(False)

props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(1, 2e-3, f"Corr.: {corr_gs:0.2f} \n Bias: {100 * bias_gs:0.2f}\%",
        fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props)

ax = fig.add_subplot(gs[0, 2])
plt.colorbar(m, label=r"$\text{p}\left(\text{TIWP}_\text{ref} \mid \int \text{TIWC}_\text{ref} dz \right)$ $[(\si{\kilo \gram \per \meter \squared})^{-1}$]", cax=ax)
fig.savefig("../figures/tiwc_trapz_true_vs_tiwp_true.pdf", dpi=200, bbox_inches="tight")

In [None]:
from scipy.stats import binned_statistic
bins = np.logspace(-3, 2, 201)
y_cpc = np.histogram2d(tiwp_true_cpcir, tiwp_trapz_true_cpcir, bins=bins, density=True)[0]
# y_cpc /= np.trapz(np.diff(bins) * y_cpc, axis=-1)[..., None]
# cond_mean_cpc = binned_statistic(
#     tiwp_trapz_true_cpcir,
#     tiwp_true_cpcir,
#     lambda x: x.mean() if x.size > 1e2 else np.nan,
#     bins=bins)[0]
# corr_cpc = np.corrcoef(
#     tiwp_trapz_true_cpcir,
#     tiwp_true_cpcir
# )[0, 1]
# bias_cpc = np.mean(tiwp_true_cpcir - tiwp_trapz_true_cpcir) / np.mean(tiwp_trapz_true_cpcir)

y_gs = np.histogram2d(tiwp_true_gridsat, tiwp_trapz_true_gridsat, bins=bins, density=True)[0]
# y_gs /= np.trapz(np.diff(bins) * y_gs, axis=-1)[..., None]
# cond_mean_gs = binned_statistic(
#     tiwp_trapz_true_gridsat,
#     tiwp_true_gridsat,
#     lambda x: x.mean() if x.size > 1e2 else np.nan,
#     bins=bins)[0]
# corr_gs = np.corrcoef(
#     tiwp_trapz_true_gridsat,
#     tiwp_true_gridsat,
# )[0, 1]
# bias_gs = np.mean(tiwp_true_gridsat - tiwp_trapz_true_gridsat) / np.mean(tiwp_trapz_true_gridsat)

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LogNorm

fig = plt.figure(figsize=(8, 3.5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.075])

norm = LogNorm(1e-3, 1e2)
levels = np.logspace(-3, 2, 11)

txtcol = "C0"
diagcol = "orangered"

ax = fig.add_subplot(gs[0, 0])
ax.set_title("(a) CPCIR", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_cpc.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
# ax.plot(x, cond_mean_cpc, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_ylabel(r"$\int \text{TIWC}_\text{ref} dz$ $[\si{\kilo \gram \per \meter \squared}$]")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")
# props = dict(facecolor='white', alpha=1.0, edgecolor="k")
# ax.text(
#     1, 2e-3, f"Corr.: {corr_cpc:0.2f} \n Bias: {100 * bias_cpc:0.2f}\%",
#     fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props
# )
# ax.legend()


ax = fig.add_subplot(gs[0, 1])
ax.set_title("(b) GridSat", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_gs.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
# ax.plot(x, cond_mean_gs, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")
for l in ax.yaxis.get_ticklabels():
    l.set_visible(False)

# props = dict(facecolor='white', alpha=1.0, edgecolor="k")
# ax.text(1, 2e-3, f"Corr.: {corr_gs:0.2f} \n Bias: {100 * bias_gs:0.2f}\%",
#         fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props)

ax = fig.add_subplot(gs[0, 2])
plt.colorbar(m, label=r"$\text{p}\left(\text{TIWP}_\text{ref} , \int \text{TIWC}_\text{ref} dz \right)$ $[(\si{\kilo \gram \per \meter \squared})^{-1}$]", cax=ax)
fig.savefig("../figures/tiwp_true_joint_tiwc_trapz_true.pdf", dpi=200, bbox_inches="tight")

In [None]:
bins = np.logspace(-3, 2, 201)
y_cpc = np.histogram2d(tiwp_true_cpcir, tiwp_trapz_retrieved_cpcir, bins=bins, density=True)[0]
y_cpc /= np.trapz(np.diff(bins) * y_cpc, axis=-1)[..., None]
cond_mean_cpc = binned_statistic(
    tiwp_true_cpcir,
    tiwp_trapz_retrieved_cpcir,
    lambda x: x.mean() if x.size > 1e2 else np.nan,
    bins=bins)[0]
corr_cpc = np.corrcoef(
    tiwp_true_cpcir,
    tiwp_trapz_retrieved_cpcir
)[0, 1]
bias_cpc = np.mean(tiwp_trapz_retrieved_cpcir - tiwp_true_cpcir) / np.mean(tiwp_trapz_retrieved_cpcir)

y_gs = np.histogram2d(tiwp_true_gridsat, tiwp_trapz_retrieved_gridsat, bins=bins, density=True)[0]
y_gs /= np.trapz(np.diff(bins) * y_gs, axis=-1)[..., None]
cond_mean_gs = binned_statistic(
    tiwp_true_gridsat,
    tiwp_trapz_retrieved_gridsat,
    lambda x: x.mean() if x.size > 1e2 else np.nan,
    bins=bins)[0]
corr_gs = np.corrcoef(
    tiwp_true_gridsat,
    tiwp_trapz_retrieved_gridsat
)[0, 1]
bias_gs = np.mean(tiwp_trapz_retrieved_gridsat - tiwp_true_gridsat) / np.mean(tiwp_trapz_retrieved_gridsat)

In [None]:
fig = plt.figure(figsize=(8, 3.5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.075])

norm = LogNorm(1e-3, 1e2)
levels = np.logspace(-3, 2, 11)

txtcol = "C0"
diagcol = "orangered"

ax = fig.add_subplot(gs[0, 0])
ax.set_title("(a) CPCIR", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_cpc.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_cpc, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_ylabel(r"$\int \text{TIWC}_\text{ret} dz$ $[\si{\kilo \gram \per \meter \squared}$]")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")
props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(
    1, 2e-3, f"Corr.: {corr_cpc:0.2f} \n Bias: {100 * bias_cpc:0.2f}\%",
    fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props
)
ax.legend()


ax = fig.add_subplot(gs[0, 1])
ax.set_title("(b) GridSat", loc="left")
x = 0.5 * (bins[1:] + bins[:-1])
m = ax.contourf(x, x, y_gs.T, norm=norm, levels=levels, extend="both")
for c in m.collections:
    c.set_rasterized(True)
    
ax.plot(x, x, ls="--", c="grey")
ax.plot(x, cond_mean_gs, c="C0", label="Conditional mean")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")
for l in ax.yaxis.get_ticklabels():
    l.set_visible(False)

props = dict(facecolor='white', alpha=1.0, edgecolor="k")
ax.text(1, 2e-3, f"Corr.: {corr_gs:0.2f} \n Bias: {100 * bias_gs:0.2f}\%",
        fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props)

ax = fig.add_subplot(gs[0, 2])
plt.colorbar(m, label=r"$\text{p}\left(\int \text{TIWC}_\text{ret} dz | \text{TIWP}_\text{ref} \right)$ $[(\si{\kilo \gram \per \meter \squared})^{-1}$]", cax=ax)
fig.savefig("../figures/tiwp_true_vs_tiwc_trapz_retrieved.pdf", dpi=200, bbox_inches="tight")

In [None]:
y_cpc_ret_trapz = y_cpc
y_gs_ret_trapz = y_gs

In [None]:
bins = np.logspace(-3, 2, 201)
y_cpc = np.histogram2d(results_cpcir.tiwp_true.data, results_cpcir.tiwp_mean.data, bins=bins, density=True)[0]
y_cpc /= np.trapz(np.diff(bins) * y_cpc, axis=-1)[..., None]

y_gs = np.histogram2d(results_gridsat.tiwp_true.data, results_gridsat.tiwp_mean.data, bins=bins, density=True)[0]
y_gs /= np.trapz(np.diff(bins) * y_gs, axis=-1)[..., None]

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LogNorm

fig = plt.figure(figsize=(8, 3.5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.075])

norm = LogNorm(1e-3, 1e2)
levels = np.logspace(-3, 2, 11)

txtcol = "C0"
diagcol = "orangered"

bins_x, bins_y = np.meshgrid(bins, bins)

ax = fig.add_subplot(gs[0, 0])
ax.set_title("(a) CPCIR", loc="left")
m = ax.pcolormesh(bins_x, bins_y, ((y_cpc_ret_trapz - y_cpc) / np.where(y_cpc > 0, y_cpc, np.nan)).T * 100, vmin=-100, vmax=100, cmap='coolwarm')
    
ax.plot(bins, bins, ls="--", c="grey")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_ylabel(r"$\int \text{TIWC}_\text{ret} dz$ or $\text{TIWP}_\text{ret}$ $[\si{\kilo \gram \per \meter \squared}$]")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")

ax = fig.add_subplot(gs[0, 1])
ax.set_title("(b) GridSat", loc="left")
m = ax.pcolormesh(bins_x, bins_y, ((y_gs_ret_trapz - y_gs) / np.where(y_gs > 0, y_gs, np.nan)).T * 100, vmin=-100, vmax=100, cmap='coolwarm')
    
ax.plot(bins, bins, ls="--", c="grey")
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_xlabel(r"$\text{TIWP}_\text{ref}$ $[\si{\kilo \gram \per \meter \squared}$]")
for l in ax.yaxis.get_ticklabels():
    l.set_visible(False)

ax = fig.add_subplot(gs[0, 2])
cbar = fig.colorbar(m, cax=ax, extend='both')
cbar.set_label(r"$\frac{\text{p}(\int \text{TIWC}_\text{ret} dz | \text{TIWP}_\text{ref}) - \text{p}(\text{TIWP}_\text{ret} | \text{TIWP}_\text{ref})}{\text{p}(\text{TIWP}_\text{ret} | \text{TIWP}_\text{ref})}$")
ticks = cbar.get_ticks()
cbar.set_ticks([int(n) for n in ticks])
cbar.set_ticklabels([f'{int(n)}\%' for n in ticks])
fig.savefig("../figures/tiwp_true_vs_tiwc_trapz_retrieved_percentdiff_with_twp_retrieved.pdf", dpi=200, bbox_inches="tight")