# Olympex validation

This notebook evaluates the CCIC retrieval against radar retrievals and in-situ measurements from the Olympex flight campaign.


In [None]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import pandas as pd

from ccic.plotting import set_style

set_style()
DATA_PATH = Path("/home/amell/mnt/sun/data/ccic")

In [None]:
from datetime import datetime, timedelta
from pansat.time import to_datetime64

elev = xr.load_dataset(DATA_PATH / "validation/olympex/elevation_olympex.nc")

def read_citation_file(filename):
    with open(filename, "r") as lines:
        lines = lines.readlines()
    indices = lines[68].split()
    data = pd.read_csv(
        filename,
        skiprows=70,
        names=indices,
        delim_whitespace=True,
        na_values="999999.9999"
    )
    
    # Convert time
    year, month, day, *_ = Path(filename).name.split("_")
    start_time = to_datetime64(datetime(int(year) + 2000, int(month), int(day)))
    time = start_time + np.array(data.Time).astype("timedelta64[s]")
    data.index = time
    
    data = xr.Dataset(data).rename({"dim_0": "time"}).resample(time="30s").mean()
    data = data.rename({
        "POS_Lat": "latitude",
        "POS_Lon": "longitude",
    })
    sfc_elev = elev.interp(latitude=data.latitude, longitude=data.longitude)
    alt = data.POS_Alt.data - sfc_elev.elevation.data
    data["altitude"] = (("time",), alt)
    return data

## Overview plot

In [None]:
citation_files = sorted(list(Path(DATA_PATH / "validation/olympex/citation/").glob("*.olympex")))
olympex_files = sorted(list(Path(DATA_PATH / "validation/olympex/results").glob("*.nc")))

In [None]:
import cartopy.crs as ccrs

lons_citation = []
lats_citation = []
lons_er2 = []
lats_er2 = []

for citation_file in citation_files:
    print(citation_file)
    citation_data = read_citation_file(citation_file)
    lons = citation_data.longitude.data
    lats = citation_data.latitude.data
    lons_citation.append(lons)
    lats_citation.append(lats)
    
for olympex_file in olympex_files:
    data = xr.load_dataset(olympex_file, group="LargePlateAggregate")
    lons = data.longitude.data
    lats = data.latitude.data
    lons_er2.append(lons)
    lats_er2.append(lats)

In [None]:
import cartopy.crs as ccrs
from matplotlib.ticker import FixedLocator

def add_ticks(ax, lons, lats, left=True, bottom=True):
    gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, linewidth=0, color='none')
    gl.top_labels = False
    gl.right_labels = False
    gl.left_labels = left
    gl.bottom_labels = bottom
    gl.xlocator = FixedLocator(lons)
    gl.ylocator = FixedLocator(lats)


In [None]:
import os
from PIL import Image
Image.MAX_IMAGE_PIXELS = None
os.environ["CARTOPY_USER_BACKGROUNDS"] = str(DATA_PATH / "misc")

In [None]:
fig = plt.figure(figsize=(6, 4))
crs = ccrs.PlateCarree()
ax = fig.add_subplot(1, 1, 1, projection=crs)

for ind, (lons, lats) in enumerate(zip(lons_citation, lats_citation)):
    ax.plot(lons, lats, c="C0", label= "UMD Citation" if ind == 0 else None)
    
for ind, (lons, lats) in enumerate(zip(lons_er2, lats_er2)):
    ax.plot(lons, lats, c="C1", label= "NASA ER-2" if ind == 0 else None)
    
lon_ticks = np.linspace(-135, -120, 6)
lat_ticks = [45, 47.5, 50]
add_ticks(ax, lon_ticks, lat_ticks)

ax.set_xlim(-130, -120)
ax.set_ylim(44, 51)
ax.legend()

ax.set_title("Flight paths of the OLYMPEX campaign")

ax.background_img(name='blue_marble_dec', resolution='low')
fig.savefig("olympex_flight_paths.pdf", )

## Resample Citation data

The code below resamples the in-situe measurements from the Citation aircraft to the CCIC retrieval grids.

In [None]:
citation_files = sorted(list(Path(DATA_PATH / "validation/olympex/citation/").glob("*.olympex")))

In [None]:
from datetime import datetime, timedelta
from pansat.time import to_datetime64

elev = xr.load_dataset(DATA_PATH / "validation/data/elevation_olympex.nc")

def read_citation_file(filename):
    with open(filename, "r") as lines:
        lines = lines.readlines()
    indices = lines[68].split()
    data = pd.read_csv(
        filename,
        skiprows=70,
        names=indices,
        delim_whitespace=True,
        na_values="999999.9999"
    )
    
    # Convert time
    year, month, day, *_ = Path(filename).name.split("_")
    start_time = to_datetime64(datetime(int(year) + 2000, int(month), int(day)))
    time = start_time + np.array(data.Time).astype("timedelta64[s]")
    data.index = time
    
    data = xr.Dataset(data).rename({"dim_0": "time"}).resample(time="30s").mean()
    data = data.rename({
        "POS_Lat": "latitude",
        "POS_Lon": "longitude",
        "Air_Temp": "temperature"
    })
    sfc_elev = elev.interp(latitude=data.latitude, longitude=data.longitude)
    alt = data.POS_Alt.data - sfc_elev.elevation.data
    data["altitude"] = (("time",), alt)
    return data

In [None]:
citation_files

In [None]:
from ccic.validation import get_latlon_bins
cpcir_file = DATA_PATH / "results/olympex/ccic_cpcir_201511010000.nc"
bins_cpcir = get_latlon_bins(cpcir_file)
gridsat_file = DATA_PATH / "results/olympex/ccic_gridsat_201511010000.nc"
bins_gridsat = get_latlon_bins(gridsat_file)

In [None]:
from ccic.validation import resample_data
for citation_file in citation_files:
    for name, dt, bins in zip(["cpcir", "gridsat"], [0.5, 0.5], [bins_cpcir, bins_gridsat]):
        resample_data(
            read_citation_file(citation_file),
            ["Nev_IWC", "Nev_TWC", "temperature"],
            bins[1],
            bins[0],
            DATA_PATH / "validation/olympex/citation_resampled/",
            f"citation_{name}_{{year:04}}{{month:02}}{{day:02}}{{hour:02}}.nc"
        )

## Resample CCIC results

In [None]:
olympex_files

In [None]:
from tqdm import tqdm
from ccic.validation import load_radar_results
from ccic.validation import resample_data
olympex_files = sorted(list(Path(DATA_PATH / "validation/olympex/results").glob("*.nc")))
olympex_files = sorted(list(Path(DATA_PATH / "validation/olympex/results_ml").glob("*.nc")))

results = {}
groups = ["LargePlateAggregate", "LargeColumnAggregate", "8-ColumnAggregate"]
for group in groups:
    scene = 0
    for olympex_file in tqdm(olympex_files):
        
        try:
            rs = xr.load_dataset(olympex_file, group=group)
            timed = (rs.time.data - rs.time.data.astype("datetime64[D]").astype("datetime64[s]"))
            timed = timed.astype("timedelta64[s]").astype("float64") / 3600

            for name, dt, bins in zip(["cpcir", "gridsat"], [0.5, 0.5], [bins_cpcir, bins_gridsat]):
                resample_data(
                    rs,
                    ["iwc", "rwc", "temperature"],
                    bins[1],
                    bins[0],
                    DATA_PATH / "validation/olympex/radar_resampled_ml/",
                    f"radar_{name}_{group}_{{year:04}}{{month:02}}{{day:02}}{{hour:02}}.nc"
                )
        except Exception as exc:
            pass
            #raise exc


## Load and combine resampled data

Ths code below loads the data resampled to the CPCIR grids and combines them into a dictionary containing the radar retrieval results for the three evaluated particles.

In [None]:
citation_path = Path(DATA_PATH / "validation/olympex/citation_resampled")
radar_path = Path(DATA_PATH / "validation/olympex/radar_resampled_ml")
ccic_path = Path(DATA_PATH / "results/olympex/")

particles = ["LargePlateAggregate", "LargeColumnAggregate", "8-ColumnAggregate"]
citation_files = sorted(list(citation_path.glob("*cpcir*.nc")))
ccic_files = sorted(list(ccic_path.glob("*cpcir*.nc")))

citation_data = []

results_cpcir = {}

for particle in particles:
    for ccic_file in ccic_files:
        
        date = ccic_file.name.split("_")[-1][:10]
        citation_files = sorted(list(citation_path.glob(f"*cpcir*{date}.nc")))
        radar_files = sorted(list(radar_path.glob(f"radar_cpcir_{particle}_{date}.nc")))
        
        if len(radar_files) == 0 and len(citation_files) == 0:
            continue
        
        ccic_data = xr.load_dataset(ccic_file)
        
        dims = ("time", "latitude", "longitude", "altitude")
        
        try:
            radar_data = xr.load_dataset(radar_files[0])
            ccic_data["tiwc_radar"] = (dims, radar_data.iwc.data.astype("float32"))
            ccic_data["temperature"] = (dims, radar_data.temperature.data.astype("float32"))
        except Exception as e:
            print("No radar data:", ccic_file)
            ccic_data["tiwc_radar"] = (
                dims, np.nan * np.ones_like(ccic_data.tiwc.data, dtype="float32")
            )
            ccic_data["temperature"] = (
                dims, np.nan * np.ones_like(ccic_data.tiwc.data, dtype="float32")
            )
            
        try:
            citation_data = xr.load_dataset(citation_files[0])
            ccic_data["tiwc_citation"] = (dims, citation_data.Nev_IWC.data)
            ccic_data["temperature_citation"] = (dims, citation_data.temperature.data)
        except Exception:
            print("No in-situ data:", ccic_file)
            ccic_data["tiwc_citation"] = (
                dims, np.nan * np.ones_like(ccic_data.tiwc.data)
            )
            ccic_data["temperature_citation"] = (
                dims, np.nan * np.ones_like(ccic_data.tiwc.data)
            )
        results_cpcir.setdefault(particle, []).append(ccic_data)

for particle in particles:
    results_cpcir[particle] = xr.concat(results_cpcir[particle], "time")

In [None]:
particles = ["LargePlateAggregate", "LargeColumnAggregate", "8-ColumnAggregate"]
citation_files = sorted(list(citation_path.glob("*gridsat*.nc")))
ccic_files = sorted(list(ccic_path.glob("*gridsat*.nc")))

citation_data = []

results_gridsat = {}

for particle in particles:
    for ccic_file in ccic_files:
        
        date = ccic_file.name.split("_")[-1][:10]
        citation_files = sorted(list(citation_path.glob(f"*gridsat*{date}.nc")))
        radar_files = sorted(list(radar_path.glob(f"radar_gridsat_{particle}_{date}.nc")))
        
        if len(radar_files) == 0 and len(citation_files) == 0:
            continue
        
        ccic_data = xr.load_dataset(ccic_file)
        
        dims = ("time", "latitude", "longitude", "altitude")
        
        try:
            radar_data = xr.load_dataset(radar_files[0])[{"time": [0]}]
            ccic_data["tiwc_radar"] = (dims, radar_data.iwc.data)
            ccic_data["temperature"] = (dims, radar_data.temperature.data.astype("float32"))
        except Exception as exc:
            ccic_data["tiwc_radar"] = (
                dims, np.nan * np.ones_like(ccic_data.tiwc.data)
            )
            ccic_data["temperature"] = (
                dims, np.nan * np.ones_like(ccic_data.tiwc.data)
            )
            
        try:
            citation_data = xr.load_dataset(citation_files[0])[{"time": [0]}]
            ccic_data["tiwc_citation"] = (dims, citation_data.Nev_IWC.data)
            ccic_data["temperature_citation"] = (dims, citation_data.temperature.data)
        except Exception:
            ccic_data["tiwc_citation"] = (
                dims, np.nan * np.ones_like(ccic_data.tiwc.data)
            )
            ccic_data["temperature_citation"] = (
                dims, np.nan * np.ones_like(ccic_data.tiwc.data)
            )
        results_gridsat.setdefault(particle, []).append(ccic_data)

for particle in particles:
    results_gridsat[particle] = xr.concat(results_gridsat[particle], "time")

In [None]:
results_gridsat["LargePlateAggregate"].temperature_citation.max()

## Results

In [None]:
tiwc_radar = results_cpcir["LargePlateAggregate"].tiwc_radar.data
tiwc_citation = results_cpcir["LargePlateAggregate"].tiwc_citation.data

In [None]:
valid = np.isfinite(tiwc_citation) * np.isfinite(tiwc_radar)

In [None]:
def get_iwc_citation_radar(data):
    """
    Extract collocations between citation in-situ measurements and radar retrievals.
    """
    iwc_ref = np.maximum(data.tiwc_citation.data, 0.0)
    iwc = data.tiwc_radar.data * 1e3
    temp = data.temperature_citation.data
    valid = (iwc_ref >= 0.0) * (iwc >= 0.0) * (temp < 0)
    return iwc_ref[valid], iwc[valid]

def get_iwc_citation_ccic(data):
    """
    Extract collocations between citation in-situe measurements and CCIC retrievals.
    """
    temp = data.temperature_citation.data
    iwc_ref = np.maximum(data.tiwc_citation.data, 0.0)
    iwc = data.tiwc.data
    valid = (iwc_ref >= 0.0) * (iwc >= 0.0) * (temp < 0)
    return iwc_ref[valid], iwc[valid]

def get_iwc_radar_ccic(data):
    """
    Extract collocations between citation in-situe measurements and CCIC retrievals.
    """
    iwc_ref = data.tiwc_radar.data * 1e3
    iwc = data.tiwc.data
    temp = data.temperature.data
    valid = np.all(iwc_ref >= 0.0, -1)[..., None] * (iwc >= 0.0)# * (temp < 273.15)
    print(np.all(iwc_ref >= 0.0, -1)[..., None].sum())
    return iwc_ref[valid], iwc[valid]

In [None]:
f, ax = plt.subplots(1, 1, figsize=(5, 5))

x = np.logspace(-3, 1, 101)
ax.plot(x, x, c="k", ls="--")

iwc_ref, iwc = get_iwc_citation_radar(results_cpcir["LargePlateAggregate"])
ax.scatter(iwc_ref, iwc, label="LargePlateAggregate")
bias = 100 * (iwc - iwc_ref).mean() / iwc_ref.mean()
print(bias)

iwc_ref, iwc = get_iwc_citation_radar(results_cpcir["LargeColumnAggregate"])
ax.scatter(iwc_ref, iwc, label="LargeColumnAggregate")
bias = 100 * (iwc - iwc_ref).mean() / iwc_ref.mean()
print(bias)

iwc_ref, iwc = get_iwc_citation_radar(results_cpcir["8-ColumnAggregate"])
ax.scatter(iwc_ref, iwc, label="8-ColumnAggregate")
bias = 100 * (iwc - iwc_ref).mean() / iwc_ref.mean()
print(bias)

ax.set_xlim(1e-3, 1e1)
ax.set_ylim(1e-3, 1e1)
ax.set_aspect(1)
ax.set_xscale("log")
ax.set_yscale("log")

ax.set_xlabel("IWC (In-situ) [g m$^{-3}$]")
ax.set_ylabel("IWC (airborne radar) [g m$^{-3}$]")
ax.legend()

f.savefig("../figures/radar_only_olympex.pdf", bbox_inches="tight")

### TIWC 

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LogNorm, Normalize
from scipy.stats import binned_statistic_2d
from matplotlib.colors import rgb2hex

gs = GridSpec(2, 5, width_ratios=(0.4, 1.0, 1.0, 0.1, 0.075), wspace=0.075, hspace=0.05)
fig = plt.figure(figsize=(10, 7))

norm = LogNorm(1e-2, 1e2)
levels = np.logspace(-2, 2, 11)
txtcol = "C0"
diagcol = "orangered"
names = ["CPCIR", "GridSat"]

temp_norm = Normalize(215, 275)
temp_levels = np.linspace(220, 270, 6)
particle = "LargePlateAggregate"

for i, (name, results) in enumerate(zip(names, [results_cpcir, results_gridsat])):
    
    ax = fig.add_subplot(gs[i, 0])
    ax.text(0, 0, name, rotation=90, va="center", ha="center", fontsize=14)
    ax.set_axis_off()
    ax.set_ylim(-2, 2)
    ax.set_xlim(-1, 1)
    
    
    bins = np.logspace(-4, 1, 21)
    iwc_ref, iwc = get_iwc_citation_ccic(results[particle])
    y = np.histogram2d(iwc_ref, iwc, bins=bins, density=True)[0]
    y /= np.sum(y * np.diff(bins)[None], axis=1, keepdims=True)
    ax = fig.add_subplot(gs[i, 1])
    x = 0.5 * (bins[1:] + bins[:-1])
    ax.contourf(x, x, np.maximum(y.T, 1e-3), norm=norm, rasterized=True, extend="both", levels=levels)
    ax.plot(bins, bins, c="grey", ls="--")
    ax.set_xscale("log")
    ax.set_yscale("log")
    ax.set_aspect(1.0)

    corr = np.corrcoef(iwc_ref, iwc)[0, 1]
    bias = (iwc - iwc_ref).mean() / iwc_ref.mean()
    props = dict(facecolor='white', alpha=1.0, edgecolor="grey")
    ax.text(0.08, 4e-4, f"Corr.: {corr:0.2f} \n Bias: {100 * bias:0.2f}\%",
            fontsize=12, color=txtcol, ha="left", va="bottom", bbox=props)
    
    bins = np.logspace(-4, 1, 11)
    iwc_ref = results[particle].tiwc_citation.data
    iwc = results[particle].tiwc.data
    temp = results[particle].temperature_citation.data + 273.15
    valid = np.isfinite(iwc_ref) * np.isfinite(iwc) * np.isfinite(temp)
    y_temp = binned_statistic_2d(iwc_ref[valid], iwc[valid], temp[valid], bins=bins)[0]
    x = 0.5 * (bins[1:] + bins[:-1])
    ax.contour(x, x, y_temp.T, cmap="Oranges_r", levels=temp_levels, norm=temp_norm, linewidths=2)

    ax.set_ylabel("CCIC TIWC [g m$^{-3}$]")
    if i == 0:
        ax.set_title("In-situ measurements", loc="center", fontsize=14)
        for l in ax.xaxis.get_ticklabels():
            l.set_visible(False)
        for l in ax.xaxis.get_ticklines():
            l.set_visible(False)
    else:
        ax.set_xlabel("Nevzorov-probe TIWC [g m$^{-3}$]")

    bins = np.logspace(-4, 1, 21)

    iwc_ref, iwc = get_iwc_radar_ccic(results[particle])
    y = np.histogram2d(iwc_ref, iwc, bins=bins, density=True)[0]
    y /= np.sum(y * np.diff(bins)[None], axis=1, keepdims=True)
    ax = fig.add_subplot(gs[i, 2])
    x = 0.5 * (bins[1:] + bins[:-1])
    m = ax.contourf(x, x, np.maximum(y.T, 1e-3), norm=norm, rasterized=True, extend="both", levels=levels)
    ax.plot(bins, bins, c="grey", ls="--")
    ax.set_xscale("log")
    ax.set_yscale("log")
    ax.set_aspect(1.0)

    if i == 0:
        ax.set_title("Airborne radar", loc="center", fontsize=14)
        for l in ax.xaxis.get_ticklabels():
            l.set_visible(False)
        for l in ax.xaxis.get_ticklines():
            l.set_visible(False)
    else:
        ax.set_xlabel("NASA CRS TIWC [g m$^{-3}$]")
    for l in ax.yaxis.get_ticklines():
        l.set_visible(False)
    for l in ax.yaxis.get_ticklabels():
        l.set_visible(False)
        
    corr = np.corrcoef(iwc_ref, iwc)[0, 1]
    bias = (iwc - iwc_ref).mean() / iwc_ref.mean()
    props = dict(facecolor='white', alpha=1.0, edgecolor="grey")
    ax.text(0.08, 4e-4, f"Corr.: {corr:0.2f} \n Bias: {100 * bias:0.2f}\%",
            fontsize=12, color=txtcol, ha="left", va="bottom", bbox=props)
    
    iwc_ref = results[particle].tiwc_radar.data * 1e3
    iwc = results[particle].tiwc.data
    temp = results[particle].temperature.data
    valid = np.isfinite(iwc_ref) * np.isfinite(iwc)
    bins = np.logspace(-4, 1, 11)
    y = binned_statistic_2d(iwc_ref[valid], iwc[valid], temp[valid], bins=bins)[0]
    x = 0.5 * (bins[1:] + bins[:-1])
    m_temp = ax.contour(x, x, y.T, cmap="Oranges_r", levels=temp_levels, norm=temp_norm, linewidths=2)


ax = fig.add_subplot(gs[:, -1])
cb = plt.colorbar(m, cax=ax)
cb.set_label(label=r"p($\text{TIWC}_\text{Ret} $|$ \text{TIWC}_\text{Ref}$) [(g m$^{-3}$)$^{-1}$]")
ax.yaxis.set_label_position('left')

ax = ax.twinx()
cb = plt.colorbar(m_temp, cax=ax)
for ind, label in enumerate(ax.get_yticklabels()):
    label.set_color(rgb2hex(m_temp.tcolors[ind][0]))
cb.set_label("Mean temperature [K]", color="orangered")

fig.savefig("../figures/olympex_scatter.pdf", bbox_inches="tight")

In [None]:
def get_tiwp_radar_ccic(data):
    """
    Extract collocations between in_situ in-situe measurements and CCIC retrievals.
    """
    tiwc_ref = data[f"tiwc_radar"].data * 1e3
    tiwc_ref[..., 16:] = 0.0
    tiwc = data.tiwc.data
    valid = np.all(np.isfinite(tiwc_ref), -1) * np.all(np.isfinite(tiwc), -1)
    alt = data.altitude.data
    tiwp_ref = tiwc_ref.sum(-1)
    tiwp = data.tiwp.data
    
    valid = (tiwp_ref >= 0.0) * (tiwp >= 0.0)
    print(valid.sum())
    return tiwp_ref[valid], tiwp[valid]

In [None]:
from matplotlib.gridspec import GridSpec
from matplotlib.colors import LogNorm

gs = GridSpec(1, 3, width_ratios=(1.0, 1.0, 0.075), wspace=0.2, hspace=0.20)
fig = plt.figure(figsize=(8, 3.5))

norm = LogNorm(1e-2, 1e1)
txtcol = "C0"
diagcol = "orangered"
names = ["CPCIR", "GridSat"]
particle = "LargePlateAggregate"

bins = np.logspace(-3, 2, 11)

for i, (name, results) in enumerate(zip(names, [results_cpcir, results_gridsat])):
    
    ax = fig.add_subplot(gs[0, i])
    
    tiwp_ref, tiwp = get_tiwp_radar_ccic(results["LargePlateAggregate"])
    y = np.histogram2d(tiwp_ref, tiwp, bins=bins, density=True)[0]
    y /= np.sum(y * np.diff(bins)[None], axis=1, keepdims=True)
    m_tiwp = ax.pcolormesh(bins, bins, np.maximum(y.T, 1e-3), norm=norm, rasterized=True)
    x = 0.5 * (bins[1:] + bins[:-1])
    #m_tiwp = ax.contourf(x, x, y.T, levels=np.logspace(-2, 2, 11), norm=norm, extend="both")
    ax.plot(bins, bins, c="grey", ls="--")
    ax.set_xscale("log")
    ax.set_yscale("log")
    ax.set_aspect(1.0)
    ax.set_title(f"({chr(ord('a') + i)}) {name}")

    corr = np.corrcoef(tiwp_ref, tiwp)[0, 1]
    bias = (tiwp - tiwp_ref).mean() / tiwp_ref.mean()
    ax.text(0.5, 3e-3, f"Corr.: {corr:0.2f} \n Bias: {100 * bias:0.2f}\%",
            fontsize=10, color=txtcol, ha="left", va="bottom", bbox=props)
    
    ax.set_xlabel("NASA CRS TIWP [kg m$^{-2}$]")
    
    if i == 0:
        ax.set_ylabel("CCIC TIWP [kg m$^{-2}$]")
    
ax = fig.add_subplot(gs[0, -1])
plt.colorbar(
    m_tiwp,
    cax=ax,
    label=r"p(TIWP$_\text{RET}$ $|$ TIWP$_\text{REF}$) [(kg m$^{-2}$)$^{-1}$]",
    extend="both"
)
fig.savefig("../figures/olympex_scatter_tiwp.pdf", bbox_inches="tight")

### Average profiles

In [None]:
def get_profiles_radar_ccic(data):
    """
    Extract collocations between citation in-situe measurements and CCIC retrievals.
    """
    data = data[{"altitude": slice(0, 20)}]
    tiwc_ref = data.tiwc_radar.data * 1e3
    tiwc = data.tiwc.data
    valid = np.all(tiwc_ref >= 0.0, axis=-1) * np.all((tiwc >= 0.0), -1)
    return tiwc_ref[valid], tiwc[valid]

In [None]:
fig = plt.figure(figsize=(10, 5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.6])
levels = results_gridsat["LargeColumnAggregate"].altitude.data / 1e3


ax = fig.add_subplot(gs[0, 0])
handles = []
particles = ["8-ColumnAggregate", "LargePlateAggregate", "LargeColumnAggregate"]
for name, style, results in zip(["CPCIR", "GridSat"], ["-", "--"], [results_cpcir, results_gridsat]):
    for i, part in enumerate(particles):
        iwc_ref, iwc = get_profiles_radar_ccic(results[part])
        profs_ref = np.nanmean(iwc_ref, 0)
        profs = np.nanmean(iwc, 0)
        if i == 0:
            handles += ax.plot( profs, levels, label=f"CCIC ({name})", c="k", ls=style)
        handles += ax.plot(profs_ref, levels, label=f"{part} ({name})", ls=style, c=f"C{i}")
    
ax.set_ylim(0, 20)
ax.set_xscale("log")
ax.set_xlim(1e-3, 1e0)
ax.set_ylabel("Altitude [km]")
ax.set_xlabel("Mean IWC [g m$^{-3}$]")
ax.set_title("(a) Mean profiles", loc="left")

ax = fig.add_subplot(gs[0, 1])
handles = []
for name, style, results in zip(["CPCIR", "GridSat"], ["-", "--"], [results_cpcir, results_gridsat]):
    for i, part in enumerate(particles):
        iwc_ref, iwc = get_profiles_radar_ccic(results_gridsat[part])
        profs_ref = np.nanmean(iwc_ref, 0)
        profs = np.nanmean(iwc, 0)
        if i == 0:
            handles += ax.plot(100 * profs / profs.sum(), levels, label=f"CCIC ({name})", c="k", ls=style)
        handles += ax.plot(100 * profs_ref / profs_ref.sum(), levels, label=f"{part} ({name})", ls=style, c=f"C{i}")

ax.set_ylim(0, 20)
ax.set_xlim(0, 35)
ax.set_title("(b) Normalized mean profiles", loc="left")
ax.set_xlabel("Contribution to total IWP [\%]")

ax = fig.add_subplot(gs[0, 2])
ax.set_axis_off()
ax.legend(handles=handles, loc="center", facecolor="none", edgecolor="none")
fig.savefig("../figures/olympex_profiles.png")

## Case study

In [None]:
from datetime import datetime
from pathlib import Path
from ccic.data.cpcir import gpm_mergeir, PROVIDER

start_time = datetime(2015, 12, 3, 16)
end_time = datetime(2015, 12, 3, 16)
cpcir_file = Path("GPM/gpm_mergeir/merg_2015120316_4km-pixel.nc4")
if not cpcir_file.exists():
    file = gpm_mergeir.download(start_time, end_time)
cpcir_data = xr.load_dataset(cpcir_file)

In [None]:
from ccic.validation import great_circle_distance
radar_data = xr.load_dataset(
    DATA_PATH / "validation/olympex/results_new/crs_olympex_201512031559_201512031649.nc",
    group="8-ColumnAggregate"
)
lons = radar_data.longitude
lats = radar_data.latitude
time = radar_data.time
dist = np.array([great_circle_distance(lats[i], lons[i], lats[i + 1], lons[i + 1]) for i in range(lats.size - 1)])
dist = np.concatenate([[0], np.cumsum(dist)])
ccic_cpcir_data = xr.load_dataset("/home/simonpf/data_3/ccic/results/olympex/ccic_cpcir_201512031600.nc")
tiwp_cpcir = ccic_cpcir_data.tiwp.copy()
ccic_gridsat_data = xr.load_dataset("/home/simonpf/data_3/ccic/results/olympex/ccic_gridsat_201512031500.nc")[{"time": 0}].drop_vars("time")
tiwp_gridsat = ccic_gridsat_data.tiwp.copy()

In [None]:
citation_data = read_citation_file(DATA_PATH / "validation/olympex/citation/15_12_03_13_35_43.olympex")
lons_citation = citation_data.longitude.data
lats_citation = citation_data.latitude.data
time_citation = citation_data.time.data

In [None]:
DATA_PATH

In [None]:
from ccic.data.cpcir import CPCIR_GRID
cpcir_files = [
    DATA_PATH / "results/olympex/ccic_cpcir_201512031500.nc",
    DATA_PATH / "results/olympex/ccic_cpcir_201512031600.nc",
    DATA_PATH / "results/olympex/ccic_cpcir_201512031700.nc",
    DATA_PATH / "results/olympex/ccic_cpcir_201512031800.nc",
]
ccic_cpcir_data = xr.concat(
    [xr.load_dataset(filename) for filename in cpcir_files],
    dim="time"
)
tiwp = ccic_cpcir_data.tiwp[{"time": 2}]

lon_min = ccic_cpcir_data.longitude.data.min()
lon_start = np.where(cpcir_data.lon.data >= lon_min)[0][0]
lat_min = ccic_cpcir_data.latitude.data.min()
lat_start = np.where(cpcir_data.lat.data >= lat_min)[0][0]
area = CPCIR_GRID[-(lat_start + 256): -lat_start, lon_start: lon_start + 256]

cpcir_tbs = cpcir_data.interp(lat=ccic_cpcir_data.latitude, lon=ccic_cpcir_data.longitude)
ccic_cpcir_data = ccic_cpcir_data.interp(time=time, latitude=lats, longitude=lons)

In [None]:
gridsat_files = [
    DATA_PATH / "results/olympex/ccic_gridsat_201512031500.nc",
    DATA_PATH / "results/olympex/ccic_gridsat_201512031800.nc",
]
ccic_gridsat_data = xr.concat(
    [xr.load_dataset(filename) for filename in gridsat_files],
    dim="time"
)
ccic_gridsat_data = ccic_gridsat_data.interp(time=time, latitude=lats, longitude=lons)

In [None]:
import cartopy.crs as ccrs
from matplotlib.ticker import FixedLocator

def add_ticks(ax, lons, lats, left=True, bottom=True):
    gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, linewidth=0, color='none')
    gl.top_labels = False
    gl.right_labels = False
    gl.left_labels = left
    gl.bottom_labels = bottom
    gl.xlocator = FixedLocator(lons)
    gl.ylocator = FixedLocator(lats)


### Map Citation measurements to ER-2 ground track

The in-situ measurements from the Citation aircraft are mapped to the flight path of the ER-2 aircraft by determining the ER-2 times of the positions closes to the Citation measurements. 

In [None]:
from pyresample.geometry import SwathDefinition
from pyresample.kd_tree import resample_nearest
lon_cit = citation_data.longitude.data
lat_cit = citation_data.latitude.data
lon_rad = radar_data.longitude.data
lat_rad = radar_data.latitude.data
time_rad = radar_data.time.data
t_t = time_rad.dtype

swath_cit = SwathDefinition(lon_cit, lat_cit)
swath_radar = SwathDefinition(lon_rad, lat_rad)

time_r = resample_nearest(
    swath_radar,
    time_rad.astype(np.int64),
    swath_cit,
    radius_of_influence=5e3,
    fill_value=-1
)
invalid = time_r < 0
time_r = time_r.astype(t_t)
delta_t = (time_r - citation_data.time.data).astype("timedelta64[s]")

invalid = np.abs(delta_t) > np.timedelta64(15 * 60, "s")
time_r[invalid] = np.datetime64("nat")

In [None]:
from ccic.plotting import set_style
set_style()

In [None]:
data = xr.load_dataset("/home/simonpf/data_3/ccic/validation/olympex/observations/olympex_CRS_20151204_155917-20151204_165854_2_v01a.nc")

In [None]:
from matplotlib.colors import LogNorm
from matplotlib.gridspec import GridSpec

fig = plt.figure(figsize=(14, 7.4))
gs = GridSpec(6, 4, width_ratios=[0.075, 1.1, 1.5, 0.075], wspace=0.2, hspace=0.3)

start = np.datetime64("2015-12-03T16:21:00", "s")
end = np.datetime64("2015-12-03T16:32:00", "s")

ind_start = np.where(radar_data.time > start)[0][0]
ind_end = np.where(radar_data.time > end)[0][0]
crs = area.to_cartopy_crs()

# CPCIR input
                 
ax = fig.add_subplot(gs[:3, 1], projection=crs)
ext = area.area_extent
ext = (ext[0], ext[2], ext[1], ext[3])
m = ax.imshow(cpcir_tbs.Tb.data[1], extent=ext, rasterized=True, cmap="cmo.amp")

ax.set_xlim(-125, -122)
ax.set_ylim(46.5, 49)

#ax.plot(radar_data.longitude.data, radar_data.latitude.data, c="grey")
#ax.plot(citation_data.longitude.data, citation_data.latitude.data, c="black")
#ax.text(radar_data.longitude.data[ind_start], radar_data.latitude.data[ind_start], "Start", color="red", ha="left", va="bottom")
#ax.scatter([radar_data.longitude.data[ind_start]], [radar_data.latitude.data[ind_start]], marker="x", color="red", zorder=10)
#ax.text(radar_data.longitude.data[ind_end], radar_data.latitude.data[ind_end], "End")
#ax.scatter([radar_data.longitude.data[ind_end]], [radar_data.latitude.data[ind_end]], marker="x", color="red", zorder=10)
ax.set_title("(a) CPCIR brightness temperatures (16:30)", loc="left")


add_ticks(ax, np.arange(-125, -121), np.arange(46.0, 50), bottom=False)
ax.coastlines(color="grey")

ax = fig.add_subplot(gs[:3, 0])
plt.colorbar(m, label="CPCIR T$_b$ [K]", cax=ax, location="left", extend="both")

# CPCIR TIWP

ax = fig.add_subplot(gs[3:6, 1], projection=crs)
m = ax.imshow(tiwp, extent=ext, norm=LogNorm(1e-2, 1e1), rasterized=True, cmap="cmo.dense")

ax.plot(radar_data.longitude.data, radar_data.latitude.data, c="C0", label="NASA ER-2 flight path")
ax.plot(citation_data.longitude.data, citation_data.latitude.data, c="C1", label="Citation flight path")
ax.legend()
ax.set_xlim(-125, -122)
ax.set_ylim(46.5, 49)

props = dict(facecolor='white', alpha=1.0, edgecolor="grey")
ax.text(radar_data.longitude.data[ind_start] + 0.05, radar_data.latitude.data[ind_start], "Start (16:21:00)", color="black", ha="left", va="bottom", bbox=props)
ax.scatter([radar_data.longitude.data[ind_start]], [radar_data.latitude.data[ind_start]], marker="x", color="red", zorder=10)
ax.text(radar_data.longitude.data[ind_end] + 0.05, radar_data.latitude.data[ind_end], "End (16:35:00)", color="black", bbox=props)
ax.scatter([radar_data.longitude.data[ind_end]], [radar_data.latitude.data[ind_end]], marker="x", color="orangered", zorder=10)
ax.set_title("(b) CCIC TIWP (CPCIR)", loc="left")

add_ticks(ax, np.arange(-125, -121), np.arange(46.0, 50), bottom=True)
ax.coastlines(color="grey")

ax = fig.add_subplot(gs[3:6, 0])
plt.colorbar(m, label="CCIC TIWP [kg m$^{-2}$]", cax=ax, location="left", extend="both")

# Radar results

ax = fig.add_subplot(gs[:2, 2])
norm = LogNorm(1e-2, 1e1)
x = time_rad
y = radar_data.altitude.data / 1e3
ax.pcolormesh(x, y, radar_data.iwc.T * 1e3, norm=norm, rasterized=True)
ax.scatter(time_r, citation_data.altitude / 1e3, c=citation_data.Nev_TWC, norm=norm, edgecolor="k", s=70, linewidth=1.2, label="Nevzorov probe TWC")
ax.legend()
ax.set_xticklabels([])
ax.set_ylabel("Altitude [km]")
ax.set_xlim(start, end)
ax.set_ylim(0, 10)
ax.set_title("(c) Radar retrievals", loc="left")
ax.yaxis.set_ticks_position("right")
ax.yaxis.set_label_position("right")
for t in ax.xaxis.get_major_ticks():
    t.set_visible(False)

# CPCIR results

ax = fig.add_subplot(gs[2:4, 2])
x = ccic_cpcir_data.time.data
y = ccic_cpcir_data.altitude.data / 1e3/
ax.pcolormesh(x, y, ccic_cpcir_data.tiwc.T, norm=norm, rasterized=True)
ax.scatter(time_r, citation_data.altitude / 1e3, c=citation_data.Nev_TWC, norm=norm, edgecolor="k", s=70, linewidth=1.2)

ax.set_xlim(start, end)
ax.set_xticklabels([])
ax.set_ylabel("Altitude [km]")
ax.set_ylim(0, 10)
ax.set_title("(d) CCIC TIWC (CPCIR)", loc="left")
ax.yaxis.set_ticks_position("right")
ax.yaxis.set_label_position("right")
for t in ax.xaxis.get_major_ticks():
    t.set_visible(False)

# Gridsat results

ax = fig.add_subplot(gs[4:6, 2])
x = ccic_gridsat_data.time.data
y = ccic_gridsat_data.altitude.data / 1e3
m = ax.pcolormesh(x, y, ccic_gridsat_data.tiwc.T, norm=norm, rasterized=True)
ax.scatter(time_r, citation_data.altitude / 1e3, c=citation_data.Nev_TWC, norm=norm, edgecolor="k", s=70, linewidth=1.2)
ax.set_ylabel("Altitude [km]")
ax.set_title("(e) CCIC TIWC (GridSat)", loc="left")
ax.set_xlabel("Time (UTC)")

ax.set_xlim(start, end)
ax.set_ylim(0, 10)
ax.yaxis.set_ticks_position("right")
ax.yaxis.set_label_position("right")

ax = fig.add_subplot(gs[:, -1])
plt.colorbar(m, label="TIWC [g m$^{-3}$]", cax=ax, extend="both")
fig.savefig("../figures/olympex_case_study.pdf", bbox_inches="tight")

## Plot particular flights

In [None]:
data_path = DATA_PATH / "validation/olympex/results_new"
radar_files = sorted(list(Path(data_path).glob("*.nc")))

In [None]:
for path in radar_files:
    with xr.open_dataset(path, group="LargePlateAggregate") as data:
        print(path, "latitude" in data)

In [None]:
cpcir_files = sorted(list(Path(DATA_PATH / "results/olympex/").glob("*cpcir*.nc")))
cpcir_times = np.array([
    to_datetime64(datetime.strptime(filename.name, "ccic_cpcir_%Y%m%d%H%M%S.nc")) 
    for filename in cpcir_files]
)
def load_cpcir_results(radar_data):
    
    start_time = radar_data.time.data[0]
    end_time = radar_data.time.data[-1]
    
    start_index = np.where(cpcir_times < start_time)[0]
    if len(start_index) == 0:
        start_index = 0
    else:
        start_index = start_index[-1]
        
    end_index = np.where(cpcir_times > end_time)[0]
    if len(end_index) == 0:
        end_index = 0
    else:
        end_index = end_index[0]
        
    cpcir_data = xr.open_mfdataset(cpcir_files[start_index:end_index+1]).compute()
    
    lons = radar_data.longitude
    lats = radar_data.latitude
    time = radar_data.time
    cpcir_data = cpcir_data.interp(time=time, latitude=lats, longitude=lons)
    
    return cpcir_data
    
    


In [None]:
radar_data = xr.load_dataset(radar_files[24], group="LargePlateAggregate")

In [None]:
cpcir_data = load_cpcir_results(radar_data)

In [None]:
from matplotlib.colors import LogNorm

def plot_overview(radar_data, cpcir_data):
    
    f, axs = plt.subplots(3, 1, figsize=(12, 8))
    norm = LogNorm(1e-2, 1e1)

    ax = axs[0]
    x = cpcir_data.time.data
    tiwp_cpcir = cpcir_data.tiwp.data
    alt = radar_data.altitude.data
    tiwp = np.trapz(radar_data.iwc.data, x=alt)
    ax.plot(x, tiwp)
    ax.plot(x, tiwp_cpcir)
    ax.set_xlim(x[0], x[-1])
    ax.set_xticklabels([])
    ax.set_ylabel("TIWP [kg m$^{-2}$]")

    ax = axs[1]
    x = cpcir_data.time.data
    y = cpcir_data.altitude.data / 1e3
    ax.pcolormesh(x, y, cpcir_data.tiwc.T, norm=norm, rasterized=True)
    ax.set_xlim(x[0], x[-1])
    ax.set_ylabel("Altitude [km]")
    ax.set_xticklabels([])

    ax = axs[2]
    x = radar_data.time.data
    y = radar_data.altitude.data / 1e3
    ax.pcolormesh(x, y, radar_data.iwc.T * 1e3, norm=norm, rasterized=True)
    ax.set_xlim(x[0], x[-1])
    ax.set_ylabel("Altitude [km]")
    ax.set_xlabel("Time")

## Evaluate radar-only retrieval

In [None]:
from scipy.stats import binned_statistic
t_bins = np.linspace(250, 280, 11)
err_bins = np.linspace(-40, 40, 41)

def calculate_log_error_dist(temperature, tiwc_ref, tiwc):
    """
    Calculate distribution of logarithmic retrieval errors w.r.t. temperature.
    
    Args:
        temperature: Array containing the atmospheric temperature.
        tiwc_ref: Array containing the reference TIWC measurements.
        tiwc: Array containing the retrieved TIWC.
        
    Return:
        A tuple ``(mean_err, dist)`` containing ``mean_err``, the logarithmic mean
        error for for each temperature bin and ``dist``, the joint distribution of
        of logarithmic retrieval error by temperature.
    """
    valid = (tiwc_ref >= 0) * (tiwc >= 0)# * (temperature < 273.15)
    temperature = temperature[valid]
    tiwc_ref = tiwc_ref[valid]
    tiwc = tiwc[valid]
    
    print(temperature.min(), temperature.max())
    
    tiwc_mean = binned_statistic(temperature, tiwc, bins=t_bins)[0]
    tiwc_ref_mean = binned_statistic(temperature, tiwc_ref, bins=t_bins)[0]
    
    mean_err = 10 * np.log10(tiwc_mean / tiwc_ref_mean)
    
    err = 10 * np.log10(tiwc / tiwc_ref)
    dist = np.histogram2d(temperature, err, bins=(t_bins, err_bins), density=True)[0].T
    dist /= dist.sum(0, keepdims=True)
    return mean_err, dist
    
    

In [None]:
tiwc_ref_8ca = results_cpcir["8-ColumnAggregate"].tiwc_citation.data
tiwc_8ca = results_cpcir["8-ColumnAggregate"].tiwc_radar.data * 1e3
temp = results_cpcir["8-ColumnAggregate"].temperature.data
valid = (tiwc_ref_8ca >= 0) * (tiwc_8ca >= 0)# * (temp < 273.15)
temp = temp[valid]
tiwc_ref_8ca = tiwc_ref_8ca[valid]
tiwc_8ca = tiwc_8ca[valid]
mean_err_8ca, err_8ca = calculate_log_error_dist(temp, tiwc_ref_8ca, tiwc_8ca)

tiwc_ref_lpa = results_cpcir["LargePlateAggregate"].tiwc_citation.data
tiwc_lpa = results_cpcir["LargePlateAggregate"].tiwc_radar.data * 1e3
temp = results_cpcir["LargePlateAggregate"].temperature.data
valid = (tiwc_ref_lpa >= 0) * (tiwc_lpa >= 0)# * (temp < 273.15)
temp = temp[valid]
tiwc_ref_lpa = tiwc_ref_lpa[valid]
tiwc_lpa = tiwc_lpa[valid]
mean_err_lpa, err_lpa = calculate_log_error_dist(temp, tiwc_ref_lpa, tiwc_lpa)

tiwc_ref_lca = results_cpcir["LargeColumnAggregate"].tiwc_citation.data
tiwc_lca = results_cpcir["LargeColumnAggregate"].tiwc_radar.data * 1e3
temp = results_cpcir["LargeColumnAggregate"].temperature.data
valid = (tiwc_ref_lca >= 0) * (tiwc_lca >= 0)# * (temp < 273.15)
temp = temp[valid]
tiwc_ref_lca = tiwc_ref_lca[valid]
tiwc_lca = tiwc_lca[valid]
mean_err_lca, err_lca = calculate_log_error_dist(temp, tiwc_ref_lca, tiwc_lca)

In [None]:
from matplotlib.gridspec import GridSpec

fig = plt.figure(figsize=(9, 3.5))
gs = GridSpec(1, 3, width_ratios=[1.0, 1.0, 0.5], wspace=0.25)

ax = fig.add_subplot(gs[0, 0])
x = np.logspace(-3, 1, 101)
ax.plot(x, x, ls="--", c="k")
ax.scatter(tiwc_ref_8ca, tiwc_8ca, s=20)
ax.scatter(tiwc_ref_lpa, tiwc_lpa, s=20)
ax.scatter(tiwc_ref_lca, tiwc_lca, s=20)
ax.set_xlabel("In-situ measured TIWC [g m$^{-3}$]")
ax.set_ylabel("Retrieved TIWC [g m$^{-3}$]")

ax.set_xscale("log")
ax.set_yscale("log")
ax.set_ylim(1e-3, 1e1)
ax.set_xlim(1e-3, 1e1)
ax.set_title("(a) Reference vs. retrieved", loc="left")

ax = fig.add_subplot(gs[0, 1])
temps = 0.5 * (t_bins[1:] + t_bins[:-1])

handles = []
handles += ax.plot(mean_err_8ca, temps, label="8-Column Aggregate")
handles += ax.plot(mean_err_lpa, temps, label="Large Plate Aggregate")
handles += ax.plot(mean_err_lca, temps, label="Large Column Aggregate")

ax.set_xlabel("Retrieved TIWC [dB]")
ax.set_ylabel("Temperature [K]")
ax.set_ylim(275, 250)
ax.axvline(0, ls="--", c="k")
ax.set_title("(b) Retrieval bias by temperature", loc="left")
ax.set_xlim(-10, 10)

ax = fig.add_subplot(gs[0, -1])
ax.set_axis_off()
ax.legend(handles=handles, edgecolor="none", facecolor="none", loc="center")
fig.savefig("../figures/olympex_radar_retrieval.pdf", bbox_inches="tight")

In [None]:
valid = np.isfinite(tiwc_ref_8ca) * np.isfinite(tiwc_8ca)
rel_bias = 100 * ((tiwc_8ca[valid].mean() / tiwc_ref_8ca[valid].mean()) - 1.0)
corr = np.corrcoef(tiwc_8ca[valid], tiwc_ref_8ca[valid])[0, 1]
rel_bias, corr

In [None]:
valid = (tiwc_ref_lpa >= 0) * (tiwc_lpa >= 0)
rel_bias = 100 * ((tiwc_lpa[valid].mean() / tiwc_ref_lpa[valid].mean()) - 1.0)
corr = np.corrcoef(tiwc_lpa[valid], tiwc_ref_lpa[valid])[0, 1]
rel_bias, corr

In [None]:
valid = (tiwc_ref_lca >= 0) * (tiwc_lca >= 0)
rel_bias = 100 * ((tiwc_lca[valid].mean() / tiwc_ref_lca[valid].mean()) - 1.0)
corr = np.corrcoef(tiwc_lca[valid], tiwc_ref_lca[valid])[0, 1]
rel_bias, corr