# Preprocess pyCIAM Inputs for Diaz 2016

This notebook creates all of the inputs needed for `pyCIAM`, using the data provided by the [github repo](https://github.com/delavane/CIAM) for [Diaz 2016](https://link.springer.com/article/10.1007/s10584-016-1675-4). The raw GAMS data files (.gdx) were previously converted to a zarr store for easy loading in pyCIAM. In this notebook, we reformat this zarr store and calculate some intermediate data products that were calculated on the fly in Diaz 2016 but are treated as input values in pyCIAM.

## Setup

In [None]:
import sys

sys.path.append("../")

In [2]:
import numpy as np
import pandas as pd
import xarray as xr
from shared import PATH_DIAZ_INPUTS_INT, PATH_DIAZ_INPUTS_RAW, open_zarr, save

  from distributed.utils import LoopRunner, format_bytes


## Load raw data

In [3]:
# load, assuming missings (e.g. in land area) correspond to 0
ds_in = open_zarr(PATH_DIAZ_INPUTS_RAW).load()
for v in ds_in.data_vars:
    if ds_in[v].isnull().any():
        ds_in[v] = ds_in[v].fillna(0)

# drop non-segment-level analysis inputs
ds_in = ds_in.drop_vars(
    [
        d
        for d in ds_in.data_vars
        if d.startswith("country_") or d.startswith("fund_") or d.startswith("rep_")
    ]
)

## Adjust to "remove kink in land area"

In [4]:
area0 = ds_in.landarea.sel(elev=1) / 2
area0["elev"] = 0.5

area2 = ds_in.landarea.sel(elev=2) / 2

area1 = area0 + area2
area1["elev"] = 1.5

landarea = xr.concat(
    [area0, area1, area2, ds_in.landarea.sel(elev=slice(3, None))], dim="elev"
)
ds_in = ds_in.drop_dims("elev")
ds_in["landarea"] = landarea

## Calculate input vals

In [5]:
# start with coastline lengths
out = xr.Dataset({"length": ds_in.length})

# constants from original ciam dataset
out = xr.merge((out, ds_in.mobcapfrac))

# extreme sea levels
esls = ds_in.surge_height.sel(return_period=["s10", "s100", "s1000", "smax"])
esls["return_period"] = [10, 100, 1000, 10000]
out["surge_height"] = esls

# growth values used later
gr_ypcc = ds_in["ypc"] / (ds_in["ypc"].shift(t=1) + 1e-9) - 1
gr_pop = ds_in["pop"] / (ds_in["pop"].shift(t=1) + 1e-9) - 1
gr_val = xr.DataArray(np.arange(len(ds_in.t)), coords={"t": ds_in.t.values})
appr = np.exp((0.565 * gr_ypcc + 0.313 * gr_pop).cumsum("t"))

# population
popdens = ds_in.popdens * np.exp(np.log(1 + gr_pop).cumsum("t")).sel(country=ds_in.xsc)
out["pop_2000"] = popdens.isel(t=0, drop=True) * ds_in.landarea.fillna(0)
out["pop_scale"] = (popdens / popdens.isel(t=0, drop=True)).fillna(0)

# income
out["ypc"] = ds_in.ypc.sel(country=ds_in.xsc) * np.maximum(
    ds_in.min_ypc_scale,
    (popdens.isel(t=0, drop=True) / ds_in.ypc_scale_denom) ** ds_in.ypc_scale_elast,
)
# correct for greenland
out["ypc"] = out.ypc.where(
    ~ds_in.subsets.sel(subset="greenland", drop=True), 22642 * 1.01**gr_val
)

# capital
out["K_2000"] = ds_in.kgdp * out["pop_2000"] * out.ypc.isel(t=0, drop=True)
out["K_scale"] = out.pop_scale * out.ypc / out.ypc.isel(t=0, drop=True)

# land value
fundland = (
    np.minimum(
        ds_in.dvbm,
        np.maximum(
            ds_in.min_fundland,
            ds_in.dvbm
            * ds_in.ypc.isel(t=0, drop=True)
            * ds_in.refpopdens
            / (
                ds_in.ypc.isel(t=0, drop=True).sel(country="USA", drop=True)
                * ds_in.refpopdens.sel(country="USA", drop=True)
            ),
        ),
    )
    * 1e6
)
interior = appr * fundland
out["interior"] = interior.where(
    ~ds_in.subsets.sel(subset="greenland", drop=True),
    interior.sel(country="CAN", drop=True),
).sel(country=ds_in.xsc)

# wetland - distribute this over total land area starting from elev=0 to emulate how it
# is implicitly treated in calculating wetland costs in Diaz 2016
cum_area = ds_in.landarea.cumsum("elev")
wetland_area = ds_in.landarea.where(
    ds_in.wetland.fillna(0) >= cum_area,
    np.maximum(ds_in.wetland.fillna(0) - cum_area.shift(elev=1, fill_value=0), 0),
)
wetlandservice = (
    1e6
    * appr
    * ds_in.wvbm
    * (
        ds_in.ypc.isel(t=0, drop=True)
        / ds_in.ypc.isel(t=0, drop=True).sel(country="USA", drop=True)
    )
    ** 1.16
    * (ds_in.refpopdens / 27.59) ** 0.47
)
out["wetland"] = wetland_area
out["wetlandservice"] = wetlandservice.sel(country=ds_in.xsc)
# handle segs where they have more wetland area than land area
out["total_wetland_val"] = ds_in.wetland.fillna(0) * wetlandservice.sel(
    country=ds_in.xsc
)

# vsl
out["vsl"] = (
    ds_in.vsl_ypc_mult
    * ds_in.ypc.sel(country="USA", drop=True)
    * (ds_in.ypc / ds_in.ypc.sel(country="USA", drop=True)) ** ds_in.vsl_inc_elast
).sel(country=ds_in.xsc)
# correct greenland
out["vsl"] = out.vsl.where(
    ~ds_in.subsets.sel(subset="greenland", drop=True),
    (
        ds_in.vsl_ypc_mult
        * ds_in.ypc.sel(country="USA", drop=True)
        * (
            out.ypc.isel(seg=ds_in.subsets.sel(subset="greenland", drop=True))
            / ds_in.ypc.sel(country="USA", drop=True)
        )
        ** ds_in.vsl_inc_elast
    ).reindex(seg=out.seg),
)

# rho
out["rho"] = (
    ds_in.ypc
    / (ds_in.ypc + ds_in.ypc.isel(t=0, drop=True).sel(country="USA", drop=True))
).sel(country=ds_in.xsc)

# protection construction cost
cci = ds_in.cci.sel(country=ds_in.xsc)
cci = cci.where(~ds_in.subsets.sel(subset="island", drop=True), cci * 2)
out["pc"] = ds_in.pc0 * 1e6 * cci

# discount factor. emulating diaz, we don't discount 2010 relative to 2000 and start
# discounting in 2010
out["dr"] = ds_in.dr
out["dfact"] = 1 / (1 + ds_in.dr) ** (out.t - out.t.isel(t=0, drop=True))

# add in year 2000 values, equal to 2010
out = out.reindex(t=np.concatenate(([2000], out.t.values))).bfill("t")

# add in surge coefficients to allow for Diaz-like surge calculation
out["surge_coefs"] = ds_in.coefs

# add in LSL
out["lsl"] = ds_in.lsl.reindex(t=out.t.values, fill_value=0)

# land area
out["landarea"] = ds_in.landarea.fillna(0)

# add in elevation bounds
elev_lb = xr.DataArray(
    np.concatenate(([0, 0.5, 1.5], out.elev.isel(elev=slice(3, None)).values - 1)),
    coords={"elev": out.elev.values},
)
out["elev_bounds"] = xr.concat(
    (elev_lb, out.elev), dim=pd.Index(["lower", "upper"], name="bound")
)
out["elev"] = out.elev_bounds.mean("bound")

# rename t
out = out.rename(t="year").drop_vars("country")

## Save

In [6]:
dollar_units = "2010 USD"

# coords
out.seg.attrs.update({"long_name": "DIVA Segment"})
out.return_period.attrs.update({"long_name": "Return periods", "units": "y"})
out.elev.attrs.update(
    {
        "long_name": "Elevation",
        "description": (
            "Midpoint elevation for each coastal elevation bin employed in pyCIAM"
        ),
        "units": "m",
    }
)

# data_vars
out.surge_coefs.attrs.update(
    {
        "long_name": "Surge Damage Coefficients",
        "description": (
            "Coefficients used in the original Diaz 2016 paper to estimate surge "
            "damage"
        ),
    }
)
out.elev_bounds.attrs.update(
    {
        "long_name": "Elevation bounds",
        "description": (
            "Lower and upper bounds for each coastal elevation bin employed in pyCIAM"
        ),
        "units": "m",
    }
)
out.length.attrs.update({"description": "Length of coastline", "units": "km"})
out.surge_height.attrs.update(
    {"description": "Estimated ESL/storm surge heights", "units": "m"}
)
out.wetland.attrs.update(
    {
        "description": "Estimated area for all wetland by elevation",
        "units": "km^2",
    }
)
out.wetlandservice.attrs.update(
    {
        "description": "Value of wetlands",
        "units": f"{dollar_units} per km^2",
    }
)
out.total_wetland_val.attrs.update(
    {
        "description": (
            "Estimated value for all wetland. Includes wetlands for segments that have "
            "more wetland area than land area"
        ),
        "units": dollar_units,
    }
)
out.vsl.attrs.update(
    {
        "long_name": "Value of a Statistical Life",
        "units": f"{dollar_units} per person",
    }
)
out.K_2000.attrs.update(
    {
        "description": "Total value of capital in year 2000",
        "units": dollar_units,
    }
)
out.K_scale.attrs.update(
    {
        "description": "Ratio of capital stock in present year to K_2000",
        "units": dollar_units,
    }
)
out.pop_2000.attrs.update({"long_name": "Population in 2000", "units": "people"})
out.pop_scale.attrs.update(
    {
        "description": "Ratio of population in present year to pop_2000",
        "units": dollar_units,
    }
)
out.ypc.attrs.update(
    {
        "description": "Downscaled income per capital",
        "units": f"{dollar_units} per person",
    }
)
out.landarea.attrs.update(
    {
        "long_name": "Total Land Area",
        "units": "km^2",
    }
)
out.interior.attrs.update(
    {
        "long_name": "Value of non-coastal land",
        "units": f"{dollar_units} per km2",
    }
)
out.pc.attrs.update(
    {
        "description": "Protection cost (quadratic with height)",
        "units": f"{dollar_units} per km per vert m^2",
    }
)
out.mobcapfrac.attrs.update(
    {
        "description": "Fraction of capital that is mobile",
    }
)
out.rho.attrs.update(
    {
        "description": (
            "Resilience factor scaling depth-damage and depth-mortality functions"
        )
    }
)
out.lsl.attrs.update(
    {
        "long_name": "Local Sea Level Rise",
        "description": (
            "Local Sea Level Rise under emissions scenario relative to 1991-2009 "
            "baseline."
        ),
        "units": "m",
    }
)

# clear previously-saved zarr store encoding
for d in out.data_vars:
    out[d].encoding.clear()

# prep str coords for zarr
for k, v in out.coords.items():
    if v.dtype == object:
        out[k] = v.astype("unicode")

In [8]:
save(out, PATH_DIAZ_INPUTS_INT, mode="w")

<xarray.backends.zarr.ZarrStore at 0x7efd24d035f0>