# Create SLIIDERS-ECON

This notebook creates the SLIIDERS-ECON dataset, starting from a segment X ADM1 X elevation dataset of capital stock and population + country-level scaling factors following the SSPs.

In [1]:
%load_ext autoreload
%autoreload 2

## Setup

In [9]:
from collections import OrderedDict

import geopandas as gpd
import numpy as np
import pandas as pd
import xarray as xr
from dask_gateway import Gateway
from gcsfs import GCSFileSystem
from scipy.stats import gumbel_r

from sliiders.settings import (
    EXPOSURE_BIN_WIDTH_V,
    PATH_CIAM_2016,
    PATH_CIAM_ADM1_VORONOI_INTERSECTIONS,
    PATH_CIAM_COASTLINES,
    PATH_SEG_CENTROIDS,
    PATH_COUNTRY_LEVEL_EXPOSURE,
    PATH_COUNTRY_LEVEL_EXPOSURE_PROJ,
    PATH_EXPOSURE_AREA_BY_CIAM_AND_ELEVATION,
    PATH_EXPOSURE_BINNED_WITHELEV,
    PATH_EXPOSURE_LINCKE,
    PATH_EXPOSURE_WB_ICP,
    PATH_GADM,
    PATH_GTSM_SURGE,
    PATH_PWT_RAW,
    PATH_SLIIDERS_ECON,
    PATH_SLIIDERS_SLR,
    SVALS,
)
from sliiders.spatial import coastlen_poly, get_great_circle_nearest_index
from sliiders.utils import upload_pkg

### Start Dask Cluster

This section can be modified for whatever computing resources you have. But the result must be a dask distributed `Client` object assigned to `client`

In [5]:
gateway = Gateway()
cluster = gateway.new_cluster(
    idle_timeout=3600,
    profile="micro",
)
client = cluster.get_client()
cluster.scale(120)

upload_pkg(client, "../../sliiders")
cluster

VBox(children=(HTML(value='<h2>GatewayCluster</h2>'), HBox(children=(HTML(value='\n<div>\n<style scoped>\n    …

## Define Paths and parameters

In [10]:
def fuse_to_gspath(path):
    return str(path.relative_to("/gcs"))


FS = GCSFileSystem(token="/opt/gcsfuse_tokens/rhg-data.json")
(
    PATH_CIAM_ADM1_VORONOI_INTERSECTIONS,
    PATH_COUNTRY_LEVEL_EXPOSURE,
    PATH_COUNTRY_LEVEL_EXPOSURE_PROJ,
    PATH_EXPOSURE_AREA_BY_CIAM_AND_ELEVATION,
    PATH_EXPOSURE_BINNED_WITHELEV,
) = list(
    map(
        fuse_to_gspath,
        (
            PATH_CIAM_ADM1_VORONOI_INTERSECTIONS,
            PATH_COUNTRY_LEVEL_EXPOSURE,
            PATH_COUNTRY_LEVEL_EXPOSURE_PROJ,
            PATH_EXPOSURE_AREA_BY_CIAM_AND_ELEVATION,
            PATH_EXPOSURE_BINNED_WITHELEV,
        ),
    )
)

PATH_SLIIDERS_SLR = FS.get_mapper(PATH_SLIIDERS_SLR.relative_to("/gcs"))
PATH_SLIIDERS_ECON = FS.get_mapper(PATH_SLIIDERS_ECON.relative_to("/gcs"))
PATH_CIAM_2016 = FS.get_mapper(PATH_CIAM_2016.relative_to("/gcs"))

In [6]:
# How are we chunking the output data (i.e. CIAM inputs)
CHUNKS = OrderedDict(
    ssp=-1,
    iam=-1,
    year=-1,
    seg_adm=500,
    elev=-1,
    params=-1,
    return_period=-1,
    bound=-1,
    country=-1,
)

## Prep coastline Lengths

In [8]:
# Import seg polys to get seg count
segadm_df = pd.read_parquet(
    PATH_CIAM_ADM1_VORONOI_INTERSECTIONS,
    filesystem=FS,
    columns=["station_id", "adm1", "ISO", "seg_adm"],
    filters=[("lowelev", "=", True)],
).sort_values("seg_adm")

all_segs = segadm_df.seg_adm.str.split("_").apply(lambda x: "_".join(x[:2])).values
valid_segs = np.unique(all_segs)
segadm_df["station_id"] = all_segs
all_isos = segadm_df.ISO.unique()

In [9]:
# get coastline length calculations started
coastlen_ftrs = client.map(
    coastlen_poly,
    segadm_df.seg_adm.values,
    PATH_CIAM_COASTLINES,
    PATH_CIAM_ADM1_VORONOI_INTERSECTIONS,
    filesystem=FS,
)

## Initialize pyCIAM input dataset

In [10]:
out = (
    segadm_df.rename(
        columns={
            "ISO": "seg_country",
            "station_id": "seg",
            "length_km": "length",
        }
    )
    .set_index("seg_adm")
    .to_xarray()
    .set_coords(["seg_country", "adm1", "seg"])
)

## Income

### Load country-level scaling data to forecast and backcast gdppc, pop, capital

In [11]:
def read_scaling_table(path, cols):
    out = (
        pd.read_parquet(path, filesystem=FS, columns=cols)
        .rename_axis(index={"ccode": "country"})
        .rename(
            columns={"rgdpna_pc_19": "ypcc", "rnna_19": "K", "rnna_19_scale": "K_scale"}
        )
    )
    out["pop"] *= 1e6
    if "K" in out:
        out["K"] *= 1e6
    return out.to_xarray()


hist_scaling = read_scaling_table(
    PATH_COUNTRY_LEVEL_EXPOSURE, cols=["pop", "rgdpna_pc_19", "rnna_19"]
)
nopop_countries = hist_scaling.country.isel(country=(hist_scaling.pop == 0).all("year"))

assert hist_scaling.notnull().all().to_array().all()

proj_scaling = read_scaling_table(
    PATH_COUNTRY_LEVEL_EXPOSURE_PROJ,
    cols=["pop", "pop_scale", "rnna_19_scale", "rgdpna_pc_19"],
)

# make gdppc for unpopulated areas 0 just like for the IR-level dataset
proj_scaling["ypcc"] = proj_scaling.ypcc.where(
    ~proj_scaling.ypcc.country.isin(nopop_countries), 0
)

# Account for unpopulated Spratly Islands
hist_scaling = hist_scaling.reindex(
    country=np.concatenate([hist_scaling.country.values, ["SP-"]]), fill_value=0
)
proj_scaling = proj_scaling.reindex(
    country=np.concatenate([proj_scaling.country.values, ["SP-"]]), fill_value=0
)

# backcast SSP2/IIASA to 2000 using 2000-2010 historical growth and set initial values
# (2000) the same for all scenarios
min_scale_year = proj_scaling.year.min().item()
hist_rat_2000 = hist_scaling.sel(year=2000) / hist_scaling.sel(year=min_scale_year)
hist_rat_2000 = hist_rat_2000.where(hist_scaling.sel(year=2000) > 0, 0)
hist_rat_2000["pop_scale"] = hist_rat_2000["pop"]
hist_rat_2000 = hist_rat_2000.rename(K="K_scale")

proj_scaling = proj_scaling.reindex(year=np.arange(2000, 2101))
proj_scaling.loc[{"year": 2000}] = hist_rat_2000.sel(
    country=proj_scaling.country
) * proj_scaling.sel(year=min_scale_year, ssp="SSP2", iam="IIASA").broadcast_like(
    proj_scaling.sel(year=2000)
)
proj_scaling = np.exp(np.log(proj_scaling.sel(country=all_isos)).interpolate_na("year"))
hist_scaling = hist_scaling.sel(country=all_isos)

## SLR site <> coastline segment mapping

In [12]:
# GTSM-based CIAM segment points
segpts = gpd.read_file(PATH_SEG_CENTROIDS)[["station_id", "lat", "lon"]].rename(
    columns={"station_id": "seg"}
)
segpts["seg"] = segpts.seg.str.split("_").apply(lambda x: "seg_" + x[-1])
segpts = segpts.set_index("seg").loc[valid_segs].sort_index()

# Import future SLR
with xr.open_zarr(PATH_SLIIDERS_SLR, chunks=None) as slrf:
    slrfdf = slrf[["lat", "lon"]].to_dataframe()

nn = get_great_circle_nearest_index(segpts, slrfdf).rename("site_id")

# assign crosswalk table of CIAM segments and SLR sites
out["SLR_site_id"] = nn.to_xarray().sel(seg=out.seg, drop=True)

## ESL heights and Gumbel params

In [13]:
# GTSM-based CIAM segment points
segs = gpd.read_file(PATH_SEG_CENTROIDS)[["station_id", "lat", "lon"]].set_index(
    "station_id"
)

# Split seg df into GTSM stations and manually added new segments
added_seg_ids = segs.index.str.contains("_990")
old_segs = segs[~added_seg_ids]
added_segs = segs[added_seg_ids]

# Find nearest GTSM point to each manually added segment point
nn_added = get_great_circle_nearest_index(added_segs, old_segs)

# Add column for nearest GTSM station (most will be self-equivalent)
segs["near_station_id"] = segs.index.values
segs.near_station_id.update(nn_added)

# CoDEC GTSM Surge Height Data [Muis et al. 2020]
gtsm_msl = (
    xr.open_dataset(PATH_GTSM_SURGE)
    .rename(
        {
            "return_periods": "rps",
            "gumbel_parameters": "params",
            "station_y_coordinate": "lat",
            "station_x_coordinate": "lon",
        }
    )
    .drop("station_name")
    .swap_dims(stations="station_id")
    .rename(station_id="stations")
)
gtsm_msl["params"] = [
    "loc" if i.startswith(b"location") else "scale" for i in gtsm_msl.params.values
]
gtsm_msl["stations"] = gtsm_msl.stations.astype(str)

# Filter by unique GTSM segment points
gtsm_msl = gtsm_msl.sel(stations=np.unique(segs.near_station_id.values)).GUM.rename("")

# create return heights (setting "1-year" return period to 99th percentile as per NOAA
# tides and currents ESL plots)
inv_freq = 1 / SVALS
inv_freq = np.where(inv_freq > 0.99, 0.99, inv_freq)
hts = gumbel_r.ppf(
    1 - inv_freq, loc=gtsm_msl.sel(params=["loc"]), scale=gtsm_msl.sel(params=["scale"])
)
hts = np.where(hts > 0, hts, 0)
hts = xr.DataArray(
    hts,
    dims=["stations", "return_period"],
    coords={"stations": gtsm_msl.stations.values, "return_period": SVALS},
)

# combine params and pre-calculated heights
esl_hts = xr.Dataset({"gumbel_params": gtsm_msl, "surge_height": hts})

# reindex to match segs
esl_hts = esl_hts.sel(stations=segs.near_station_id.values).rename(stations="seg")
esl_hts["seg"] = segs.index.values
esl_hts["seg"] = "seg_" + esl_hts.seg.str[-5:]

# reindex to match seg-adms
esl_hts = esl_hts.drop_vars(["lat", "lon"]).sel(seg=out.seg, drop=True)

In [14]:
# join with previous input data
out = xr.merge((out, esl_hts))

## Wetland and land areas

In [15]:
areas = pd.read_parquet(
    PATH_EXPOSURE_AREA_BY_CIAM_AND_ELEVATION,
    filesystem=FS,
    filters=[("protection_zone", "==", -1)],
    columns=[
        "seg_adm",
        "z_ix",
        "land_area_km",
        "wetland_area_km",
    ],
).set_index(["seg_adm", "z_ix"])

assert areas.index.is_unique

# add in all below-0 wetlands (~10%) into the lowest elevation bin
areas["wetland_area_km"] += (
    areas.loc[(slice(None), -1), "wetland_area_km"]
    .rename(index={-1: 0}, level="z_ix")
    .reindex(index=areas.index)
    .fillna(0)
)
areas = areas.sort_index().loc[(slice(None), slice(0, None)), :].to_xarray().fillna(0)

# bin index to bin midpoint
areas["z_ix"] = (areas.z_ix + 0.5) * EXPOSURE_BIN_WIDTH_V

# cover all seg-adms
areas = areas.reindex(seg_adm=out.seg_adm.values, fill_value=0).rename(z_ix="elev")

# get this to use later in calculating wetland services
wetland_area_by_iso = areas.wetland_area_km.groupby(out.seg_country).sum().sum("elev")

## Constants from Original CIAM

In [40]:
# Import table of constants
const_ds = xr.open_zarr(PATH_CIAM_2016)
const_ds = const_ds[[c for c in const_ds.data_vars if len(const_ds[c].dims) == 0]]

# adjust to 2019 USD from 2010M USD
pl_data = pd.read_excel(
    PATH_PWT_RAW, usecols=["year", "countrycode", "pl_gdpo"], index_col=[0, 1]
).pl_gdpo.USA
pl_multiplier = pl_data[2019] / pl_data[2010]

const_ds[["dvbm", "pc0", "wvbm", "min_fundland"]] = (
    const_ds[["dvbm", "pc0", "wvbm", "min_fundland"]] * pl_multiplier * 1e6
)

out = xr.merge((out, const_ds))

## Wetland Value

### Get total area by country

In [18]:
iso_geoms = (
    gpd.read_file(PATH_GADM)[["GID_0", "geometry"]]
    .rename(columns={"GID_0": "country"})
    .set_index("country")
    .geometry.loc[all_isos]
)
iso_geoms.crs = "epsg:4326"
iso_geoms = iso_geoms.to_crs({"proj": "cea"})
careas = iso_geoms.area / 1e6  # km2 areas

### Get current-day country-level population and income densities

In [19]:
curr_data = hist_scaling.isel(year=-1).to_dataframe().loc[careas.index, :]
refpopdens = curr_data["pop"] / careas
assert refpopdens.notnull().all()

income_spatial_dens = refpopdens * curr_data.ypcc

### Calculate land value appreciation via Yohe 1999 via Abraham and Hendershott 1993

In CIAM (Diaz, 2016), the constant and lagged appreciation variable is dropped from the original growth regression. Here we add it back in

In [20]:
# calculate initial conditions to use as initial logP_{-1}
logdiff_hist_pop = np.log(hist_scaling.pop).diff("year")
logdiff_hist_y = np.log(hist_scaling.ypcc).diff("year")

logdiff_hist_landval = xr.zeros_like(logdiff_hist_pop.isel(year=slice(1, None)))
for yr in logdiff_hist_landval.year[1:]:
    logdiff_hist_landval.loc[{"year": yr}] = (
        -0.006
        + 0.313 * logdiff_hist_pop.sel(year=yr, drop=True)
        + 0.565 * logdiff_hist_y.sel(year=yr, drop=True)
        + 0.402 * logdiff_hist_landval.sel(year=yr - 1, drop=True)
    )

# seed the 1999-2000 log difference to be equal to that calculated above, by setting
# log(appr) in 1999 to the negative of the calculated d[log(appr)]_2000. This makes it
# such that we can have appr=1 (i.e. log(appr)=0) in 2000 but have the calculated
# lageed growth rate to calculate the A&H regression equations.
init_log_val = -logdiff_hist_landval.sel(year=2000)

# initialize output array
appr = xr.zeros_like(proj_scaling.ypcc)

# calculate growth using log-difference for income and pop
logdiff_y = np.log(proj_scaling.ypcc).diff("year")
logdiff_p = np.log(proj_scaling["pop"]).diff("year")

# add a 1999 year and seed w/ initial conditions such that 2000 - 1999 matches
# historical data
appr = appr.reindex(
    year=np.concatenate(([(appr.year[0] * 2 - appr.year[1]).item()], appr.year.values))
).fillna(init_log_val)

for yr in appr.year[2:]:
    appr.loc[{"year": yr}] = (
        -0.006
        + 0.313 * logdiff_p.sel(year=yr, drop=True)
        + 0.565 * logdiff_y.sel(year=yr, drop=True)
        + 1.402 * appr.sel(year=yr - 1, drop=True)
        - 0.402 * appr.sel(year=yr - 2, drop=True)
    )

# drop the pre-period initial condition we used
appr = appr.isel(year=slice(1, None))

# now exponentiate to get multiplier scaling factor
appr = np.exp(appr)

# account for no population areas, which will have
appr = appr.where(((hist_scaling.pop != 0) | (hist_scaling.ypcc != 0)).any("year"), 0)

### Calculate growth in wetland value

This approach would be most consistent with [Brander et al. 2006](https://www.cbd.int/financial/values/g-valuewetland.pdf) (Table 4.1) and what was described in the Supplement to Diaz 2016 (p. 15). In other words, we would use the income and population density elasticities over time, in addition to cross-sectionally. We would also make it such that the mean wetland value across all wetlands (weighted by area) represents the mean observed in the datasets used in Brander et al. However, this is not how it is implemented in the code for Diaz 2016, and when used can give *very* high values for later years. Instead, we adopt the approach from the Diaz 2016 code, which states that the US in 2000 assumes the mean wetland value from Brander 2006, uses the Brander elasticies to scale cross-sectionally, then uses the [Yohe 2013](http://gyohe.faculty.wesleyan.edu/files/2018/05/37.pdf) elasticities to scale temporally (these are the same as those used to scale land value). *NOTE*: This code snippet will require some updating

In [21]:
# income_pop_wetland_contrib = np.log(ypcc) * 1.16 + np.log(pops/careas) * 0.47
# wo_const_wetland_log = (income_pop_wetland_contrib).loc[:, 2000, "SSP2", "IIASA"]
# mean_wo_const = (wo_const_wetland_log * wetland_area_by_iso.to_series()).sum() / wetland_area_by_iso.sum().item()
# const = np.log(out.wvbm).item() - mean_wo_const
# wetlandservice = np.exp(income_pop_wetland_contrib + const).to_xarray().sel(ISO_TER1=out.country_TER1, drop=True)
# assert wetlandservice.notnull().all()

Here is the approach that is taken directly from the code accompanying Diaz 2016:

In [22]:
out["wetlandservice"] = (
    appr.sel(country=curr_data.index)
    * out.wvbm.item()
    * (curr_data.ypcc / curr_data.ypcc["USA"]).to_xarray() ** 1.16
    * (refpopdens.to_xarray() / 27.59) ** 0.47
)
assert out.wetlandservice.notnull().all()

## Resilience factor (rho)

In [23]:
out["rho"] = proj_scaling.ypcc / (
    proj_scaling.ypcc + proj_scaling.ypcc.sel(country="USA", year=2000)
)
assert out.rho.notnull().all() & (out.rho.max() <= 1) & (out.rho.min() >= 0)

Adding this in to be able to population-weight rho when collapsing over ADM1

In [24]:
out["ypcc"] = proj_scaling.ypcc

## Protection Costs

This is a reference protection cost multiplied by a country-level construction cost index

### First priority: 2017 WB ICP

In [25]:
# Import World Bank Int'l Comparison Project [2017] table
# to be used as 1st-preference CCI vals; along with minor clean-up
icp = (
    pd.read_csv(PATH_EXPOSURE_WB_ICP)
    .iloc[1:]
    .rename(
        columns={
            "Country Code": "country",
            "1501200:CONSTRUCTION [1501200]": "wbcci",
        }
    )
)
icp["wbcci"] = icp["wbcci"].astype("float64")

# filter by year and price level index (world avg = 100) variable
icp = icp[
    (~icp.wbcci.isnull())
    & (icp.country.isin(out.country.values))
    & icp.Time.eq(2017)
    & (icp["Classification Name"] == "Price level index (World = 100)")
    & (icp.country != "WLD")
]

# save construction cost price index as df
wb_cci = icp[["country", "wbcci"]].set_index("country") / 100

### Second priority: Lincke 2021

In [26]:
# Load sea dike construction costs from Lincke 2021 as as 2nd-preference CCI vals
lincke = (
    pd.read_csv(
        PATH_EXPOSURE_LINCKE,
        usecols=["locationid", "seadike_unit_cost_rural", "seadike_unit_cost_urban"],
    )
    .rename(columns={"locationid": "country"})
    .set_index("country")
)

# filter to countries we need
lincke = lincke[lincke.index.isin(np.unique(out.country))]

# average across urban/rural
lincke_cci = lincke.mean(axis=1)

# scale such that 1 is global average
lincke_cci /= lincke_cci.mean()

### Combine and add to input data

In [27]:
# combine these two and add into main dataset
cci = wb_cci.join(lincke_cci.rename("lincke_cci"), how="outer")
cci.loc[cci.wbcci.isnull(), "wbcci"] = cci.loc[cci.wbcci.isnull(), "lincke_cci"]
cci = cci.wbcci.rename("cci")

# address iso3s for unpopulated regions that don't show up in CCI dataset
cci_mapping = {"TWN": "SP-", "FRA": "CL-"}
assert not np.isin(np.array(list(cci_mapping.values())), cci).any()
cci = pd.concat((cci, cci.loc[cci_mapping.keys()].rename(index=cci_mapping)))

# put in out array
out["pc"] = (cci.to_xarray() * out.pc0).sel(country=all_isos)

## Total wetland value

In [28]:
out["wetland"] = areas["wetland_area_km"]

## Mobile Capital Fraction

In [29]:
mobcapfrac = (
    pd.read_parquet(
        PATH_COUNTRY_LEVEL_EXPOSURE,
        filesystem=FS,
        filters=[("year", ">=", 2000), ("year", "<", 2020)],
    )
    .k_movable_ratio.groupby("ccode")
    .mean()
    .rename("mobcapfrac")
)
mobcapfrac.index.name = "country"

# countries not in this dataset which have 0 capital
mobcapfrac["SP-"] = 0
out["mobcapfrac"] = mobcapfrac.to_xarray().sel(country=all_isos)

## Population and Capital

### Load exposure grid

In [30]:
# Import Surge-based Exposure Grid for 'Present Day' (2014)
cols = [
    "seg_adm",
    "z_ix",
    "ISO",
    "area_km",
    "asset_value",
    "pop_landscan",
]
filters = [("protection_zone", "==", -1)]  # Filter out protected pixels
eg = pd.read_parquet(
    PATH_EXPOSURE_BINNED_WITHELEV,
    columns=cols,
    filters=filters,
    filesystem=FS,
)

# grid ix to value
eg["elev"] = (eg.z_ix + 0.5) * EXPOSURE_BIN_WIDTH_V
eg = eg.drop(columns="z_ix")

# aggregate to ADM1 x SEG x ELEV x SCALING TABLE REGION
eg = eg.groupby(["seg_adm", "elev"], observed=True).sum().to_xarray()

# separate out area var
eg = eg.drop("area_km").rename({"pop_landscan": "pop", "asset_value": "K"})

# reindex and label as 2019 values
eg_xr = (
    eg.reindex(seg_adm=out.seg_adm).fillna(0).rename({"pop": "pop_2019", "K": "K_2019"})
)

# reindex scaling factors
cds_xr = proj_scaling[["pop_scale", "K_scale"]].sel(country=all_isos)
assert cds_xr.notnull().all().to_array().all()

# merge into dataset
out = xr.merge((out, eg_xr, cds_xr))

## Land Value and Area

In [31]:
fundland = np.minimum(
    out.dvbm.item(),
    np.maximum(
        out.min_fundland.item(),
        out.dvbm.item() * income_spatial_dens / income_spatial_dens["USA"],
    ),
).to_xarray()
out["interior"] = appr * fundland
out["landarea"] = areas.land_area_km

## Add in Coastline Length

In [32]:
out["length"] = pd.Series(
    client.gather(coastlen_ftrs),
    index=pd.Index(segadm_df.seg_adm.values, name="seg_adm"),
).to_xarray()

## Drop any seg_adms with no exposed area

In [33]:
any_exp = (
    out[["K_2019", "pop_2019", "landarea", "wetland"]]
    .to_array()
    .sum(["variable", "elev"])
    > 0
) | (out.length > 0)

out = out.sel(seg_adm=any_exp)

## Save

### Cleanup

In [34]:
out["elev_bounds"] = xr.concat(
    (out.elev - EXPOSURE_BIN_WIDTH_V / 2, out.elev + EXPOSURE_BIN_WIDTH_V / 2),
    dim=pd.Index(["lower", "upper"], name="bound"),
)

In [35]:
for d in out.data_vars:
    if out[d].dtype == "float64":
        out[d] = out[d].astype("float32")

In [36]:
out = out.transpose(*CHUNKS.keys()).chunk(CHUNKS)

#### Add attrs

In [37]:
dollar_units = "2019 USD PPP"

# coords
out.seg_adm.attrs.update(
    {
        "description": (
            "Unique combinations of coastline segment and ADM1 unit. Each is treated "
            "as an independent unit in pyCIAM."
        )
    }
)
out.params.attrs.update(
    {"description": "Gumbel parameters for ESL/storm surge distribution"}
)
out.return_period.attrs.update({"long_name": "Return periods", "units": "y"})
out.elev.attrs.update(
    {
        "long_name": "Elevation",
        "description": (
            "Midpoint elevation for each coastal elevation bin employed in pyCIAM"
        ),
        "units": "m",
    }
)
out.ssp.attrs.update(
    {
        "long_name": "Shared Socioeconomic Pathway",
        "description": "Trajectories of income, capital, and population growth",
    }
)
out.iam.attrs.update(
    {
        "long_name": "Growth Model",
        "description": (
            "Independent models used to simulate income and capital growth for each "
            "SSP",
        ),
    }
)
out.country.attrs.update(
    {
        "description": (
            "Dimension used for variables that exhibit only country-level variance"
        )
    }
)

# alternate coords
out.seg.attrs.update({"description": "Segment associated with each seg-ADM1"})
out.adm1.attrs.update({"description": "ADM1 unit associated with each seg-ADM1"})
out.seg_country.attrs.update(
    {"description": ("Country associated with each analysis unit")}
)

# data_vars
out.interior.attrs.update(
    {
        "long_name": "Value of non-coastal land",
        "units": f"{dollar_units} per km2",
    }
)
out.SLR_site_id.attrs.update(
    {
        "long_name": "SLR Site ID",
        "description": "SLR Site ID for closest 2-deg LSLR projection grid cell",
    }
)
out.elev_bounds.attrs.update(
    {
        "long_name": "Elevation bounds",
        "description": (
            "Lower and upper bounds for each coastal elevation bin employed in pyCIAM"
        ),
        "units": "m",
    }
)
out.length.attrs.update(
    {
        "description": "Length of coastline associated with each analysis unit",
        "units": "km",
    }
)
out.gumbel_params.attrs.update(
    {"description": "Parameters describing ESL distribution", "units": "m"}
)
out.surge_height.attrs.update(
    {"description": "Estimated ESL/storm surge heights", "units": "m"}
)
out.wetland.attrs.update(
    {
        "description": "Estimated area for all wetland by elevation",
        "units": "km^2",
    }
)
out.wetlandservice.attrs.update(
    {
        "description": "Value of wetlands",
        "units": f"{dollar_units} per km^2",
    }
)
out.K_2019.attrs.update(
    {
        "long_name": "2019 Capital stock",
        "description": (
            "Estimated value of physical capital in 2019, under actual conditions "
            "(i.e. observed, not SSP)"
        ),
        "units": dollar_units,
    }
)
out.pop_2019.attrs.update(
    {
        "long_name": "2019 population",
        "description": (
            "Estimated population in 2019, under actual conditions (i.e. observed, not "
            "SSP)"
        ),
        "units": "people",
    }
)
out.K_scale.attrs.update(
    {
        "long_name": "Capital scaling factor",
        "description": (
            "Country-level change factor in capital stock specific to SSP/IAM, "
            "relative to K_2019"
        ),
    }
)
out.pop_scale.attrs.update(
    {
        "long_name": "Population scaling factor",
        "description": (
            "Country-level change factor in population specific to SSP/IAM, relative "
            "to pop_2019"
        ),
    }
)
out.ypcc.attrs.update(
    {
        "long_name": "Country-level income per capita",
        "units": f"{dollar_units} per person",
    }
)
out.landarea.attrs.update(
    {
        "long_name": "Total Land Area",
        "units": "km^2",
    }
)
out.pc.attrs.update(
    {
        "description": "Protection cost (quadratic with height)",
        "units": f"{dollar_units} per km per vert m^2",
    }
)
out.mobcapfrac.attrs.update(
    {
        "description": "Fraction of capital that is mobile",
    }
)
out.rho.attrs.update(
    {
        "description": (
            "Resilience factor scaling depth-damage and depth-mortality functions"
        ),
    }
)
out.dr.attrs.update({"description": "Discount rate"})
out.wmaxrate.attrs.update(
    {
        "units": "m per year",
    }
)

# values we need to save
to_keep = [
    "elev_bounds",
    "seg",
    "adm1",
    "country",
    "seg_country",
    "SLR_site_id",
    "length",
    "pc",
    "mobcapfrac",
    "gumbel_params",
    "surge_height",
    "rho",
    "K_2019",
    "pop_2019",
    "K_scale",
    "pop_scale",
    "ypcc",
    "landarea",
    "interior",
    "wetland",
    "wetlandservice",
]

# print dataset to verify it looks as expected
out[to_keep]

Unnamed: 0,Array,Chunk
Bytes,91.48 kiB,3.91 kiB
Shape,"(11709,)","(500,)"
Count,24 Tasks,24 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 91.48 kiB 3.91 kiB Shape (11709,) (500,) Count 24 Tasks 24 Chunks Type object numpy.ndarray",11709  1,

Unnamed: 0,Array,Chunk
Bytes,91.48 kiB,3.91 kiB
Shape,"(11709,)","(500,)"
Count,24 Tasks,24 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,91.48 kiB,3.91 kiB
Shape,"(11709,)","(500,)"
Count,24 Tasks,24 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 91.48 kiB 3.91 kiB Shape (11709,) (500,) Count 24 Tasks 24 Chunks Type object numpy.ndarray",11709  1,

Unnamed: 0,Array,Chunk
Bytes,91.48 kiB,3.91 kiB
Shape,"(11709,)","(500,)"
Count,24 Tasks,24 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,91.48 kiB,3.91 kiB
Shape,"(11709,)","(500,)"
Count,24 Tasks,24 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 91.48 kiB 3.91 kiB Shape (11709,) (500,) Count 24 Tasks 24 Chunks Type object numpy.ndarray",11709  1,

Unnamed: 0,Array,Chunk
Bytes,91.48 kiB,3.91 kiB
Shape,"(11709,)","(500,)"
Count,24 Tasks,24 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.56 kiB,1.56 kiB
Shape,"(200, 2)","(200, 2)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.56 kiB 1.56 kiB Shape (200, 2) (200, 2) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",2  200,

Unnamed: 0,Array,Chunk
Bytes,1.56 kiB,1.56 kiB
Shape,"(200, 2)","(200, 2)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,91.48 kiB,3.91 kiB
Shape,"(11709,)","(500,)"
Count,24 Tasks,24 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 91.48 kiB 3.91 kiB Shape (11709,) (500,) Count 24 Tasks 24 Chunks Type object numpy.ndarray",11709  1,

Unnamed: 0,Array,Chunk
Bytes,91.48 kiB,3.91 kiB
Shape,"(11709,)","(500,)"
Count,24 Tasks,24 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,45.74 kiB,1.95 kiB
Shape,"(11709,)","(500,)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 45.74 kiB 1.95 kiB Shape (11709,) (500,) Count 24 Tasks 24 Chunks Type float32 numpy.ndarray",11709  1,

Unnamed: 0,Array,Chunk
Bytes,45.74 kiB,1.95 kiB
Shape,"(11709,)","(500,)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,816 B,816 B
Shape,"(204,)","(204,)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 816 B 816 B Shape (204,) (204,) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",204  1,

Unnamed: 0,Array,Chunk
Bytes,816 B,816 B
Shape,"(204,)","(204,)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,816 B,816 B
Shape,"(204,)","(204,)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 816 B 816 B Shape (204,) (204,) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",204  1,

Unnamed: 0,Array,Chunk
Bytes,816 B,816 B
Shape,"(204,)","(204,)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,91.48 kiB,3.91 kiB
Shape,"(11709, 2)","(500, 2)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 91.48 kiB 3.91 kiB Shape (11709, 2) (500, 2) Count 24 Tasks 24 Chunks Type float32 numpy.ndarray",2  11709,

Unnamed: 0,Array,Chunk
Bytes,91.48 kiB,3.91 kiB
Shape,"(11709, 2)","(500, 2)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,182.95 kiB,7.81 kiB
Shape,"(11709, 4)","(500, 4)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 182.95 kiB 7.81 kiB Shape (11709, 4) (500, 4) Count 24 Tasks 24 Chunks Type float32 numpy.ndarray",4  11709,

Unnamed: 0,Array,Chunk
Bytes,182.95 kiB,7.81 kiB
Shape,"(11709, 4)","(500, 4)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 804.84 kiB 804.84 kiB Shape (5, 2, 101, 204) (5, 2, 101, 204) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",5  1  204  101  2,

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.93 MiB,390.62 kiB
Shape,"(11709, 200)","(500, 200)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 8.93 MiB 390.62 kiB Shape (11709, 200) (500, 200) Count 24 Tasks 24 Chunks Type float32 numpy.ndarray",200  11709,

Unnamed: 0,Array,Chunk
Bytes,8.93 MiB,390.62 kiB
Shape,"(11709, 200)","(500, 200)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.93 MiB,390.62 kiB
Shape,"(11709, 200)","(500, 200)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 8.93 MiB 390.62 kiB Shape (11709, 200) (500, 200) Count 24 Tasks 24 Chunks Type float32 numpy.ndarray",200  11709,

Unnamed: 0,Array,Chunk
Bytes,8.93 MiB,390.62 kiB
Shape,"(11709, 200)","(500, 200)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 804.84 kiB 804.84 kiB Shape (5, 2, 101, 204) (5, 2, 101, 204) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",5  1  204  101  2,

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 804.84 kiB 804.84 kiB Shape (5, 2, 101, 204) (5, 2, 101, 204) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",5  1  204  101  2,

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 804.84 kiB 804.84 kiB Shape (5, 2, 101, 204) (5, 2, 101, 204) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",5  1  204  101  2,

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.93 MiB,390.62 kiB
Shape,"(11709, 200)","(500, 200)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 8.93 MiB 390.62 kiB Shape (11709, 200) (500, 200) Count 24 Tasks 24 Chunks Type float32 numpy.ndarray",200  11709,

Unnamed: 0,Array,Chunk
Bytes,8.93 MiB,390.62 kiB
Shape,"(11709, 200)","(500, 200)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 804.84 kiB 804.84 kiB Shape (5, 2, 101, 204) (5, 2, 101, 204) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",5  1  204  101  2,

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8.93 MiB,390.62 kiB
Shape,"(11709, 200)","(500, 200)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 8.93 MiB 390.62 kiB Shape (11709, 200) (500, 200) Count 24 Tasks 24 Chunks Type float32 numpy.ndarray",200  11709,

Unnamed: 0,Array,Chunk
Bytes,8.93 MiB,390.62 kiB
Shape,"(11709, 200)","(500, 200)"
Count,24 Tasks,24 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 804.84 kiB 804.84 kiB Shape (5, 2, 101, 204) (5, 2, 101, 204) Count 1 Tasks 1 Chunks Type float32 numpy.ndarray",5  1  204  101  2,

Unnamed: 0,Array,Chunk
Bytes,804.84 kiB,804.84 kiB
Shape,"(5, 2, 101, 204)","(5, 2, 101, 204)"
Count,1 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [38]:
out[to_keep].to_zarr(PATH_SLIIDERS_ECON, mode="w")



<xarray.backends.zarr.ZarrStore at 0x7fbb9563e3c0>

In [39]:
cluster.close(), client.close()

(None, None)