# Create SLIIDERS

This notebook creates the SLIIDERS dataset, starting from a segment X administrative region X elevation dataset of capital stock and population + country-level scaling factors.

In [1]:
%load_ext autoreload
%autoreload 2

## Setup

In [5]:
from collections import OrderedDict

import geopandas as gpd
import numpy as np
import pandas as pd
import sliiders.settings as sset
import statsmodels.formula.api as smf
import xarray as xr
from dask_gateway import Gateway
from scipy.stats import gumbel_r
from sliiders.dask import start_cluster
from sliiders.io import open_zarr, save
from sliiders.spatial import coastlen_poly, grid_ix_to_val, spherical_nearest_neighbor

In [3]:
N_WORKERS_MIN = 7
N_WORKERS_MAX = 120

### Start Dask Cluster

This section can be modified for whatever computing resources you have. But the result must be a dask distributed `Client` object assigned to `client`

In [6]:
client, cluster = start_cluster()
cluster.adapt(minimum=N_WORKERS_MIN, maximum=N_WORKERS_MAX)
cluster

VBox(children=(HTML(value='<h2>GatewayCluster</h2>'), HBox(children=(HTML(value='\n<div>\n<style scoped>\n    …

## Define Paths and parameters

In [7]:
# How are we chunking the output data (i.e. CIAM inputs)
CHUNKS = OrderedDict(
    year=-1,
    seg_adm=500,
    elev=-1,
    params=-1,
    return_period=-1,
    bound=-1,
    country=-1,
    ssp=1,
    iam=1,
)

## Constants from Original CIAM

These values are taken from the original implementation of CIAM (Diaz 2016) with dollar values updated to be in 2019 dollars.

In [8]:
# Reference value of dry land per sq km (2019 USD)
DVBM = 6279000

# minimum land value to consider (all lower values set to this)
MIN_FUNDLAND = 5800

# reference cost of protection (2019 USD per km per vert m squared)
PC0 = 7031000

# Annual value of wetland services per sq km (2019 USD)
WVBM = 439100

## Prep coastline Lengths

In [9]:
# Import seg polys to get seg count
segreg_df = pd.read_parquet(
    sset.PATH_SEG_REGION_VORONOI_INTERSECTIONS,
    columns=["station_id", "adm1", "ISO", "seg_adm"],
    filters=[("lowelev", "=", True)],
).sort_values("seg_adm")

all_segs = (
    segreg_df.reset_index()["seg_adm"]
    .str.split("_")
    .apply(lambda x: "_".join(x[:2]))
    .values
)
valid_segs = np.unique(all_segs)
segreg_df["station_id"] = all_segs
all_isos = segreg_df.ISO.unique()

In [10]:
# get coastline length calculations started. For some reason, we need this wrapper b/c
# permissions get lost when the cloudpathlib GSPath object is passed as a kwarg
def this_coastlen_poly(seg_adm):
    return coastlen_poly(
        seg_adm,
        coastlines_shp_path=sset.PATH_GEOG_COASTLINES,
        seg_adm_voronoi_parquet_path=sset.PATH_SEG_REGION_VORONOI_INTERSECTIONS,
        seg_var="seg_adm",
    )


coastlen_ftrs = client.map(this_coastlen_poly, segreg_df.index.values)

## Initialize pyCIAM input dataset

In [11]:
out = (
    segreg_df.rename(
        columns={
            "ISO": "seg_country",
            "station_id": "seg",
            "length_km": "length",
        }
    )
    .to_xarray()
    .set_coords(["seg_country", "adm1", "seg"])
)

## Income

### Load country-level scaling data to forecast and backcast gdppc, pop, capital

In [12]:
# load and process country-level historical estimates
hist_scaling = (
    pd.read_parquet(
        sset.PATH_EXPOSURE_YPK_COUNTRY_HIST_INT,
        columns=["pop", "rnna_19"],
        filters=[("year", "=", sset.SOCIOECONOMIC_SCALE_YR)],
    )
    .to_xarray()
    .rename({"rnna_19": "K"})
    .squeeze(drop=True)
)

# handle uninhabited areas
assert np.isin(
    all_isos[~np.isin(all_isos, hist_scaling.ccode)], sset.UNINHABITED_ISOS
).all()
hist_scaling = hist_scaling.reindex(ccode=all_isos, fill_value=0)

# load and process country-level projections
scaling = (
    open_zarr(sset.PATH_EXPOSURE_YPK_COUNTRY_PROJ_INT, chunks=None)
    .rename({"capital": "K", "gdppc": "ypcc"})
    .drop_vars(["IY", "KY", "mpk"])
)

# handle uninhabited areas
assert np.isin(all_isos[~np.isin(all_isos, scaling.ccode)], sset.UNINHABITED_ISOS).all()
scaling = scaling.reindex(ccode=all_isos, fill_value=0)

scaling[["K_scale", "pop_scale"]] = (scaling[["K", "pop"]] / hist_scaling).where(
    hist_scaling != 0, 0
)

# handle 0-population areas
assert scaling.notnull().all().to_array().all()

## ESL heights and Gumbel params

In [17]:
gtsm_msl = (
    xr.open_dataset(sset.PATH_GEOG_GTSM_SURGE)
    .rename(
        {
            "return_periods": "rps",
            "gumbel_parameters": "params",
            "station_y_coordinate": "lat",
            "station_x_coordinate": "lon",
        }
    )
    .drop_vars("station_name")
    .swap_dims(stations="station_id")
    .rename(station_id="stations")
)

In [18]:
# GTSM-based CIAM segment points
segpts_orig = pd.read_parquet(sset.PATH_SEG_CENTROIDS)
segpts = segpts_orig.copy().reset_index().rename(columns={"station_id": "seg"})
segpts["seg"] = segpts.seg.str.split("_").apply(lambda x: "seg_" + x[-1])
segpts = segpts.set_index("seg").loc[valid_segs].sort_index()

# Split seg df into GTSM stations and manually added new segments
added_seg_ids = segpts_orig.index.str.contains("_990")
old_segs = segpts_orig[~added_seg_ids]
added_segs = segpts_orig[added_seg_ids]

# Find nearest GTSM PATH_GEOG_GTSMeach manually added segment point
nn_added = spherical_nearest_neighbor(added_segs, old_segs)

# Add column for nearest GTSM station (most will be self-equivalent)
segpts_orig["near_station_id"] = segpts_orig.index.values
segpts_orig.near_station_id.update(nn_added)

# CoDEC GTSM Surge Height Data [Muis et al. 2020]
gtsm_msl = (
    xr.open_dataset(sset.PATH_GEOG_GTSM_SURGE)
    .rename(
        {
            "return_periods": "rps",
            "gumbel_parameters": "params",
            "station_y_coordinate": "lat",
            "station_x_coordinate": "lon",
        }
    )
    .drop_vars("station_name")
    .swap_dims(stations="station_id")
    .rename(station_id="stations")
)
gtsm_msl["params"] = [
    "loc" if i.startswith(b"location") else "scale" for i in gtsm_msl.params.values
]
gtsm_msl["stations"] = gtsm_msl.stations.astype(str)

# Filter by unique GTSM segment points
gtsm_msl = gtsm_msl.sel(
    stations=np.unique(segpts_orig.near_station_id.values)
).GUM.rename("")

# create return heights (setting "1-year" return period to 99th percentile as per NOAA
# tides and currents ESL plots)
inv_freq = 1 / sset.SVALS
inv_freq = np.where(inv_freq > 0.99, 0.99, inv_freq)
hts = gumbel_r.ppf(
    1 - inv_freq, loc=gtsm_msl.sel(params=["loc"]), scale=gtsm_msl.sel(params=["scale"])
)
hts = np.where(hts > 0, hts, 0)
hts = xr.DataArray(
    hts,
    dims=["stations", "return_period"],
    coords={"stations": gtsm_msl.stations.values, "return_period": sset.SVALS},
)

# combine params and pre-calculated heights
esl_hts = xr.Dataset({"gumbel_params": gtsm_msl, "surge_height": hts})

# reindex to match segs
esl_hts = esl_hts.sel(stations=segpts_orig.near_station_id.values).rename(
    stations="seg"
)
esl_hts["seg"] = segpts_orig.index.values
esl_hts["seg"] = "seg_" + esl_hts.seg.str[-5:]

# reindex to match seg-adms
esl_hts = esl_hts.drop_vars(["lon", "lat"]).sel(seg=out.seg, drop=True)

In [19]:
# join with previous input data
out = xr.merge((out, esl_hts))

## Wetland and land areas

In [20]:
areas = pd.read_parquet(
    sset.PATH_EXPOSURE_AREA_BY_CIAM_AND_ELEVATION,
    # uncomment if you only want to count unprotected area
    # filters=[("protection_zone", "==", -1)],
    columns=[
        "seg_adm",
        "protection_zone",
        "z_ix",
        "land_area_km",
        "wetland_area_km",
    ],
)

assert areas.set_index(["seg_adm", "z_ix", "protection_zone"]).index.is_unique

# assume low-elevation protected areas are actually at 0 elevation and add in all
# below-0 wetlands (~10%) into the lowest elevation bin
areas["z_ix"] = areas.z_ix.where(
    (areas.protection_zone == -1) & (areas.land_area_km != 0), np.maximum(areas.z_ix, 0)
)
areas = areas.set_index(["seg_adm", "z_ix", "protection_zone"])

# ignore protection-zone
areas = areas.groupby(["seg_adm", "z_ix"], observed=True).sum()
assert areas.index.get_level_values("z_ix").min() >= 0

# convert to dataset
areas = areas.sort_index().loc[(slice(None), slice(0, None)), :].to_xarray().fillna(0)

# bin index to bin midpoint
areas["z_ix"] = grid_ix_to_val(areas.z_ix.values, sset.EXPOSURE_BIN_WIDTH_V)

# cover all seg-adms
areas = areas.reindex(seg_adm=out.seg_adm.values, fill_value=0).rename(z_ix="elev")

# get this to use later in calculating wetland services
wetland_area_by_iso = areas.wetland_area_km.groupby(out.seg_country).sum().sum("elev")

## Wetland Value

### Get total area by country

In [21]:
# load geometries
iso_geoms = gpd.read_parquet(sset.PATH_GADM_ADM0_INT).geometry.rename_axis("country")

# calculate area
iso_geoms.crs = "epsg:4326"
iso_geoms = iso_geoms.to_crs({"proj": "cea"})
careas = iso_geoms.area / 1e6  # km2 areas

### Get current-day country-level population and income densities

In [22]:
hist_latest = (
    pd.read_parquet(
        sset.PATH_EXPOSURE_YPK_COUNTRY_HIST_INT,
        columns=["pop", "rnna_19", "rgdpna_pc_19"],
        filters=[("year", ">=", sset.SOCIOECONOMIC_SCALE_YR)],
    )
    .to_xarray()
    .rename({"rnna_19": "K", "rgdpna_pc_19": "ypcc"})
    .squeeze(drop=True)
)

ref_year = hist_latest.year.max().item()
curr_data = hist_latest.sel(year=ref_year).to_dataframe()
refpopdens = curr_data["pop"] / careas.loc[curr_data.index]
assert refpopdens.notnull().all()

income_spatial_dens = (refpopdens * curr_data.ypcc).loc[scaling.ccode]

### Calculate land value appreciation via Yohe 1999 via Abraham and Hendershott 1993

In CIAM (Diaz, 2016), the constant and lagged appreciation variable is dropped from the original growth regression. Here we add it back in

In [23]:
# initialize output array
appr = xr.zeros_like(scaling.ypcc)

# add a previous year and seed w/ initial conditions of no appreciation
logdiff_pop = np.log(scaling.pop).diff("year")
logdiff_y = np.log(scaling.ypcc).diff("year")
appr = appr.reindex(
    year=np.concatenate(([(appr.year[0] * 2 - appr.year[1]).item()], appr.year.values))
).fillna(0)

for yr in appr.year[2:]:
    appr.loc[{"year": yr}] = (
        -0.006
        + 0.313 * logdiff_pop.sel(year=yr, drop=True)
        + 0.565 * logdiff_y.sel(year=yr, drop=True)
        + 1.402 * appr.sel(year=yr - 1, drop=True)
        - 0.402 * appr.sel(year=yr - 2, drop=True)
    )

# drop the pre-period initial condition we used
appr = appr.isel(year=slice(1, None))

# now exponentiate to get multiplier scaling factor
appr = np.exp(appr)

# set equal to one in dataset's reference year
appr = (appr / appr.sel(year=ref_year)).fillna(0)

  result_data = func(*input_data)


### Calculate growth in wetland value

This approach would be most consistent with [Brander et al. 2006](https://www.cbd.int/financial/values/g-valuewetland.pdf) (Table 4.1) and what was described in the Supplement to Diaz 2016 (p. 15). In other words, we would use the income and population density elasticities over time, in addition to cross-sectionally. We would also make it such that the mean wetland value across all wetlands (weighted by area) represents the mean observed in the datasets used in Brander et al. However, this is not how it is implemented in the code for Diaz 2016, and when used can give *very* high values for later years. Instead, we adopt the approach from the Diaz 2016 code, which states that the US in 2000 assumes the mean wetland value from Brander 2006, uses the Brander elasticies to scale cross-sectionally, then uses the [Yohe 2013](http://gyohe.faculty.wesleyan.edu/files/2018/05/37.pdf) elasticities to scale temporally (these are the same as those used to scale land value). *NOTE*: This code snippet will require some updating

In [24]:
# income_pop_wetland_contrib = np.log(ypcc) * 1.16 + np.log(pops/careas) * 0.47
# wo_const_wetland_log = (income_pop_wetland_contrib).loc[:, 2000, "SSP2", "IIASA"]
# mean_wo_const = (wo_const_wetland_log * wetland_area_by_iso.to_series()).sum() / wetland_area_by_iso.sum().item()
# const = np.log(out.wvbm).item() - mean_wo_const
# wetlandservice = np.exp(income_pop_wetland_contrib + const).to_xarray().sel(ISO_TER1=out.country_TER1, drop=True)
# assert wetlandservice.notnull().all()

Here is the approach that is taken directly from the code accompanying Diaz 2016:

In [25]:
out["wetlandservice"] = (
    appr
    * WVBM
    * (curr_data.ypcc / curr_data.ypcc["USA"]).to_xarray() ** 1.16
    * (refpopdens.to_xarray() / 27.59) ** 0.47
).rename(ccode="country")
assert out.wetlandservice.notnull().all()

## Resilience factor (rho)

In [26]:
out["rho"] = (
    scaling.ypcc
    / (scaling.ypcc + scaling.ypcc.sel(ccode="USA", year=sset.PROJ_YEARS[0]))
).rename(ccode="country")
assert out.rho.notnull().all() & (out.rho.max() <= 1) & (out.rho.min() >= 0)

Adding this in to be able to population-weight rho when collapsing over region

In [27]:
out["ypcc"] = scaling.ypcc.rename(ccode="country")

## Protection Costs

This is a reference protection cost multiplied by a country-level construction cost index. CCI's for countries not in the ICP are estimated via a linear regression with income per capita.

In [28]:
ADM0_MAPPINGS = pd.read_parquet(sset.PATH_HIST_CCODE_MAPPING).ccode

In [29]:
# Import World Bank Int'l Comparison Project [2017] table
# to be used as 1st-preference CCI vals; along with minor clean-up
icp = (
    pd.read_csv(sset.PATH_EXPOSURE_WB_ICP)
    .iloc[1:]
    .rename(
        columns={
            "Country Code": "country",
            "1501200:CONSTRUCTION [1501200]": "wbcci",
        }
    )
)
icp["wbcci"] = icp["wbcci"].astype("float64")

# handle country mappings
icp["country"] = icp.country.replace(ADM0_MAPPINGS)

# filter by year and price level index (world avg = 100) variable
icp = icp[
    (~icp.wbcci.isnull())
    & (icp.country.isin(out.country.values))
    & icp.Time.eq(2017)
    & (icp["Classification Name"] == "Price level index (World = 100)")
    & (icp.country != "WLD")
]

# save construction cost price index as df
wb_cci = icp[["country", "wbcci"]].set_index("country").wbcci / 100

In [30]:
# Load sea dike construction costs from Lincke 2021 as as 2nd-preference CCI vals
lincke = (
    pd.read_csv(
        sset.PATH_EXPOSURE_LINCKE,
        usecols=["locationid", "seadike_unit_cost_rural", "seadike_unit_cost_urban"],
    )
    .rename(columns={"locationid": "country"})
    .set_index("country")
)

# filter to countries we need
lincke = lincke[lincke.index.isin(all_isos)]

# average across urban/rural
lincke_cci = lincke.mean(axis=1)

# scale such that 1 is global average
lincke_cci /= lincke_cci.mean()

In [31]:
cci = pd.concat((wb_cci, lincke_cci[~lincke_cci.index.isin(wb_cci.index)])).rename(
    "cci"
)

# estimate other CCIs using income elasticity
df = curr_data[["ypcc"]].join(cci, how="inner")
df = df[df.ypcc != 0]
reg = smf.ols("np.log(cci) ~ np.log(ypcc)", df).fit()
predicted = np.exp(
    reg.predict(curr_data.loc[~curr_data.index.isin(cci.index), ["ypcc"]])
)
cci = pd.concat([cci, predicted]).rename("cci")

# fill in other unpopulated areas not in lincke (e.g. with 0 ypcc) with global average
valid = cci[cci != 0]
valid_pop = curr_data.loc[valid.index, "pop"]
global_avg = (valid * valid_pop).sum() / valid_pop.sum()
cci = cci.where(cci > 0, global_avg).rename_axis("country")

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [32]:
# put cci in out array
out["pc"] = cci.to_xarray().sel(country=all_isos) * PC0

## Total wetland area

In [33]:
out["wetland"] = areas["wetland_area_km"]

## Mobile Capital Fraction

In [34]:
mobcapfrac = (
    pd.read_parquet(
        sset.PATH_EXPOSURE_YPK_COUNTRY_HIST_INT,
        filters=[
            ("year", ">=", sset.HISTORICAL_YEARS[-1] - 20),
            ("year", "<", sset.HISTORICAL_YEARS[-1]),
        ],
    )
    .groupby("ccode")
    .k_movable_ratio.mean()
    .rename("mobcapfrac")
)
mobcapfrac.index.name = "country"

# countries not in this dataset which have 0 capital
assert np.isin(
    all_isos, (np.concatenate((mobcapfrac.index.values, sset.UNINHABITED_ISOS)))
).all()
mobcapfrac = mobcapfrac.reindex(out.country.values, fill_value=0.5).to_xarray()

In [35]:
out["mobcapfrac"] = mobcapfrac

## Population and Capital

### Load exposure grid

In [36]:
# Import Surge-based Exposure Grid for 'Present Day' (2014)
cols = ["seg_adm", "z_ix", "asset_value", "pop", "protection_zone"]
# uncomment if we want to assume infinite protection for currently protected areas
# filters = [("protection_zone", "==", -1)]
filters = None

eg = pd.read_parquet(
    sset.PATH_EXPOSURE_BINNED_WITHELEV,
    columns=cols,
    filters=filters,
).rename(columns={"asset_value": "K"})

# assume all protected below-SLR areas are in the first elevation bin
assert eg.z_ix.where(eg.protection_zone == -1).min() >= 0
eg["z_ix"] = eg.z_ix.where(eg.protection_zone == -1, np.maximum(eg.z_ix, 0))
eg = eg.drop(columns="protection_zone")

# grid ix to value
eg["elev"] = grid_ix_to_val(eg.z_ix.values, sset.EXPOSURE_BIN_WIDTH_V)
eg = eg.drop(columns="z_ix")

# turn into dataset
eg_xr = (
    eg.groupby(["seg_adm", "elev"], observed=True)
    .sum()
    .to_xarray()
    .reindex(seg_adm=out.seg_adm)
    .fillna(0)
    .rename(
        {
            "pop": f"pop_{sset.SOCIOECONOMIC_SCALE_YR}",
            "K": f"K_{sset.SOCIOECONOMIC_SCALE_YR}",
        }
    )
)

# reindex scaling factors
cds_xr = scaling[["pop_scale", "K_scale"]].sel(ccode=all_isos).rename(ccode="country")
assert cds_xr.notnull().all().to_array().all()

In [37]:
# merge into dataset
out = xr.merge((out, eg_xr, cds_xr))

## Land Value and Area

In [38]:
fundland = (
    np.minimum(
        DVBM,
        np.maximum(
            MIN_FUNDLAND,
            DVBM * income_spatial_dens / income_spatial_dens.USA,
        ),
    )
    .to_xarray()
    .rename(ccode="country")
)
out["interior"] = appr.rename(ccode="country") * fundland
out["landarea"] = areas.land_area_km

## Add in Coastline Length

In [39]:
out["length"] = pd.Series(
    client.gather(coastlen_ftrs),
    index=segreg_df.index,
).to_xarray()

## Drop any seg-regions with no exposed area or countries with no value (e.g. uninabited atolls with their own ISO)

In [40]:
any_exp = (
    out[
        [
            f"K_{sset.SOCIOECONOMIC_SCALE_YR}",
            f"pop_{sset.SOCIOECONOMIC_SCALE_YR}",
            "landarea",
            "wetland",
        ]
    ]
    .to_array()
    .sum(["variable", "elev"])
    > 0
) | (out.length > 0)

out = out.sel(seg_adm=any_exp)

any_exp[~any_exp].values

array([], dtype=bool)

In [41]:
any_value = (
    out[["K_scale", "pop_scale", "wetlandservice", "interior"]]
    .to_array()
    .sum(["variable", "year"])
    > 0
).any(["ssp", "iam"])
assert out.country[~any_value].isin(sset.UNINHABITED_ISOS).all()

## Add segment centroid lat/lon

In [42]:
out = xr.merge(
    (
        out,
        segpts.rename(columns={"lat": "seg_lat", "lon": "seg_lon"})
        .to_xarray()
        .sel(seg=out.seg, drop=True),
    )
)

## Save

### Cleanup

In [43]:
out["elev_bounds"] = xr.concat(
    (
        out.elev - sset.EXPOSURE_BIN_WIDTH_V / 2,
        out.elev + sset.EXPOSURE_BIN_WIDTH_V / 2,
    ),
    dim=pd.Index(["lower", "upper"], name="bound"),
)

In [44]:
for d in out.data_vars:
    if out[d].dtype == "float64":
        out[d] = out[d].astype("float32")

In [45]:
out = out.transpose(*CHUNKS.keys()).chunk(CHUNKS)

#### Add attrs

In [46]:
dollar_units = "2019 USD PPP"

# coords
out.seg_adm.attrs.update(
    {
        "description": (
            "Unique combinations of coastline segment and administrative unit. These "
            "are adm1 units as defined by Natural Earth outside of the U.S., and "
            "unique combos of zip3, county, CBSA, and state within the U.S. Each is "
            "treated as an independent unit in pyCIAM."
        )
    }
)
out.params.attrs.update(
    {"description": "Gumbel parameters for ESL/storm surge distribution"}
)
out.return_period.attrs.update({"long_name": "Return periods", "units": "y"})
out.elev.attrs.update(
    {
        "long_name": "Elevation",
        "description": (
            "Midpoint elevation for each coastal elevation bin employed in pyCIAM"
        ),
        "units": "m",
    }
)
out.country.attrs.update(
    {
        "description": (
            "Dimension used for variables that exhibit only country-level variance"
        )
    }
)

# alternate coords
out.seg.attrs.update({"description": "Segment associated with each seg-reg"})
out.adm1.attrs.update(
    {"description": "Administrative unit associated with each seg-reg"}
)
out.seg_country.attrs.update(
    {"description": ("Country associated with each analysis unit")}
)

# data_vars
out.seg_lat.attrs.update({"long_name": "Segment centroid latitude", "units": "deg N"})
out.seg_lon.attrs.update({"long_name": "Segment centroid longitude", "units": "deg E"})
out.interior.attrs.update(
    {
        "long_name": "Value of non-coastal land",
        "units": f"{dollar_units} per km2",
    }
)
out.elev_bounds.attrs.update(
    {
        "long_name": "Elevation bounds",
        "description": (
            "Lower and upper bounds for each coastal elevation bin employed in pyCIAM"
        ),
        "units": "m",
    }
)
out.length.attrs.update(
    {
        "description": "Length of coastline associated with each analysis unit",
        "units": "km",
    }
)
out.gumbel_params.attrs.update(
    {"description": "Parameters describing ESL distribution", "units": "m"}
)
out.surge_height.attrs.update(
    {"description": "Estimated ESL/storm surge heights", "units": "m"}
)
out.wetland.attrs.update(
    {
        "description": "Estimated area for all wetland by elevation",
        "units": "km^2",
    }
)
out.wetlandservice.attrs.update(
    {
        "description": "Value of wetlands",
        "units": f"{dollar_units} per km^2",
    }
)
out[f"K_{sset.SOCIOECONOMIC_SCALE_YR}"].attrs.update(
    {
        "long_name": f"{sset.SOCIOECONOMIC_SCALE_YR} Capital stock",
        "description": (
            f"Estimated value of physical capital in {sset.SOCIOECONOMIC_SCALE_YR}, "
            "under actual conditions (i.e. observed, not forecasted)"
        ),
        "units": dollar_units,
    }
)
out[f"pop_{sset.SOCIOECONOMIC_SCALE_YR}"].attrs.update(
    {
        "long_name": f"{sset.SOCIOECONOMIC_SCALE_YR} population",
        "description": (
            f"Estimated population in {sset.SOCIOECONOMIC_SCALE_YR}, under actual "
            "conditions (i.e. observed, not forecasted)"
        ),
        "units": "people",
    }
)
out.K_scale.attrs.update(
    {
        "long_name": "Capital scaling factor",
        "description": (
            "Country-level change factor in capital stock specific to SSP/IAM, "
            f"relative to K_{sset.SOCIOECONOMIC_SCALE_YR}"
        ),
    }
)
out.pop_scale.attrs.update(
    {
        "long_name": "Population scaling factor",
        "description": (
            "Country-level change factor in population specific to SSP/IAM, relative "
            f"to pop_{sset.SOCIOECONOMIC_SCALE_YR}"
        ),
    }
)
out.ypcc.attrs.update(
    {
        "long_name": "Country-level income per capita",
        "units": f"{dollar_units} per person",
    }
)
out.landarea.attrs.update(
    {
        "long_name": "Total Land Area",
        "units": "km^2",
    }
)
out.pc.attrs.update(
    {
        "description": "Protection cost (quadratic with height)",
        "units": f"{dollar_units} per km per vert m^2",
    }
)
out.mobcapfrac.attrs.update(
    {
        "description": "Fraction of capital that is mobile",
    }
)
out.rho.attrs.update(
    {
        "description": (
            "Resilience factor scaling depth-damage and depth-mortality functions"
        ),
    }
)

# values we need to save
to_keep = [
    "elev_bounds",
    "seg",
    "adm1",
    "country",
    "seg_country",
    "length",
    "pc",
    "mobcapfrac",
    "gumbel_params",
    "surge_height",
    "rho",
    f"K_{sset.SOCIOECONOMIC_SCALE_YR}",
    f"pop_{sset.SOCIOECONOMIC_SCALE_YR}",
    "K_scale",
    "pop_scale",
    "ypcc",
    "landarea",
    "interior",
    "wetland",
    "wetlandservice",
    "seg_lon",
    "seg_lat",
]

# print dataset to verify it looks as expected
out = out[to_keep]
out[["seg", "adm1", "seg_country"]].load()
out

Unnamed: 0,Array,Chunk
Bytes,93.59 kiB,3.91 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 93.59 kiB 3.91 kiB Shape (11980,) (500,) Dask graph 24 chunks in 1 graph layer Data type object numpy.ndarray",11980  1,

Unnamed: 0,Array,Chunk
Bytes,93.59 kiB,3.91 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,93.59 kiB,3.91 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 93.59 kiB 3.91 kiB Shape (11980,) (500,) Dask graph 24 chunks in 1 graph layer Data type object numpy.ndarray",11980  1,

Unnamed: 0,Array,Chunk
Bytes,93.59 kiB,3.91 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,93.59 kiB,3.91 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,object numpy.ndarray,object numpy.ndarray
"Array Chunk Bytes 93.59 kiB 3.91 kiB Shape (11980,) (500,) Dask graph 24 chunks in 1 graph layer Data type object numpy.ndarray",11980  1,

Unnamed: 0,Array,Chunk
Bytes,93.59 kiB,3.91 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,object numpy.ndarray,object numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.56 kiB,1.56 kiB
Shape,"(200, 2)","(200, 2)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.56 kiB 1.56 kiB Shape (200, 2) (200, 2) Dask graph 1 chunks in 1 graph layer Data type float32 numpy.ndarray",2  200,

Unnamed: 0,Array,Chunk
Bytes,1.56 kiB,1.56 kiB
Shape,"(200, 2)","(200, 2)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,46.80 kiB,1.95 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 46.80 kiB 1.95 kiB Shape (11980,) (500,) Dask graph 24 chunks in 1 graph layer Data type float32 numpy.ndarray",11980  1,

Unnamed: 0,Array,Chunk
Bytes,46.80 kiB,1.95 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,844 B,844 B
Shape,"(211,)","(211,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 844 B 844 B Shape (211,) (211,) Dask graph 1 chunks in 1 graph layer Data type float32 numpy.ndarray",211  1,

Unnamed: 0,Array,Chunk
Bytes,844 B,844 B
Shape,"(211,)","(211,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,844 B,844 B
Shape,"(211,)","(211,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 844 B 844 B Shape (211,) (211,) Dask graph 1 chunks in 1 graph layer Data type float32 numpy.ndarray",211  1,

Unnamed: 0,Array,Chunk
Bytes,844 B,844 B
Shape,"(211,)","(211,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,93.59 kiB,3.91 kiB
Shape,"(11980, 2)","(500, 2)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 93.59 kiB 3.91 kiB Shape (11980, 2) (500, 2) Dask graph 24 chunks in 1 graph layer Data type float32 numpy.ndarray",2  11980,

Unnamed: 0,Array,Chunk
Bytes,93.59 kiB,3.91 kiB
Shape,"(11980, 2)","(500, 2)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,187.19 kiB,7.81 kiB
Shape,"(11980, 4)","(500, 4)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 187.19 kiB 7.81 kiB Shape (11980, 4) (500, 4) Dask graph 24 chunks in 1 graph layer Data type float32 numpy.ndarray",4  11980,

Unnamed: 0,Array,Chunk
Bytes,187.19 kiB,7.81 kiB
Shape,"(11980, 4)","(500, 4)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,832.46 kiB,83.25 kiB
Shape,"(101, 211, 5, 2)","(101, 211, 1, 1)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 832.46 kiB 83.25 kiB Shape (101, 211, 5, 2) (101, 211, 1, 1) Dask graph 10 chunks in 1 graph layer Data type float32 numpy.ndarray",101  1  2  5  211,

Unnamed: 0,Array,Chunk
Bytes,832.46 kiB,83.25 kiB
Shape,"(101, 211, 5, 2)","(101, 211, 1, 1)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.14 MiB,390.62 kiB
Shape,"(11980, 200)","(500, 200)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 9.14 MiB 390.62 kiB Shape (11980, 200) (500, 200) Dask graph 24 chunks in 1 graph layer Data type float32 numpy.ndarray",200  11980,

Unnamed: 0,Array,Chunk
Bytes,9.14 MiB,390.62 kiB
Shape,"(11980, 200)","(500, 200)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.14 MiB,390.62 kiB
Shape,"(11980, 200)","(500, 200)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 9.14 MiB 390.62 kiB Shape (11980, 200) (500, 200) Dask graph 24 chunks in 1 graph layer Data type float32 numpy.ndarray",200  11980,

Unnamed: 0,Array,Chunk
Bytes,9.14 MiB,390.62 kiB
Shape,"(11980, 200)","(500, 200)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,832.46 kiB,83.25 kiB
Shape,"(101, 211, 5, 2)","(101, 211, 1, 1)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 832.46 kiB 83.25 kiB Shape (101, 211, 5, 2) (101, 211, 1, 1) Dask graph 10 chunks in 1 graph layer Data type float32 numpy.ndarray",101  1  2  5  211,

Unnamed: 0,Array,Chunk
Bytes,832.46 kiB,83.25 kiB
Shape,"(101, 211, 5, 2)","(101, 211, 1, 1)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,416.23 kiB,83.25 kiB
Shape,"(101, 211, 5)","(101, 211, 1)"
Dask graph,5 chunks in 1 graph layer,5 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 416.23 kiB 83.25 kiB Shape (101, 211, 5) (101, 211, 1) Dask graph 5 chunks in 1 graph layer Data type float32 numpy.ndarray",5  211  101,

Unnamed: 0,Array,Chunk
Bytes,416.23 kiB,83.25 kiB
Shape,"(101, 211, 5)","(101, 211, 1)"
Dask graph,5 chunks in 1 graph layer,5 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,832.46 kiB,83.25 kiB
Shape,"(101, 211, 5, 2)","(101, 211, 1, 1)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 832.46 kiB 83.25 kiB Shape (101, 211, 5, 2) (101, 211, 1, 1) Dask graph 10 chunks in 1 graph layer Data type float32 numpy.ndarray",101  1  2  5  211,

Unnamed: 0,Array,Chunk
Bytes,832.46 kiB,83.25 kiB
Shape,"(101, 211, 5, 2)","(101, 211, 1, 1)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.14 MiB,390.62 kiB
Shape,"(11980, 200)","(500, 200)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 9.14 MiB 390.62 kiB Shape (11980, 200) (500, 200) Dask graph 24 chunks in 1 graph layer Data type float32 numpy.ndarray",200  11980,

Unnamed: 0,Array,Chunk
Bytes,9.14 MiB,390.62 kiB
Shape,"(11980, 200)","(500, 200)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,832.46 kiB,83.25 kiB
Shape,"(101, 211, 5, 2)","(101, 211, 1, 1)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 832.46 kiB 83.25 kiB Shape (101, 211, 5, 2) (101, 211, 1, 1) Dask graph 10 chunks in 1 graph layer Data type float32 numpy.ndarray",101  1  2  5  211,

Unnamed: 0,Array,Chunk
Bytes,832.46 kiB,83.25 kiB
Shape,"(101, 211, 5, 2)","(101, 211, 1, 1)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,9.14 MiB,390.62 kiB
Shape,"(11980, 200)","(500, 200)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 9.14 MiB 390.62 kiB Shape (11980, 200) (500, 200) Dask graph 24 chunks in 1 graph layer Data type float32 numpy.ndarray",200  11980,

Unnamed: 0,Array,Chunk
Bytes,9.14 MiB,390.62 kiB
Shape,"(11980, 200)","(500, 200)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,832.46 kiB,83.25 kiB
Shape,"(101, 211, 5, 2)","(101, 211, 1, 1)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 832.46 kiB 83.25 kiB Shape (101, 211, 5, 2) (101, 211, 1, 1) Dask graph 10 chunks in 1 graph layer Data type float32 numpy.ndarray",101  1  2  5  211,

Unnamed: 0,Array,Chunk
Bytes,832.46 kiB,83.25 kiB
Shape,"(101, 211, 5, 2)","(101, 211, 1, 1)"
Dask graph,10 chunks in 1 graph layer,10 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,46.80 kiB,1.95 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 46.80 kiB 1.95 kiB Shape (11980,) (500,) Dask graph 24 chunks in 1 graph layer Data type float32 numpy.ndarray",11980  1,

Unnamed: 0,Array,Chunk
Bytes,46.80 kiB,1.95 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,46.80 kiB,1.95 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 46.80 kiB 1.95 kiB Shape (11980,) (500,) Dask graph 24 chunks in 1 graph layer Data type float32 numpy.ndarray",11980  1,

Unnamed: 0,Array,Chunk
Bytes,46.80 kiB,1.95 kiB
Shape,"(11980,)","(500,)"
Dask graph,24 chunks in 1 graph layer,24 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [77]:
with dask.config.set(scheduler="single-threaded"):
    save(out, sset.PATH_SLIIDERS, mode="w")



In [78]:
cluster.close(), client.close()

(None, None)