In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys

sys.path.append("../")

In [3]:
import json
import re
from pathlib import Path
from shutil import make_archive
from tempfile import TemporaryDirectory

import dask.config
import requests
import shared
from cloudpathlib import AnyPath
from sliiders import settings as sset
from zarr import ZipStore

In [4]:
PATT_OUTPUTS_NC = shared.PATH_OUTPUTS.parent / (shared.PATH_OUTPUTS.stem + "_{case}.nc")
PATH_SLIIDERS_NC = sset.PATH_SLIIDERS.parent / (sset.PATH_SLIIDERS.stem + ".nc")

In [5]:
ds = shared.open_zarr(shared.PATH_OUTPUTS)
fpaths = []
for case in ds.case.values:
    print(f"Processing {case}")
    fpath = AnyPath(str(PATT_OUTPUTS_NC).format(case=case))
    fpaths.append(fpath)
    if not fpath.exists():
        shared.save_dataset(ds.sel(case=case).load(), fpath)

shared.save_dataset(shared.open_zarr(sset.PATH_SLIIDERS).load(), PATH_SLIIDERS_NC)

Processing noAdaptation
Processing protect10
Processing protect100
Processing protect1000
Processing protect10000
Processing retreat1
Processing retreat10
Processing retreat100
Processing retreat1000
Processing retreat10000
Processing optimalfixed


In [6]:
dask.config.set(scheduler="threads")

<dask.config.set at 0x7d40ae7e1130>

## Parameters

In [7]:
ACCESS_TOKEN = "Q5z5IQ1m5Z9l1QS7ZYeV78IS5bqPmhzcFVo0KSNLoh2p39HRMPgFoJsCyQt5"
VERSION = "1.2.0"
TITLES = {
    # "SLIIDERS": (
    #     "SLIIDERS: Sea Level Impacts Input Dataset by Elevation, Region, and Scenario"
    # ),
    "pyCIAM": (
        "Estimates of Global Coastal Losses Under Multiple Sea Level Rise Scenarios"
    ),
}
PYCIAM_CODE_PATH = Path("pyCIAM.zip")
SLIIDERS_CODE_PATH = Path("/tmp/sliiders.zip")

In [8]:
PARAMS = {"access_token": ACCESS_TOKEN}

In [9]:
# get host
Z_URL = "https://zenodo.org/api/deposit/depositions"

# Find existing depositions
ALL_DEPOSITS = requests.get(
    Z_URL,
    params=PARAMS,
).json()
EXISTING_DEPOSITS = {}
MISSING_DEPOSITS = []
for k, v in TITLES.items():
    this = [i for i in ALL_DEPOSITS if i["title"] == v]
    assert len(this) <= 1
    if len(this):
        EXISTING_DEPOSITS[k] = this[0]
    else:
        MISSING_DEPOSITS.append(k)
assert not len(MISSING_DEPOSITS)

## Metadata

In [10]:
# Metadata
AUTHORS = [
    {
        "affiliation": "United Nations Development Programme",
        "name": "Depsky, Nicholas",
        "orcid": "0000-0002-9441-9042",
    },
    {
        "affiliation": (
            "Reask; Global Policy Lab, Goldman School of Public Policy, University of "
            "California, Berkeley"
        ),
        "name": "Bolliger, Ian",
        "orcid": "0000-0001-8055-297X",
    },
    {
        "affiliation": "Recidiviz",
        "name": "Allen, Daniel",
        "orcid": "0000-0001-5366-5178",
    },
    {
        "affiliation": "Columbia University",
        "name": "Choi, Jun Ho",
        "orcid": "0000-0003-0749-9222",
    },
    {
        "affiliation": "The Rhodium Group",
        "name": "Delgado, Michael",
        "orcid": "0000-0002-2414-045X",
    },
    {
        "affiliation": (
            "National Bureau of Economic Research; Energy Policy Institute, University "
            "of Chicago"
        ),
        "name": "Greenstone, Michael",
        "orcid": "0000-0002-2364-2810",
    },
    {
        "affiliation": "BlackRock",
        "name": "Hamidi, Ali",
        "orcid": "0000-0001-6235-0303",
    },
    {
        "affiliation": "The Rhodium Group",
        "name": "Houser, Trevor",
        "orcid": "0000-0002-0514-7058",
    },
    {
        "affiliation": (
            "Global Policy Lab, Goldman School of Public Policy, University of "
            "California, Berkeley; National Bureau of Economic Research"
        ),
        "name": "Hsiang, Solomon",
        "orcid": "0000-0002-2074-0829",
    },
    {
        "affiliation": (
            "Department of Earth & Planetary Sciences and Rutgers Institute of Earth, "
            "Ocean and Atmospheric Sciences, Rutgers University"
        ),
        "name": "Kopp, Robert E.",
        "orcid": "0000-0003-4016-9428",
    },
]

## Files To Upload

In [11]:
# Files and paths
ORIGINAL_PATHS = {
    "SLIIDERS": {
        "products": [sset.PATH_SLIIDERS, PATH_SLIIDERS_NC],
        "inputs": [
            sset.PATH_GEOG_GTSM_SNAPPED,
            sset.PATH_GEOG_GTSM_STATIONS_TOTHIN,
            sset.PATH_SEG_PTS_MANUAL,
        ],
    },
    # uncomment Diaz inputs if a re-upload is necessary
    "pyCIAM": {
        "products": [
            shared.PATH_OUTPUTS,
            # shared.PATH_DIAZ_RES,
            shared.PATH_MOVEFACTOR_DATA,
            *fpaths,
        ],
        "inputs": [
            # shared.PATH_DIAZ_INPUTS_RAW,
            # shared.PATH_SLR_AR5_QUANTILES,
            shared.PATH_SLIIDERS_INCOME_INTERMEDIATE_FILE,
            shared.PATHS_SURGE_LOOKUP["seg"],
            shared.PATHS_SURGE_LOOKUP["seg_adm"],
        ],
    },
}

if PYCIAM_CODE_PATH is not None:
    ORIGINAL_PATHS["pyCIAM"]["source"] = [PYCIAM_CODE_PATH]
if SLIIDERS_CODE_PATH is not None:
    ORIGINAL_PATHS["SLIIDERS"]["source"] = [SLIIDERS_CODE_PATH]

## Create and/or update depositions

In [12]:
def create_draft_deposit(name, update_dict={}, overwrite=False):
    dep = EXISTING_DEPOSITS[name]

    # create new deposit if needed
    r = requests.post(dep["links"]["newversion"], params=PARAMS)
    # case 1: this is already a new unpublished version
    if (
        r.status_code == 404
        and r.json()["message"] == "The persistent identifier is not registered."
    ):
        pass
    # case 2: this is a successful new version request and we need to grab the new
    # version deposition
    elif r.status_code in [200, 201]:
        # returned value would be original deposit version in case of new version
        # created
        dep = r.json()
    # case 3: some other error
    else:
        raise ValueError(f"{r.status_code}: {r.text}")

    dep = requests.get(dep["links"]["latest_draft"], params=PARAMS).json()

    if overwrite:
        new_id = dep["links"]["latest_draft"].split("/")[-1]
        files = requests.get(dep["links"]["files"], params=PARAMS).json()
        if len(files):
            for f in files:
                file_url = f"{Z_URL}/{new_id}/files/{f['id']}"
                r = requests.delete(file_url, params=PARAMS)
                if r.status_code not in [204, 404]:
                    raise ValueError(f"{r.status_code}: {r.text}")

    metadata = {k: v for k, v in dep["metadata"].copy().items() if k != "doi"}
    metadata.update({"version": VERSION, **update_dict})
    url = dep["links"]["latest_draft"]
    meta_put = requests.put(
        url,
        params=PARAMS,
        data=json.dumps({"metadata": metadata}),
        headers={"Content-Type": "application/json"},
    )
    if meta_put.status_code != 200:
        raise ValueError(f"{meta_put.status_code}: {meta_put.text}")
    return dep


def create_all_new_deposits(titles=TITLES, overwrite=False):
    return {t: create_draft_deposit(t, overwrite=overwrite) for t in titles.keys()}


def _get_zenodo_name(fname):
    # drop a datestamp if it exists
    zenodo_name = re.sub(r"_\d{8}", "", fname.name)
    # drop version from name
    return "-".join([i for i in zenodo_name.split("-") if shared.RES_VERS not in i])


def upload_file(
    deposit_link_dict, fname, zenodo_name=None, overwrite=False, existing_files={}
):
    if zenodo_name is None:
        zenodo_name = _get_zenodo_name(fname)

    if zenodo_name in existing_files:
        if not overwrite:
            print("...Skipping b/c already uploaded")
            return existing_files[zenodo_name]
        requests.delete(existing_files[zenodo_name]["links"]["self"], params=PARAMS)

    with fname.open("rb") as fp:
        r = requests.put(
            f"{deposit_link_dict['bucket']}/{zenodo_name}",
            params=PARAMS,
            data=fp,
        )

    if r.status_code not in [200, 201]:
        raise ValueError(f"{r.status_code}: {r.text}")
    return r.json()


def upload_file_list(deposit, flist, overwrite=False):
    out = []
    existing_file_request = requests.get(deposit["links"]["files"], params=PARAMS)
    if existing_file_request.status_code == 404:
        existing_files = {}
    else:
        existing_files = {f["filename"]: f for f in existing_file_request.json()}
    for f in flist:
        print(f"Uploading: {str(f)}")
        zenodo_name = _get_zenodo_name(f)
        if (
            zenodo_name in existing_files or (zenodo_name + ".zip") in existing_files
        ) and not overwrite:
            print("...Skipping b/c already uploaded")
            continue
        if f.is_file():
            out.append(
                upload_file(
                    deposit["links"],
                    f,
                    overwrite=overwrite,
                    zenodo_name=zenodo_name,
                    existing_files=existing_files,
                )
            )
        elif f.is_dir():
            with TemporaryDirectory() as d:
                tmp_file = Path(d) / (f.name + ".zip")

                if f.suffix == ".zarr":
                    with ZipStore(tmp_file, mode="w") as tf:
                        ds = shared.open_zarr(f)
                        for c in ds.coords:
                            ds[c].load()
                            ds[c].encoding = {}
                        for v in ds.variables:
                            if ds[v].dtype == "object":
                                ds[v] = ds[v].astype("unicode")
                        ds.to_zarr(tf)
                else:
                    name = Path(d) / f.name
                    f.download_to(name)
                    make_archive(name, "zip", name)

                out.append(
                    upload_file(
                        deposit["links"],
                        tmp_file,
                        zenodo_name=zenodo_name,
                        overwrite=overwrite,
                    )
                )
        else:
            raise ValueError(f)
    return out

In [13]:
draft_deps = create_all_new_deposits(overwrite=False)

In [14]:
uploads = {}
for name in TITLES:
    print(name)
    kind = ORIGINAL_PATHS[name]
    this_dep = draft_deps[name]

    uploads[name] = []
    for filetype in ["inputs", "products", "source"]:
        if filetype in kind:
            uploads[name] += upload_file_list(this_dep, kind[filetype], overwrite=False)

pyCIAM
Uploading: gs://rhg-data/impactlab-rhg/coastal/sliiders/int/exposure/ypk/finalized/ypk_2000_2100_20240222.zarr
...Skipping b/c already uploaded
Uploading: gs://rhg-data/impactlab-rhg/coastal/ciam_paper/data/int/surge-lookup-v1.2-seg.zarr
...Skipping b/c already uploaded
Uploading: gs://rhg-data/impactlab-rhg/coastal/ciam_paper/data/int/surge-lookup-v1.2-seg_adm.zarr
...Skipping b/c already uploaded
Uploading: gs://rhg-data/impactlab-rhg/coastal/ciam_paper/results-v1.2/pyCIAM_outputs.zarr
...Skipping b/c already uploaded
Uploading: gs://rhg-data/impactlab-rhg/coastal/ciam_paper/results-v1.2/suboptimal_capital_by_movefactor.zarr
...Skipping b/c already uploaded
Uploading: gs://rhg-data/impactlab-rhg/coastal/ciam_paper/results-v1.2/pyCIAM_outputs_noAdaptation.nc
...Skipping b/c already uploaded
Uploading: gs://rhg-data/impactlab-rhg/coastal/ciam_paper/results-v1.2/pyCIAM_outputs_protect10.nc
...Skipping b/c already uploaded
Uploading: gs://rhg-data/impactlab-rhg/coastal/ciam_paper/