In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys

sys.path.append("../")

In [3]:
import json
from os import environ
from pathlib import Path
from shutil import make_archive
from tempfile import TemporaryDirectory

import dask.config
import requests
import shared
import xarray as xr
from sliiders import settings as sset
from sliiders.io import open_zarr
from zarr import ZipStore

In [4]:
PATH_SLIIDERS_NC = sset.PATH_SLIIDERS.parent / (sset.PATH_SLIIDERS.stem + ".nc")
PATH_OUTPUTS_NC = shared.PATH_OUTPUTS.parent / (shared.PATH_OUTPUTS.stem + ".nc")

shared.save_dataset(shared.open_zarr(sset.PATH_SLIIDERS).load(), PATH_SLIIDERS_NC)
shared.save_dataset(shared.open_zarr(sset.PATH_OUTPUTS).load(), PATH_OUTPUTS_NC)

In [4]:
dask.config.set(scheduler="threads")

<dask.config.set at 0x7cfe44438890>

## Parameters

In [21]:
ACCESS_TOKEN = "Q5z5IQ1m5Z9l1QS7ZYeV78IS5bqPmhzcFVo0KSNLoh2p39HRMPgFoJsCyQt5"
VERSION = "1.2.0"
TITLES = {
    "SLIIDERS": "SLIIDERS: Sea Level Impacts Input Dataset by Elevation, Region, and Scenario",
    # "pyCIAM": "Estimates of Global Coastal Losses Under Multiple Sea Level Rise Scenarios",
}
PYCIAM_CODE_PATH = Path("pyCIAM-1.1.2.zip")
SLIIDERS_CODE_PATH = Path("/tmp/sliiders-1.2.zip")

In [22]:
PARAMS = {"access_token": ACCESS_TOKEN}

In [23]:
# get host
Z_URL = "https://zenodo.org/api/deposit/depositions"

# Find existing depositions
ALL_DEPOSITS = requests.get(
    Z_URL,
    params=PARAMS,
).json()
EXISTING_DEPOSITS = {}
MISSING_DEPOSITS = []
for k, v in TITLES.items():
    this = [i for i in ALL_DEPOSITS if i["title"] == v]
    assert len(this) <= 1
    if len(this):
        EXISTING_DEPOSITS[k] = this[0]
    else:
        MISSING_DEPOSITS.append(k)
assert not len(MISSING_DEPOSITS)

## Metadata

In [24]:
# Metadata
AUTHORS = [
    {
        "affiliation": "Energy & Resources Group, University of California, Berkeley; Global Policy Lab, Goldman School of Public Policy, University of California, Berkeley",
        "name": "Depsky, Nicholas",
        "orcid": "0000-0002-9441-9042",
    },
    {
        "affiliation": "BlackRock; Global Policy Lab, Goldman School of Public Policy, University of California, Berkeley",
        "name": "Bolliger, Ian",
        "orcid": "0000-0001-8055-297X",
    },
    {
        "affiliation": "Global Policy Lab, Goldman School of Public Policy, University of California, Berkeley",
        "name": "Allen, Daniel",
        "orcid": "0000-0001-5366-5178",
    },
    {
        "affiliation": "Energy Policy Institute, University of Chicago",
        "name": "Choi, Jun Ho",
        "orcid": "0000-0003-0749-9222",
    },
    {
        "affiliation": "The Rhodium Group",
        "name": "Delgado, Michael",
        "orcid": "0000-0002-2414-045X",
    },
    {
        "affiliation": "National Bureau of Economic Research; Energy Policy Institute, University of Chicago",
        "name": "Greenstone, Michael",
        "orcid": "0000-0002-2364-2810",
    },
    {
        "affiliation": "The Rhodium Group",
        "name": "Hamidi, Ali",
        "orcid": "0000-0001-6235-0303",
    },
    {
        "affiliation": "The Rhodium Group",
        "name": "Houser, Trevor",
        "orcid": "0000-0002-0514-7058",
    },
    {
        "affiliation": "Global Policy Lab, Goldman School of Public Policy, University of California, Berkeley; National Bureau of Economic Research",
        "name": "Hsiang, Solomon",
        "orcid": "0000-0002-2074-0829",
    },
    {
        "affiliation": "Department of Earth & Planetary Sciences and Rutgers Institute of Earth, Ocean and Atmospheric Sciences, Rutgers University",
        "name": "Kopp, Robert E.",
        "orcid": "0000-0003-4016-9428",
    },
]

## Files To Upload

In [25]:
# Files and paths
ORIGINAL_PATHS = {
    "SLIIDERS": {
        "products": [sset.PATH_SLIIDERS, PATH_SLIIDERS_NC],
        "inputs": [
            sset.PATH_GEOG_GTSM_SNAPPED,
            sset.PATH_GEOG_GTSM_STATIONS_TOTHIN,
            sset.PATH_SEG_PTS_MANUAL,
        ],
    },
    "pyCIAM": {
        "products": [
            shared.PATH_OUTPUTS,
            PATH_OUTPUTS_NC,
            shared.PATH_DIAZ_RES,
            shared.PATH_MOVEFACTOR_DATA,
        ],
        "inputs": [
            shared.PATH_DIAZ_INPUTS_RAW,
            shared.PATH_SLR_AR5_QUANTILES,
            shared.PATH_SLIIDERS_INCOME_INTERMEDIATE_FILE,
            shared.PATHS_SURGE_LOOKUP["seg"],
            shared.PATHS_SURGE_LOOKUP["seg_adm"],
        ],
    },
}

if PYCIAM_CODE_PATH is not None:
    ORIGINAL_PATHS["pyCIAM"]["source"] = [PYCIAM_CODE_PATH]
if SLIIDERS_CODE_PATH is not None:
    ORIGINAL_PATHS["SLIIDERS"]["source"] = [SLIIDERS_CODE_PATH]

## Create and/or update depositions

In [40]:
def create_draft_deposit(name, update_dict={}, overwrite=False):
    dep = EXISTING_DEPOSITS[name]
    # create new deposit
    deposition_id = dep["id"]
    if "latest_draft" not in dep["links"]:
        url = f"{Z_URL}/{deposition_id}/actions/newversion"
        r = requests.post(url, params=PARAMS)
        if r.status_code not in [200, 201]:
            raise ValueError(f"{r.status_code}: {r.text}")
        dep = r.json()

    if overwrite:
        new_id = dep["links"]["latest_draft"].split("/")[-1]
        files = requests.get(dep["links"]["files"], params=PARAMS).json()
        print(files)
        if len(files):
            for f in files:
                file_url = f"{Z_URL}/{new_id}/files/{f['id']}"
                r = requests.delete(file_url, params=PARAMS)
                if r.status_code not in [204, 404]:
                    raise ValueError(f"{r.status_code}: {r.text}")

    metadata = {k: v for k, v in dep["metadata"].copy().items() if k != "doi"}
    metadata.update({"version": VERSION, **update_dict})
    url = dep["links"]["latest_draft"]
    meta_put = requests.put(
        url,
        params=PARAMS,
        data=json.dumps({"metadata": metadata}),
        headers={"Content-Type": "application/json"},
    )
    if meta_put.status_code != 200:
        raise ValueError(f"{meta_put.status_code}: {meta_put.text}")
    return dep


def create_all_new_deposits(titles=TITLES, overwrite=False):
    deps = {}
    ids = {}
    for t in titles.keys():
        dep = create_draft_deposit(t, overwrite=overwrite)
        ids[t] = int(dep["links"]["latest_draft"].split("/")[-1])
    all_deps = requests.get(
        Z_URL,
        params={"access_token": ACCESS_TOKEN},
    ).json()
    for t in titles.keys():
        dep = [d for d in all_deps if d["id"] == ids[t]]
        assert len(dep) == 1
        deps[t] = dep[0]
    return deps


def upload_file(deposit_link_dict, fname, root, zenodo_name=None, overwrite=False):
    if zenodo_name is None:
        zenodo_name = fname.name
    zenodo_name = root + zenodo_name

    existing_files = {
        f["filename"]: f
        for f in requests.get(deposit_link_dict["files"], params=PARAMS).json()
    }
    if zenodo_name in existing_files:
        if not overwrite:
            print("...Skipping b/c already uploaded")
            return existing_files[zenodo_name]
        requests.delete(existing_files[zenodo_name]["links"]["self"], params=PARAMS)

    with fname.open("rb") as fp:
        r = requests.put(
            f"{deposit_link_dict['bucket']}/{zenodo_name}",
            params=PARAMS,
            data=fp,
        )

    if r.status_code != 200:
        raise ValueError(f"{r.status_code}: {r.text}")
    return r.json()


def upload_file_list(deposit, flist, root, overwrite=False):
    out = []
    existing_files = {
        f["filename"]: f
        for f in requests.get(deposit["links"]["files"], params=PARAMS).json()
    }
    for f in flist:
        print(f"Uploading: {str(f)}")
        if (
            (root + f.name) in existing_files
            or (root + f.name + ".zip") in existing_files
        ) and not overwrite:
            print("...Skipping b/c already uploaded")
            continue
        if f.is_file():
            out.append(upload_file(deposit["links"], f, root, overwrite=overwrite))
        elif f.is_dir():
            with TemporaryDirectory() as d:
                tmp_file = Path(d) / (f.name + ".zip")

                if f.suffix == ".zarr":
                    with ZipStore(tmp_file, mode="w") as tf:
                        ds = shared.open_zarr(f)
                        for c in ds.coords:
                            ds[c].load()
                            ds[c].encoding = {}
                        for v in ds.variables:
                            if ds[v].dtype == "object":
                                ds[v] = ds[v].astype("unicode")
                        ds.to_zarr(tf)
                else:
                    name = Path(d) / f.name
                    f.download_to(name)
                    make_archive(name, "zip", name)

                out.append(
                    upload_file(
                        deposit["links"],
                        tmp_file,
                        root,
                        overwrite=overwrite,
                    )
                )
        else:
            raise ValueError(f)
    return out

In [41]:
draft_deps = create_all_new_deposits(overwrite=False)

Note that it seems to take some time for the "bucket" link to show up, which is needed to use Zenodo's "new" file API, which allows for uploads larger than 100MB. So if bucket is not appearing, you may need to wait a while (<1 day) to be able to run the file uploads below.

In [44]:
uploads = {}
for name, kind in ORIGINAL_PATHS.items():
    print(name)
    this_dep = draft_deps[name]

    uploads[name] = upload_file_list(
        this_dep, kind["inputs"], "inputs/", overwrite=False
    )
    uploads[name] += upload_file_list(
        this_dep, kind["products"], "products/", overwrite=False
    )
    if "source" in kind.keys():
        uploads[name] += upload_file_list(
            this_dep, kind["source"], "source/", overwrite=False
        )

SLIIDERS


TypeError: string indices must be integers, not 'str'