# Project income and pop from SSPs

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import xarray as xr
from sliiders import settings as sset
from sliiders.io import save

ALL_ISOS = pd.read_parquet(sset.PATH_ALL_VALID_HIST_CCODES).iloc[:, 0].values
FINAL_ISOS = [i for i in ALL_ISOS if "+" not in i]

# 2010 is last year of harmonization across all SSPs. Starts to diverge in 2015
TRANSITION_YEAR = 2010

## Transform functions

In [3]:
def append_hist(proj_da, hist_da):
    # log-linear interpolation to yearly values
    proj_da = np.exp(
        np.log(proj_da)
        .reindex(year=np.arange(TRANSITION_YEAR, sset.PROJ_YEARS[-1] + 1))
        .interpolate_na("year")
    )

    # check
    assert proj_da.notnull().all()

    # align hist with proj
    hist_adj = (
        hist_da
        / hist_da.sel(year=TRANSITION_YEAR)
        * proj_da.sel(year=TRANSITION_YEAR).reindex(ccode=hist_da.ccode)
    )

    # concat and return
    return xr.concat(
        (hist_adj.isel(year=slice(None, -1)), proj_da.reindex(ccode=hist_da.ccode)),
        dim="year",
    )


def project_missing_isos(proj_da, hist_da):
    """
    1. If these are territories of other sovereigns, we use the sovereign growth rates
       applied to the 2010 populations from historical data. We then subtract these
       populations from the sovereign projections to avoid double counting.
    2. If they are not territories but have some other relationship (e.g. ESH to MAR) we
       do the same thing, but don't subtract from the sovereign.
    3. Otherwise, we start with populations at 2010 levels and use the global growth
       rate from each scenario.
    """

    agg_dims = [i for i in ["year", "ssp", "iam"] if i in proj_da.dims]
    # countries without SSP data
    need_matching = proj_da.ccode[proj_da.isnull().any(dim=agg_dims)].values

    # get sovereign growth rates
    gr = proj_da.rename(ccode="tmp").sel(
        tmp=sset.GADM_TO_SSP_ISO_MAPPING.parent.to_xarray(), drop=True
    )
    gr /= gr.sel(year=TRANSITION_YEAR)

    # estimate missing levels
    missing = (
        hist_da.sel(ccode=sset.GADM_TO_SSP_ISO_MAPPING.index, year=TRANSITION_YEAR) * gr
    )

    # return dataarray of parent countries that we can subtract from
    grps = sset.GADM_TO_SSP_ISO_MAPPING.loc[
        sset.GADM_TO_SSP_ISO_MAPPING.included_in_parent, "parent"
    ].to_xarray()
    to_subtract = missing.sel(ccode=grps.ccode)

    out = proj_da.fillna(missing)

    # apply global growth rate for remaining
    remaining = out.ccode[out.isnull().all(agg_dims)]
    global_gr = out.sel(year=slice(TRANSITION_YEAR, None)).sum("ccode")
    global_gr /= global_gr.sel(year=TRANSITION_YEAR)
    imputed = xr.concat(
        (
            hist_da.sel(ccode=remaining, year=slice(None, TRANSITION_YEAR - 1)),
            hist_da.sel(ccode=remaining, year=TRANSITION_YEAR) * global_gr,
        ),
        dim="year",
    )
    out = out.fillna(imputed)

    # confirm complete
    assert out.notnull().all()

    return out, to_subtract, grps

## Raw data re-formatting

In [4]:
iiasa_raw_df = pd.read_csv(
    sset.PATH_IIASA_PROJECTIONS_RAW,
).rename(columns={"REGION": "ccode"})

iiasa_raw_df["iam"] = iiasa_raw_df.MODEL.str.split(" |-").str[0]
iiasa_raw_df["ssp"] = iiasa_raw_df.SCENARIO.str[:4]

iiasa_raw_df = (
    iiasa_raw_df.set_index(["VARIABLE"])
    .loc[["Population", "GDP|PPP"], :]
    .drop(columns=["MODEL", "SCENARIO"])
    .reset_index()
    .drop_duplicates()
    .set_index(["VARIABLE", "ssp", "iam", "ccode"])
    .sort_index()
)

assert iiasa_raw_df.index.is_unique

multipliers = np.where(
    iiasa_raw_df.UNIT.str.startswith("billion"),
    1e9,
    np.where(iiasa_raw_df.UNIT.str.startswith("million"), 1e6, np.nan),
)
assert not np.isnan(multipliers).any()
iiasa_raw_df = (
    iiasa_raw_df.drop(columns="UNIT").dropna(how="all", axis=1).mul(multipliers, axis=0)
)
iiasa_raw_df.columns = iiasa_raw_df.columns.astype(int)

# 2010 is last year of harmonization across all SSPs. Starts to diverge in 2015
iiasa_raw_df = iiasa_raw_df.loc[:, TRANSITION_YEAR : sset.PROJ_YEARS[-1]]

iiasa_pop = iiasa_raw_df.loc["Population"]
iiasa_gdp = iiasa_raw_df.loc["GDP|PPP"]

# hist data
hist_da = (
    pd.read_parquet(
        sset.PATH_EXPOSURE_YPK_COUNTRY_HIST_INT,
        columns=["rgdpna_pc_19", "pop"],
        filters=[("year", "in", np.arange(sset.PROJ_YEARS[0], TRANSITION_YEAR + 1))],
    )
    .to_xarray()
    .rename(rgdpna_pc_19="gdppc")
)

## Population

We will only take IIASA projections, with the exception of countries whose information are in OECD projections but not in IIASA.

In [5]:
# borrow info from other projections
ii_pop = (
    iiasa_pop.to_xarray()
    .to_array("year")
    .sel(iam=["IIASA", "OECD", "NCAR", "PIK"])
    .bfill("iam")
    .sel(iam="IIASA")
)

ii_pop, to_subtract, grps = project_missing_isos(
    append_hist(ii_pop, hist_da["pop"]), hist_da["pop"]
)

# subtract pop from parent countries
ii_pop -= (
    to_subtract.groupby(grps)
    .sum()
    .rename(parent="ccode")
    .reindex(ccode=ii_pop.ccode, fill_value=0)
)

## GDPpc and GDP

We will use IAMs `IIASA` and `OECD`.

In [6]:
# use iiasa/oecd to fill when other is missing
ii_gdppc = (
    iiasa_gdp.to_xarray()
    .to_array("year", name="gdppc")
    .sel(
        year=slice(2010, sset.PROJ_YEARS[-1]),
        iam=["IIASA", "OECD", "PIK"],
    )
    .ffill("iam")
    .bfill("iam")
    .sel(iam=["IIASA", "OECD"])
) / ii_pop

# convert from 2005 to 2019 USD
pwt = pd.read_excel(sset.PATH_PWT_RAW).set_index(["countrycode", "year"])
infla = pwt.loc[("USA", 2019), "pl_gdpo"] / pwt.loc[("USA", 2005), "pl_gdpo"]
ii_gdppc *= infla

In [7]:
ii_gdppc, to_subtract, grps = project_missing_isos(
    append_hist(ii_gdppc, hist_da.gdppc), hist_da.gdppc
)

# need to account for population when subtracting territories from sovs (not just gdppc)
ii_gdppc -= (
    (to_subtract * ii_pop).groupby(grps).sum().rename(parent="ccode") / ii_pop
).reindex(ccode=ii_gdppc.ccode, fill_value=0)

### Save

In [9]:
merged = xr.Dataset({"gdppc": ii_gdppc, "pop": ii_pop})
save(merged, sset.PATH_EXPOSURE_YPK_COUNTRY_PROJ_INT, mode="w")