In [1]:
'''
Manual rebasing script

Purpose: this script takes unrebased projection output and manually calculates the 'combined, rebased impact'
(correctly!) by enacting this equation:

combined_rebased_impact = minlost_hi_rebased * riskshare_hi + minlost_lo_rebased * (1 - riskshare_hi)

This is done for histclim and fulladapt scenarios. As per usual, the histclim value will then be subtracted
from the fulladapt value and we will get our final result.

Parameters:

@model     : the name of the model (and thus the name of folder that output is stored in)
@scenario  : the name of the scenario from which projection output is sourced

Outputs: a modified version of the original projection file that now includes a new column, 'rebased_new',
which has this correctly-calculated combined, rebased impact.

'''
__author__ = 'Kit Schwarz'
__contact__ = 'csschwarz@uchicago.edu'
__version__ = '1.0'

############
# LIBRARIES
############

import xarray as xr
import pandas as pd
import getpass
import sys

############
# PARAMETERS
############

model = 'uninteracted_main_model_w_chn'
# select: uninteracted_main_model_w_chn, uninteracted_main_model

scenario = '-histclim'
# '-histclim', '' (fulladapt), '-incadapt', '-noadapt'

############
# PATHWAYS
############

username = getpass.getuser()

if model == 'uninteracted_main_model_w_chn':
    
    proj_root = ('/shares/gcp/outputs/labor/impacts-woodwork/uninteracted_main_model_w_chn/' +
                 'uninteracted_splines_w_chn_21_37_41_by_risk_empshare_noFE_YearlyAverageDay/rcp85/CCSM4/high/SSP3')
    
elif model == 'uninteracted_main_model':
    
    proj_root = ('/shares/gcp/outputs/labor/impacts-woodwork/test_rcc/' +
                 'uninteracted_splines_27_37_39_by_risk_empshare_noFE_YearlyAverageDay/rcp85/CCSM4/high/SSP3')

else:

    sys.exit("Your model is unrecognized.")
    
############
# CALCULATIONS
############

dt = (xr.open_dataset(
        f'{proj_root}/{model}{scenario}.nc4')
        .to_dataframe()
        .reset_index()
     )

# to get rid of duplicate rows (because of orderofoperations column)
dt = dt.loc[dt.orderofoperations == 'clip'] 

# get a sub-dataset for the base years used to calculate rebasing
base = dt.loc[(dt.year >=2001) & (dt.year <=2010)]

# get mean by region for the base years
mean_base = base.groupby("regions").agg(
                    high_base=('highriskimpacts', 'mean'),
                     low_base=('lowriskimpacts', 'mean'))

# merge base year values onto main dataset so we can calculate new columns
dt = dt.merge(mean_base,
            left_on='regions',
            right_index = True)

# these are the rebased sector-specific impacts
dt['rebased_high'] = dt.highriskimpacts - dt.high_base
dt['rebased_low'] = dt.lowriskimpacts - dt.low_base

# and the final weighted average (combined, rebased) impacts
dt['rebased_new'] = (dt.rebased_high * dt['clip']) + (dt.rebased_low * (1 - dt['clip']))

# # here's a manual check of rebased vs rebased_new
# with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
#     print(dt.loc[dt.regions == "CAN.1.2.28"][['year', 'rebased', 'rebased_new']])

# # here you can compare to Rae's rebasing version
xr.open_dataset('/shares/gcp/outputs/labor/impacts-woodwork/uninteracted_main_model_w_chn_copy/' + 
                'uninteracted_splines_w_chn_21_37_41_by_risk_empshare_noFE_YearlyAverageDay/rcp85/' +
                'CCSM4/high/SSP3/uninteracted_main_model_w_chn-histclim.nc4').to_dataframe()