In [1]:
%run StdPackages.ipynb
d['processedData'] = os.path.join(d['data'],'processedData') # update to raw data folder
os.chdir(d['py'])
from loadIO import *
import RAS
from scipy.cluster.vq import kmeans2

No clean-up of work-folder


# Aggregate IO data for NationalClimatePolicy project

We aggregate in two steps: First, we outline specific sectors that the model draw on. This includes energy (sector code 35000). For other sectors, we define input shares and use KMeans to aggregate sectors into 3 residual sectors.

## 1. Aggregate IO data

*Load data:*

In [2]:
t0 = 2019
fullIO = os.path.join(d['processedData'], f'IO{t0}_s146_dur7')
name = f'IO{t0}_NCP' # add new name for the database when exporting
db = GpyDB(fullIO, name = name)
sectorsKMeans = 3 

*Mappings:*

In [3]:
mDur = pd.MultiIndex.from_tuples([(k, 'K') for k in db('s_i')], names = ['n','nn'])
m = pd.MultiIndex.from_tuples([(k, 'Energy') for k in ('350011','350012','350020','350030')], names = ['s','ss'])

*Create input intensities to aggregate sectors using KMeans*

In [4]:
vS = adj.rc_pd(pdSum(db('vD'), 's'), db('n_p')).rename_axis(index = {'n':'s'})
vD = adj.rc_pd(db('vD'), db('s_p'))
inpIntensity = vD / vS
inpIntensity = inpIntensity / pdSum(inpIntensity, 'n')
μ = adj.rc_pd(inpIntensity, ('not', m)).unstack('n').fillna(0)
km = kmeans2(μ, sectorsKMeans)
mKM = pd.MultiIndex.from_arrays([μ.index, 'agg'+pd.Index(km[1], name = 'ss').astype(str)])
m = m.union(mKM)

  μ = adj.rc_pd(inpIntensity, ('not', m)).unstack('n').fillna(0)


*Create I_K (flow investment variable corresponding to durable stock $K$)*:

In [5]:
mDur_s = mDur.set_levels(mDur.levels[-1].map(lambda x: 'I_'+x), level = 1).rename(['s','ss'])

*Full sector mapping:*

In [7]:
ms = m.union(mDur_s)
ms_neutral = pd.MultiIndex.from_arrays([db('s'), db('s').rename('ss')]) # neutral mapping from (x,x)
m_sector = ms.union(adj.rc_pd(ms_neutral, ('not', ms.levels[0]))) # full mapping: Use specific mapping if this exists, otherwise default to neutral mapping.

*Apply mapping:*

In [8]:
AggDB.aggDB(db, m_sector);

*Adjust goods index:*

In [9]:
mn = m.union(m.set_levels([l+'_F' for l in m.levels])).rename(['n','nn']).union(mDur)
mn_neutral = pd.MultiIndex.from_arrays([db('n'), db('n').rename('nn')])
m_goods = mn.union(adj.rc_pd(mn_neutral, ('not', mn.levels[0])))

*Apply mapping:*

In [10]:
AggDB.aggDB(db, m_goods);

## 2. Model data

*Clean up some data:*

In [13]:
[db.series.database.pop(k) for k in ('gc','vC','vC_tax')];
[db.__setitem__(k, db(k)[db(k)!=0]) for k in db.getTypes(['var'])];

IO data is measured in mio DKK. Here, we rescale to billion DKK instead (*Note: This rescales all variables in the database, so it is assumed that everything is measured in absolute values (and not e.g. ratios)*)

In [14]:
factor = 1000
[db.__setitem__(k, db(k)/factor) for k in db.getTypes(['var']) if k not in ['qCO2', 'M1990']];
db._scale = db._scale * factor

*Note: The ```._scale``` attribute indicates the value of 1 unit in the database.*

Translate depreciation of durables to rates, distinguish between investments and durables (flow, stock) with investment good syntax ```I_x``` for durable ```x```. Define mapping ```dur2inv``` and subsets ```dur_p, inv_p```. Add investments and value of durables to the vector ```vD```:

In [15]:
db['rDepr'] = db('vD_depr')/db('vD_dur')
db['dur_p'] = db('vD_dur').index.levels[db['vD_dur'].domains.index('n')]
db['inv_p'] = db('dur_p').map(lambda x: f'I_{x}')
db['dur2inv'] = pd.MultiIndex.from_arrays([db('dur_p'), db('inv_p').rename('nn')])
db('vD_inv').index = db('vD_inv').index.set_levels(db('vD_inv').index.levels[db['vD_inv'].domains.index('n')].map(lambda x: f'I_{x}'), level = 'n')
db['vD'] = db('vD_inv').combine_first(db('vD')).combine_first(db('vD_dur'))

### RAS

Simple RAS algorithm:

In [17]:
threshold = 0 # remove values less than 0 bio DKK
v0 = adj.rc_pd(db('vD'), ('and', [('or', [db('n_p'), db('n_F')]),
                                  ('or', [db('s_p'), db('s_i')])]))
leaveCols = db('n_F') # are there any type of goods that we do not need to balance
leaveRows = None # are there any type of sectors that we do not need to balance
vBar = v0[v0<threshold] * 0

*Get RAS adjustments:*

In [18]:
vD = RAS.simpleRAS(v0, vBar, leaveCols = leaveCols, leaveRows = leaveRows, tol = 1e-8, iterMax = 1000)

Largest deviation summing over n: 0.0
Largest deviation summing over s: 0.0


In [19]:
vD

s       n       
Energy  Energy        3.276949
        Energy_F       0.28537
        agg0          5.710723
        agg0_F        3.687417
        agg1          2.728461
        agg1_F        2.120625
        agg2          3.503238
        agg2_F        0.267127
I_K     Energy        0.884227
        agg0        257.019096
        agg0_F        69.50237
        agg1          44.28125
        agg1_F       13.507946
        agg2         53.958902
        agg2_F        0.426417
agg0    Energy        9.191595
        Energy_F      0.906473
        agg0        520.298704
        agg0_F      493.782328
        agg1        111.779827
        agg1_F       57.628301
        agg2        133.154295
        agg2_F       24.647824
agg1    Energy        2.290931
        Energy_F      0.201176
        agg0         94.767016
        agg0_F       28.063118
        agg1         39.055617
        agg1_F       21.290389
        agg2          41.63241
        agg2_F        4.506569
agg2    Energy        

*Merge things back up again:*

In [20]:
vD_full = vD.combine_first(db('vD'))
vD_full = vD_full[vD_full!=0] # remove zero values again

*Remove residual income category (we don't currently use this in the model, this will enter the return on durables instead):*

In [21]:
db['vD'] = adj.rc_pd(vD_full, ('not', pd.Index(['resIncome'], name = 'n')))

### Create other variables

In [22]:
db['R_LR'] = gpy(1.03, name = 'R_LR', type = 'par')
db['infl_LR'] = gpy(0, name = 'infl_LR', type = 'par')
db['g_LR'] = gpy(0, name = 'g_LR', type = 'par')
model_vS(db)
model_p(db)
model_durables(db, db('R_LR'), db('infl_LR'))
model_quantNonDurables(db) 

### Create other subsets and mappings

Subsets of goods/sectors:

In [23]:
db['nEqui'] = db('vS').index.droplevel('s').unique() # what goods require an equilibrium condition
db['d_qS'] = db['vS'].index 
db['d_qD'] = adj.rc_pd(db('vD'), db('nEqui')).index 
db['d_qSEqui'] = adj.rc_pd(db['d_qS'].vals, ('not', db('s_HH'))) # Subset of qS values to be endogenized in general equilibrium
db['d_pEqui'] = pd.Index(['L'], name ='n') # Subset of prices to be endogenized in general equilibrium 

####  Trade mappings

Define the mappings:
* ```dom2for[n,nn]```: Mapping from domestic to the equivalent foreign goods (with syntax ```x,x_F```).
* ```dExport[t,s,n]```: Foreign sectors' demand for domestic goods.
* ```dImport[t,s,n,nn]```: sector, domestic good, foreign good combinations in data - i.e. where a sector demands both domestic and foreign type of product.
* ```dImport_dom[t,s,n]```: sector, domestic good combination (s,n) where the sector only demands the domestic and not the corresponding foreign good.
* ```dImport_for[t,s,n]```: sector, foreign good combinations (s,n) where the sector only demand the foreign and not the corresponding domestic good.

In [24]:
db['dom2for'] = pd.MultiIndex.from_arrays([db('n_p').sort_values(), db('n_F').sort_values().rename('nn')])
db['dExport'] = adj.rc_pd(db('vD'), db('s_f')).index
vD_dom = stdSort(adjMultiIndex.applyMult(adj.rc_pd(db('vD'), db('n_p')), db('dom2for')))
vD_for = adj.rc_pd(db('vD'), db('n_F')).rename_axis(index= {'n':'nn'})
db['dImport'] = stdSort(adj.rc_pd(vD_dom, vD_for)).index
db['dImport_dom'] = adj.rc_pd(vD_dom, ('not', vD_for)).droplevel('nn').index
db['dImport_for'] = adj.rc_pd(vD_for, ('not', db('dImport'))).rename_axis(index = {'nn':'n'}).index

### Export

In [25]:
AggDB.updSetsFromSyms(db, types = ['var','par','map']) # define sets from variables/parameters defined throughout
db.export()