In [1]:
# Load all the GDX files
from collections import OrderedDict
from os import makedirs as mkdir
from os.path import join

import gdx
import pandas as pd
import xray

GDX_DIR = 'gdx'
OUT_DIR = '../../../cecp-cop21-data'
FILES = [
    ('bau', 'result_urban_exo.gdx'),
    ('3', 'result_cint_n_3.gdx'),
    ('4', 'result_cint_n_4.gdx'),
    ('5', 'result_cint_n_5.gdx'),    
    ]

raw = OrderedDict()
extra = dict()
for case, fn in FILES:
    raw[case] = gdx.File('gdx/' + fn)
    extra[case] = gdx.File('gdx/' + fn.replace('.gdx', '_extra.gdx'))

CREM = raw['bau']
cases = pd.Index(raw.keys(), name='case')
time = pd.Index(filter(lambda t: int(t) <= 2030, CREM.set('t')))

In [2]:
# List of all the parameters available in each file
#CREM.parameters()

In [2]:
arrays = {}

# GDP
temp = [raw[case].extract('gdp_ref') for case in cases]
arrays['GDP'] = xray.concat(temp, dim=cases).sel(rs=CREM.set('r')) \
                    .rename({'rs': 'r'})

In [3]:
# CO2 emissions
temp = []
for case in cases:
    temp.append(raw[case].extract('sectem').sum('g') +
        raw[case].extract('houem'))
arrays['CO2_emi'] = xray.concat(temp, dim=cases)

In [4]:
# Air pollutant emissions
temp = []
for case in cases:
    temp.append(raw[case].extract('urban').sum('*'))
temp = xray.concat(temp, dim=cases).sel(rs=CREM.set('r')).rename({'rs': 'r'})
for u in temp['urb']:
    if u in ['PM10', 'PM25']:
        continue
    arrays['{}_emi'.format(u.values)] = temp.sel(urb=u).drop('urb')

In [8]:
# CO₂ price
temp = []
for case in cases:
    temp.append(extra[case].extract('ptcarb_t'))
arrays['CO2_price'] = xray.concat(temp, dim=cases)

In [10]:
# Consumption
temp = []
for case in cases:
    temp.append(extra[case].extract('cons_t'))
arrays['Consumption'] = xray.concat(temp, dim=cases)

In [11]:
# Primary energy
temp = []
for case in cases:
    temp.append(extra[case].extract('pe_t'))
temp = xray.concat(temp, dim=cases).sel(t=time)
for ener in temp['e']:
    arrays['{}_energy'.format(ener.values)] = temp.sel(e=ener).drop('e')

## TODO: further variables

From C-REM:
- Population
- Share of coal in production inputs

From GEOS-Chem:
- Population-weighted PM2.5 exposure

In [12]:
# Combine all variables into a single xray.Dataset and truncate time
data = xray.Dataset(arrays).sel(t=time)
# National totals
national = data.sum('r')
data

<xray.Dataset>
Dimensions:      (case: 4, r: 30, t: 6)
Coordinates:
  * t            (t) object '2007' '2010' '2015' '2020' '2025' '2030'
  * r            (r) <U2 'BJ' 'TJ' 'HE' 'LN' 'SH' 'JS' 'ZJ' 'FJ' 'SD' 'GD' ...
  * case         (case) object 'bau' '3' '4' '5'
Data variables:
    SOL_energy   (case, r, t) float64 5e+300 5e+300 5e+300 5e+300 5e+300 ...
    COL_energy   (case, r, t) float64 19.59 22.06 28.29 34.43 37.86 39.73 ...
    NUC_energy   (case, r, t) float64 5e+300 5e+300 5e+300 5e+300 5e+300 ...
    NH3_emi      (case, r, t) float64 0.0755 0.06891 0.06917 0.08399 0.09497 ...
    HYD_energy   (case, r, t) float64 0.1134 0.04984 0.04976 0.04989 0.04941 ...
    BC_emi       (case, r, t) float64 0.01766 0.01688 0.01721 0.01999 ...
    GAS_energy   (case, r, t) float64 5.466 6.842 8.318 10.29 13.44 18.02 ...
    WND_energy   (case, r, t) float64 5e+300 5e+300 5e+300 5e+300 5e+300 ...
    CO2_emi      (case, r, t) float64 111.1 128.3 164.1 200.6 225.2 244.5 ...
    SO2_emi      

In [14]:
# TODO: output a README file along with the data files; units.

# Create directories
for r in CREM.set('r'):
    mkdir(join(OUT_DIR, r), exist_ok=True)
mkdir(join(OUT_DIR, 'national'), exist_ok=True)

# Serialize to CSV
for c in cases:
    # Provincial data
    for r in CREM.set('r'):
        data.sel(case=c, r=r).drop(['case', 'r']).to_dataframe().to_csv(
            join(OUT_DIR, r, '{}.csv'.format(c)))
    # National data
    national.sel(case=c).drop('case').to_dataframe() \
            .to_csv(join(OUT_DIR, 'national', '{}.csv'.format(c)))