# Emissions outputs

We want to make a file with:
1. flux maps
   - fluxapriori (i.e. prior from EDGAR)
   - pfluxapriori (percentiles sampled from prior)
   - fluxapost (e.g. mean or mode)
   - pfluxapost (percentiles sampled from posterior)
2. country totals
   - countryapriori (i.e. (country map) x (area grid) x (flux apriori))
   - pcountryapriori (percentiles sampled from prior)
   - countryapost (e.g. mean or mode)
   - pcountryapost (percentiles sampled from posterior country trace)
   - covcountryapost (sample? covariances between country totals)

## Set up

In [1]:
import json
from pathlib import Path
import re

import arviz as az
import matplotlib.pyplot as plt
import numpy as np
import numpy.linalg as nla
import pandas as pd
import pymc as pm
import scipy
import scipy.sparse as ssp
import scipy.linalg as sla
import sparse
from sparse import COO
import xarray as xr

In [2]:
ds0 = xr.open_dataset("CH4_EUROPE_PARIS_dec2023_weighted_2021-02-01.nc")

In [3]:
country_path = Path("~/Documents/inversions/openghg_inversions/countries")
cds = xr.open_dataset(country_path/"country_EUROPE.nc")
cds_ukmo = xr.open_dataset(country_path/"country-ukmo_EUROPE.nc")

In [4]:
%run country_totals.py

## Preparing flux variables

- fluxapriori (i.e. prior from EDGAR)
- pfluxapriori (percentiles sampled from prior)
- fluxapost (e.g. mean or mode)
- pfluxapost (percentiles sampled from posterior)

In [5]:
basis_inv = ds0.fluxapriori * get_xr_dummies(ds0.basisfunctions, cat_dim="nparam")

In [6]:
flux_trace = sparse_xr_dot(basis_inv, ds0.xtrace.isel(steps=slice(0, 1000)))

_func received array of type <class 'sparse._coo.core.COO'>, shape (293, 391, 1, 499)
_func received array of type <class 'numpy.ndarray'>, shape (1000, 499)
_func result shape: (293, 391, 1000)


In [7]:
flux_trace = flux_trace.expand_dims(dim={"time": [ds0.Ytime.min().values]})

In [8]:
from typing import Sequence
def make_quantiles(da: xr.DataArray, probs: Sequence[float] = [0.025, 0.159, 0.841, 0.975], sample_dim="steps") -> xr.DataArray:
    probs_da = xr.DataArray(probs, coords=[probs], dims=["probs"])

    # make function to apply
    # we will pass `q=probs_da` so that the coordinates of probs will be propegated
    def func(a, q):
        qs = np.quantile(a, q, axis=-1)  # apply along input_core_dim = sample_dim
        qs = qs[..., 0] # contracted dimension at axis=-1 is left with length 1, need to remove it
        qs = np.moveaxis(qs, 0, -1)
        return qs
        
    result = xr.apply_ufunc(func, da, probs_da, input_core_dims=[[sample_dim], []])
    return result.transpose("probs", ...)  # we want "probs" first

In [9]:
pflux_trace = make_quantiles(flux_trace)

To get the prior x trace, we need to sample...

In [10]:
with pm.Model(coords={"nparam": ds0.paramnum}) as model:
    x = pm.TruncatedNormal("x", mu=1.0, sigma=1.0, lower=0.0, dims="nparam")
    idata = pm.sample_prior_predictive()

Sampling: [x]


In [11]:
fluxapriori_trace = sparse_xr_dot(basis_inv, idata.prior.x.isel(chain=0).drop_vars("chain"))

_func received array of type <class 'sparse._coo.core.COO'>, shape (293, 391, 1, 499)
_func received array of type <class 'numpy.ndarray'>, shape (500, 499)
_func result shape: (293, 391, 500)


In [12]:
fluxapriori_trace = fluxapriori_trace.expand_dims(dim={"time": [ds0.Ytime.min().values]})

In [13]:
pfluxapriori_trace = make_quantiles(fluxapriori_trace, sample_dim="draw")

Now we need to check units and add attributes.

We want our outputs in mol m^-2 s^-1

In [14]:
ds0.fluxapriori.attrs

{'units': 'mol/m2/s', 'longname': 'mean a priori flux over period'}

In [15]:
%run attribute_parsers.py

In [16]:
attr_dict = get_data_var_attrs(emissions_template)

In [17]:
write_data_var_attrs(attr_dict, "emissions_data_vars_attrs.json")

We need to convert time to UNIX epoch.

It's probably worth assembling the flux variables together before doing this...

## Preparing country variables

- countryapriori (i.e. (country map) x (area grid) x (flux apriori))
- pcountryapriori (percentiles sampled from prior)
- countryapost (e.g. mean or mode)
- pcountryapost (percentiles sampled from posterior country trace)
- covcountryapost (sample? covariances between country totals)

In [18]:
area_grid = get_area_grid_data_array(ds0.lat, ds0.lon)

In [19]:
country_mat = get_xr_dummies(cds.country, cat_dim="country", categories=cds.name)

In [20]:
x_to_country = sparse_xr_dot(country_mat * area_grid, basis_inv, debug=True)

_func received array of type <class 'sparse._coo.core.COO'>, shape (104, 1, 293, 391)
_func received array of type <class 'sparse._coo.core.COO'>, shape (499, 293, 391)
_func result shape: (104, 499)


In [21]:
countryapriori = xr.apply_ufunc(lambda x: x.todense(), x_to_country.sum("nparam"))

In [22]:
countryapriori_trace = sparse_xr_dot(country_mat * area_grid, fluxapriori_trace, debug=True)

_func received array of type <class 'sparse._coo.core.COO'>, shape (104, 1, 1, 293, 391)
_func received array of type <class 'numpy.ndarray'>, shape (1, 500, 293, 391)
_func result shape: (104, 1, 500)


In [23]:
pcountryapriori = make_quantiles(countryapriori_trace, sample_dim="draw")

In [24]:
countryapost_trace = sparse_xr_dot(country_mat * area_grid, flux_trace)

_func received array of type <class 'sparse._coo.core.COO'>, shape (104, 1, 1, 293, 391)
_func received array of type <class 'numpy.ndarray'>, shape (1, 1000, 293, 391)
_func result shape: (104, 1, 1000)


In [25]:
countryapost = countryapost_trace.mean("steps")

In [26]:
qcountryapost = make_quantiles(countryapost_trace)

## Country names to codes

In [27]:
ds0.countrynames

In [28]:
cds.name.values

array(['OCEAN', 'VENEZUELA', 'VIRGIN ISLANDS', 'PUERTO RICO',
       'UNITED STATES', 'ANGUILLA', 'ISLE OF MAN',
       'UNITED KINGDOM OF GREAT BRITAIN AND NORTHERN IRELAND', 'UKRAINE',
       'TURKEY', 'TUNISIA', 'TRINIDAD AND TOBAGO', 'TOGO',
       'SYRIAN ARAB REPUBLIC', 'SWITZERLAND', 'SWEDEN', 'SOUTH SUDAN',
       'SUDAN', 'SPAIN', 'SLOVAKIA', 'SLOVENIA', 'SERBIA', 'SENEGAL',
       'SAUDI ARABIA', 'SAINT LUCIA', 'RUSSIAN FEDERATION', 'ROMANIA',
       'PORTUGAL', 'POLAND', 'NORWAY', 'NIGERIA', 'NIGER', 'NICARAGUA',
       'NETHERLANDS', 'MOROCCO', 'SAMOA', 'MONTENEGRO', 'MOLDOVA',
       'MEXICO', 'MAURITANIA', 'MALI', 'MACEDONIA', 'LUXEMBOURG',
       'LITHUANIA', 'LIBYA', 'LEBANON', 'LATVIA', 'KOSOVO', 'JORDAN',
       'JAMAICA', 'ITALY', 'ISRAEL', 'PALESTINE', 'IRELAND', 'IRAQ',
       'ICELAND', 'HUNGARY', 'HONDURAS', 'HAITI', 'GUINEA-BISSAU',
       'GUINEA', 'GUATEMALA', 'GRENADA', 'GREECE', 'GHANA', 'GERMANY',
       'GAMBIA', 'FRANCE', 'FINLAND', 'ETHIOPIA', 'ESTONIA',

In [38]:
import iso3166
from iso3166 import countries

In [44]:
country_names = list(iso3166.countries_by_name.keys())
apol_country_names = list(iso3166.countries_by_apolitical_name.keys())

In [54]:
for k, v in iso3166.countries_by_alpha2.items():
    print(k, v)

AF Country(name='Afghanistan', alpha2='AF', alpha3='AFG', numeric='004', apolitical_name='Afghanistan')
AX Country(name='Åland Islands', alpha2='AX', alpha3='ALA', numeric='248', apolitical_name='Åland Islands')
AL Country(name='Albania', alpha2='AL', alpha3='ALB', numeric='008', apolitical_name='Albania')
DZ Country(name='Algeria', alpha2='DZ', alpha3='DZA', numeric='012', apolitical_name='Algeria')
AS Country(name='American Samoa', alpha2='AS', alpha3='ASM', numeric='016', apolitical_name='American Samoa')
AD Country(name='Andorra', alpha2='AD', alpha3='AND', numeric='020', apolitical_name='Andorra')
AO Country(name='Angola', alpha2='AO', alpha3='AGO', numeric='024', apolitical_name='Angola')
AI Country(name='Anguilla', alpha2='AI', alpha3='AIA', numeric='660', apolitical_name='Anguilla')
AQ Country(name='Antarctica', alpha2='AQ', alpha3='ATA', numeric='010', apolitical_name='Antarctica')
AG Country(name='Antigua and Barbuda', alpha2='AG', alpha3='ATG', numeric='028', apolitical_name

In [43]:
missing = []
for x in list(cds.name.values):
    try:
        alpha2 = countries.get(x).alpha2
    except KeyError:
        print(f"!!{x} not found")
        missing.append(x)
    else:
        print(alpha2, x)

print("\nNumber missing = ", len(missing))
print(missing)

!!OCEAN not found
!!VENEZUELA not found
!!VIRGIN ISLANDS not found
PR PUERTO RICO
!!UNITED STATES not found
AI ANGUILLA
IM ISLE OF MAN
GB UNITED KINGDOM OF GREAT BRITAIN AND NORTHERN IRELAND
UA UKRAINE
!!TURKEY not found
TN TUNISIA
TT TRINIDAD AND TOBAGO
TG TOGO
SY SYRIAN ARAB REPUBLIC
CH SWITZERLAND
SE SWEDEN
SS SOUTH SUDAN
SD SUDAN
ES SPAIN
SK SLOVAKIA
SI SLOVENIA
RS SERBIA
SN SENEGAL
SA SAUDI ARABIA
LC SAINT LUCIA
RU RUSSIAN FEDERATION
RO ROMANIA
PT PORTUGAL
PL POLAND
NO NORWAY
NG NIGERIA
NE NIGER
NI NICARAGUA
NL NETHERLANDS
MA MOROCCO
WS SAMOA
ME MONTENEGRO
!!MOLDOVA not found
MX MEXICO
MR MAURITANIA
ML MALI
!!MACEDONIA not found
LU LUXEMBOURG
LT LITHUANIA
LY LIBYA
LB LEBANON
LV LATVIA
XK KOSOVO
JO JORDAN
JM JAMAICA
IT ITALY
IL ISRAEL
PS PALESTINE
IE IRELAND
IQ IRAQ
IS ICELAND
HU HUNGARY
HN HONDURAS
HT HAITI
GW GUINEA-BISSAU
GN GUINEA
GT GUATEMALA
GD GRENADA
GR GREECE
GH GHANA
DE GERMANY
GM GAMBIA
FR FRANCE
FI FINLAND
ET ETHIOPIA
EE ESTONIA
ER ERITREA
SV EL SALVADOR
EG EGYPT
DO DOM

In [52]:
import re
from collections import defaultdict
translations = defaultdict(list)
for x in missing:
    found = False
    pat = re.compile(".*" + x + ".*", flags=re.IGNORECASE)
    for c in apol_country_names:
        if m := pat.match(c):
            translations[x].append(m.group(0))
            found = True
    if not found:
        translations[x].append("NOT FOUND")

In [53]:
translations

defaultdict(list,
            {'OCEAN': ['BRITISH INDIAN OCEAN TERRITORY'],
             'VENEZUELA': ['VENEZUELA, BOLIVARIAN REPUBLIC OF'],
             'VIRGIN ISLANDS': ['VIRGIN ISLANDS, BRITISH',
              'VIRGIN ISLANDS, U.S.'],
             'UNITED STATES': ['UNITED STATES OF AMERICA',
              'UNITED STATES MINOR OUTLYING ISLANDS'],
             'TURKEY': ['NOT FOUND'],
             'MOLDOVA': ['MOLDOVA, REPUBLIC OF'],
             'MACEDONIA': ['NORTH MACEDONIA'],
             'N. CYPRUS': ['NOT FOUND']})

In [58]:
#import requests
import json

In [59]:
r = requests.get("https://github.com/countries/countries-data-json/tree/master/data/countries/")

In [60]:
rj = json.loads(r.text)

In [85]:
from collections import deque
dq = deque()

In [86]:
pat = re.compile("[A-Z]{2}.json")

In [90]:
dq.append([rj, []])
while dq:
    cur = dq.popleft()
    try:
        keys = cur[0].keys()
    except AttributeError:
        if isinstance(cur[0], str):
            if pat.search(cur[0]):
                print(cur[1], cur[0], "\n")
                break
        if isinstance(cur[0], list):
            for x in cur[0]:
                if isinstance(x, str) and pat.search(cur[0]):
                    print(cur)
                    break
                else:
                    dq.append([x, cur[1] + [x]])
    else:
        for k in keys:
            dq.append([cur[0][k], cur[1] + [k]])

['payload', 'tree', 'items', {'name': 'AD.json', 'path': 'data/countries/AD.json', 'contentType': 'file'}, 'name'] AD.json 



In [91]:
r2 = requests.get("https://github.com/countries/countries-data-json/tree/master/data/countries/AD.json")

In [96]:
r2j = json.loads(r2.content)

dict_keys(['payload', 'title'])

In [121]:
def json_bfs(js, pat):
    result = []
    dq = deque()
    dq.append([js, []])
    while dq:
        cur = dq.popleft()
        try:
            keys = cur[0].keys()
        except AttributeError:
            if isinstance(cur[0], str):
                if pat.search(cur[0]):
                    result.append((cur[1], cur[0]))
            if isinstance(cur[0], list):
                for x in cur[0]:
                    if isinstance(x, str) and pat.search(x):
                        result.append((cur[1], x))
                        break
                    else:
                        dq.append([x, cur[1] + [x]])
        else:
            for k in keys:
                dq.append([cur[0][k], cur[1] + [k]])
    return result

In [122]:
result = json_bfs(r2j, re.compile("blob"))

In [123]:
result

[(['payload', 'blob', 'displayUrl'],
  'https://github.com/countries/countries-data-json/blob/master/data/countries/AD.json?raw=true'),
 (['payload', 'blob', 'headerInfo', 'siteNavLoginPath'],
  '/login?return_to=https%3A%2F%2Fgithub.com%2Fcountries%2Fcountries-data-json%2Fblob%2Fmaster%2Fdata%2Fcountries%2FAD.json'),
 (['payload', 'blob', 'planSupportInfo', 'requestFullPath'],
  '/countries/countries-data-json/blob/master/data/countries/AD.json')]

In [125]:
result = json_bfs(rj, re.compile(r'[A-Z]{2}\.json'))
file_names = [x[1] for x in result[::2]]
file_names

['AD.json',
 'AE.json',
 'AF.json',
 'AG.json',
 'AI.json',
 'AL.json',
 'AM.json',
 'AO.json',
 'AQ.json',
 'AR.json',
 'AS.json',
 'AT.json',
 'AU.json',
 'AW.json',
 'AX.json',
 'AZ.json',
 'BA.json',
 'BB.json',
 'BD.json',
 'BE.json',
 'BF.json',
 'BG.json',
 'BH.json',
 'BI.json',
 'BJ.json',
 'BL.json',
 'BM.json',
 'BN.json',
 'BO.json',
 'BQ.json',
 'BR.json',
 'BS.json',
 'BT.json',
 'BV.json',
 'BW.json',
 'BY.json',
 'BZ.json',
 'CA.json',
 'CC.json',
 'CD.json',
 'CF.json',
 'CG.json',
 'CH.json',
 'CI.json',
 'CK.json',
 'CL.json',
 'CM.json',
 'CN.json',
 'CO.json',
 'CR.json',
 'CU.json',
 'CV.json',
 'CW.json',
 'CX.json',
 'CY.json',
 'CZ.json',
 'DE.json',
 'DJ.json',
 'DK.json',
 'DM.json',
 'DO.json',
 'DZ.json',
 'EC.json',
 'EE.json',
 'EG.json',
 'EH.json',
 'ER.json',
 'ES.json',
 'ET.json',
 'FI.json',
 'FJ.json',
 'FK.json',
 'FM.json',
 'FO.json',
 'FR.json',
 'GA.json',
 'GB.json',
 'GD.json',
 'GE.json',
 'GF.json',
 'GG.json',
 'GH.json',
 'GI.json',
 'GL

In [112]:
json.loads("".join(x.strip() for x in r2j['payload']['blob']['rawLines']))

{'AD': {'alpha2': 'AD',
  'alpha3': 'AND',
  'continent': 'Europe',
  'country_code': '376',
  'currency_code': 'EUR',
  'distance_unit': 'KM',
  'gec': 'AN',
  'geo': {'latitude': 42.506285,
   'longitude': 1.521801,
   'max_latitude': 42.655791,
   'max_longitude': 1.786639,
   'min_latitude': 42.4287488,
   'min_longitude': 1.4087052,
   'bounds': {'northeast': {'lat': 42.655791, 'lng': 1.786639},
    'southwest': {'lat': 42.4287488, 'lng': 1.4087052}}},
  'international_prefix': '00',
  'ioc': 'AND',
  'iso_long_name': 'The Principality of Andorra',
  'iso_short_name': 'Andorra',
  'languages_official': ['ca'],
  'languages_spoken': ['ca'],
  'national_destination_code_lengths': [2],
  'national_number_lengths': [6, 7, 8, 9],
  'national_prefix': 'None',
  'nationality': 'Andorran',
  'number': '020',
  'postal_code': True,
  'postal_code_format': 'AD[1-7]0\\d',
  'region': 'Europe',
  'start_of_week': 'monday',
  'subregion': 'Southern Europe',
  'un_locode': 'AD',
  'unofficial_n

In [126]:
results = []
#for fn in file_names:
 #   r = requests.get(f"https://github.com/countries/countries-data-json/tree/master/data/countries/{fn}").json()
  #  try:
 #       result = json.loads("".join(x.strip() for x in r['payload']['blob']['rawLines']))
 #   except:
 #       results.append({fn[:2]: "NO RESULTS"})
 #   else:
 #       results.append(result)

In [133]:
results_dict = {k: v for r in results for k, v in r.items()}

In [136]:
with open("iso3166.json", "w") as f:
    json.dump(results_dict, f)

In [137]:
with open("iso3166.json", "r") as f:
    results_reloaded = json.load(f)

In [138]:
results_reloaded

{'AD': {'alpha2': 'AD',
  'alpha3': 'AND',
  'continent': 'Europe',
  'country_code': '376',
  'currency_code': 'EUR',
  'distance_unit': 'KM',
  'gec': 'AN',
  'geo': {'latitude': 42.506285,
   'longitude': 1.521801,
   'max_latitude': 42.655791,
   'max_longitude': 1.786639,
   'min_latitude': 42.4287488,
   'min_longitude': 1.4087052,
   'bounds': {'northeast': {'lat': 42.655791, 'lng': 1.786639},
    'southwest': {'lat': 42.4287488, 'lng': 1.4087052}}},
  'international_prefix': '00',
  'ioc': 'AND',
  'iso_long_name': 'The Principality of Andorra',
  'iso_short_name': 'Andorra',
  'languages_official': ['ca'],
  'languages_spoken': ['ca'],
  'national_destination_code_lengths': [2],
  'national_number_lengths': [6, 7, 8, 9],
  'national_prefix': 'None',
  'nationality': 'Andorran',
  'number': '020',
  'postal_code': True,
  'postal_code_format': 'AD[1-7]0\\d',
  'region': 'Europe',
  'start_of_week': 'monday',
  'subregion': 'Southern Europe',
  'un_locode': 'AD',
  'unofficial_n

In [139]:
unofficial_to_alpha2 = {w: k for k, v in results_reloaded.items() for w in v['unofficial_names']}

In [146]:
results = defaultdict(list)
for x in missing:
    found = False
    pat = re.compile(".*" + x + ".*", flags=re.I)
    for k, v in unofficial_to_alpha2.items():
        if pat.search(k):
            results[x].append(v)
            found = True
    if not found:
        results[x].append("XX")

In [147]:
results

defaultdict(list,
            {'OCEAN': ['IO'],
             'VENEZUELA': ['VE'],
             'VIRGIN ISLANDS': ['VG', 'VG', 'VI', 'VI', 'VI', 'VI'],
             'UNITED STATES': ['UM', 'US', 'VI', 'VI'],
             'TURKEY': ['TR'],
             'MOLDOVA': ['MD', 'MD'],
             'MACEDONIA': ['MK', 'MK', 'MK', 'MK', 'MK'],
             'N. CYPRUS': ['XX']})

In [145]:
countries.get("io")

Country(name='British Indian Ocean Territory', alpha2='IO', alpha3='IOT', numeric='086', apolitical_name='British Indian Ocean Territory')