# Calculate a 2D emissions grid incorporating COVID-19 effects

In [1]:
import itertools
import netCDF4 as nc
import pandas as pd
import numpy as np
import reverse_geocoder as rg
import matplotlib.pyplot as plt

In [2]:
input_co2_mole = "../input/mole-fraction-of-carbon-dioxide-in-air_input4MIPs_GHGConcentrations_ScenarioMIP_UoM-MESSAGE-GLOBIOM-ssp245-1-2-0_gr1-GMNHSH_2015-2500.nc"
input_co2_air = "../input/CO2-em-AIR-anthro_input4MIPs_emissions_ScenarioMIP_IAMC-MESSAGE-GLOBIOM-ssp245-1-1_gn_201501-210012.nc"
input_nox = "../input/NOx-em-anthro_input4MIPs_emissions_ScenarioMIP_IAMC-MESSAGE-GLOBIOM-ssp245-1-1_gn_201501-210012.nc"
input_blip = "../input/Robin_sectors_V3.csv"
convert_country_code_file = "../input/convertCountryCodes.csv"
files_to_blip = [input_nox]
key_variables = ["NOx_em_anthro"]

## Collect and clean the data

In [3]:
nox_0 = nc.Dataset(input_nox, "r", format="NETCDF4")
blip_factors = pd.read_csv(input_blip)
convert_countries = pd.read_csv(convert_country_code_file, keep_default_na=False, na_values=['_'])

In [4]:
data_to_modify = [nox_0]

In [5]:
for dimobj in nox_0.dimensions.values():
...     print(dimobj)

<class 'netCDF4._netCDF4.Dimension'>: name = 'lon', size = 720
<class 'netCDF4._netCDF4.Dimension'>: name = 'lat', size = 360
<class 'netCDF4._netCDF4.Dimension'>: name = 'sector', size = 8
<class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'time', size = 120
<class 'netCDF4._netCDF4.Dimension'>: name = 'bound', size = 2


In [6]:
sectors = nox_0.variables["sector"][:]
nox_0.variables["sector"]

<class 'netCDF4._netCDF4.Variable'>
int32 sector(sector)
    long_name: sector
    bounds: sector_bnds
    ids: 0: Agriculture; 1: Energy; 2: Industrial; 3: Transportation; 4: Residential, Commercial, Other; 5: Solvents production and application; 6: Waste; 7: International Shipping
unlimited dimensions: 
current shape = (8,)
filling on, default _FillValue of -2147483647 used

In [7]:
blip_factors = blip_factors[~blip_factors["1"].isna()]
blip_sectors = blip_factors["Sector"].unique()

## Perform the sector weighting


In [8]:
blip_sectors

array(['surface-transport', 'residential', 'public/commercial',
       'industry', 'international-shipping', 'international-aviation',
       'domestic-aviation', 'power'], dtype=object)

The set of sectors in our blip need to be converted into our sectors in the netCDF case. This uses:
0: Agriculture; 1: Energy; 2: Industrial; 3: Transportation; 4: Residential, Commercial, Other; 5: Solvents production and application; 6: Waste; 7: International Shipping

In [9]:
sector_dict = {"surface-transport": 3, "residential": 4, "public/commercial": -4, "industry": 2, 
 "international-shipping":7, "international-aviation": -1, "domestic-aviation": -2, "power": 1}

In [10]:
blip_factors_multi = blip_factors.copy()
blip_factors_multi.drop(["Country", "Base(MtCO2/day)", "Unnamed: 0"], axis=1, inplace=True)
blip_factors_multi["Sector"] = [sector_dict[sect] for sect in blip_factors_multi["Sector"]]
blip_factors_multi.head()

Unnamed: 0,ISO_A3,Sector,Base%,1,2,3,4,5,6,7,...,357,358,359,360,361,362,363,364,365,366
0,ALB,3,42.007797,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,DZA,3,31.360946,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.407,-0.407,-0.407,-0.407,-0.407,-0.407,-0.407,-0.407,-0.407,-0.407
2,AGO,3,29.972754,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429
3,ARG,3,23.181375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951
4,ARM,3,30.523255,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
blip_factors_multi.set_index(blip_factors_multi.columns[:2].to_list(), drop=True, inplace=True)
blip_factors_multi.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Base%,1,2,3,4,5,6,7,8,9,...,357,358,359,360,361,362,363,364,365,366
ISO_A3,Sector,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ALB,3,42.007797,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
DZA,3,31.360946,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.407,-0.407,-0.407,-0.407,-0.407,-0.407,-0.407,-0.407,-0.407,-0.407
AGO,3,29.972754,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429
ARG,3,23.181375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951
ARM,3,30.523255,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
# We want to average the two sets of sector 4 together in the right ratio
all_countries = blip_factors_multi.index.get_level_values("ISO_A3").unique()
for country in all_countries:
    if (country, 4) in blip_factors_multi.index and (country, -4) in blip_factors_multi.index:
        blip_factors_multi.loc[country, 4] = (
            blip_factors_multi.loc[country, 4].values * 
            blip_factors_multi["Base%"][country, 4] + 
            blip_factors_multi.loc[country, -4].values * blip_factors_multi["Base%"][country, -4]
            ) / (
                blip_factors_multi["Base%"][country, 4] + blip_factors_multi["Base%"][country, -4]
            )
        blip_factors_multi["Base%"][country, 4] = blip_factors_multi["Base%"][country, 4] + \
            blip_factors_multi["Base%"][country, -4]
        blip_factors_multi.drop((country, -4), inplace=True)
    elif (country, -4) in blip_factors_multi.index:
        blip_factors_multi.loc[country, 4] = blip_factors_multi.loc[country, -4]
    elif (country, 4) in blip_factors_multi.index:
        continue
    else:
        print("no data for {}".format(country))

In [13]:
# Test that this produces the right answers
example_factor = blip_factors[
    (blip_factors["ISO_A3"] == "GBR") & (blip_factors["Sector"].isin(["residential", "public/commercial"]))
][["Base%", "100"]]
assert np.isclose(blip_factors_multi.loc["GBR", 4][100], sum(
    example_factor["Base%"] * example_factor["100"]) / sum(example_factor["Base%"])
)

In [14]:
blip_factors_multi

Unnamed: 0_level_0,Unnamed: 1_level_0,Base%,1,2,3,4,5,6,7,8,9,...,357,358,359,360,361,362,363,364,365,366
ISO_A3,Sector,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ALB,3,42.007797,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
DZA,3,31.360946,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.407000,-0.407000,-0.407000,-0.407000,-0.407000,-0.407000,-0.407000,-0.407000,-0.407000,-0.407000
AGO,3,29.972754,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429,-0.394429
ARG,3,23.181375,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951,-0.447951
ARM,3,30.523255,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
VEN,1,43.093385,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.154102,-0.154102,-0.154102,-0.154102,-0.154102,-0.154102,-0.154102,-0.154102,-0.154102,-0.154102
VNM,1,31.822661,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.077964,-0.077964,-0.077964,-0.077964,-0.077964,-0.077964,-0.077964,-0.077964,-0.077964,-0.077964
YEM,1,34.652509,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.008565,-0.008565,-0.008565,-0.008565,-0.008565,-0.008565,-0.008565,-0.008565,-0.008565,-0.008565
ZMB,1,26.424361,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,-0.062563,-0.062563,-0.062563,-0.062563,-0.062563,-0.062563,-0.062563,-0.062563,-0.062563,-0.062563


We additionally assume that solvent production tracks industry, and no longer need the -4 sector.

In [15]:
for country in all_countries:
    blip_factors_multi.loc[(country, 5)] = blip_factors_multi.loc[(country, 4)]

## Derive country and date relation
We need to assign each lat/long a country. This is slightly complicated by the country index being 2 letters in the inverse geocoder but 3 letters in our data.

In [16]:
lat, lon = nox_0.variables["lat"][:], nox_0.variables["lon"][:]

In [17]:
convert_countries_dict = {convert_countries["A2 (ISO)"][i]: convert_countries["A3 (UN)"][i] for i in convert_countries.index}
coords = []
lon_length = len(lon)
for latperm in lat:
    coords = coords + list(zip([latperm] * lon_length, lon))

In [18]:
results = rg.search(coords)

Loading formatted geocoded file...


In [19]:
lat_countries_dict = {coords[i]: convert_countries_dict[results[i]["cc"]] for i in range(len(coords)) 
                      if results[i]["cc"] in convert_countries_dict.keys()}

The process will be faster if we map the other way and use the index rather than the coordinates:

In [20]:
country_coord_dict = {}
for k, v in lat_countries_dict.items():
    country_coord_dict[v] = country_coord_dict.get(v, [])
    country_coord_dict[v].append((np.where(lat.data == k[0])[0][0], np.where(lon.data == k[1])[0][0]))
country_coord_dict

{'NZL': [(0, 0),
  (0, 1),
  (0, 2),
  (0, 3),
  (0, 4),
  (0, 5),
  (0, 6),
  (0, 7),
  (0, 8),
  (0, 9),
  (0, 10),
  (0, 11),
  (0, 12),
  (0, 13),
  (0, 14),
  (0, 15),
  (0, 16),
  (0, 17),
  (0, 18),
  (0, 19),
  (0, 20),
  (0, 21),
  (0, 22),
  (0, 23),
  (0, 24),
  (0, 25),
  (0, 26),
  (0, 27),
  (0, 28),
  (0, 29),
  (0, 30),
  (0, 31),
  (0, 32),
  (0, 33),
  (0, 34),
  (0, 35),
  (0, 36),
  (0, 37),
  (0, 38),
  (0, 39),
  (0, 40),
  (0, 41),
  (0, 42),
  (0, 43),
  (0, 44),
  (0, 45),
  (0, 46),
  (0, 47),
  (0, 48),
  (0, 49),
  (0, 50),
  (0, 51),
  (0, 52),
  (0, 53),
  (0, 54),
  (0, 55),
  (0, 56),
  (0, 57),
  (0, 58),
  (0, 59),
  (0, 60),
  (0, 61),
  (0, 62),
  (0, 63),
  (0, 64),
  (0, 65),
  (0, 66),
  (0, 67),
  (0, 68),
  (0, 69),
  (0, 70),
  (0, 71),
  (0, 72),
  (0, 73),
  (0, 74),
  (0, 75),
  (0, 76),
  (0, 77),
  (0, 78),
  (0, 79),
  (0, 80),
  (0, 81),
  (0, 82),
  (0, 83),
  (0, 84),
  (0, 85),
  (0, 86),
  (0, 87),
  (0, 88),
  (0, 89),
  (0, 90),
  

Now we must relate the dates. blip_factors uses days from 2020-01-01, and has values for every day. The netCDFs use days since 2015-01-01, which is 5 * 365 + 1 days later and monthly. 

In [21]:
date_dif = 5 * 365 + 1
date_dif

1826

In [22]:
netCDF_times = nox_0.variables["time"][:]
netCDF_tseries = pd.Series(netCDF_times)
bliptimes = blip_factors_multi.columns[blip_factors_multi.columns != "Base%"]
bliptimes = pd.Series(pd.to_numeric(bliptimes))

In [23]:
time_dict = {}
remaining_times = bliptimes.copy()
mappable_times = netCDF_tseries[(netCDF_tseries > date_dif) & (netCDF_tseries < date_dif + max(bliptimes))]
for t in mappable_times.index[:-1]:
    closeTimes = [bliptime for bliptime in remaining_times if (
        0.5 * (mappable_times[t + 1] + mappable_times[t]) - date_dif > bliptime
    )]
    time_dict[mappable_times[t]] = closeTimes
    remaining_times = remaining_times[~remaining_times.isin(closeTimes)]
time_dict[mappable_times.iloc[-1]] = list(remaining_times)

In [24]:
blip_factors_av = pd.DataFrame(index=blip_factors_multi.index, columns=time_dict.keys())
for key, val in time_dict.items():
    blip_factors_av[key] = blip_factors_multi[list(str(v) for v in val)].mean(axis=1)

## Perform the emissions blip
We now have a mapping between times and locations and the emissions we want. 

In [25]:
nox_0.close()

In [26]:
def copy_netcdf_file(filename): 
    src = nc.Dataset(filename)
    trg = nc.Dataset(filename.replace("input", "output"), mode='w')

    # Create the dimensions of the file
    for name, dim in src.dimensions.items():
        trg.createDimension(name, len(dim) if not dim.isunlimited() else None)

    # Copy the global attributes
    trg.setncatts({a:src.getncattr(a) for a in src.ncattrs()})

    # Create the variables in the file
    for name, var in src.variables.items():
        trg.createVariable(name, var.dtype, var.dimensions)

        # Copy the variable attributes
        trg.variables[name].setncatts({a:var.getncattr(a) for a in var.ncattrs()})

        # Copy the variables values (as 'f4' eventually)
        trg.variables[name][:] = src.variables[name][:]

    # Return the data
    src.close()
    return trg

In [27]:
file = files_to_blip[0]
data = copy_netcdf_file(file)

In [28]:
all_valid_countries = [c for c in all_countries if c in country_coord_dict.keys()]

In [29]:
from mpl_toolkits.basemap import Basemap
lon_0 = lon.mean()
lat_0 = lat.mean()

ModuleNotFoundError: No module named 'mpl_toolkits.basemap'

In [37]:
#for file in files_to_blip

output = data.variables["NOx_em_anthro"]
for country in all_valid_countries:
    print(country)
    for time in blip_factors_av.columns[0:1]: # TODO: more 
        timeind = list(i == time for i in data.variables["time"])
        for sector in range(1, 2): # We will manage sectors 6 and 7 elsewhere, no change to sector 0 (agri).  #TODO: more
            try:
                mult_fact = blip_factors_av[time].loc[country, sector] + 1
                if mult_fact != 1.0: #This saves operations
                    for lati, longi in list(zip(*country_coord_dict[country])):
                        print(len(lati))  # output[timeind, sector, lati, lo])
                    #output[timeind, sector, lati, longi] *= mult_fact
            except Exception e:
                print(e)
                continue

ALB
DZA
AGO
ARG
ARM
AUS
AUT
AZE
BHR
BGD
BLR
BEL
BEN
BOL
BIH
BWA
BRA
BRN
BGR
KHM
CMR
CAN
CHL
CHN
COL
COG
CRI
CIV
HRV
CUB
CUW
CYP
CZE
PRK
COD
DNK
DOM
ECU
EGY
SLV
ERI
EST
ETH
FIN
FRA
GAB
GEO
DEU
GHA
GRC
GTM
HTI
HND
HKG
HUN
ISL
IND
IDN
IRN
IRQ
IRL
ISR
ITA
JAM
JPN
JOR
KAZ
KEN
KOR
RKS
KWT
KGZ
LVA
LBN
LBY
LTU
LUX
MYS
MLT
MUS
MEX
MDA
MNG
MNE
MAR
MOZ
MMR
NAM
NPL
NLD
NZL
NIC
NER
NGA
MKD
NOR
OMN
PAK
PAN
PRY
PER
PHL
POL
PRT
QAT
ROU
RUS
SAU
SEN
SRB
SGP
SVK
SVN
ZAF
SSD
ESP
LKA
SDN
SUR
SWE
CHE
SYR
TWN
TJK
TZA
THA
TGO
TTO
TUN
TUR
TKM
UKR
ARE
GBR
USA
URY
UZB
VEN
VNM
YEM
ZMB
ZWE


In [None]:
plt.figure(figsize=(20, 20))

plt.imshow(output[5, 1, :250, :300].squeeze())

In [38]:
for lati, longi in list(zip(*country_coord_dict[country])):
    print(len(lati))  # output[timeind, sector, lati, lo])

ValueError: too many values to unpack (expected 2)

In [None]:
sector=1

## SANDBOX

In [None]:
(1 + blip_factors_av[time].loc[country, sector])
output[timeind, sector, lati, longi] *= (1 + blip_factors_av[time].loc[country, sector])

In [None]:
time = blip_factors_av.columns[0]

In [None]:
blip_factors_av[time].loc[country, sector]

In [None]:
nox_0 = nc.Dataset(input_nox, "r", format="NETCDF4")

In [None]:
ans = [state for state, _ in country_coord_dict.items() if state not in all_countries]
ans.sort()
print(ans)

In [None]:
output = data.variables["NOx_em_anthro"]
output[8, 1, :, :] = 1 * output[8, 1, :, :] > 0

In [None]:
plt.figure(figsize=(20, 20))
plt.imshow((1 + output[8, 1, :, :]) / (1 + nox_0.variables["NOx_em_anthro"][8, 1, :, :]).squeeze())

In [None]:
output[timeind, sector, lati, longi] *= (1 + blip_factors_av[time].loc[country, sector])

In [None]:
data.close()
nox_0.close()