In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import math

<h2> Read the file </h2>

In [9]:
# Read datafile corresponding to territorial emissions
df = pd.read_csv("../../../data/ghg-emissions/gcp/GCB2020v18_MtCO2_flat.csv")

# Set the dataframe structure
df.sort_values(["Year","Country"],inplace=True)
df.set_index(["Year","Country"],inplace=True)

# Convert the MtCO2 in MtC (to be in adequation with the values from other dataset of GCP)
df[["Total","Coal","Oil","Gas","Cement","Flaring","Other","Per Capita"]] = (1.0/3.664)* df[["Total","Coal","Oil","Gas","Cement","Flaring","Other","Per Capita"]]

df.tail(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,ISO 3166-1 alpha-3,Total,Coal,Oil,Gas,Cement,Flaring,Other,Per Capita
Year,Country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2019,Yemen,YEM,2.798867,0.092,2.137605,0.34078,0.228481,0.0,,0.095977
2019,Zambia,ZMB,1.834195,0.512601,1.036221,0.0,0.285373,0.0,,0.102693
2019,Zimbabwe,ZWE,2.83141,1.720523,0.95226,0.0,0.158627,0.0,,0.19333


<h2>Mapping</h2>

In [14]:
# Option to select only a subset of countries/year (to check structure)
#list_country=["Afghanistan","Albania"]
#list_years=[2018,2019]

# Option to generate the full mapped dataset
list_years = df.index.get_level_values(0).unique()
list_countries = df.index.get_level_values(1).unique()

# Definition of the sub-sector list
list_subsectors=["Coal","Oil","Gas","Cement","Flaring","Other"]

# dictionary that will be mapped to .json 
dict_final={}

# key for geo-component/emission (this is a running index without actual "meaning")
index_geo=0

# Loop over geo-component (in this case, countries)
for country in list_country:
    
    # Dictionnary associated to geo-localised unit (country/town/grid)
    dict_country={}

    # create geo_component dictionary
    dict_geo={}
    dict_geo["scale_name"]="country"
    dict_geo["geoComponent_name"]=country
    dict_geo["iso_code"]={"alpha2":None,"alpha3":df.loc[(2019,country),"ISO 3166-1 alpha-3"]}
    dict_geo["properties"]={"area":None}
    
    # Create emission list
    # The emission list has to be updated by looping over all possible
    # years, gas, sectors and sub-sectors.
    em_list=[]
    
    for yr in list_years:
        for subsec in list_subsectors:
            value = df.loc[(yr,country),subsec]
            
            # Only create entry if value is not NaN
            if not math.isnan(value):
                dict_em_yr={}
                dict_em_yr["date"]=yr
                dict_em_yr["sector_name"]="Territorial Emissions"
                dict_em_yr["subsector_name"]=subsec
                dict_em_yr["gas_name"]="CO2"
                dict_em_yr["value"]=value
                dict_em_yr["unit"]="MtC"                
                dict_em_yr["data_source_name"]="GCP"
                em_list.append(dict_em_yr)

    # Add the two dictionaries inside main dictionnary
    dict_country["geoComponent"]=dict_geo
    dict_country["emissions"]=em_list
    dict_final[index_geo]=dict_country
    index_geo+=1


# Optional printing to check structure (comment when running full database!!)
#print(dict_final)  

# Export as a .json
with open("sample.json", "w") as outfile: 
    json.dump(dict_final, outfile)