<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#-Read-the-file-" data-toc-modified-id="-Read-the-file--1"><span class="toc-item-num">1&nbsp;&nbsp;</span> Read the file </a></span></li><li><span><a href="#Mapping" data-toc-modified-id="Mapping-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Mapping</a></span></li></ul></div>

In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import math
import os

<h2> Read the file </h2>

In [24]:
# Read datafile corresponding to territorial emissions
#df = pd.read_csv("https://github.com/OpenGeoScales/ogs-data-exploration/tree/main/data/ghg-emissions/owid/owid-co2-data.csv", sep = ";")
path = os.getcwd()
subfolder = '/../data/'
df = pd.read_csv(path+subfolder+'owid-co2-data.csv', sep = ";")

In [13]:
df.head()

Unnamed: 0,iso_code,country,year,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
0,AFG,Afghanistan,1949,0.015,,,,,,0.002,...,,,,,,,,,7663783.0,
1,AFG,Afghanistan,1950,0.084,475.0,0.07,,,,0.011,...,,,,,,,,,7752000.0,19494800000.0
2,AFG,Afghanistan,1951,0.092,8.696,0.007,,,,0.012,...,,,,,,,,,7840000.0,20063850000.0
3,AFG,Afghanistan,1952,0.092,,,,,,0.012,...,,,,,,,,,7936000.0,20742350000.0
4,AFG,Afghanistan,1953,0.106,16.0,0.015,,,,0.013,...,,,,,,,,,8040000.0,22015460000.0


In [25]:
# Set the dataframe structure
df.sort_values(["year","country"],inplace=True)
df.set_index(["year","country"],inplace=True)

# Convert the MtCO2 in MtC (to be in adequation with the values from other dataset of GCP)
df[["co2", "trade_co2", "cement_co2",
    "coal_co2", "flaring_co2",
    "gas_co2", "oil_co2", "other_industry_co2"]] = (1.0/3.664)* df[["co2", "trade_co2", "cement_co2",
                                                                    "coal_co2", "flaring_co2",
                                                                    "gas_co2", "oil_co2", "other_industry_co2"]]

df.tail(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,iso_code,co2,co2_growth_prct,co2_growth_abs,consumption_co2,trade_co2,trade_co2_share,co2_per_capita,consumption_co2_per_capita,share_global_co2,...,ghg_per_capita,methane,methane_per_capita,nitrous_oxide,nitrous_oxide_per_capita,primary_energy_consumption,energy_per_capita,energy_per_gdp,population,gdp
year,country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2019,Yemen,YEM,2.798854,3.115,0.31,,,,0.352,,0.028,...,,,,,,,,,29162000.0,
2019,Zambia,ZMB,1.834061,-3.025,-0.21,,,,0.376,,0.018,...,,,,,,,,,17861000.0,
2019,Zimbabwe,ZWE,2.831332,-8.521,-0.966,,,,0.708,,0.028,...,,,,,,,,,14645000.0,


In [26]:
# Select the variables to include in the JSON file
df = df[["iso_code","co2", "trade_co2", "cement_co2","coal_co2", "flaring_co2",
                    "gas_co2", "oil_co2", "other_industry_co2"]]
df

Unnamed: 0_level_0,Unnamed: 1_level_0,iso_code,co2,trade_co2,cement_co2,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2
year,country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1750,EU-28,,2.552129,,,2.552129,,,,
1750,Europe,,2.552129,,,2.552129,,,,
1750,Europe (excl. EU-27),,2.552129,,,2.552129,,,,
1750,United Kingdom,GBR,2.552129,,,2.552129,,,,
1750,World,OWID_WRL,2.552129,,,2.552129,,,,
...,...,...,...,...,...,...,...,...,...,...
2019,Wallis and Futuna Islands,WLF,0.007915,,,,,,0.007915,
2019,World,OWID_WRL,9945.793668,,426.790666,3919.805404,117.220524,2078.524563,3372.033024,31.419487
2019,Yemen,YEM,2.798854,,0.228439,0.091976,,0.340884,2.137555,
2019,Zambia,ZMB,1.834061,,0.285480,0.512555,,,1.036299,


In [27]:
df["iso_code"].unique()

array([nan, 'GBR', 'OWID_WRL', 'CAN', 'DEU', 'POL', 'USA', 'BEL', 'FRA',
       'AUT', 'NOR', 'ARM', 'AZE', 'BLR', 'EST', 'GEO', 'HUN', 'KAZ',
       'KGZ', 'LVA', 'LTU', 'MDA', 'RUS', 'ESP', 'TJK', 'TKM', 'UKR',
       'UZB', 'SWE', 'DNK', 'NLD', 'IRL', 'IND', 'ROU', 'CHE', 'AUS',
       'CZE', 'FIN', 'ITA', 'SVK', 'TUR', 'GRC', 'JPN', 'PRT', 'NZL',
       'BGR', 'PER', 'ZAF', 'BIH', 'HRV', 'MNE', 'MKD', 'SRB', 'SVN',
       'ARG', 'IDN', 'MYS', 'MEX', 'VNM', 'CHL', 'TWN', 'CHN', 'BRA',
       'ZWE', 'VEN', 'PRK', 'KOR', 'IRN', 'PHL', 'TTO', 'EGY', 'NGA',
       'DZA', 'TUN', 'ECU', 'COD', 'PRI', 'COL', 'ABW', 'BES', 'SXM',
       'IRQ', 'MOZ', 'BRB', 'BOL', 'MAR', 'MMR', 'ISR', 'LBN', 'SYR',
       'THA', 'URY', 'ALB', 'BHR', 'BRN', 'MDG', 'ISL', 'SAU', 'HKG',
       'ERI', 'CUB', 'ETH', 'GTM', 'NIC', 'LUX', 'BGD', 'KWT', 'PAK',
       'DOM', 'PAN', 'AFG', 'QAT', 'AGO', 'BHS', 'BLZ', 'BMU', 'BDI',
       'CMR', 'CPV', 'CRI', 'CYP', 'DJI', 'SLV', 'GNQ', 'SWZ', 'FRO',
       'FJI', 'GM

In [28]:
# Select only countries (not continents or other entities)
# NB: Kosovo had no official iso_code, and we kept 'OWID_KOS'
df = df.loc[(df["iso_code"].notnull()) & (df["iso_code"] != "OWID_WRL")]

df

Unnamed: 0_level_0,Unnamed: 1_level_0,iso_code,co2,trade_co2,cement_co2,coal_co2,flaring_co2,gas_co2,oil_co2,other_industry_co2
year,country,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1750,United Kingdom,GBR,2.552129,,,2.552129,,,,
1751,United Kingdom,GBR,2.552129,,,2.552129,,,,
1752,United Kingdom,GBR,2.552948,,,2.552948,,,,
1753,United Kingdom,GBR,2.552948,,,2.552948,,,,
1754,United Kingdom,GBR,2.554039,,,2.554039,,,,
...,...,...,...,...,...,...,...,...,...,...
2019,Vietnam,VNM,67.606168,,8.775109,36.907751,,5.049127,16.874181,
2019,Wallis and Futuna Islands,WLF,0.007915,,,,,,0.007915,
2019,Yemen,YEM,2.798854,,0.228439,0.091976,,0.340884,2.137555,
2019,Zambia,ZMB,1.834061,,0.285480,0.512555,,,1.036299,


<h2>Mapping</h2>

In [38]:
# Option to select only a subset of countries/year (to check structure)
#list_country=["Afghanistan","Puerto Rico"]
#list_years=[2018,2019]

# Option to generate the full mapped dataset
list_country = df.index.get_level_values(1).unique()

# Definition of the sub-sector list
list_subsectors=["co2", "trade_co2", "cement_co2","coal_co2", "flaring_co2",
                    "gas_co2", "oil_co2", "other_industry_co2"]

# dictionary that will be mapped to .json 
dict_final={}

# key for geo-component/emission (this is a running index without actual "meaning")
index_geo=0

# Loop over geo-component (in this case, countries)
for country in list_country:
    
    # Option to generate the full mapped dataset
    # Define list of years for the country (as temporal coverage varies from one country to another)
    list_years = df.loc[df.index.get_level_values(1) == country].index.get_level_values(0).unique()
    
    # Identify the first year where a value is available for a country
    yr_ini = list_years[0]
    
    # Dictionnary associated to geo-localised unit (country/town/grid)
    dict_country={}

    # create geo_component dictionary
    dict_geo={}
    dict_geo["scale_name"]="country"
    dict_geo["geoComponent_name"]=country
    dict_geo["iso_code"]={"alpha2":None,"alpha3":df.loc[(yr_ini,country),"iso_code"]}
    dict_geo["properties"]={"area":None}
    
    # Create emission list
    # The emission list has to be updated by looping over all possible
    # years, gas, sectors and sub-sectors.
    em_list=[]
    
    
    
    for yr in list_years:
        for subsec in list_subsectors:
            value = df.loc[(yr,country),subsec]
            
            # Only create entry if value is not NaN
            if not math.isnan(value):
                dict_em_yr={}
                dict_em_yr["date"]=yr
                dict_em_yr["sector_name"]="Territorial Emissions"
                dict_em_yr["subsector_name"]=subsec
                dict_em_yr["gas_name"]="CO2"
                dict_em_yr["value"]=value
                dict_em_yr["unit"]="MtC"                
                dict_em_yr["data_source_name"]="OWID"
                em_list.append(dict_em_yr)

    # Add the two dictionaries inside main dictionnary
    dict_country["geoComponent"]=dict_geo
    dict_country["emissions"]=em_list
    dict_final[index_geo]=dict_country
    index_geo+=1


# Optional printing to check structure (comment when running full database!!)
#print(dict_final)  

# Export as a .json
with open("sample.json", "w") as outfile: 
    json.dump(dict_final, outfile)