In [1]:
import pandas as pd
import numpy as np
import math

In [2]:
# Load oecd data from github repository

url = "https://raw.githubusercontent.com/OpenGeoScales/ogs-data-exploration/oecd/data/ghg-emissions/oecd/raw/AIR_GHG_07042021221914030.csv"
df = pd.read_csv(url)

In [3]:
# Set the dataframe structure
df.sort_values(["Year", "Country", "POL", "Variable"],inplace=True)
df.set_index(["Year", "Country","POL", "Variable"],inplace=True)

In [4]:
# Define the range of years, countries, sectors and pollutants from the data that we will map.
list_years = df.index.get_level_values(0).unique()
list_country = df.index.get_level_values(1).unique()
list_sectors=["Total  emissions excluding LULUCF",
              "Total  emissions including LULUCF",
              "1 - Energy",
              "1A4 - Residential and other sectors",
              "1A1 - Energy Industries",
              "1B - Fugitive Emissions from Fuels",
              "1A2 - Manufacturing industries and construction",
              "1A5 - Energy - Other",
              "1A3 - Transport",
              "1C - CO2 from Transport and Storage",
              "2- Industrial processes and product use",
              "3 - Agriculture",
              "Land use, land-use change and forestry (LULUCF)",
              "5 - Waste",
              "6 - Other"]
list_gas = df.index.get_level_values(2).unique()

In [5]:
# Mapping sector
mapped_sectors={}
mapped_sectors["Total  emissions excluding LULUCF"]="total_excluding_LUCF"
mapped_sectors["Total  emissions including LULUCF"]="total_including_LUCF"
mapped_sectors["1 - Energy"]="total_energy"
mapped_sectors["1A4 - Residential and other sectors"]="building"
mapped_sectors["1A1 - Energy Industries"]="electricity_heat"
mapped_sectors["1B - Fugitive Emissions from Fuels"]="fugitive_emissions"
mapped_sectors["1A2 - Manufacturing industries and construction"]="manufacturing_construction"
mapped_sectors["1A5 - Energy - Other"]="other_fuel_combustion"
mapped_sectors["1A3 - Transport"]="total_excluding_LUCF"
mapped_sectors["1C - CO2 from Transport and Storage"]="transportation"
mapped_sectors["2- Industrial processes and product use"]="industrial_processes"
mapped_sectors["3 - Agriculture"]="agriculture"
mapped_sectors["Land use, land-use change and forestry (LULUCF)"]="lucf"
mapped_sectors["5 - Waste"]="waste"
mapped_sectors["6 - Other"]="other"

In [6]:
# Mapping gas name
mapped_gas={}
mapped_gas["GHG"]="kyotogases"
mapped_gas["CO2"]="CO2"
mapped_gas["CH4"]="CH4"
mapped_gas["N2O"]="N2O"
mapped_gas["HFC"]="HFC"
mapped_gas["HFC_PFC"]="HFC_PFC"
mapped_gas["NF3"]="NF3"
mapped_gas["PFC"]="PFC"
mapped_gas["SF6"]="SF6"

In [7]:
# Define the name for the output mapped datafile.
path="../../../data/ghg-emissions/oecd/mapped/"
data_name="oecd"
namefile = path+"mapped_data_"+data_name+".json"

In [None]:
# Create the output file
file = open(namefile, "w")

i = 0

# Loop over geo_component (in this case, countries)
for country in list_country:
    
    # Loop over years
    for year in list_years:
        
        #Loop over sectors
        for gas in list_gas:
            
            # Loop over gas
            for sector in list_sectors:
                
                # For specific gasses, the only measure is total emissions
                if gas!="GHG" and sector!="Total  emissions excluding LULUCF":
                    continue
                    
                # Check if there is an entry for the given index 
                if not df.index.isin([(year,country, gas, sector)]).any():
                    continue
                    
                # Get the emission value from dataset
                emission_value = df.loc[(year,country, gas, sector),"Value"].values[0]
                
                # Check that the value is not missing
                if math.isnan(emission_value):
                    continue
                                    
                # Create dictionary that will contain the final json object
                dict_mapped_entry={}

                ############## Entries associated with data_source
                dict_data_source={}
                dict_data_source["name"]=data_name    
                dict_data_source["link"]="https://stats.oecd.org/Index.aspx?DataSetCode=AIR_GHG"
                
                ############## Entries associated with geo_component
                dict_geo_component={}
                
                # Sub-dictionary geo-component ID
                dict_geo_component_id={}
                dict_geo_component_id["id"]=df.loc[(year,country, gas, sector),"COU"].values[0]
                dict_geo_component_id["type"]="alpha3"           
                
                dict_geo_component["scale"]="country"
                dict_geo_component["name"]=country      
                dict_geo_component["identifier"]=dict_geo_component_id
                
                ############# Entries associated with emission
                dict_emission={}
                   
                # Sub-dictionnary on unit
                dict_emission_unit={}
                dict_emission_unit["unit_used"]="MtC" #########################   CHECK UNIT   #########################
                
                # Sub-dictionary on sector
                dict_emission_sector={}
                dict_emission_sector["sector_origin_name"]=sector
                dict_emission_sector["sector_mapped_name"]=mapped_sectors[sector]
                
                dict_emission["gas"]=mapped_gas[gas]
                dict_emission["value"]=emission_value
                dict_emission["unit"]=dict_emission_unit
                dict_emission["sector"]=dict_emission_sector   
                
                ############# Combine everything together
                dict_mapped_entry["data_source"]=dict_data_source
                dict_mapped_entry["geo_component"]=dict_geo_component 
                dict_mapped_entry["date"]=str(year)+"-01-01"
                dict_mapped_entry["emission"]=dict_emission

                
                # Write the json object to a file and add a line break (every line is a json object)
                json.dump(dict_mapped_entry, file)
                file.write("\n")
                
    # Print progress
    i += 1
    print("{}/{} - {} done!".format(i, len(list_country), country))
    
file.close()