# Libraries importations and datasets loading

In [1]:
import os

import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

import json
import math

In [2]:
folder = "../../../data/ghg-emissions/primap-hist/"
dataset_name = "PRIMAP-hist_v2.2_19-Jan-2021.csv"
primap_hist_data = pd.read_csv(folder+dataset_name)

In [3]:
primap_hist_data.head()

Unnamed: 0,scenario,country,category,entity,unit,1850,1851,1852,1853,1854,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,HISTCR,ABW,IPC1A,CH4,Gg,0.000153,0.000158,0.000164,0.000169,0.000174,...,0.0355,0.0385,0.0317,0.0517,0.0684,0.0594,0.0595,0.0539,0.0552,0.0565
1,HISTCR,AFG,IPC1A,CH4,Gg,0.0237,0.0238,0.0239,0.0241,0.0242,...,4.07,4.49,5.22,7.82,11.7,5.42,5.38,7.59,7.96,8.33
2,HISTCR,AGO,IPC1A,CH4,Gg,1.75,1.77,1.8,1.82,1.84,...,57.7,59.1,60.5,61.7,63.0,64.3,65.6,66.9,68.3,69.6
3,HISTCR,AIA,IPC1A,CH4,Gg,1e-05,1e-05,1e-05,1e-05,1e-05,...,0.00205,0.0024,0.00256,0.00256,0.00275,0.00275,0.00276,0.00286,0.00298,0.0031
4,HISTCR,ALB,IPC1A,CH4,Gg,0.0602,0.0606,0.0615,0.0631,0.0652,...,5.14,5.04,4.98,4.87,4.79,4.66,4.89,5.0,5.02,5.04


# Mapping

## Pre-processing

In [4]:
mapped_sectors = {}
mapped_sectors["IPCM0EL"] = "total_excluding_LUCF"
mapped_sectors["IPC1"] = "total_energy"
mapped_sectors["IPC1B"] = "fugitive_emissions"
mapped_sectors["IPC1B3"] = "other_fuel_combustion"
mapped_sectors["IPC1C"] = "transport_storage"
mapped_sectors["IPC2"] = "total_ippu"
mapped_sectors["IPC2A"] = "industrial_processes"
mapped_sectors["IPC2B"] = "industrial_processes"
mapped_sectors["IPC2C"] = "industrial_processes"
mapped_sectors["IPC2E"] = "industrial_processes"
mapped_sectors["IPCMAG"] = "agriculture"
mapped_sectors["IPC4"] = "waste"
mapped_sectors["IPC5"] = "other"
mapped_sectors["IPC1A"] = "fuel_combustion_activities"
mapped_sectors["IPC1B1"] = "solid_fuels"
mapped_sectors["IPC1B2"] = "oil_and_natural_gas"
mapped_sectors["IPC2D"] = "non_energy_products"
mapped_sectors["IPC2F"] = "substitutes_ozone_depleting_substances"
mapped_sectors["IPC2G"] = "other_product_manufacture_use"
mapped_sectors["IPC2H"] = "other_ippu"
mapped_sectors["IPC3A"] = "livestock"
mapped_sectors["IPCMAGELV"] = "agriculture_excluding_livestock"

In [5]:
mapped_gases = {}
mapped_gases["KYOTOGHGAR4"] = "kyotogases"
mapped_gases["KYOTOGHG"] = "kyotogases"
mapped_gases["CO2"] = "CO2"
mapped_gases["CH4"] = "CH4"
mapped_gases["N2O"] = "N2O"
mapped_gases["FGASESAR4"] = "F-gas"
mapped_gases["FGASES"] = "F-gas"
mapped_gases["HFCS"] = "HFC"
mapped_gases["HFCSAR4"] = "HFC"
mapped_gases["NF3"] = "NF3"
mapped_gases["PFCS"] = "PFC"
mapped_gases["PFCSAR4"] = "PFC"
mapped_gases["SF6"] = "SF6"

In [6]:
list_years = primap_hist_data.columns.values[5:]

In [7]:
# Define the name for the output mapped datafile.
path = "../../../data/ghg-emissions/primap-hist/mapped/"
datasource_name = "primap-hist"
namefile = path + "mapped_data_" + datasource_name + ".json"

In [8]:
test = primap_hist_data.head(1000)

In [9]:
test

Unnamed: 0,scenario,country,category,entity,unit,1850,1851,1852,1853,1854,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,HISTCR,ABW,IPC1A,CH4,Gg,0.000153,0.000158,0.000164,0.000169,0.000174,...,0.03550,0.0385,0.03170,0.05170,0.06840,0.05940,0.05950,0.05390,0.05520,0.0565
1,HISTCR,AFG,IPC1A,CH4,Gg,0.023700,0.023800,0.023900,0.024100,0.024200,...,4.07000,4.4900,5.22000,7.82000,11.70000,5.42000,5.38000,7.59000,7.96000,8.3300
2,HISTCR,AGO,IPC1A,CH4,Gg,1.750000,1.770000,1.800000,1.820000,1.840000,...,57.70000,59.1000,60.50000,61.70000,63.00000,64.30000,65.60000,66.90000,68.30000,69.6000
3,HISTCR,AIA,IPC1A,CH4,Gg,0.000010,0.000010,0.000010,0.000010,0.000010,...,0.00205,0.0024,0.00256,0.00256,0.00275,0.00275,0.00276,0.00286,0.00298,0.0031
4,HISTCR,ALB,IPC1A,CH4,Gg,0.060200,0.060600,0.061500,0.063100,0.065200,...,5.14000,5.0400,4.98000,4.87000,4.79000,4.66000,4.89000,5.00000,5.02000,5.0400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,HISTCR,TON,IPC1,CH4,Gg,0.001200,0.001210,0.001220,0.001220,0.001230,...,0.25900,0.2520,0.28100,0.37800,0.24400,0.15600,0.17000,0.24800,0.24700,0.2460
996,HISTCR,TTO,IPC1,CH4,Gg,0.747000,0.772000,0.797000,0.822000,0.848000,...,554.00000,565.0000,544.00000,543.00000,545.00000,540.00000,511.00000,633.00000,652.00000,672.0000
997,HISTCR,TUN,IPC1,CH4,Gg,0.282000,0.285000,0.289000,0.292000,0.296000,...,175.00000,174.0000,179.00000,179.00000,171.00000,169.00000,154.00000,185.00000,188.00000,192.0000
998,HISTCR,TUR,IPC1,CH4,Gg,0.144000,0.144000,0.145000,0.147000,0.149000,...,492.00000,491.0000,504.00000,525.00000,467.00000,534.00000,296.00000,420.00000,356.00000,383.0000


Processing duration estimated: 30 minutes

In [10]:
import time

#start_time = time.time()

# Create the output file
file = open(namefile, "w")

df = test #primap_hist_data
nb_rows = len(df)

for row in range(nb_rows):
    
    for year in list_years:
        
        # Get the emission value from dataset
        emission_value = df.loc[row, year] 
        
        if not math.isnan(emission_value):

                # Create dictionary that will contain the final json object
                dict_mapped_entry = {}

                ############## Entries associated with data_source
                dict_data_source = {}
                dict_data_source["name"] = datasource_name
                dict_data_source["link"] = "https://zenodo.org/record/4479172"
                
                # Sub-dictionary data source properties
                dict_data_source_properties = {}
                
                dict_data_source_properties["scenario"] = df.loc[row, "scenario"]
                dict_data_source["properties"] = dict_data_source_properties

                ############## Entries associated with geo_component
                dict_geo_component = {}
                
                # Sub-dictionary geo-component ID
                dict_geo_component_id = {}
                
                country_code = df.loc[row, "country"]
                dict_geo_component_id["id"] = country_code
                
                if country_code in ["EARTH", "ANNEXI", "NONANNEXI", "AOSIS", "BASIC", "EU28", "LDC", "UMBRELLA"]:
                    dict_geo_component_id["type"] = "custom"
                    dict_geo_component["scale"] = "country group"
                else:
                    dict_geo_component_id["type"] = "alpha3"
                    dict_geo_component["scale"] = "country"                   
                     
                dict_geo_component["identifier"] = dict_geo_component_id
                
                ############# Entries associated with emission
                dict_emission = {}
                gas = df.loc[row, "entity"]
                
                # Sub-dictionnary on unit
                dict_emission_unit = {}
                dict_emission_unit["unit_used"] = df.loc[row, "unit"]
                if gas in ["FGASESAR4", "KYOTOGHGAR4"]:
                    dict_emission_unit["gwp_report_reference"] = "AR4"
                elif gas in ["FGASES", "KYOTOGHG"]:
                    dict_emission_unit["gwp_report_reference"] = "SAR"                    
                
                # Sub-dictionary on sector
                dict_emission_sector = {}
                sector = df.loc[row, "category"]
                dict_emission_sector["sector_origin_name"] = sector
                dict_emission_sector["sector_mapped_name"] = mapped_sectors[sector]
                
                dict_emission["gas"] = mapped_gases[gas]
                dict_emission["value"] = emission_value
                dict_emission["unit"] = dict_emission_unit
                dict_emission["sector"] = dict_emission_sector         
                
                ############# Combine everything together
                dict_mapped_entry["data_source"] = dict_data_source
                dict_mapped_entry["geo_component"] = dict_geo_component 
                dict_mapped_entry["date"] = year + "-01-01"
                dict_mapped_entry["emission"] = dict_emission
                 
                # Write the json object to a file and add a line break (every line is a json object)
                json.dump(dict_mapped_entry, file)
                file.write("\n")

file.close()

#end_time = time.time()
#print("Temps d'exécution : {:.3} secondes".format(end_time-start_time))