# Mapping CAIT

The objective of thsi notebook is to map the CAIT dataset to the Opengeoscale model. 

CAIT data can be retrieved from several ways on the ClimateWatch tool online. 
The most robust method seems to click on "Download Bulk Data", and select "GHG Emissions". The dataset is is named CW_CAIT_GHG_Emissions.xlsx


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
import math

pd.set_option("max_rows", 200)

<h2> Read the file </h2>

In [2]:
# Read datafile 
df_ghg = pd.read_excel("../../../data/ghg-emissions/wri/CW_CAIT_GHG_Emissions.xlsx", header=0)

#drop useless header 
df_ghg.drop("Source", axis=1, inplace=True)

# Set the dataframe structure and create a multiindex structure in country-gas-sector 
df_ghg.set_index(["Country", "Gas", "Sector"], inplace=True)


In [3]:
df_ghg[0:100]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
Country,Gas,Sector,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
AFG,All GHG,Total excluding LUCF,15.18285,15.10201,13.63469,13.46399,13.27173,13.47605,14.43757,15.34291,16.09523,16.91458,...,36.95546,44.90616,58.65186,66.74928,74.79611,84.61923,93.72862,95.37284,97.30011,98.92076
AFG,All GHG,Total including LUCF,12.79404,12.71321,11.24588,11.07519,10.88293,11.08725,12.04877,12.95411,13.70642,14.52578,...,37.07736,45.02807,58.40564,66.50306,74.54989,84.37301,93.4824,95.5275,97.45477,99.07541
AFG,All GHG,Energy,5.829497,5.334624,3.760858,3.42276,3.102594,2.783429,2.651769,2.509109,2.389449,2.096789,...,20.14082,26.05662,39.52942,47.59223,55.52703,64.67584,74.74164,75.93291,77.71818,79.58044
AFG,All GHG,Industrial Processes,0.051879,0.0545,0.060111,0.062722,0.065343,0.067964,0.081694,0.095434,0.109174,0.122915,...,0.222971,0.248895,0.313896,0.378967,0.449909,0.53463,0.592081,0.758807,0.911544,1.06428
AFG,All GHG,Agriculture,8.072853,8.396465,8.409491,8.48648,8.523959,8.957016,9.977472,10.95273,11.75197,12.79124,...,13.85702,15.78838,15.90226,15.77779,15.72486,16.22045,15.11257,15.31574,15.22195,14.74454
AFG,All GHG,Waste,1.228617,1.316422,1.404227,1.492032,1.579837,1.667642,1.72664,1.785639,1.844637,1.903635,...,2.734649,2.81227,2.906281,3.000293,3.094305,3.188316,3.282328,3.365385,3.448441,3.531498
AFG,All GHG,Land-Use Change and Forestry,-2.3888,-2.3888,-2.3888,-2.3888,-2.3888,-2.3888,-2.3888,-2.3888,-2.3888,-2.3888,...,0.121902,0.121902,-0.24622,-0.24622,-0.24622,-0.24622,-0.24622,0.154657,0.154657,0.154657
AFG,All GHG,Bunker Fuels,0.019,0.019,0.019,0.019,0.016,0.016,0.016,0.016,0.016,0.016,...,0.032,0.032,0.032,0.032,0.032,0.033,0.032,0.038,0.04,0.042
AFG,All GHG,Electricity/Heat,0.268,0.265,0.164,0.164,0.164,0.164,0.163,0.164,0.164,0.162,...,0.123,0.139,0.187,0.151,0.216,0.172,0.17,0.2,0.207,0.223
AFG,All GHG,Manufacturing/Construction,0.571,0.532,0.392,0.377,0.359,0.343,0.323,0.295,0.275,0.252,...,1.666,2.389,4.158,3.225,3.343,3.672,2.893,2.948,3.441,3.294


# Mapping GHG Emissions 

### Definition of the dataframe sectors

In [4]:
# Definition of the sub-sectors list
# and heir mapped conunterpart
mapped_sectors={'Total excluding LUCF': 'total_excluding_LUCF',
                'Total including LUCF': 'total_including_LUCF',
                'Energy ': 'total_energy', 
                'Industrial Processes ': 'industrial_processes',
                'Agriculture ':'agriculture',
                'Waste ':'waste',
                'Land-Use Change and Forestry ': 'lucf', 
                'Bunker Fuels ':'bunker_fuels',
                'Electricity/Heat ':'electricity_heat', 
                'Manufacturing/Construction ':'manufacturing_construction',
                'Transportation ':'transportation', 
                'Building':'building',
                'Other Fuel Combustion ': 'other_fuel_combustion',
                'Fugitive Emissions ': 'fugitive_emissions'
               }

#The present gases 
#Their mapped counterparts
mapped_gases={'All GHG': 'kyoto_gases',
            'CO2': 'co2',
            'CH4': 'ch4',
            'N2O': 'n2o',
            'F-Gas':'f-gas',
           }



In [5]:
# Define the name for the output mapped datafile.
path="../../../data/ghg-emissions/wri/mapped/"
data_name="wri-cait"
namefile = path + "mapped_data_"+data_name+".json"

In [6]:
def MapDataSet(df, namefile):
    '''
    Function meant to map a dataframe provided with country-gas-sector, and years in columns
    '''

    # Create the output file
    file = open(namefile, "w")
    
    #list of years
    years = df.columns
    
    #list of countries
    countries = df.index.get_level_values(0).unique()[0:2]

    
    # Loop over countries
    for country in countries:
        
        print(country)
        #list of gases
        gases = df.loc[country,:].index.get_level_values(0).unique()
                 
        #Loop over gases 
        for gas in gases:
                        
            #list of sectors
            sectors = df.loc[(country,gas),:].index.get_level_values(0).unique()
            
            #Loop over sectors
            for sector in sectors:
                    
                for year in years:   
                                                            
                    #Get the emission value from dataset
                    emission_value = df.loc[(country,gas,sector),year]
                    
                    #We have to create a json block for each emission value. The method is borrowed from UNFCCC
                    if not math.isnan(emission_value):

                        # Create dictionary that will contain the final json object
                        dict_mapped_entry={}

                        #Entries associated with data_source
                        dict_data_source={}
                        
                        # Sub-dictionary data_source properties
                        dict_data_source_properties={}
                        dict_data_source_properties["description"]=None
                        dict_data_source_properties["provider"]="CAIT"
                        
                        dict_data_source["name"]=data_name    
                        dict_data_source["link"]="https://www.climatewatchdata.org/"
                        dict_data_source["properties"]=dict_data_source_properties

                        # Entries associated with geo_component
                        dict_geo_component={}

                        # Sub-dictionary geo-component ID
                        dict_geo_component_id={}
                        dict_geo_component_id["id"]=country
                        dict_geo_component_id["type"]="alpha3"
                        
                        # Sub-dictionary geo-component properties
                        dict_geo_component_properties={}
                        dict_geo_component_properties["data_source_code"]=None

                        dict_geo_component["scale"]="country"
                        dict_geo_component["name"]=None      
                        dict_geo_component["identifier"]=dict_geo_component_id
                        dict_geo_component["properties"]=dict_geo_component_properties

                        #Entries associated with emissions
                        dict_emission={}

                        # Sub-dictionnary on unit
                        dict_emission_unit={}
                        dict_emission_unit["unit_used"]="Mt co2eq"

                        # Sub-dictionary on sector
                        dict_emission_sector={}
                        dict_emission_sector["sector_origin_name"]=sector
                        dict_emission_sector["sector_mapped_name"]=mapped_sectors[sector]

                        dict_emission["gas"]=mapped_gases[gas]
                        dict_emission["value"]=emission_value
                        dict_emission["unit"]=dict_emission_unit
                        dict_emission["sector"]=dict_emission_sector         

                        #Combine everything together
                        dict_mapped_entry["data_source"]=dict_data_source
                        dict_mapped_entry["geo_component"]=dict_geo_component 
                        dict_mapped_entry["date"]=str(year)+"-01-01"
                        dict_mapped_entry["emission"]=dict_emission
                    
                        # Write the json object to a file and add a line break (every line is a json object)
                        json.dump(dict_mapped_entry, file)
                        file.write("\n")                        

    file.close()
                


In [7]:
#map the data
MapDataSet(df_ghg, namefile)

AFG


  return self._getitem_tuple(key)


AGO
