In [None]:
!pip install pandas
!pip install matplotlib
!pip install seaborn

In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import json
import math

In [26]:
filepath = "https://raw.githubusercontent.com/OpenGeoScales/ogs-data-exploration/cdiac/data/ghg-emissions/cdiac/raw/Fossil-Fuel_CO2_Emissions_by_Nation.csv"
data = pd.read_csv(filepath)
data.head()

Unnamed: 0,Nation,Year,Total CO2 emissions from fossil-fuels and cement production (thousand metric tons of C),Emissions from solid fuel consumption,Emissions from liquid fuel consumption,Emissions from gas fuel consumption,Emissions from cement production,Emissions from gas flaring,Per capita CO2 emissions (metric tons of carbon),Emissions from bunker fuels (not included in the totals)
0,"(Note: missing values denoted by ""."")",,,,,,,,,
1,Source: Tom Boden and Bob Andres (Oak Ridge Na...,,,,,,,,,
2,DOI: 10.3334/CDIAC/00001_V2017,,,,,,,,,
3,AFGHANISTAN,1949.0,4.0,4.0,0.0,0.0,0.0,.,.,0.0
4,AFGHANISTAN,1950.0,23.0,6.0,18.0,0.0,0.0,0,0,0.0


In [28]:
columns = ["country", "year", "total_gas", "solid_fuel", "liquid_fuel"
           , "gas_fuel", "cement", "gas_flaring", "per_capita_co2", "bunker_fuels"]
# rename the columns
data.columns = columns
# drop first 3 lines & reset index
data = data.iloc[3:, :].reset_index(drop=True)
data = data.dropna()
# convert year into datetime and drop out the time part, keep the date part only
data.year = data.year.astype('int64')
#data = data.set_index(['year','country'])
#data = data.applymap(np.integer)
data.head()

Unnamed: 0,country,year,total_gas,solid_fuel,liquid_fuel,gas_fuel,cement,gas_flaring,per_capita_co2,bunker_fuels
0,AFGHANISTAN,1952,25.0,9,17,0,0.0,0,0.0,0.0
1,AFGHANISTAN,1953,29.0,10,18,0,0.0,0,0.0,0.0
2,AFGHANISTAN,1954,29.0,12,18,0,0.0,0,0.0,0.0
3,AFGHANISTAN,1955,42.0,17,25,0,0.0,0,0.0,0.0
4,AFGHANISTAN,1956,50.0,17,33,0,0.0,0,0.01,0.0


In [31]:
source_name = "cdiac"
source_url = "https://cdiac.ess-dive.lbl.gov/trends/emis/tre_regional.html"
geo_scale = "country"
geo_id_type = "name"
emission_unit = "Mtc"
gas_name = "CO2"
# Create the output file, appending to the json exisiting file
path = "../../data/json/mapped_data_" + source_name+".json"
file = open(path, "a")
# Copy from GCP's Mapping_gcp.ipynb notebook
mapped_sectors={}
mapped_sectors["total_gas"]="fossil_emissions_total"
mapped_sectors["gas_fuel"]="fossil_emissions_gas"
mapped_sectors["cement"]="fossil_emissions_cement"
mapped_sectors["gas_flaring"]="fossil_emissions_flaring"
mapped_sectors["bunker_fuels"]="bunker_fuels"
mapped_sectors["solid_fuel"]="fossil_emissions_other"
mapped_sectors["liquid_fuel"]="fossil_emissions_other"
mapped_sectors["per_capita_co2"]="fossil_emissions_other"

list_sector = mapped_sectors.keys()
for sect_origin in list_sector:
    subdata = data[['country', 'year', sect_origin]]
    for i in range(len(subdata)):
        country = subdata.iloc[i].country
        year = subdata.iloc[i].year
        emission = subdata.iloc[i][sect_origin]
        dict_mapped_entry={}  
        ############## Entries associated with geo_component
        dict_geo_component={}
        dict_geo_component["scale"] = geo_scale
        dict_geo_component["name"] = country      
        dict_geo_component["identifier"] = {"id": country
                                            , "type": geo_id_type }
        ############# Entries associated with emission
        dict_emission={}
        dict_emission["gas"] = gas_name
        dict_emission["value"] = str(emission_value)
        dict_emission["unit"] = {"unit_used" : emission_unit} 
        dict_emission["sector"] = {"sector_origin_name": sect_origin
                                    , "sector_mapped_name": mapped_sectors.get(sect_origin)} 
        ############# Combine everything together
        ############## Entries associated with data_source
        dict_mapped_entry["data_source"] = {"name": source_name
                                            , "link":source_url}
        dict_mapped_entry["geo_component"] = dict_geo_component 
        dict_mapped_entry["date"] = str(year)+"-01-01"
        dict_mapped_entry["emission"] = dict_emission
        # Write the json object to a file and add a line break (every line is a json object)
        json.dump(dict_mapped_entry, file)
        file.write("\n")
        

file.close()
