In [1]:
import os
import pandas as pd
from zipfile import ZipFile
import plotly.express as px

from src.config.general import PATH_DATA_RAW

In [2]:
path_zip = os.path.join(PATH_DATA_RAW, 'International Greenhouse Gas Emissions.zip')

zip_file = ZipFile(path_zip)
dfs = {text_file.filename: pd.read_csv(zip_file.open(text_file.filename))
       for text_file in zip_file.infolist()
       if text_file.filename.endswith('.csv')}

In [3]:
dfs.keys()

dict_keys(['greenhouse_gas_inventory_data_data.csv'])

In [24]:
emissions = dfs['greenhouse_gas_inventory_data_data.csv'].copy()

In [25]:
emissions.shape

(8406, 4)

In [26]:
emissions.head(2)

Unnamed: 0,country_or_area,year,value,category
0,Australia,2014,393126.946994,carbon_dioxide_co2_emissions_without_land_use_...
1,Australia,2013,396913.93653,carbon_dioxide_co2_emissions_without_land_use_...


In [29]:
emissions['category'].value_counts().to_frame('N').reset_index()

Unnamed: 0,index,N
0,carbon_dioxide_co2_emissions_without_land_use_...,1074
1,greenhouse_gas_ghgs_emissions_without_land_use...,1074
2,methane_ch4_emissions_without_land_use_land_us...,1074
3,nitrous_oxide_n2o_emissions_without_land_use_l...,1074
4,sulphur_hexafluoride_sf6_emissions_in_kilotonn...,1032
5,hydrofluorocarbons_hfcs_emissions_in_kilotonne...,975
6,greenhouse_gas_ghgs_emissions_including_indire...,949
7,perfluorocarbons_pfcs_emissions_in_kilotonne_c...,831
8,nitrogen_trifluoride_nf3_emissions_in_kilotonn...,248
9,unspecified_mix_of_hydrofluorocarbons_hfcs_and...,75


In [30]:
emissions['category'].unique()

array(['carbon_dioxide_co2_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent',
       'greenhouse_gas_ghgs_emissions_including_indirect_co2_without_lulucf_in_kilotonne_co2_equivalent',
       'greenhouse_gas_ghgs_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent',
       'hydrofluorocarbons_hfcs_emissions_in_kilotonne_co2_equivalent',
       'methane_ch4_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent',
       'nitrogen_trifluoride_nf3_emissions_in_kilotonne_co2_equivalent',
       'nitrous_oxide_n2o_emissions_without_land_use_land_use_change_and_forestry_lulucf_in_kilotonne_co2_equivalent',
       'perfluorocarbons_pfcs_emissions_in_kilotonne_co2_equivalent',
       'sulphur_hexafluoride_sf6_emissions_in_kilotonne_co2_equivalent',
       'unspecified_mix_of_hydrofluorocarbons_hfcs_and_perfluorocarbons_pfcs_emissions_in_kilotonne_co2_equivalent'],
      dtype=obj

In [35]:
emissions['category_short'] = emissions['category'].str.split('_emissions_').str[0]

In [36]:
emissions['category_short'].value_counts().to_frame('N').reset_index()

Unnamed: 0,index,N
0,greenhouse_gas_ghgs,2023
1,carbon_dioxide_co2,1074
2,methane_ch4,1074
3,nitrous_oxide_n2o,1074
4,sulphur_hexafluoride_sf6,1032
5,hydrofluorocarbons_hfcs,975
6,perfluorocarbons_pfcs,831
7,nitrogen_trifluoride_nf3,248
8,unspecified_mix_of_hydrofluorocarbons_hfcs_and...,75


In [57]:
df_grouped_by_year = emissions. \
    groupby(['year'])["value"].agg('sum').reset_index()

In [59]:
fig = px.line(df_grouped_by_year, 
              x='year', 
              y=df_grouped_by_year.columns[1:],
              title="CO2 Emission Evolution")
fig.show()

In [60]:
df_grouped_by_category = emissions. \
    groupby(['year', 'category_short'])["value"].agg('sum').reset_index(). \
    pivot(index='year', columns='category_short', values='value').reset_index()

In [62]:
fig = px.line(df_grouped_by_category, 
              x='year', 
              y=df_grouped_by_category.columns[1:],
              title="CO2 Emission by Category")
fig.show()

In [63]:
df_grouped_by_country_or_area = emissions. \
    groupby(['year', 'country_or_area'])["value"].agg('sum').reset_index(). \
    pivot(index='year', columns='country_or_area', values='value').reset_index()

In [64]:
fig = px.line(df_grouped_by_country_or_area, 
              x='year', 
              y=df_grouped_by_country_or_area.columns[1:],
              title="CO2 Emission by Country")
fig.show()