In [1]:
%matplotlib inline
%matplotlib notebook

import warnings
warnings.filterwarnings('ignore')

In [2]:
# Dependencies
import os
import numpy as np
import pandas as pd
import plotly.io as pio
import plotly.express as px
import matplotlib.pyplot as plt

In [3]:
# Setting dataset filepath and export dir
filepath = os.path.join('resources', 'Methane_final.csv')

if not os.path.exists('images'):
    os.mkdir('images')

In [4]:
# Read csv
emissions = pd.read_csv(filepath, index_col=[0])
emissions.head()

Unnamed: 0,region,country,emissions,type,segment,reason,baseYear,notes
0,Africa,Algeria,257.611206,Agriculture,Total,All,2019-2021,Average based on United Nations Framework Conv...
1,Africa,Algeria,0.052,Energy,Bioenergy,All,2022,Estimates from end-uses are for 2020 or 2021 (...
2,Africa,Algeria,130.798996,Energy,Gas pipelines and LNG facilities,Fugitive,2022,Not available
3,Africa,Algeria,69.741898,Energy,Gas pipelines and LNG facilities,Vented,2022,Not available
4,Africa,Algeria,213.987,Energy,Onshore gas,Fugitive,2022,Not available


In [5]:
# region
# country - Country of Emission
# emissions - Methane Emissions (kt)
# type - Sector from which emissions occur
# Segment- Sub-sector from which emissions occur
# reason - The reason for the emission
# baseYear - Base year for the tracking of emissions
# notes - The source of data
emissions.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1548 entries, 0 to 1547
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   region     1548 non-null   object 
 1   country    1548 non-null   object 
 2   emissions  1548 non-null   float64
 3   type       1548 non-null   object 
 4   segment    1548 non-null   object 
 5   reason     1548 non-null   object 
 6   baseYear   1548 non-null   object 
 7   notes      1548 non-null   object 
dtypes: float64(1), object(7)
memory usage: 108.8+ KB


In [6]:
# Evaluating notes column
na = (emissions['notes'] == 'Not available').sum()
na

877

In [7]:
# Dropping notes column and evaluating type column
emissions.drop('notes', axis=1, inplace=True)
emissions['type'].value_counts()

Energy         1233
Agriculture     105
Other           105
Waste           105
Name: type, dtype: int64

In [8]:
# Evaluating segment column
emissions['segment'].value_counts()

Total                                             414
Onshore oil                                       217
Gas pipelines and LNG facilities                  164
Offshore oil                                      159
Onshore gas                                       144
Offshore gas                                      118
Other from oil and gas                             98
Bioenergy                                          93
Other from coal                                    66
Steam coal                                         30
Coking coal                                        24
Satellite-detected large oil and gas emissions     21
Name: segment, dtype: int64

In [9]:
# Removing 'World' values
emissions = emissions[emissions['region'] != 'World']

In [10]:
# Pulling segment spec
seg_totals = emissions[emissions['segment'] != 'Total']
seg_totals.head()

Unnamed: 0,region,country,emissions,type,segment,reason,baseYear
1,Africa,Algeria,0.052,Energy,Bioenergy,All,2022
2,Africa,Algeria,130.798996,Energy,Gas pipelines and LNG facilities,Fugitive,2022
3,Africa,Algeria,69.741898,Energy,Gas pipelines and LNG facilities,Vented,2022
4,Africa,Algeria,213.987,Energy,Onshore gas,Fugitive,2022
5,Africa,Algeria,464.308014,Energy,Onshore gas,Vented,2022


In [36]:
# Creating region-based sunburst chart
region_sb = px.sunburst(
    seg_totals, 
    path=['region', 'country'], 
    title='Methane Emissions by Region',
    values='emissions', 
    color='region'
)



region_sb.update_traces(hoverinfo = 'value')

region_sb.show()

In [12]:
region_sb.write_image('images/region_sb.jpeg')
pio.write_html(region_sb, file='region_sb.html')

In [13]:
# Grouping by segment data
seg_grouped=seg_totals.groupby('segment').sum().reset_index()
seg_grouped.sort_values('emissions',ascending=False,inplace=True)

In [14]:
# Creating segment bar chart
seg_bar = px.bar(seg_grouped, x = 'segment', 
                 y = 'emissions', 
                 title = 'Methane Emissions by Segment', 
                 color = 'segment',
                 labels = {'emissions':'Methane emission'})
seg_bar.update_traces(showlegend = False)

In [15]:
# Pulling country specific data
countries = emissions[emissions['country'] != 'World']
countries.head()

Unnamed: 0,region,country,emissions,type,segment,reason,baseYear
0,Africa,Algeria,257.611206,Agriculture,Total,All,2019-2021
1,Africa,Algeria,0.052,Energy,Bioenergy,All,2022
2,Africa,Algeria,130.798996,Energy,Gas pipelines and LNG facilities,Fugitive,2022
3,Africa,Algeria,69.741898,Energy,Gas pipelines and LNG facilities,Vented,2022
4,Africa,Algeria,213.987,Energy,Onshore gas,Fugitive,2022


In [16]:
# Getting the total by emissions type
country_totals = emissions.loc[np.where((countries['segment'] == 'Total'))]
country_totals.groupby('type').sum().reset_index()

Unnamed: 0,type,emissions
0,Agriculture,150805.103144
1,Energy,135347.173195
2,Other,10028.772862
3,Waste,75079.707008


In [17]:
# Creating type pie chart
px.pie(country_totals, values = 'emissions', title = 'Methane Emissions by Type', names = 'type', hole = 0.5)

In [18]:
# Pulling data for energy data
energy_totals = countries[~((countries['type'] == 'Energy') & (countries['segment'] == 'Total'))]
energy_totals.head()

Unnamed: 0,region,country,emissions,type,segment,reason,baseYear
0,Africa,Algeria,257.611206,Agriculture,Total,All,2019-2021
1,Africa,Algeria,0.052,Energy,Bioenergy,All,2022
2,Africa,Algeria,130.798996,Energy,Gas pipelines and LNG facilities,Fugitive,2022
3,Africa,Algeria,69.741898,Energy,Gas pipelines and LNG facilities,Vented,2022
4,Africa,Algeria,213.987,Energy,Onshore gas,Fugitive,2022


In [19]:
# Grouping segment data by reasons
energy_reasons = energy_totals.groupby(['segment', 'reason']).sum()
reasons_sorted = energy_reasons.sort_values('emissions', ascending = False).reset_index()
reasons_sorted

Unnamed: 0,segment,reason,emissions
0,Total,All,235913.583015
1,Steam coal,All,30047.273344
2,Onshore oil,Vented,26029.764491
3,Onshore gas,Vented,12388.200895
4,Coking coal,All,10313.157684
5,Bioenergy,All,9926.408038
6,Gas pipelines and LNG facilities,Fugitive,8243.553747
7,Offshore oil,Vented,6570.998807
8,Onshore oil,Flared,6381.733689
9,Onshore gas,Fugitive,5709.396678


In [20]:
# Displaying segment reasons in a stacked bar chart
reasons_sorted.drop(index = [0], inplace = True)
px.bar(reasons_sorted, x = 'emissions', 
                       y = 'segment', 
                       title = 'Methane Emissions by Segment and Reason', 
                       color = 'reason')

In [21]:
# Creating a region and segment sunburst chart
energy_regions = countries[(countries['type'] == 'Energy') & (countries['segment'] != 'Total')]
regions_sb = px.sunburst(energy_regions, path = ['region', 'segment'], 
                                         values = 'emissions',
                                         title = 'Methane Emissions by Region and Segment',
                                         color = 'emissions', 
                                         color_continuous_scale = 'YlOrRd')
regions_sb.show()

In [22]:
# Finding the top 10 countries by emissions
top_countries = countries.groupby('country')['emissions'].sum().sort_values(ascending=False).head(10)
print(top_countries)

country
China             81048.371586
United States     48604.877296
Russia            42432.929804
India             34852.007386
Brazil            21720.838126
Indonesia         19404.469000
European Union    18985.173461
Other             15997.357175
Iran              13030.685366
Nigeria            9903.895667
Name: emissions, dtype: float64


In [23]:
# Pulling all data from China
china = emissions.loc[emissions['country'] == 'China']
china = china[['type', 'segment', 'reason', 'emissions', 'baseYear']]
china = china.reset_index(drop=True)
china.head()

Unnamed: 0,type,segment,reason,emissions,baseYear
0,Agriculture,Total,All,18501.914062,2019-2021
1,Energy,Bioenergy,All,984.200012,2022
2,Energy,Coking coal,All,5418.77002,2022
3,Energy,Gas pipelines and LNG facilities,Fugitive,552.119995,2022
4,Energy,Gas pipelines and LNG facilities,Vented,294.390991,2022


In [24]:
# Grouping China data by emission type
china_types = china.groupby('type').sum().sort_values('emissions', ascending=False)
china_types

Unnamed: 0_level_0,emissions
type,Unnamed: 1_level_1
Energy,50744.44495
Agriculture,18501.914062
Waste,10424.141602
Other,1377.870972
