In [22]:
import warnings
warnings.filterwarnings('ignore')

# Dependencies
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

filepath = "resources/Methane_final.csv"

In [23]:
# Read csv
emissions = pd.read_csv(filepath, index_col=[0])
emissions.head()

Unnamed: 0,region,country,emissions,type,segment,reason,baseYear,notes
0,Africa,Algeria,257.611206,Agriculture,Total,All,2019-2021,Average based on United Nations Framework Conv...
1,Africa,Algeria,0.052,Energy,Bioenergy,All,2022,Estimates from end-uses are for 2020 or 2021 (...
2,Africa,Algeria,130.798996,Energy,Gas pipelines and LNG facilities,Fugitive,2022,Not available
3,Africa,Algeria,69.741898,Energy,Gas pipelines and LNG facilities,Vented,2022,Not available
4,Africa,Algeria,213.987,Energy,Onshore gas,Fugitive,2022,Not available


In [24]:
# region -
# country - Country of Emission
# emissions - Methane Emissions (kt)
# type - Sector from which emissions occur
# Segment- Sub-sector from which emissions occur
# reason - The reason for the emission
# baseYear - Base year for the tracking of emissions
# notes - The source of data
emissions.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1548 entries, 0 to 1547
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   region     1548 non-null   object 
 1   country    1548 non-null   object 
 2   emissions  1548 non-null   float64
 3   type       1548 non-null   object 
 4   segment    1548 non-null   object 
 5   reason     1548 non-null   object 
 6   baseYear   1548 non-null   object 
 7   notes      1548 non-null   object 
dtypes: float64(1), object(7)
memory usage: 108.8+ KB


In [25]:
# Evaluating notes column
na = (emissions['notes'] == 'Not available').sum()
na

877

In [26]:
# Dropping notes column and evaluating type column
emissions.drop('notes', axis=1, inplace=True)
emissions['type'].value_counts()

Energy         1233
Agriculture     105
Other           105
Waste           105
Name: type, dtype: int64

In [27]:
# Evaluating segment column
emissions['segment'].value_counts()

Total                                             414
Onshore oil                                       217
Gas pipelines and LNG facilities                  164
Offshore oil                                      159
Onshore gas                                       144
Offshore gas                                      118
Other from oil and gas                             98
Bioenergy                                          93
Other from coal                                    66
Steam coal                                         30
Coking coal                                        24
Satellite-detected large oil and gas emissions     21
Name: segment, dtype: int64

In [28]:
emissions = emissions[emissions['region'] != 'World']

In [29]:
segment = emissions[emissions['segment'] != 'Total']
segment.head()

Unnamed: 0,region,country,emissions,type,segment,reason,baseYear
1,Africa,Algeria,0.052,Energy,Bioenergy,All,2022
2,Africa,Algeria,130.798996,Energy,Gas pipelines and LNG facilities,Fugitive,2022
3,Africa,Algeria,69.741898,Energy,Gas pipelines and LNG facilities,Vented,2022
4,Africa,Algeria,213.987,Energy,Onshore gas,Fugitive,2022
5,Africa,Algeria,464.308014,Energy,Onshore gas,Vented,2022


In [34]:
segment_sb = px.sunburst(segment, path=['region', 'country'], values='emissions', color='region')
segment_sb.show()

In [31]:
segment_grouped = segment.groupby('segment').sum()
segment_grouped.sort_values('emissions', ascending=False, inplace=True)
segment_grouped.reset_index(drop=True)
segment_grouped

Unnamed: 0_level_0,emissions
segment,Unnamed: 1_level_1
Onshore oil,34332.78324
Steam coal,30047.273344
Onshore gas,18097.597573
Gas pipelines and LNG facilities,12639.023727
Coking coal,10313.157684
Bioenergy,9926.408038
Offshore oil,8349.61186
Offshore gas,4342.396777
Satellite-detected large oil and gas emissions,3106.400007
Other from oil and gas,2493.177699


In [32]:
px.bar(segment_grouped, x='segment',y='emissions',color='segment',labels={'emissions':'Methane emission'})

ValueError: Value of 'x' is not the name of a column in 'data_frame'. Expected one of ['emissions'] but received: segment

In [None]:
top_countries = emissions.groupby('country')['emissions'].sum().sort_values(ascending=False).head(10)
print(top_countries)

In [None]:
china = emissions.loc[emissions['country'] == 'China']
china = china[['type', 'segment', 'reason', 'emissions', 'baseYear']]
china = china.reset_index(drop=True)
china.head()

In [None]:
china_types = china.groupby('type').sum()
china_types