In [16]:
import warnings
warnings.filterwarnings('ignore')

# Dependencies
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt

filepath = "resources/Methane_final.csv"

In [2]:
# Read csv
emissions_df = pd.read_csv(filepath, index_col=[0])
emissions_df.head()

Unnamed: 0,region,country,emissions,type,segment,reason,baseYear,notes
0,Africa,Algeria,257.611206,Agriculture,Total,All,2019-2021,Average based on United Nations Framework Conv...
1,Africa,Algeria,0.052,Energy,Bioenergy,All,2022,Estimates from end-uses are for 2020 or 2021 (...
2,Africa,Algeria,130.798996,Energy,Gas pipelines and LNG facilities,Fugitive,2022,Not available
3,Africa,Algeria,69.741898,Energy,Gas pipelines and LNG facilities,Vented,2022,Not available
4,Africa,Algeria,213.987,Energy,Onshore gas,Fugitive,2022,Not available


In [3]:
# region -
# country - Country of Emission
# emissions - Methane Emissions (kt)
# type - Sector from which emissions occur
# Segment- Sub-sector from which emissions occur
# reason - The reason for the emission
# baseYear - Base year for the tracking of emissions
# notes - The source of data
emissions_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1548 entries, 0 to 1547
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   region     1548 non-null   object 
 1   country    1548 non-null   object 
 2   emissions  1548 non-null   float64
 3   type       1548 non-null   object 
 4   segment    1548 non-null   object 
 5   reason     1548 non-null   object 
 6   baseYear   1548 non-null   object 
 7   notes      1548 non-null   object 
dtypes: float64(1), object(7)
memory usage: 108.8+ KB


In [4]:
# Checking null values
emissions_df.isnull().sum()

region       0
country      0
emissions    0
type         0
segment      0
reason       0
baseYear     0
notes        0
dtype: int64

In [5]:
# Evaluating notes column
na = (emissions_df['notes'] == 'Not available').sum()
na

877

In [6]:
# Dropping notes column and evaluating type column
emissions_df.drop('notes', axis=1, inplace=True)
emissions_df['type'].value_counts()

Energy         1233
Agriculture     105
Other           105
Waste           105
Name: type, dtype: int64

In [7]:
# Evaluating segment column
emissions_df['segment'].value_counts()

Total                                             414
Onshore oil                                       217
Gas pipelines and LNG facilities                  164
Offshore oil                                      159
Onshore gas                                       144
Offshore gas                                      118
Other from oil and gas                             98
Bioenergy                                          93
Other from coal                                    66
Steam coal                                         30
Coking coal                                        24
Satellite-detected large oil and gas emissions     21
Name: segment, dtype: int64

In [8]:
# Creating DataFrame with all 'Energy' type emissions 
energy_df = emissions_df[emissions_df['type'] == 'Energy']
energy_df.drop(['type'], axis=1, inplace=True)
energy_df = energy_df[energy_df['segment'] != 'Total']
energy_df = energy_df[energy_df['region'] != 'World']
energy_df.head()

Unnamed: 0,region,country,emissions,segment,reason,baseYear
1,Africa,Algeria,0.052,Bioenergy,All,2022
2,Africa,Algeria,130.798996,Gas pipelines and LNG facilities,Fugitive,2022
3,Africa,Algeria,69.741898,Gas pipelines and LNG facilities,Vented,2022
4,Africa,Algeria,213.987,Onshore gas,Fugitive,2022
5,Africa,Algeria,464.308014,Onshore gas,Vented,2022


In [9]:
# Creating a DataFrame excluding 'World' values for country analysis
country_df = emissions_df[(emissions_df['region'] != 'World') & (emissions_df['country'] != 'World')]
country_df.head()

Unnamed: 0,region,country,emissions,type,segment,reason,baseYear
0,Africa,Algeria,257.611206,Agriculture,Total,All,2019-2021
1,Africa,Algeria,0.052,Energy,Bioenergy,All,2022
2,Africa,Algeria,130.798996,Energy,Gas pipelines and LNG facilities,Fugitive,2022
3,Africa,Algeria,69.741898,Energy,Gas pipelines and LNG facilities,Vented,2022
4,Africa,Algeria,213.987,Energy,Onshore gas,Fugitive,2022


In [14]:
# Creating a DataFrame isolating type values
type_df = emissions_df.loc[np.where((country_df['segment']=='Total'))]
type_df.groupby('type').sum().reset_index()

Unnamed: 0,type,emissions
0,Agriculture,150805.103144
1,Energy,135347.173195
2,Other,10028.772862
3,Waste,75079.707008


In [18]:
px.pie(type_df, values = 'emissions', 
                names = 'type', 
                hole = 0.5)

In [11]:
# Creating a DataFrame isolating segment values
segment_df = energy_df.groupby('segment').sum().reset_index()
segment_df.sort_values('emissions', ascending=False, inplace=True)
segment_df.head()

Unnamed: 0,segment,emissions
6,Onshore oil,34332.78324
10,Steam coal,30047.273344
5,Onshore gas,18097.597573
2,Gas pipelines and LNG facilities,12639.023727
1,Coking coal,10313.157684


In [12]:
# Outputting DataFrames to CSV files
emissions_fp = 'output/emissions.csv'
energy_fp = 'output/energy.csv'
country_fp = 'output/country.csv'
segment_fp = 'output/segment.csv'

dfs = [emissions_df, energy_df, country_df, segment_df]
filepaths = [emissions_fp, energy_fp, country_fp, segment_fp]

def dfs_to_csv(dfs, filepaths):
    for i, df in enumerate(dfs):
        df.to_csv(filepaths[i])
    return print('CSVs created!')

In [13]:
dfs_to_csv(dfs, filepaths)

CSVs created!
