In [None]:
# required packages
import pandas as pd
import numpy as np

In [2]:
# Loading data
raw_data = pd.read_csv('Datasets_MS_Project/Emission_indicators/Climate_change_Emissions_indicators_E_All_Data_(Normalized)/Climate_change_Emissions_indicators_E_All_Data_(Normalized).csv')
raw_data.head()

Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag
0,2,'004,Afghanistan,6996,Farm gate,726313,Emissions Share (CO2eq) (AR5),1990,1990,%,82.04,E
1,2,'004,Afghanistan,6996,Farm gate,726313,Emissions Share (CO2eq) (AR5),1991,1991,%,82.71,E
2,2,'004,Afghanistan,6996,Farm gate,726313,Emissions Share (CO2eq) (AR5),1992,1992,%,85.57,E
3,2,'004,Afghanistan,6996,Farm gate,726313,Emissions Share (CO2eq) (AR5),1993,1993,%,84.58,E
4,2,'004,Afghanistan,6996,Farm gate,726313,Emissions Share (CO2eq) (AR5),1994,1994,%,83.84,E


In [3]:
raw_data['Item'].unique()

array(['Farm gate', 'Land Use change', 'Pre- and Post- Production',
       'Agrifood systems', 'Emissions on agricultural land',
       'Emissions from crops', 'Emissions from livestock', 'AFOLU',
       'IPCC Agriculture', 'LULUCF', 'Energy', 'IPPU', 'Waste', 'Other',
       'All sectors with LULUCF', 'All sectors without LULUCF',
       'International bunkers'], dtype=object)

The values in the "Item" column represent different sources annd scopes of emissions. 

**Farm gate:** Emissions that occur on-farm (enteric fermentation, manure management, 
soil fertilization, etc.) — core agricultural production emissions.

**Land Use Change:** GHGs from conversion of forest to agriculture, deforestation, 
and land degradation — large CO₂ source.

**Pre- and Post-Production:** Emissions before and after the farm — includes fertilizer 
manufacturing, food processing, packaging, transport, and retail.

**Agrifood systems:** Full supply chain emissions: pre-production + farm gate + 
post-production. This is end-to-end.

**Emissions on agricultural land:** Emissions specifically on agricultural land, 
including soil, crops, and animals — a more spatial view.

**Emissions from crops:** Emissions from crop-related sources: synthetic fertilizers, 
rice cultivation (CH₄), burning of residues.

**Emissions from livestock:** Emissions from livestock: enteric fermentation (CH₄), 
manure handling (CH₄ and N₂O).

**AFOLU:** Agriculture, Forestry, and Other Land Use – IPCC sector that combines 
emissions from land and agriculture.

**IPCC Agriculture:** Emissions from the agriculture sector only, as defined in IPCC's 
inventory (no land-use change).

**LULUCF:** Land Use, Land-Use Change, and Forestry — includes emissions/removals 
from forests, cropland, grassland, etc.

**Energy:** Emissions from fuel combustion and energy use (e.g., machinery on farms).

**IPPU:** Industrial Processes and Product Use – emissions from chemical processes 
(like fertilizer manufacturing).

**Waste:** Emissions from agricultural waste management — landfills, burning, etc.

**International bunkers:** Emissions from international aviation and shipping, 
typically excluded from national totals.


The items we would consider for our project would be: 

'Farm gate', 'Land Use change', 'Pre- and Post- Production',
'Agrifood systems', 'Emissions from crops', 'Emissions from livestock',
'IPCC Agriculture', 'Energy', 'Waste', 'International bunkers'

In [4]:
raw_data['Element'].unique()

array(['Emissions Share (CO2eq) (AR5)', 'Emissions Share (CO2)',
       'Emissions Share (CH4)', 'Emissions Share (N2O)',
       'Emissions per capita',
       'Emissions per value of agricultural production',
       'Emissions per area of agricultural land',
       'Emissions Share (CO2eq) (AR5) (F-gases)'], dtype=object)

The elements in the "Element" column indicate the metric used to describe emissions, 
often expressed as CO₂-equivalents (CO₂eq) using IPCC’s AR5 global warming potential 
(GWP) values.

The one we would use for our project is "Emissions Share (CO2eq) (AR5)". It 
represents the share (%) of total emissions expressed in CO₂-equivalents, 
using AR5 GWP for CH₄ and N₂O. 

In [10]:
# Filter the data for desired items and element
items_to_keep = ['Farm gate', 'Land Use change', 'Pre- and Post- Production',
'Agrifood systems', 'Emissions from crops', 'Emissions from livestock',
'IPCC Agriculture', 'Energy', 'Waste', 'International bunkers']

filtered_data = raw_data.loc[
    (raw_data['Element']=='Emissions Share (CO2eq) (AR5)') &
    (raw_data['Item'].isin(items_to_keep))
]

filtered_data.head()

Unnamed: 0,Area Code,Area Code (M49),Area,Item Code,Item,Element Code,Element,Year Code,Year,Unit,Value,Flag
0,2,'004,Afghanistan,6996,Farm gate,726313,Emissions Share (CO2eq) (AR5),1990,1990,%,82.04,E
1,2,'004,Afghanistan,6996,Farm gate,726313,Emissions Share (CO2eq) (AR5),1991,1991,%,82.71,E
2,2,'004,Afghanistan,6996,Farm gate,726313,Emissions Share (CO2eq) (AR5),1992,1992,%,85.57,E
3,2,'004,Afghanistan,6996,Farm gate,726313,Emissions Share (CO2eq) (AR5),1993,1993,%,84.58,E
4,2,'004,Afghanistan,6996,Farm gate,726313,Emissions Share (CO2eq) (AR5),1994,1994,%,83.84,E


In [11]:
filtered_data['Item'].unique()

array(['Farm gate', 'Land Use change', 'Pre- and Post- Production',
       'Agrifood systems', 'Emissions from crops',
       'Emissions from livestock', 'IPCC Agriculture', 'Energy', 'Waste',
       'International bunkers'], dtype=object)

In [12]:
filtered_data['Item'].value_counts()

Item
Farm gate                    7814
Land Use change              7814
Pre- and Post- Production    7814
Agrifood systems             7814
IPCC Agriculture             7814
Waste                        7781
Energy                       7748
Emissions from livestock     7563
Emissions from crops         7448
International bunkers          33
Name: count, dtype: int64

Item "International bunkers" has very few rows of data. This category could be removed 
from our dataset in later steps. 

In [13]:
# restructuring data from long-format to wide-format
pivoted_data = filtered_data.pivot_table(
    index = ['Area Code', 'Area', 'Year Code', 'Year'],
    columns = 'Item',
    values = 'Value'
)

# resetting row index
pivoted_data.reset_index(inplace=True)

# setting column index name to None
pivoted_data.columns.name = None

pivoted_data.head()

Unnamed: 0,Area Code,Area,Year Code,Year,Agrifood systems,Emissions from crops,Emissions from livestock,Energy,Farm gate,IPCC Agriculture,International bunkers,Land Use change,Pre- and Post- Production,Waste
0,1,Armenia,1992,1992,36.76,1.36,15.35,71.41,25.84,16.77,,0.67,10.25,7.66
1,1,Armenia,1993,1993,40.1,2.49,23.41,55.57,28.01,25.99,,1.18,10.91,12.76
2,1,Armenia,1994,1994,39.48,1.38,22.64,58.86,26.19,24.11,,1.16,12.13,12.44
3,1,Armenia,1995,1995,36.82,1.21,19.64,63.4,23.09,20.93,,1.02,12.72,10.92
4,1,Armenia,1996,1996,41.05,1.69,23.36,56.07,27.94,25.08,,1.21,11.9,13.02


In [15]:
# Dropping the International bunkers column
pivoted_data = pivoted_data.drop('International bunkers', axis = 1)
pivoted_data.head()

Unnamed: 0,Area Code,Area,Year Code,Year,Agrifood systems,Emissions from crops,Emissions from livestock,Energy,Farm gate,IPCC Agriculture,Land Use change,Pre- and Post- Production,Waste
0,1,Armenia,1992,1992,36.76,1.36,15.35,71.41,25.84,16.77,0.67,10.25,7.66
1,1,Armenia,1993,1993,40.1,2.49,23.41,55.57,28.01,25.99,1.18,10.91,12.76
2,1,Armenia,1994,1994,39.48,1.38,22.64,58.86,26.19,24.11,1.16,12.13,12.44
3,1,Armenia,1995,1995,36.82,1.21,19.64,63.4,23.09,20.93,1.02,12.72,10.92
4,1,Armenia,1996,1996,41.05,1.69,23.36,56.07,27.94,25.08,1.21,11.9,13.02


In [16]:
# renaming columns
cleaned_data = pivoted_data.rename(
    columns = {
        'Area Code': 'area_code',
        'Area': 'area',
        'Year Code': 'year_code',
        'Year': 'year',
        'Farm gate': 'emission_share_farmgate', 
        'Land Use change': 'emission_share_land_use_change', 
        'Pre- and Post- Production': 'emission_share_pre_and_post_production',
        'Agrifood systems': 'emission_share_end_to_end_agrifood', 
        'Emissions from crops': 'emission_share_crops',
        'Emissions from livestock': 'emission_share_livestock', 
        'IPCC Agriculture': 'emission_share_IPCC_agriculture', 
        'Energy': 'emission_share_energy_use', 
        'Waste': 'emission_share_agri_waste_mgt'
    }
)
cleaned_data.head()

Unnamed: 0,area_code,area,year_code,year,emission_share_end_to_end_agrifood,emission_share_crops,emission_share_livestock,emission_share_energy_use,emission_share_farmgate,emission_share_IPCC_agriculture,emission_share_land_use_change,emission_share_pre_and_post_production,emission_share_agri_waste_mgt
0,1,Armenia,1992,1992,36.76,1.36,15.35,71.41,25.84,16.77,0.67,10.25,7.66
1,1,Armenia,1993,1993,40.1,2.49,23.41,55.57,28.01,25.99,1.18,10.91,12.76
2,1,Armenia,1994,1994,39.48,1.38,22.64,58.86,26.19,24.11,1.16,12.13,12.44
3,1,Armenia,1995,1995,36.82,1.21,19.64,63.4,23.09,20.93,1.02,12.72,10.92
4,1,Armenia,1996,1996,41.05,1.69,23.36,56.07,27.94,25.08,1.21,11.9,13.02


In [17]:
# exporting cleaned data as csv file
cleaned_data.to_csv('cleaned_datasets/emission_indicators_cleaned.csv', index='False')