In [20]:
#Import relevant libraries for data analysis

import pandas as pd
import numpy as np

emissions_df = pd.read_csv('/Users/alexandreribeiro/Desktop/Emissions_dataset.csv')
foodwaste_df = pd.read_csv('/Users/alexandreribeiro/Desktop/Food-Waste_dataset.csv')

#Check the first 5 rows of the emissions dataset

emissions_df.head()




Unnamed: 0,country,year,element,item,unit,value
0,Angola,2010,Carbon Dioxide (CO2),Agrifood Systems Waste Disposal,kt,1134.35
1,Angola,2010,Carbon Dioxide (CO2),Crop Residues,kt,138.57
2,Angola,2010,Carbon Dioxide (CO2),Food Household Consumption,kt,281.49
3,Angola,2010,Carbon Dioxide (CO2),Food Packaging,kt,8.13
4,Angola,2010,Carbon Dioxide (CO2),Food Processing,kt,0.0


In [21]:
foodwaste_df.head()

Unnamed: 0,country,year,food_supply_stage,commodity,loss_percentage
0,Angola,2010,Farm,Maize (corn),3.6
1,Angola,2010,Farm,Millet,1.7
2,Angola,2010,Farm,Rice,1.5
3,Angola,2010,Farm,Sorghum,3.0
4,Angola,2010,Farm,Wheat,4.2


In [22]:
#In order to merge the two datasets, we can create a mapping based on the logical correspondence between the food_supply_stage and items.

# Get unique values of 'food_supply_stage' and 'item'

food_supply_stages = foodwaste_df['food_supply_stage'].unique()
items = emissions_df['item'].unique()

food_supply_stages, items

(array(['Farm', 'Harvest', 'Shelling and processing', 'Storage',
        'Transport', 'Households', 'Retail, wholesale and market',
        'Whole supply chain', 'Distribution and export', 'Packing'],
       dtype=object),
 array(['Agrifood Systems Waste Disposal', 'Crop Residues',
        'Food Household Consumption', 'Food Packaging', 'Food Processing',
        'Food Retail', 'Food Transport', 'Manure Management',
        'Manure applied to Soils', 'Manure left on Pasture',
        'Rice Cultivation', 'Synthetic Fertilizers'], dtype=object))

In [23]:
# Create a mapping dictionary for food_supply_stage to item

mapping = {
    'Farm': ['Crop Residues', 'Rice Cultivation', 'Synthetic Fertilizers'],
    'Harvest': ['Crop Residues'],
    'Shelling and processing': ['Food Processing'],
    'Storage': ['Food Processing'],  # No direct item mapping
    'Transport': ['Food Transport'],
    'Households': ['Food Household Consumption'],
    'Retail, wholesale and market': ['Food Retail'],
    'Whole supply chain': ['Food Transport'],  # Aggregate, not mapped to a single item
    'Distribution and export': ['Food Transport'],
    'Packing': ['Food Packaging']
}

#Function to apply mapping and filter the emissions dataset

def map_stages(row):
    for stage, items in mapping.items():
        if row['item'] in items:
            return stage
    return np.nan

# Apply the mapping function to create a new column in the emissions dataset

emissions_df['food_supply_stage'] = emissions_df.apply(map_stages, axis=1)

# Check the first 5 rows of the updated emissions dataset

emissions_df.head()



Unnamed: 0,country,year,element,item,unit,value,food_supply_stage
0,Angola,2010,Carbon Dioxide (CO2),Agrifood Systems Waste Disposal,kt,1134.35,
1,Angola,2010,Carbon Dioxide (CO2),Crop Residues,kt,138.57,Farm
2,Angola,2010,Carbon Dioxide (CO2),Food Household Consumption,kt,281.49,Households
3,Angola,2010,Carbon Dioxide (CO2),Food Packaging,kt,8.13,Packing
4,Angola,2010,Carbon Dioxide (CO2),Food Processing,kt,0.0,Shelling and processing


In [27]:
# Merge the two datasets on 'country' 'year' and 'food_supply_stage'

merged_df = pd.merge(emissions_df, foodwaste_df, on=['country', 'year', 'food_supply_stage'])

# Check the first 5 rows of the merged dataset

merged_df.food_supply_stage.value_counts()

food_supply_stage
Farm                            11208
Shelling and processing          3810
Transport                        2694
Retail, wholesale and market      615
Households                         84
Packing                            78
Name: count, dtype: int64

In [45]:
merged_df.head()

Unnamed: 0,country,year,element,item,unit,value,food_supply_stage,commodity,loss_percentage
0,Angola,2010,Carbon Dioxide (CO2),Crop Residues,kt,138.57,Farm,Maize (corn),3.6
1,Angola,2010,Carbon Dioxide (CO2),Crop Residues,kt,138.57,Farm,Millet,1.7
2,Angola,2010,Carbon Dioxide (CO2),Crop Residues,kt,138.57,Farm,Rice,1.5
3,Angola,2010,Carbon Dioxide (CO2),Crop Residues,kt,138.57,Farm,Sorghum,3.0
4,Angola,2010,Carbon Dioxide (CO2),Crop Residues,kt,138.57,Farm,Wheat,4.2


In [47]:
#Describe value and loss_percentage

merged_df[['value', 'loss_percentage']].describe().round(2)

Unnamed: 0,value,loss_percentage
count,18489.0,18489.0
mean,1579.97,3.29
std,11347.12,4.5
min,0.0,0.0
25%,0.04,1.5
50%,2.93,2.0
75%,124.95,3.5
max,171822.5,63.0


commodity
Maize (corn)          3877
Rice                  3129
Sorghum               3069
Millet                2977
Wheat                 1629
                      ... 
Bambara beans, dry       3
Sweet potatoes           3
Plums and sloes          3
Figs                     3
Watermelons              3
Name: count, Length: 100, dtype: int64