In [1]:
import pandas as pd
import numpy as np
import csv
from pathlib import Path 
import re
import string
import matplotlib.pyplot as plt
import plotly.express as px

from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [2]:
df= pd.read_csv(r'/Users/lolitalolita/Downloads/Food_Production.csv')

## First, let's have a look at the dataset

In [23]:
df.head()

Unnamed: 0,Food product,Land use change,Animal Feed,Farm,Processing,Transport,Packging,Retail,Total_emissions,Eutrophying emissions per 1000kcal (gPO₄eq per 1000kcal),...,Freshwater withdrawals per kilogram (liters per kilogram),Greenhouse gas emissions per 1000kcal (kgCO₂eq per 1000kcal),Greenhouse gas emissions per 100g protein (kgCO₂eq per 100g protein),Land use per 1000kcal (m² per 1000kcal),Land use per kilogram (m² per kilogram),Land use per 100g protein (m² per 100g protein),Scarcity-weighted water use per kilogram (liters per kilogram),Scarcity-weighted water use per 100g protein (liters per 100g protein),Scarcity-weighted water use per 1000kcal (liters per 1000 kilocalories),Category
0,Wheat & Rye (Bread),0.1,0.0,0.8,0.2,0.1,0.1,0.1,1.4,,...,,,,,,,,,,"potatoes, bread, rice, pasta and other starchy..."
1,Maize (Meal),0.3,0.0,0.5,0.1,0.1,0.1,0.0,1.1,,...,,,,,,,,,,"potatoes, bread, rice, pasta and other starchy..."
2,Barley (Beer),0.0,0.0,0.2,0.1,0.0,0.5,0.3,1.1,,...,,,,,,,,,,Other
3,Oatmeal,0.0,0.0,1.4,0.0,0.1,0.1,0.0,1.6,4.281357,...,482.4,0.945482,1.907692,2.897446,7.6,5.846154,18786.2,14450.92308,7162.104461,"potatoes, bread, rice, pasta and other starchy..."
4,Rice,0.0,0.0,3.6,0.1,0.1,0.1,0.1,4.0,9.514379,...,2248.4,1.207271,6.267606,0.759631,2.8,3.943662,49576.3,69825.77465,13449.89148,"potatoes, bread, rice, pasta and other starchy..."


In [3]:
print(list(df['Food product']))

['Wheat & Rye (Bread)', 'Maize (Meal)', 'Barley (Beer)', 'Oatmeal', 'Rice', 'Potatoes', 'Cassava', 'Cane Sugar', 'Beet Sugar', 'Other Pulses', 'Peas', 'Nuts', 'Groundnuts', 'Soymilk', 'Tofu', 'Soybean Oil', 'Palm Oil', 'Sunflower Oil', 'Rapeseed Oil', 'Olive Oil', 'Tomatoes', 'Onions & Leeks', 'Root Vegetables', 'Brassicas', 'Other Vegetables', 'Citrus Fruit', 'Bananas', 'Apples', 'Berries & Grapes', 'Wine', 'Other Fruit', 'Coffee', 'Dark Chocolate', 'Beef (beef herd)', 'Beef (dairy herd)', 'Lamb & Mutton', 'Pig Meat', 'Poultry Meat', 'Milk', 'Cheese', 'Eggs', 'Fish (farmed)', 'Shrimps (farmed)']


## We also want to add a category column

In [4]:
category=['potatoes, bread, rice, pasta and other starchy carbohydrate foods','potatoes, bread, rice, pasta and other starchy carbohydrate foods','Other','potatoes, bread, rice, pasta and other starchy carbohydrate foods','potatoes, bread, rice, pasta and other starchy carbohydrate foods','fruit and vegetables',
         'Other','Other','beans, pulses, fish, eggs, meat and other protein','fruit and vegetables','beans, pulses, fish, eggs, meat and other protein','beans, pulses, fish, eggs, meat and other protein','beans, pulses, fish, eggs, meat and other protein','dairy and alternatives',
         'dairy and alternatives','oils and spreads','oils and spreads','oils and spreads','oils and spreads','oils and spreads','fruit and vegetables','fruit and vegetables','fruit and vegetables','fruit and vegetables','fruit and vegetables',
         'fruit and vegetables','fruit and vegetables','fruit and vegetables','fruit and vegetables','Other','fruit and vegetables','Other','Other','beans, pulses, fish, eggs, meat and other protein','dairy and alternatives','beans, pulses, fish, eggs, meat and other protein',
         'beans, pulses, fish, eggs, meat and other protein','beans, pulses, fish, eggs, meat and other protein','dairy and alternatives','dairy and alternatives','beans, pulses, fish, eggs, meat and other protein','beans, pulses, fish, eggs, meat and other protein','beans, pulses, fish, eggs, meat and other protein']

df['Category']=category

# Visualisation
## Let's expore the data using graphs 


### The bar chart below shows emissions per kg of food product for all products in the table. As we can see, beef production has the highest level of emissions.

In [14]:
sorted_df= df.sort_values(by='Total_emissions', ascending= False)
fig = px.bar(sorted_df, x='Food product', y='Total_emissions', color='Total_emissions', title=' Emissions per kg of food product')
fig.show()

### Looking at the pie chart below, we can see that among all products the production of beef generates almost a quarter of all emissions.

In [7]:
fig = px.pie(df,values=df['Total_emissions'], names=df['Food product'], color=df['Food product'])
fig.show()

### The bar charts below show top 10 products in both low and high emissions categories.

In [8]:
cat=df.nlargest(n=10,columns=['Total_emissions'])

sorted_df= cat.sort_values(by='Total_emissions', ascending= False)
fig = px.bar(sorted_df, x='Food product', y='Total_emissions', color='Total_emissions', title='Top 10 High Emissions Foods')
fig.show()

In [9]:
cat=df.nsmallest(n=10,columns=['Total_emissions'])

sorted_df= cat.sort_values(by='Total_emissions', ascending= False)
fig = px.bar(sorted_df, x='Food product', y='Total_emissions', color='Total_emissions', title='Top 10 Low Emissions Foods')
fig.show()

## Next, we want to look at the emissions within categories.
### As we can see from the pie chart below, foods rich in protein (almost all of them are animal products) are generating almost half of all emissions.

In [21]:
df_cat = df

fig = px.pie(df_cat, values=df_cat['Total_emissions'], names=df_cat['Category'], color=df_cat['Category'])
fig.show()

In [22]:
df_protein = df[df['Category'].isin(['beans, pulses, fish, eggs, meat and other protein'])]

fig = px.pie(df_protein, values=df_protein['Total_emissions'], names=df_protein['Food product'], color=df_protein['Food product'])
fig.show()

### From what we saw on the previous charts, it seems that fruits and vegetables are among those with low emission. Let's take a closed look at it.

In [10]:
df_veg = df[df['Category'].isin(['fruit and vegetables'])]

fig = px.pie(df_veg, values=df_veg['Total_emissions'], names=df_veg['Food product'], color=df_veg['Food product'])
fig.show()

### Let's look at the categories in terms of Freshwater withdrawals per kilogram (liters per kilogram).
### We can see that again, protein category is responsable for almost a half of all water withdrawals. 

In [12]:
df_water = df

fig = px.pie(df_water, values=df_water['Freshwater withdrawals per kilogram (liters per kilogram)'], names=df_water['Category'], color=df_water['Category'])
fig.show()

### If we have a closer look, we can see that nuts and groundnuts are responsible for approximately 35% of all fresh water withdrawals. The picture is quite different from the total emissions where nuts and grounds nuts are responsible for around 3.5% of total emissions within the category.

In [13]:
df_water = df[df['Category']=='beans, pulses, fish, eggs, meat and other protein']

fig = px.pie(df_water, values=df_water['Freshwater withdrawals per kilogram (liters per kilogram)'], names=df_water['Food product'], color=df_water['Food product'])
fig.show()