# Environmental impacts of Food

## Introduction idea - Pie Chart 

"Today’s food supply chain creates ~13.7 billion
metric tons of carbon dioxide equivalents (CO2eq),
26% of anthropogenic GHG emissions." (Poore & Nemecek, 2018)

## Setup

In [1031]:
import pandas as pd
import altair as alt
from vega_datasets import data

## Creating a simple df and plotting a pie chart

In [1032]:

df_food = pd.DataFrame({"Category": ['non-food', 'food'], "Percent": [74, 26], "Emissions": ['5,269.2 billion tons CO2eq', '13.7 billion tons CO2eq']})

In [1033]:
df_food.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Category   2 non-null      object
 1   Percent    2 non-null      int64 
 2   Emissions  2 non-null      object
dtypes: int64(1), object(2)
memory usage: 180.0+ bytes


In [1034]:
# Colors for food and non-food

df_food['Category'] = df_food['Category'].astype('category')

GHG_TYPE = df_food['Category'].cat.categories.to_list()

colors = alt.Scale(
    domain = GHG_TYPE,
    range=['darkseagreen','gainsboro']
)

colors

Scale({
  domain: ['food', 'non-food'],
  range: ['darkseagreen', 'gainsboro']
})

In [1035]:
# Plot Pie Chart

base = alt.Chart(df_food).encode(
    alt.Theta("Percent:Q").stack(True),
    alt.Color("Category:N", scale= colors).legend(None),
    alt.Tooltip(['Category','Emissions'])
)

pie = base.mark_arc(outerRadius=120)
text = base.mark_text(radius=160, size=16).encode(text="Category:N")


pie + text

## Save data

In [1036]:
# safe the data frame to use it in the quarto presentation

df_food.to_csv('cleaned-data/environmental-impact-food.csv', index = False) 

# Import data "Environmental Impact of Food Production"

### Importing sheet "Results - Retail Weight", to compare GHG emissions, land use and freshwater use per 1000g of products 

In [1037]:

df_weight = pd.read_excel('original-data/aaq0216_datas2.xls', sheet_name='Results - Retail Weight')

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)



In [1038]:
df_weight.head()

Unnamed: 0.1,Unnamed: 0,"Resampled, Randomized Data",Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,"Data Without Resampling or Randomization (original study data, after harmonizing methodology)",Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52,Unnamed: 53,Unnamed: 54
0,,Land Use (m2/FU),,,,,,"GHG Emissions (kg CO2eq/FU, IPCC 2013 incl. CC...",,,,,,"GHG Emissions (kg CO2eq/FU, IPCC 2007)",,,,,,"Acidifying Emissions (g SO2eq/FU, CML2 Baseline)",,,,,,"Eutrophying Emissions (g PO43-eq/FU, CML2 Base...",,,,,,Freshwater Withdrawals (L/FU),,,,,,Stress-Weighted Water Use (L/FU),,,,,,Land Use (m2/FU),,"GHG (kg CO2eq/FU, IPCC 2013 incl. CC feedbacks)",,"Acid. (kg SO2eq/FU, CML2 Baseline)",,"Eutr. (kg PO43-eq/FU, CML2 Baseline)",,Fresh W. (L/FU),,Strs. W. WU (L/FU),
1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,Min,Max,Min,Max,Min,Max,Min,Max,Min,Max,Min,Max
2,Wheat & Rye (Bread),0.98,1.11,3.85,2.7,7.87,9.96,0.71,0.79,1.57,1.27,2.31,3.07,0.72,0.79,1.58,1.28,2.38,3.11,5.85,6.68,13.35,13.28,20.24,25.03,1.02,2.27,7.16,5.37,13.41,18.15,2.2,2.2,647.5,419.2,1081,3368.5,2.7,4.5,33385.6,12821.7,54985.7,225767.7,0.412943,36.493263,0.417098,56.043627,3.407876,33.477292,0.16895,77.377291,1.140702,8675.359047,1.395022,333260.436228
3,Maize (Meal),0.99,1.14,2.94,1.84,5.7,9.01,0.66,0.73,1.7,1.18,2.31,3.52,0.67,0.74,1.68,1.19,2.3,3.53,5.63,5.93,11.68,10.15,20.89,22.83,1.2,1.31,4.03,2.4,8.12,12.58,0,0,215.7,43.9,530.8,598.3,0,0,10863.3,349.6,24375.4,28214.7,0.492814,28.949668,0.453814,72.276385,2.923807,25.476467,0.780147,20.807681,0,7190.948767,0,461216.164137
4,Barley (Beer),0.21,0.26,1.11,0.88,2.37,2.87,0.59,0.7,1.18,1.18,1.64,1.77,0.59,0.7,1.18,1.17,1.64,1.78,5.22,5.36,6.59,6.06,7.45,8.17,1.07,1.18,2.33,1.83,3.75,4.82,6.2,7,17.1,7,11.2,47.8,7.7,9.8,696.4,27.3,290.8,1620.9,0.214828,3.757177,0.439197,2.639285,4.822939,38.361277,1.044854,12.371997,6.230092,568.804918,4.422704,41483.803982


In [1039]:
df_weight.head()

Unnamed: 0.1,Unnamed: 0,"Resampled, Randomized Data",Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,"Data Without Resampling or Randomization (original study data, after harmonizing methodology)",Unnamed: 44,Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52,Unnamed: 53,Unnamed: 54
0,,Land Use (m2/FU),,,,,,"GHG Emissions (kg CO2eq/FU, IPCC 2013 incl. CC...",,,,,,"GHG Emissions (kg CO2eq/FU, IPCC 2007)",,,,,,"Acidifying Emissions (g SO2eq/FU, CML2 Baseline)",,,,,,"Eutrophying Emissions (g PO43-eq/FU, CML2 Base...",,,,,,Freshwater Withdrawals (L/FU),,,,,,Stress-Weighted Water Use (L/FU),,,,,,Land Use (m2/FU),,"GHG (kg CO2eq/FU, IPCC 2013 incl. CC feedbacks)",,"Acid. (kg SO2eq/FU, CML2 Baseline)",,"Eutr. (kg PO43-eq/FU, CML2 Baseline)",,Fresh W. (L/FU),,Strs. W. WU (L/FU),
1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,Min,Max,Min,Max,Min,Max,Min,Max,Min,Max,Min,Max
2,Wheat & Rye (Bread),0.98,1.11,3.85,2.7,7.87,9.96,0.71,0.79,1.57,1.27,2.31,3.07,0.72,0.79,1.58,1.28,2.38,3.11,5.85,6.68,13.35,13.28,20.24,25.03,1.02,2.27,7.16,5.37,13.41,18.15,2.2,2.2,647.5,419.2,1081,3368.5,2.7,4.5,33385.6,12821.7,54985.7,225767.7,0.412943,36.493263,0.417098,56.043627,3.407876,33.477292,0.16895,77.377291,1.140702,8675.359047,1.395022,333260.436228
3,Maize (Meal),0.99,1.14,2.94,1.84,5.7,9.01,0.66,0.73,1.7,1.18,2.31,3.52,0.67,0.74,1.68,1.19,2.3,3.53,5.63,5.93,11.68,10.15,20.89,22.83,1.2,1.31,4.03,2.4,8.12,12.58,0,0,215.7,43.9,530.8,598.3,0,0,10863.3,349.6,24375.4,28214.7,0.492814,28.949668,0.453814,72.276385,2.923807,25.476467,0.780147,20.807681,0,7190.948767,0,461216.164137
4,Barley (Beer),0.21,0.26,1.11,0.88,2.37,2.87,0.59,0.7,1.18,1.18,1.64,1.77,0.59,0.7,1.18,1.17,1.64,1.78,5.22,5.36,6.59,6.06,7.45,8.17,1.07,1.18,2.33,1.83,3.75,4.82,6.2,7,17.1,7,11.2,47.8,7.7,9.8,696.4,27.3,290.8,1620.9,0.214828,3.757177,0.439197,2.639285,4.822939,38.361277,1.044854,12.371997,6.230092,568.804918,4.422704,41483.803982


In [1040]:
# drop columns after "Data Without Resampling or Randomization"

df_weight = df_weight.drop(df_weight.columns[43:], axis=1) 

df_weight.head()



Unnamed: 0.1,Unnamed: 0,"Resampled, Randomized Data",Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42
0,,Land Use (m2/FU),,,,,,"GHG Emissions (kg CO2eq/FU, IPCC 2013 incl. CC...",,,,,,"GHG Emissions (kg CO2eq/FU, IPCC 2007)",,,,,,"Acidifying Emissions (g SO2eq/FU, CML2 Baseline)",,,,,,"Eutrophying Emissions (g PO43-eq/FU, CML2 Base...",,,,,,Freshwater Withdrawals (L/FU),,,,,,Stress-Weighted Water Use (L/FU),,,,,
1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl
2,Wheat & Rye (Bread),0.98,1.11,3.85,2.7,7.87,9.96,0.71,0.79,1.57,1.27,2.31,3.07,0.72,0.79,1.58,1.28,2.38,3.11,5.85,6.68,13.35,13.28,20.24,25.03,1.02,2.27,7.16,5.37,13.41,18.15,2.2,2.2,647.5,419.2,1081,3368.5,2.7,4.5,33385.6,12821.7,54985.7,225767.7
3,Maize (Meal),0.99,1.14,2.94,1.84,5.7,9.01,0.66,0.73,1.7,1.18,2.31,3.52,0.67,0.74,1.68,1.19,2.3,3.53,5.63,5.93,11.68,10.15,20.89,22.83,1.2,1.31,4.03,2.4,8.12,12.58,0,0,215.7,43.9,530.8,598.3,0,0,10863.3,349.6,24375.4,28214.7
4,Barley (Beer),0.21,0.26,1.11,0.88,2.37,2.87,0.59,0.7,1.18,1.18,1.64,1.77,0.59,0.7,1.18,1.17,1.64,1.78,5.22,5.36,6.59,6.06,7.45,8.17,1.07,1.18,2.33,1.83,3.75,4.82,6.2,7,17.1,7,11.2,47.8,7.7,9.8,696.4,27.3,290.8,1620.9


In [1041]:
# make row with index 1 the column names
df_weight.columns = df_weight.iloc[1]


df_weight.head()


1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl.1,10th pctl.1,Mean.1,Median.1,90th pctl.1,95th pctl.1,5th pctl.2,10th pctl.2,Mean.2,Median.2,90th pctl.2,95th pctl.2,5th pctl.3,10th pctl.3,Mean.3,Median.3,90th pctl.3,95th pctl.3,5th pctl.4,10th pctl.4,Mean.4,Median.4,90th pctl.4,95th pctl.4,5th pctl.5,10th pctl.5,Mean.5,Median.5,90th pctl.5,95th pctl.5,5th pctl.6,10th pctl.6,Mean.6,Median.6,90th pctl.6,95th pctl.6
0,,Land Use (m2/FU),,,,,,"GHG Emissions (kg CO2eq/FU, IPCC 2013 incl. CC...",,,,,,"GHG Emissions (kg CO2eq/FU, IPCC 2007)",,,,,,"Acidifying Emissions (g SO2eq/FU, CML2 Baseline)",,,,,,"Eutrophying Emissions (g PO43-eq/FU, CML2 Base...",,,,,,Freshwater Withdrawals (L/FU),,,,,,Stress-Weighted Water Use (L/FU),,,,,
1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl
2,Wheat & Rye (Bread),0.98,1.11,3.85,2.7,7.87,9.96,0.71,0.79,1.57,1.27,2.31,3.07,0.72,0.79,1.58,1.28,2.38,3.11,5.85,6.68,13.35,13.28,20.24,25.03,1.02,2.27,7.16,5.37,13.41,18.15,2.2,2.2,647.5,419.2,1081,3368.5,2.7,4.5,33385.6,12821.7,54985.7,225767.7
3,Maize (Meal),0.99,1.14,2.94,1.84,5.7,9.01,0.66,0.73,1.7,1.18,2.31,3.52,0.67,0.74,1.68,1.19,2.3,3.53,5.63,5.93,11.68,10.15,20.89,22.83,1.2,1.31,4.03,2.4,8.12,12.58,0,0,215.7,43.9,530.8,598.3,0,0,10863.3,349.6,24375.4,28214.7
4,Barley (Beer),0.21,0.26,1.11,0.88,2.37,2.87,0.59,0.7,1.18,1.18,1.64,1.77,0.59,0.7,1.18,1.17,1.64,1.78,5.22,5.36,6.59,6.06,7.45,8.17,1.07,1.18,2.33,1.83,3.75,4.82,6.2,7,17.1,7,11.2,47.8,7.7,9.8,696.4,27.3,290.8,1620.9


In [1042]:
# drop unnecessary rows
df_weight = df_weight.drop(df_weight.index[0:2], axis=0) 

df_weight


1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl.1,10th pctl.1,Mean.1,Median.1,90th pctl.1,95th pctl.1,5th pctl.2,10th pctl.2,Mean.2,Median.2,90th pctl.2,95th pctl.2,5th pctl.3,10th pctl.3,Mean.3,Median.3,90th pctl.3,95th pctl.3,5th pctl.4,10th pctl.4,Mean.4,Median.4,90th pctl.4,95th pctl.4,5th pctl.5,10th pctl.5,Mean.5,Median.5,90th pctl.5,95th pctl.5,5th pctl.6,10th pctl.6,Mean.6,Median.6,90th pctl.6,95th pctl.6
2,Wheat & Rye (Bread),0.98,1.11,3.85,2.7,7.87,9.96,0.71,0.79,1.57,1.27,2.31,3.07,0.72,0.79,1.58,1.28,2.38,3.11,5.85,6.68,13.35,13.28,20.24,25.03,1.02,2.27,7.16,5.37,13.41,18.15,2.2,2.2,647.5,419.2,1081.0,3368.5,2.7,4.5,33385.6,12821.7,54985.7,225767.7
3,Maize (Meal),0.99,1.14,2.94,1.84,5.7,9.01,0.66,0.73,1.7,1.18,2.31,3.52,0.67,0.74,1.68,1.19,2.3,3.53,5.63,5.93,11.68,10.15,20.89,22.83,1.2,1.31,4.03,2.4,8.12,12.58,0.0,0.0,215.7,43.9,530.8,598.3,0.0,0.0,10863.3,349.6,24375.4,28214.7
4,Barley (Beer),0.21,0.26,1.11,0.88,2.37,2.87,0.59,0.7,1.18,1.18,1.64,1.77,0.59,0.7,1.18,1.17,1.64,1.78,5.22,5.36,6.59,6.06,7.45,8.17,1.07,1.18,2.33,1.83,3.75,4.82,6.2,7.0,17.1,7.0,11.2,47.8,7.7,9.8,696.4,27.3,290.8,1620.9
5,Oatmeal,2.64,2.85,7.6,7.72,12.88,13.98,0.8,0.85,2.48,2.59,4.08,4.3,0.79,0.85,2.47,2.58,4.1,4.32,6.16,7.52,10.68,9.61,14.83,17.36,5.78,6.67,11.23,10.06,16.26,24.25,0.0,0.0,482.4,670.3,804.4,850.0,0.0,0.0,18786.2,24456.3,29352.2,31014.5
6,Rice,0.99,1.1,2.8,2.15,6.21,7.21,1.15,1.46,4.45,3.73,8.77,10.26,1.05,1.28,3.81,3.14,7.39,8.51,8.83,9.78,27.19,18.58,62.84,75.03,2.86,3.38,35.07,9.33,135.79,156.01,0.0,0.2,2248.4,1574.9,3936.1,10573.8,0.0,1.5,49576.3,4625.6,115317.4,191274.2
7,Potatoes,0.37,0.44,0.88,0.82,1.4,1.66,0.09,0.16,0.46,0.47,0.63,0.7,0.08,0.15,0.45,0.47,0.63,0.69,2.33,2.55,3.87,3.6,5.33,6.89,0.62,0.64,3.48,4.43,6.13,6.17,0.0,0.9,59.1,2.6,133.4,235.8,0.0,8.4,2754.2,78.3,8909.9,8977.9
8,Cassava,0.73,0.76,1.81,1.32,3.19,3.28,0.26,0.35,1.32,1.05,2.11,2.24,0.25,0.34,1.3,1.01,2.09,2.2,2.57,2.69,3.42,3.16,4.79,5.05,0.45,0.47,0.69,0.7,0.95,0.99,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,Cane Sugar,1.14,1.17,2.04,1.78,3.11,3.53,0.62,0.92,3.2,3.17,5.1,5.59,0.63,0.92,3.16,3.21,5.05,5.57,5.35,7.85,18.02,18.03,28.07,30.83,3.26,4.02,16.92,11.18,40.86,42.24,7.2,8.1,620.1,8.1,2519.7,3567.1,13.5,15.3,16438.6,15.3,34108.2,34890.8
10,Beet Sugar,1.11,1.19,1.83,1.52,3.09,3.29,1.01,1.21,1.81,1.76,2.42,2.64,1.04,1.2,1.8,1.75,2.41,2.65,4.38,4.38,12.62,12.37,18.32,20.55,2.1,2.1,5.41,4.33,14.11,16.85,10.3,12.1,217.7,12.1,506.0,1655.5,12.2,15.0,9493.3,115.1,22816.0,76242.2
11,Other Pulses,4.08,9.93,15.57,12.24,41.25,41.87,0.89,0.98,1.79,1.39,3.75,4.0,0.9,0.98,1.79,1.39,3.72,4.0,5.67,10.86,22.07,19.0,33.8,36.68,1.57,1.64,17.08,13.77,46.64,50.24,0.0,0.0,435.7,0.0,1250.1,2201.1,0.0,0.0,22477.4,0.0,45615.2,106154.3


Creating a new df with more products (like in the nutritional data chart) for the streamlit page 

In [1043]:
products_to_select = ['Rice', 'Potatoes', 'Wheat', 'Soymilk','Tofu', 'Bovine Meat (beef herd)', 'Bovine Meat (dairy herd)', 'Lamb & Mutton', 'Tomatoes', 'Crustaceans (farmed)', 'Fish (farmed)', 'Peas', 'Olive Oil', 'Pig Meat', 'Poultry Meat', 'Milk', 'Cheese']

df_streamlit = df_weight[df_weight['Product'].isin(products_to_select)]

df_streamlit

1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl.1,10th pctl.1,Mean.1,Median.1,90th pctl.1,95th pctl.1,5th pctl.2,10th pctl.2,Mean.2,Median.2,90th pctl.2,95th pctl.2,5th pctl.3,10th pctl.3,Mean.3,Median.3,90th pctl.3,95th pctl.3,5th pctl.4,10th pctl.4,Mean.4,Median.4,90th pctl.4,95th pctl.4,5th pctl.5,10th pctl.5,Mean.5,Median.5,90th pctl.5,95th pctl.5,5th pctl.6,10th pctl.6,Mean.6,Median.6,90th pctl.6,95th pctl.6
6,Rice,0.99,1.1,2.8,2.15,6.21,7.21,1.15,1.46,4.45,3.73,8.77,10.26,1.05,1.28,3.81,3.14,7.39,8.51,8.83,9.78,27.19,18.58,62.84,75.03,2.86,3.38,35.07,9.33,135.79,156.01,0.0,0.2,2248.4,1574.9,3936.1,10573.8,0.0,1.5,49576.3,4625.6,115317.4,191274.2
7,Potatoes,0.37,0.44,0.88,0.82,1.4,1.66,0.09,0.16,0.46,0.47,0.63,0.7,0.08,0.15,0.45,0.47,0.63,0.69,2.33,2.55,3.87,3.6,5.33,6.89,0.62,0.64,3.48,4.43,6.13,6.17,0.0,0.9,59.1,2.6,133.4,235.8,0.0,8.4,2754.2,78.3,8909.9,8977.9
12,Peas,2.28,2.77,7.46,6.73,14.19,20.47,0.51,0.56,0.98,0.8,1.67,1.87,0.51,0.56,0.97,0.8,1.67,1.87,3.21,3.6,8.49,10.28,10.87,11.06,0.74,0.75,7.52,1.68,33.62,33.66,0.0,0.0,396.6,0.0,3099.8,3584.0,0.0,0.0,27948.2,0.0,228332.1,263996.7
15,Soymilk,0.3,0.34,0.66,0.64,0.92,1.07,0.51,0.58,0.98,0.91,1.47,1.74,0.51,0.58,0.97,0.9,1.47,1.73,2.07,2.15,2.6,2.54,3.11,3.3,0.47,0.49,1.06,1.2,1.57,1.64,1.2,1.2,27.8,1.3,146.2,158.9,2.4,2.4,955.6,6.2,5300.7,5768.6
16,Tofu,1.57,1.77,3.52,3.41,4.94,5.87,1.41,1.6,3.16,2.58,5.55,7.27,1.39,1.6,3.14,2.57,5.47,7.19,4.99,5.13,6.7,6.0,9.04,9.85,2.9,2.92,6.16,6.64,9.09,10.32,6.1,6.3,148.6,6.6,777.7,864.4,12.4,12.4,5113.2,32.4,28226.9,31483.9
21,Olive Oil,7.85,7.85,26.31,17.29,36.32,36.32,2.13,2.86,5.42,5.09,7.63,10.79,2.14,2.88,5.25,5.04,7.72,10.7,18.78,27.47,37.58,33.89,57.92,61.96,5.78,17.09,37.26,39.11,56.34,61.19,8.5,8.5,2141.8,317.9,6907.5,6907.5,130.4,130.4,177480.2,24395.7,621151.5,621151.5
22,Tomatoes,0.07,0.09,0.8,0.17,0.93,5.62,0.37,0.39,2.09,0.65,5.95,12.62,0.37,0.39,2.01,0.65,5.17,12.28,2.89,3.21,17.21,5.21,67.95,83.38,0.62,0.78,7.51,1.92,32.1,39.51,32.6,48.3,369.8,77.0,1333.9,1993.9,270.4,384.7,5335.7,4480.7,8959.4,11842.0
35,Bovine Meat (beef herd),70.41,82.84,326.21,170.37,735.09,910.1,37.57,40.37,99.48,60.36,209.85,269.19,30.76,33.03,85.19,51.72,179.95,241.98,59.78,76.08,318.83,270.87,656.91,683.48,101.81,113.38,301.41,320.69,412.83,657.89,215.6,268.6,1451.2,740.2,2585.5,5241.3,204.6,242.1,34732.5,441.2,89872.1,190796.3
36,Bovine Meat (dairy herd),12.27,14.39,43.24,25.94,64.12,106.37,14.93,17.94,33.3,34.14,50.9,56.68,12.78,15.75,28.79,28.87,45.04,49.88,165.22,219.02,343.64,289.14,497.23,1099.17,79.78,81.38,365.29,140.93,1515.69,2509.42,187.7,191.8,2714.3,2614.2,5799.4,8744.0,42174.7,46308.5,119805.2,122176.8,181962.6,214220.5
37,Lamb & Mutton,47.85,60.06,369.81,127.41,442.34,724.65,23.7,24.52,39.72,40.61,54.44,60.16,20.54,21.25,32.71,32.99,43.21,50.51,79.19,81.79,138.97,135.16,149.78,273.61,21.95,24.64,97.13,101.92,128.72,133.36,88.0,97.8,1802.8,461.2,7133.3,7825.8,258.9,258.9,141925.0,258.9,540906.4,595278.0


In [1044]:
# for presentation
# choose rows for products

products_to_select = ['Rice', 'Potatoes', 'Wheat', 'Soymilk','Tofu', 'Bovine Meat (beef herd)', 'Bovine Meat (dairy herd)', 'Pig Meat', 'Poultry Meat', 'Milk', 'Cheese']

df_weight = df_weight[df_weight['Product'].isin(products_to_select)]

df_weight

1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl.1,10th pctl.1,Mean.1,Median.1,90th pctl.1,95th pctl.1,5th pctl.2,10th pctl.2,Mean.2,Median.2,90th pctl.2,95th pctl.2,5th pctl.3,10th pctl.3,Mean.3,Median.3,90th pctl.3,95th pctl.3,5th pctl.4,10th pctl.4,Mean.4,Median.4,90th pctl.4,95th pctl.4,5th pctl.5,10th pctl.5,Mean.5,Median.5,90th pctl.5,95th pctl.5,5th pctl.6,10th pctl.6,Mean.6,Median.6,90th pctl.6,95th pctl.6
6,Rice,0.99,1.1,2.8,2.15,6.21,7.21,1.15,1.46,4.45,3.73,8.77,10.26,1.05,1.28,3.81,3.14,7.39,8.51,8.83,9.78,27.19,18.58,62.84,75.03,2.86,3.38,35.07,9.33,135.79,156.01,0.0,0.2,2248.4,1574.9,3936.1,10573.8,0.0,1.5,49576.3,4625.6,115317.4,191274.2
7,Potatoes,0.37,0.44,0.88,0.82,1.4,1.66,0.09,0.16,0.46,0.47,0.63,0.7,0.08,0.15,0.45,0.47,0.63,0.69,2.33,2.55,3.87,3.6,5.33,6.89,0.62,0.64,3.48,4.43,6.13,6.17,0.0,0.9,59.1,2.6,133.4,235.8,0.0,8.4,2754.2,78.3,8909.9,8977.9
15,Soymilk,0.3,0.34,0.66,0.64,0.92,1.07,0.51,0.58,0.98,0.91,1.47,1.74,0.51,0.58,0.97,0.9,1.47,1.73,2.07,2.15,2.6,2.54,3.11,3.3,0.47,0.49,1.06,1.2,1.57,1.64,1.2,1.2,27.8,1.3,146.2,158.9,2.4,2.4,955.6,6.2,5300.7,5768.6
16,Tofu,1.57,1.77,3.52,3.41,4.94,5.87,1.41,1.6,3.16,2.58,5.55,7.27,1.39,1.6,3.14,2.57,5.47,7.19,4.99,5.13,6.7,6.0,9.04,9.85,2.9,2.92,6.16,6.64,9.09,10.32,6.1,6.3,148.6,6.6,777.7,864.4,12.4,12.4,5113.2,32.4,28226.9,31483.9
35,Bovine Meat (beef herd),70.41,82.84,326.21,170.37,735.09,910.1,37.57,40.37,99.48,60.36,209.85,269.19,30.76,33.03,85.19,51.72,179.95,241.98,59.78,76.08,318.83,270.87,656.91,683.48,101.81,113.38,301.41,320.69,412.83,657.89,215.6,268.6,1451.2,740.2,2585.5,5241.3,204.6,242.1,34732.5,441.2,89872.1,190796.3
36,Bovine Meat (dairy herd),12.27,14.39,43.24,25.94,64.12,106.37,14.93,17.94,33.3,34.14,50.9,56.68,12.78,15.75,28.79,28.87,45.04,49.88,165.22,219.02,343.64,289.14,497.23,1099.17,79.78,81.38,365.29,140.93,1515.69,2509.42,187.7,191.8,2714.3,2614.2,5799.4,8744.0,42174.7,46308.5,119805.2,122176.8,181962.6,214220.5
38,Pig Meat,7.39,7.76,17.36,13.44,31.11,34.09,6.91,7.41,12.31,10.57,22.26,23.79,6.58,7.0,11.54,9.82,21.42,23.02,63.23,68.98,142.66,114.8,434.05,469.04,29.47,31.64,76.38,53.53,219.71,237.55,82.9,87.6,1795.8,1810.3,3315.4,3555.6,51.0,53.5,66867.4,54242.7,134395.4,152329.6
39,Poultry Meat,6.46,6.65,12.22,11.01,16.02,20.4,3.95,4.18,9.87,7.52,20.12,20.82,3.84,4.07,9.82,7.82,19.93,20.69,39.88,43.09,102.42,64.66,192.75,197.05,22.69,25.01,48.7,34.53,101.46,101.46,18.9,19.2,660.0,370.3,1661.7,1694.1,21.0,21.2,14177.9,333.5,49726.9,66044.8
40,Milk,0.8,1.11,8.95,2.1,9.3,32.19,1.51,1.7,3.15,2.65,4.83,7.0,1.31,1.48,2.84,2.33,4.51,7.05,6.58,8.0,20.01,20.64,31.84,35.15,2.9,3.04,10.65,10.71,18.63,21.21,18.6,19.3,628.2,197.3,2592.5,2663.7,200.5,207.6,19786.3,9776.4,79193.2,81420.9
41,Cheese,7.86,9.55,87.79,20.18,239.21,323.45,10.21,10.92,23.88,18.64,39.32,58.79,8.45,9.4,21.44,16.16,36.59,57.35,45.57,57.6,165.54,173.01,267.23,304.81,26.31,29.51,98.37,99.5,167.85,192.25,158.4,178.2,5605.2,1559.3,23448.5,25756.3,1738.0,1930.1,180850.6,80463.1,718942.3,790842.0


In [1045]:
# Help from Copilot

# Define the prefix
prefixes = ['Land Use', 'GHG', 'GHG_old', 'Acid', 'Eutro', 'Freshwater', 'Water Use']

# Flatten the DataFrame to a list
columns = df_weight.columns.tolist()

# Create a new list to hold the new column names
new_columns = []

# Initialize the current prefix and column
current_prefix = 0
current_column = 0

# Iterate over the columns
for column in columns:
    # If this is the first column, skip it
    if column == columns[0]:
        new_columns.append(column)
        continue

    # Create the new column name
    new_name = prefixes[current_prefix] + '_' + column

    # Add the new name to the list
    new_columns.append(new_name)

    # Update the current column
    current_column += 1

    # If we've reached the 6th column, update the current prefix and reset the current column
    if current_column == 6:
        current_prefix += 1
        current_column = 0

# Rename the columns
df_weight.columns = new_columns

df_weight.head()

Unnamed: 0,Product,Land Use_5th pctl,Land Use_10th pctl,Land Use_Mean,Land Use_Median,Land Use_90th pctl,Land Use_95th pctl,GHG_5th pctl,GHG_10th pctl,GHG_Mean,GHG_Median,GHG_90th pctl,GHG_95th pctl,GHG_old_5th pctl,GHG_old_10th pctl,GHG_old_Mean,GHG_old_Median,GHG_old_90th pctl,GHG_old_95th pctl,Acid_5th pctl,Acid_10th pctl,Acid_Mean,Acid_Median,Acid_90th pctl,Acid_95th pctl,Eutro_5th pctl,Eutro_10th pctl,Eutro_Mean,Eutro_Median,Eutro_90th pctl,Eutro_95th pctl,Freshwater_5th pctl,Freshwater_10th pctl,Freshwater_Mean,Freshwater_Median,Freshwater_90th pctl,Freshwater_95th pctl,Water Use_5th pctl,Water Use_10th pctl,Water Use_Mean,Water Use_Median,Water Use_90th pctl,Water Use_95th pctl
6,Rice,0.99,1.1,2.8,2.15,6.21,7.21,1.15,1.46,4.45,3.73,8.77,10.26,1.05,1.28,3.81,3.14,7.39,8.51,8.83,9.78,27.19,18.58,62.84,75.03,2.86,3.38,35.07,9.33,135.79,156.01,0.0,0.2,2248.4,1574.9,3936.1,10573.8,0.0,1.5,49576.3,4625.6,115317.4,191274.2
7,Potatoes,0.37,0.44,0.88,0.82,1.4,1.66,0.09,0.16,0.46,0.47,0.63,0.7,0.08,0.15,0.45,0.47,0.63,0.69,2.33,2.55,3.87,3.6,5.33,6.89,0.62,0.64,3.48,4.43,6.13,6.17,0.0,0.9,59.1,2.6,133.4,235.8,0.0,8.4,2754.2,78.3,8909.9,8977.9
15,Soymilk,0.3,0.34,0.66,0.64,0.92,1.07,0.51,0.58,0.98,0.91,1.47,1.74,0.51,0.58,0.97,0.9,1.47,1.73,2.07,2.15,2.6,2.54,3.11,3.3,0.47,0.49,1.06,1.2,1.57,1.64,1.2,1.2,27.8,1.3,146.2,158.9,2.4,2.4,955.6,6.2,5300.7,5768.6
16,Tofu,1.57,1.77,3.52,3.41,4.94,5.87,1.41,1.6,3.16,2.58,5.55,7.27,1.39,1.6,3.14,2.57,5.47,7.19,4.99,5.13,6.7,6.0,9.04,9.85,2.9,2.92,6.16,6.64,9.09,10.32,6.1,6.3,148.6,6.6,777.7,864.4,12.4,12.4,5113.2,32.4,28226.9,31483.9
35,Bovine Meat (beef herd),70.41,82.84,326.21,170.37,735.09,910.1,37.57,40.37,99.48,60.36,209.85,269.19,30.76,33.03,85.19,51.72,179.95,241.98,59.78,76.08,318.83,270.87,656.91,683.48,101.81,113.38,301.41,320.69,412.83,657.89,215.6,268.6,1451.2,740.2,2585.5,5241.3,204.6,242.1,34732.5,441.2,89872.1,190796.3


In [1046]:
df_weight.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 6 to 41
Data columns (total 43 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Product               10 non-null     object
 1   Land Use_5th pctl     10 non-null     object
 2   Land Use_10th pctl    10 non-null     object
 3   Land Use_Mean         10 non-null     object
 4   Land Use_Median       10 non-null     object
 5   Land Use_90th pctl    10 non-null     object
 6   Land Use_95th pctl    10 non-null     object
 7   GHG_5th pctl          10 non-null     object
 8   GHG_10th pctl         10 non-null     object
 9   GHG_Mean              10 non-null     object
 10  GHG_Median            10 non-null     object
 11  GHG_90th pctl         10 non-null     object
 12  GHG_95th pctl         10 non-null     object
 13  GHG_old_5th pctl      10 non-null     object
 14  GHG_old_10th pctl     10 non-null     object
 15  GHG_old_Mean          10 non-null     object
 1

Editing the df_streamlit

In [1047]:
# Help from Copilot

# Define the prefix
prefixes = ['Land Use', 'GHG', 'GHG_old', 'Acid', 'Eutro', 'Freshwater', 'Water Use']

# Flatten the DataFrame to a list
columns2 = df_streamlit.columns.tolist()

# Create a new list to hold the new column names
new_columns = []

# Initialize the current prefix and column
current_prefix = 0
current_column = 0

# Iterate over the columns
for column in columns2:
    # If this is the first column, skip it
    if column == columns2[0]:
        new_columns.append(column)
        continue

    # Create the new column name
    new_name = prefixes[current_prefix] + '_' + column

    # Add the new name to the list
    new_columns.append(new_name)

    # Update the current column
    current_column += 1

    # If we've reached the 6th column, update the current prefix and reset the current column
    if current_column == 6:
        current_prefix += 1
        current_column = 0

# Rename the columns
df_streamlit.columns = new_columns

df_streamlit.head(20)

Unnamed: 0,Product,Land Use_5th pctl,Land Use_10th pctl,Land Use_Mean,Land Use_Median,Land Use_90th pctl,Land Use_95th pctl,GHG_5th pctl,GHG_10th pctl,GHG_Mean,GHG_Median,GHG_90th pctl,GHG_95th pctl,GHG_old_5th pctl,GHG_old_10th pctl,GHG_old_Mean,GHG_old_Median,GHG_old_90th pctl,GHG_old_95th pctl,Acid_5th pctl,Acid_10th pctl,Acid_Mean,Acid_Median,Acid_90th pctl,Acid_95th pctl,Eutro_5th pctl,Eutro_10th pctl,Eutro_Mean,Eutro_Median,Eutro_90th pctl,Eutro_95th pctl,Freshwater_5th pctl,Freshwater_10th pctl,Freshwater_Mean,Freshwater_Median,Freshwater_90th pctl,Freshwater_95th pctl,Water Use_5th pctl,Water Use_10th pctl,Water Use_Mean,Water Use_Median,Water Use_90th pctl,Water Use_95th pctl
6,Rice,0.99,1.1,2.8,2.15,6.21,7.21,1.15,1.46,4.45,3.73,8.77,10.26,1.05,1.28,3.81,3.14,7.39,8.51,8.83,9.78,27.19,18.58,62.84,75.03,2.86,3.38,35.07,9.33,135.79,156.01,0.0,0.2,2248.4,1574.9,3936.1,10573.8,0.0,1.5,49576.3,4625.6,115317.4,191274.2
7,Potatoes,0.37,0.44,0.88,0.82,1.4,1.66,0.09,0.16,0.46,0.47,0.63,0.7,0.08,0.15,0.45,0.47,0.63,0.69,2.33,2.55,3.87,3.6,5.33,6.89,0.62,0.64,3.48,4.43,6.13,6.17,0.0,0.9,59.1,2.6,133.4,235.8,0.0,8.4,2754.2,78.3,8909.9,8977.9
12,Peas,2.28,2.77,7.46,6.73,14.19,20.47,0.51,0.56,0.98,0.8,1.67,1.87,0.51,0.56,0.97,0.8,1.67,1.87,3.21,3.6,8.49,10.28,10.87,11.06,0.74,0.75,7.52,1.68,33.62,33.66,0.0,0.0,396.6,0.0,3099.8,3584.0,0.0,0.0,27948.2,0.0,228332.1,263996.7
15,Soymilk,0.3,0.34,0.66,0.64,0.92,1.07,0.51,0.58,0.98,0.91,1.47,1.74,0.51,0.58,0.97,0.9,1.47,1.73,2.07,2.15,2.6,2.54,3.11,3.3,0.47,0.49,1.06,1.2,1.57,1.64,1.2,1.2,27.8,1.3,146.2,158.9,2.4,2.4,955.6,6.2,5300.7,5768.6
16,Tofu,1.57,1.77,3.52,3.41,4.94,5.87,1.41,1.6,3.16,2.58,5.55,7.27,1.39,1.6,3.14,2.57,5.47,7.19,4.99,5.13,6.7,6.0,9.04,9.85,2.9,2.92,6.16,6.64,9.09,10.32,6.1,6.3,148.6,6.6,777.7,864.4,12.4,12.4,5113.2,32.4,28226.9,31483.9
21,Olive Oil,7.85,7.85,26.31,17.29,36.32,36.32,2.13,2.86,5.42,5.09,7.63,10.79,2.14,2.88,5.25,5.04,7.72,10.7,18.78,27.47,37.58,33.89,57.92,61.96,5.78,17.09,37.26,39.11,56.34,61.19,8.5,8.5,2141.8,317.9,6907.5,6907.5,130.4,130.4,177480.2,24395.7,621151.5,621151.5
22,Tomatoes,0.07,0.09,0.8,0.17,0.93,5.62,0.37,0.39,2.09,0.65,5.95,12.62,0.37,0.39,2.01,0.65,5.17,12.28,2.89,3.21,17.21,5.21,67.95,83.38,0.62,0.78,7.51,1.92,32.1,39.51,32.6,48.3,369.8,77.0,1333.9,1993.9,270.4,384.7,5335.7,4480.7,8959.4,11842.0
35,Bovine Meat (beef herd),70.41,82.84,326.21,170.37,735.09,910.1,37.57,40.37,99.48,60.36,209.85,269.19,30.76,33.03,85.19,51.72,179.95,241.98,59.78,76.08,318.83,270.87,656.91,683.48,101.81,113.38,301.41,320.69,412.83,657.89,215.6,268.6,1451.2,740.2,2585.5,5241.3,204.6,242.1,34732.5,441.2,89872.1,190796.3
36,Bovine Meat (dairy herd),12.27,14.39,43.24,25.94,64.12,106.37,14.93,17.94,33.3,34.14,50.9,56.68,12.78,15.75,28.79,28.87,45.04,49.88,165.22,219.02,343.64,289.14,497.23,1099.17,79.78,81.38,365.29,140.93,1515.69,2509.42,187.7,191.8,2714.3,2614.2,5799.4,8744.0,42174.7,46308.5,119805.2,122176.8,181962.6,214220.5
37,Lamb & Mutton,47.85,60.06,369.81,127.41,442.34,724.65,23.7,24.52,39.72,40.61,54.44,60.16,20.54,21.25,32.71,32.99,43.21,50.51,79.19,81.79,138.97,135.16,149.78,273.61,21.95,24.64,97.13,101.92,128.72,133.36,88.0,97.8,1802.8,461.2,7133.3,7825.8,258.9,258.9,141925.0,258.9,540906.4,595278.0


In [1048]:
# to make the data more compact, we will only use the median values for Land Use, GHG

df_weight = df_weight[['Product', 'Land Use_Median', 'GHG_Median']]

df_weight = df_weight.rename(columns={'Land Use_Median': 'Land Use', 'GHG_Median': 'Emissions'})

df_weight

Unnamed: 0,Product,Land Use,Emissions
6,Rice,2.15,3.73
7,Potatoes,0.82,0.47
15,Soymilk,0.64,0.91
16,Tofu,3.41,2.58
35,Bovine Meat (beef herd),170.37,60.36
36,Bovine Meat (dairy herd),25.94,34.14
38,Pig Meat,13.44,10.57
39,Poultry Meat,11.01,7.52
40,Milk,2.1,2.65
41,Cheese,20.18,18.64


In [1049]:
# to make the data more compact, we will only use the median values for Land Use, GHG

df_streamlit = df_streamlit[['Product', 'Land Use_Median', 'GHG_Median', 'Water Use_Median']]

df_streamlit = df_streamlit.rename(columns={'Land Use_Median': 'Land Use', 'GHG_Median': 'Emissions', 'Water Use_Median': 'Water Use' })

df_streamlit

Unnamed: 0,Product,Land Use,Emissions,Water Use
6,Rice,2.15,3.73,4625.6
7,Potatoes,0.82,0.47,78.3
12,Peas,6.73,0.8,0.0
15,Soymilk,0.64,0.91,6.2
16,Tofu,3.41,2.58,32.4
21,Olive Oil,17.29,5.09,24395.7
22,Tomatoes,0.17,0.65,4480.7
35,Bovine Meat (beef herd),170.37,60.36,441.2
36,Bovine Meat (dairy herd),25.94,34.14,122176.8
37,Lamb & Mutton,127.41,40.61,258.9


In [1050]:
df_streamlit.info()

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 6 to 44
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Product    16 non-null     object
 1   Land Use   16 non-null     object
 2   Emissions  16 non-null     object
 3   Water Use  16 non-null     object
dtypes: object(4)
memory usage: 640.0+ bytes


In [1051]:
df_weight.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 6 to 41
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Product    10 non-null     object
 1   Land Use   10 non-null     object
 2   Emissions  10 non-null     object
dtypes: object(3)
memory usage: 320.0+ bytes


In [1052]:
# column Product as category

df_weight['Product'] = df_weight['Product'].astype('category')

# Convert all columns except Product to float
df_weight = df_weight.apply(lambda col: col.astype(float) if col.name != 'Product' else col)

df_weight.info()


<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, 6 to 41
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Product    10 non-null     category
 1   Land Use   10 non-null     float64 
 2   Emissions  10 non-null     float64 
dtypes: category(1), float64(2)
memory usage: 630.0 bytes


In [1053]:
# same with the stramlit data

# column Product as category

df_streamlit['Product'] = df_streamlit['Product'].astype('category')

# Convert all columns except Product to float
df_streamlit = df_streamlit.apply(lambda col: col.astype(float) if col.name != 'Product' else col)

df_streamlit.info()

<class 'pandas.core.frame.DataFrame'>
Index: 16 entries, 6 to 44
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Product    16 non-null     category
 1   Land Use   16 non-null     float64 
 2   Emissions  16 non-null     float64 
 3   Water Use  16 non-null     float64 
dtypes: category(1), float64(3)
memory usage: 1.2 KB


In [1054]:
# new column with the Impact Type

df_weight2 = df_weight.melt(id_vars=['Product'],
             value_vars =['Land Use', 'Emissions'],
             var_name='Impact Type',  #neuer Typ
             value_name='Impact'
             )


df_weight2.head()




Unnamed: 0,Product,Impact Type,Impact
0,Rice,Land Use,2.15
1,Potatoes,Land Use,0.82
2,Soymilk,Land Use,0.64
3,Tofu,Land Use,3.41
4,Bovine Meat (beef herd),Land Use,170.37


In [1055]:

df_streamlit = df_streamlit.melt(id_vars=['Product'],
             value_vars =['Land Use', 'Emissions', 'Water Use'],
             var_name='Impact Type',  
             value_name='Impact'
             )


df_streamlit.head()

Unnamed: 0,Product,Impact Type,Impact
0,Rice,Land Use,2.15
1,Potatoes,Land Use,0.82
2,Peas,Land Use,6.73
3,Soymilk,Land Use,0.64
4,Tofu,Land Use,3.41


In [1056]:
df_weight2['Impact Type'] = df_weight2['Impact Type'].astype('category')

df_weight2.info()



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   Product      20 non-null     category
 1   Impact Type  20 non-null     category
 2   Impact       20 non-null     float64 
dtypes: category(2), float64(1)
memory usage: 836.0 bytes


In [1057]:
df_streamlit['Impact Type'] = df_streamlit['Impact Type'].astype('category')

df_streamlit.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48 entries, 0 to 47
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype   
---  ------       --------------  -----   
 0   Product      48 non-null     category
 1   Impact Type  48 non-null     category
 2   Impact       48 non-null     float64 
dtypes: category(2), float64(1)
memory usage: 1.4 KB


In [1058]:
# Order of the Products
product_order = ['Bovine Meat (beef herd)', 'Bovine Meat (dairy herd)', 'Cheese','Pig Meat', 'Poultry Meat', 'Rice', 'Tofu', 'Milk',  'Soymilk','Potatoes']

impact_chart = alt.Chart(df_weight2).mark_bar().encode(
    x=alt.X('Impact:Q', scale=alt.Scale(domain=[df_weight2['Impact'].min(), df_weight2['Impact'].max()])).sort('-y').axis(   # - ist descending
        labelAngle = 0, 
        titleAnchor = 'start'),
    y=alt.Y('Impact Type:N', title=None).axis(
        labels = False, 
        titleAnchor = 'end',
        grid = False),       
    color='Impact Type:N',
).facet(
    row=alt.Row('Product:N', sort=product_order, title=None, header=alt.Header(labelAngle=0, labelAlign= 'left')),
    spacing = 5,  # Set facet label angle to 45 degrees
)

impact_chart

In [1059]:
animal_products = ['Bovine Meat (beef herd)', 'Bovine Meat (dairy herd)', 'Pig Meat', 'Poultry Meat', 'Cheese', 'Milk']

#Help from CoPiliot
# Create a new column 'Product Type' that indicates whether each product is an animal product or a plant product
df_weight2['Product Type'] = df_weight2['Product'].apply(lambda x: 'Animal Products' if x in animal_products else 'Plants')

df_weight2

Unnamed: 0,Product,Impact Type,Impact,Product Type
0,Rice,Land Use,2.15,Plants
1,Potatoes,Land Use,0.82,Plants
2,Soymilk,Land Use,0.64,Plants
3,Tofu,Land Use,3.41,Plants
4,Bovine Meat (beef herd),Land Use,170.37,Animal Products
5,Bovine Meat (dairy herd),Land Use,25.94,Animal Products
6,Pig Meat,Land Use,13.44,Animal Products
7,Poultry Meat,Land Use,11.01,Animal Products
8,Milk,Land Use,2.1,Animal Products
9,Cheese,Land Use,20.18,Animal Products


In [1060]:
animal_products = ['Bovine Meat (beef herd)', 'Bovine Meat (dairy herd)', 'Pig Meat', 'Poultry Meat', 'Cheese', 'Milk', 'Lamb & Mutton', 'Crustaceans (farmed)', 'Fish (farmed)']

#Help from CoPiliot
# Create a new column 'Product Type' that indicates whether each product is an animal product or a plant product
df_streamlit['Product Type'] = df_streamlit['Product'].apply(lambda x: 'Animal Products' if x in animal_products else 'Plants')

df_streamlit

Unnamed: 0,Product,Impact Type,Impact,Product Type
0,Rice,Land Use,2.15,Plants
1,Potatoes,Land Use,0.82,Plants
2,Peas,Land Use,6.73,Plants
3,Soymilk,Land Use,0.64,Plants
4,Tofu,Land Use,3.41,Plants
5,Olive Oil,Land Use,17.29,Plants
6,Tomatoes,Land Use,0.17,Plants
7,Bovine Meat (beef herd),Land Use,170.37,Animal Products
8,Bovine Meat (dairy herd),Land Use,25.94,Animal Products
9,Lamb & Mutton,Land Use,127.41,Animal Products


In [1061]:
df_weight2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   Product       20 non-null     category
 1   Impact Type   20 non-null     category
 2   Impact        20 non-null     float64 
 3   Product Type  20 non-null     object  
dtypes: category(2), float64(1), object(1)
memory usage: 996.0+ bytes


In [1062]:
df_streamlit.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48 entries, 0 to 47
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype   
---  ------        --------------  -----   
 0   Product       48 non-null     category
 1   Impact Type   48 non-null     category
 2   Impact        48 non-null     float64 
 3   Product Type  48 non-null     object  
dtypes: category(2), float64(1), object(1)
memory usage: 1.8+ KB


## Save df_weight2 as csv file 'environmental-impact-weight.csv'


In [1063]:
df_weight2.to_csv('cleaned-data/environmental-impact-weight.csv', index = False) 

## Save df_streamlit as csv file 'environmental-impact-streamlit.csv'

In [1064]:
df_streamlit.to_csv('cleaned-data/environmental-impact-streamlit.csv', index = False) 

### Importing sheet "Results - Nutritional Units"

In [1065]:

df_nu = pd.read_excel('original-data/aaq0216_datas2.xls', sheet_name='Results - Nutritional Units', skipfooter=3)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

df_nu.head()

Unnamed: 0.1,Unnamed: 0,Nutr. Units / FU,"Resampled, Randomized Data",Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,"Data Without Resampling or Randomization (original study data, after harmonizing methodology)",Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52,Unnamed: 53,Unnamed: 54,Unnamed: 55
0,,,Land Use (m2/nutritional unit),,,,,,"GHG Emissions (kg CO2eq/NU, IPCC 2013 incl CC ...",,,,,,"GHG Emissions (kg CO2eq/NU, IPCC 2007)",,,,,,"Acidifying Emissions (g SO2eq/NU, CML2 Baseline)",,,,,,"Eutrophying Emissions (g PO43-eq/NU, CML2 Base...",,,,,,Freshwater Withdrawals (L/NU),,,,,,Stress-Weighted Water Use (L/NU),,,,,,Land Use (m2/FU),,"GHG (kg CO2eq/FU, IPCC 2013 incl. CC feedbacks)",,"Acid. (kg SO2eq/FU, CML2 Baseline)",,"Eutr. (kg PO43-eq/FU, CML2 Baseline)",,Fresh W. (L/FU),,Strs. W. WU (L/FU),
1,Product,,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,Min,Max,Min,Max,Min,Max,Min,Max,Min,Max,Min,Max
2,Wheat & Rye (Bread),2.67533,0.36631,0.414902,1.439075,1.009221,2.941693,3.722905,0.265388,0.295291,0.586843,0.474708,0.863445,1.147522,0.269126,0.295291,0.590581,0.478446,0.88961,1.162473,2.186646,2.496888,4.990039,4.963874,7.565422,9.355855,0.381261,0.848493,2.676305,2.007229,5.012466,6.78421,0.822328,0.822328,242.026217,156.69095,404.062303,1259.097009,1.009221,1.682035,12479.058658,4792.567646,20552.866375,84388.729615,0.154352,13.640659,0.155905,20.948304,1.273815,12.513332,0.063151,28.922522,0.426378,3242.72484,0.521439,124567.973382
3,Maize (Meal),4.5225,0.218905,0.252073,0.650083,0.406855,1.260365,1.992261,0.145937,0.161415,0.375898,0.260918,0.510779,0.778331,0.148148,0.163626,0.371476,0.263129,0.508568,0.780542,1.244887,1.311222,2.582642,2.244334,4.619127,5.048093,0.26534,0.289663,0.8911,0.53068,1.795467,2.781647,0,0,47.694859,9.70702,117.368712,132.294085,0,0,2402.056385,77.302377,5389.806523,6238.739635,0.108969,6.401253,0.100346,15.981511,0.646502,5.633271,0.172503,4.600925,0,1590.038423,0,101982.568079
4,Barley (Beer),5.0,0.042,0.052,0.222,0.176,0.474,0.574,0.118,0.14,0.236,0.236,0.328,0.354,0.118,0.14,0.236,0.234,0.328,0.356,1.044,1.072,1.318,1.212,1.49,1.634,0.214,0.236,0.466,0.366,0.75,0.964,1.24,1.4,3.42,1.4,2.24,9.56,1.54,1.96,139.28,5.46,58.16,324.18,0.042966,0.751435,0.087839,0.527857,0.964588,7.672255,0.208971,2.474399,1.246018,113.760984,0.884541,8296.760796


In [1066]:
##cleaning the columns and rows, which are in the excel file used as a describtion

df_nu = df_nu.drop(df_nu.columns[1:8], axis=1)

df_nu.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34,Unnamed: 35,Unnamed: 36,Unnamed: 37,Unnamed: 38,Unnamed: 39,Unnamed: 40,Unnamed: 41,Unnamed: 42,Unnamed: 43,"Data Without Resampling or Randomization (original study data, after harmonizing methodology)",Unnamed: 45,Unnamed: 46,Unnamed: 47,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52,Unnamed: 53,Unnamed: 54,Unnamed: 55
0,,"GHG Emissions (kg CO2eq/NU, IPCC 2013 incl CC ...",,,,,,"GHG Emissions (kg CO2eq/NU, IPCC 2007)",,,,,,"Acidifying Emissions (g SO2eq/NU, CML2 Baseline)",,,,,,"Eutrophying Emissions (g PO43-eq/NU, CML2 Base...",,,,,,Freshwater Withdrawals (L/NU),,,,,,Stress-Weighted Water Use (L/NU),,,,,,Land Use (m2/FU),,"GHG (kg CO2eq/FU, IPCC 2013 incl. CC feedbacks)",,"Acid. (kg SO2eq/FU, CML2 Baseline)",,"Eutr. (kg PO43-eq/FU, CML2 Baseline)",,Fresh W. (L/FU),,Strs. W. WU (L/FU),
1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,Min,Max,Min,Max,Min,Max,Min,Max,Min,Max,Min,Max
2,Wheat & Rye (Bread),0.265388,0.295291,0.586843,0.474708,0.863445,1.147522,0.269126,0.295291,0.590581,0.478446,0.88961,1.162473,2.186646,2.496888,4.990039,4.963874,7.565422,9.355855,0.381261,0.848493,2.676305,2.007229,5.012466,6.78421,0.822328,0.822328,242.026217,156.69095,404.062303,1259.097009,1.009221,1.682035,12479.058658,4792.567646,20552.866375,84388.729615,0.154352,13.640659,0.155905,20.948304,1.273815,12.513332,0.063151,28.922522,0.426378,3242.72484,0.521439,124567.973382
3,Maize (Meal),0.145937,0.161415,0.375898,0.260918,0.510779,0.778331,0.148148,0.163626,0.371476,0.263129,0.508568,0.780542,1.244887,1.311222,2.582642,2.244334,4.619127,5.048093,0.26534,0.289663,0.8911,0.53068,1.795467,2.781647,0,0,47.694859,9.70702,117.368712,132.294085,0,0,2402.056385,77.302377,5389.806523,6238.739635,0.108969,6.401253,0.100346,15.981511,0.646502,5.633271,0.172503,4.600925,0,1590.038423,0,101982.568079
4,Barley (Beer),0.118,0.14,0.236,0.236,0.328,0.354,0.118,0.14,0.236,0.234,0.328,0.356,1.044,1.072,1.318,1.212,1.49,1.634,0.214,0.236,0.466,0.366,0.75,0.964,1.24,1.4,3.42,1.4,2.24,9.56,1.54,1.96,139.28,5.46,58.16,324.18,0.042966,0.751435,0.087839,0.527857,0.964588,7.672255,0.208971,2.474399,1.246018,113.760984,0.884541,8296.760796


In [1067]:
df_nu = df_nu.drop(df_nu.columns[7:], axis=1)

df_nu.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13
0,,"GHG Emissions (kg CO2eq/NU, IPCC 2013 incl CC ...",,,,,
1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl
2,Wheat & Rye (Bread),0.265388,0.295291,0.586843,0.474708,0.863445,1.147522
3,Maize (Meal),0.145937,0.161415,0.375898,0.260918,0.510779,0.778331
4,Barley (Beer),0.118,0.14,0.236,0.236,0.328,0.354


In [1068]:
df_nu.columns = df_nu.iloc[1]


df_nu.head()

1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl
0,,"GHG Emissions (kg CO2eq/NU, IPCC 2013 incl CC ...",,,,,
1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl
2,Wheat & Rye (Bread),0.265388,0.295291,0.586843,0.474708,0.863445,1.147522
3,Maize (Meal),0.145937,0.161415,0.375898,0.260918,0.510779,0.778331
4,Barley (Beer),0.118,0.14,0.236,0.236,0.328,0.354


In [1069]:
df_nu = df_nu.drop(df_nu.index[0:2])

df_nu.head()

1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl
2,Wheat & Rye (Bread),0.265388,0.295291,0.586843,0.474708,0.863445,1.147522
3,Maize (Meal),0.145937,0.161415,0.375898,0.260918,0.510779,0.778331
4,Barley (Beer),0.118,0.14,0.236,0.236,0.328,0.354
5,Oatmeal,0.304994,0.324056,0.945482,0.987419,1.555471,1.639344
6,Rice,0.311991,0.396093,1.207271,1.011937,2.379273,2.783505


In [1070]:
df_nu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 2 to 46
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Product    44 non-null     object
 1   5th pctl   44 non-null     object
 2   10th pctl  44 non-null     object
 3   Mean       43 non-null     object
 4   Median     43 non-null     object
 5   90th pctl  43 non-null     object
 6   95th pctl  43 non-null     object
dtypes: object(7)
memory usage: 2.6+ KB


In [1071]:
df_nu['Product'] = df_nu['Product'].astype('category')

# Help from Copilot
# Convert all columns except Product to float
df_nu = df_nu.apply(lambda col: col.astype(float) if col.name != 'Product' else col)

df_nu.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45 entries, 2 to 46
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype   
---  ------     --------------  -----   
 0   Product    44 non-null     category
 1   5th pctl   44 non-null     float64 
 2   10th pctl  44 non-null     float64 
 3   Mean       43 non-null     float64 
 4   Median     43 non-null     float64 
 5   90th pctl  43 non-null     float64 
 6   95th pctl  43 non-null     float64 
dtypes: category(1), float64(6)
memory usage: 3.7 KB


## Plotting a range bar chart 

https://altair-viz.github.io/gallery/bar_chart_with_range.html

In [1072]:


bar_nu = alt.Chart(df_nu).mark_bar(cornerRadius=10, height=10).encode(
    x=alt.X('5th pctl:Q').scale(domain=[0, 150]).title('GHG Emissions (kg CO2eq/kg)'),
    x2='95th pctl:Q',
    y=alt.Y('Product:N').title('Product')
).properties(
    width=800,
    height=1000
)


bar_nu

In [1073]:
# choose rows for products

products_to_select = ['Rice', 'Potatoes', 'Wheat', 'Soymilk','Tofu', 'Bovine Meat (beef herd)', 'Bovine Meat (dairy herd)', 'Lamb & Mutton', 'Tomatoes', 'Crustaceans (farmed)', 'Fish (farmed)', 'Peas', 'Olive Oil', 'Pig Meat', 'Poultry Meat', 'Milk', 'Cheese']

df_nu = df_nu[df_nu['Product'].isin(products_to_select)]

df_nu

1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl
6,Rice,0.311991,0.396093,1.207271,1.011937,2.379273,2.783505
7,Potatoes,0.122951,0.218579,0.628415,0.642077,0.860656,0.956284
12,Peas,0.229523,0.252025,0.441044,0.360036,0.751575,0.841584
15,Soymilk,0.51,0.58,0.98,0.91,1.47,1.74
16,Tofu,0.88125,1.0,1.975,1.6125,3.46875,4.54375
21,Olive Oil,2.13,2.86,5.42,5.09,7.63,10.79
22,Tomatoes,0.37,0.39,2.09,0.65,5.95,12.62
35,Bovine Meat (beef herd),18.841525,20.245737,49.889669,30.270812,105.240722,135.0
36,Bovine Meat (dairy herd),7.563323,9.088146,16.869301,17.294833,25.785208,28.713273
37,Lamb & Mutton,11.844078,12.253873,19.850075,20.294853,27.206397,30.064968


In [1074]:
animal_products = ['Bovine Meat (beef herd)', 'Bovine Meat (dairy herd)', 'Pig Meat', 'Poultry Meat', 'Cheese', 'Milk', 'Lamb & Mutton', 'Crustaceans (farmed)', 'Fish (farmed)']

#Help from CoPiliot
# Create a new column 'Product Type' that indicates whether each product is an animal product or a plant product
df_nu['Product Type'] = df_nu['Product'].apply(lambda x: 'Animal Products' if x in animal_products else 'Plants')

# Create a new column 'Compare' to later define a scale and change the opacity of all the products that are "No" in the compare list
compare = ['Bovine Meat (dairy herd)' , 'Tofu']
df_nu['Compare'] = df_nu['Product'].apply(lambda x: 'Yes' if x in compare else 'No')

df_nu

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nu['Product Type'] = df_nu['Product'].apply(lambda x: 'Animal Products' if x in animal_products else 'Plants')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nu['Compare'] = df_nu['Product'].apply(lambda x: 'Yes' if x in compare else 'No')


1,Product,5th pctl,10th pctl,Mean,Median,90th pctl,95th pctl,Product Type,Compare
6,Rice,0.311991,0.396093,1.207271,1.011937,2.379273,2.783505,Plants,No
7,Potatoes,0.122951,0.218579,0.628415,0.642077,0.860656,0.956284,Plants,No
12,Peas,0.229523,0.252025,0.441044,0.360036,0.751575,0.841584,Plants,No
15,Soymilk,0.51,0.58,0.98,0.91,1.47,1.74,Plants,No
16,Tofu,0.88125,1.0,1.975,1.6125,3.46875,4.54375,Plants,Yes
21,Olive Oil,2.13,2.86,5.42,5.09,7.63,10.79,Plants,No
22,Tomatoes,0.37,0.39,2.09,0.65,5.95,12.62,Plants,No
35,Bovine Meat (beef herd),18.841525,20.245737,49.889669,30.270812,105.240722,135.0,Animal Products,No
36,Bovine Meat (dairy herd),7.563323,9.088146,16.869301,17.294833,25.785208,28.713273,Animal Products,Yes
37,Lamb & Mutton,11.844078,12.253873,19.850075,20.294853,27.206397,30.064968,Animal Products,No


In [1075]:
df_nu['Product Type'] = df_nu['Product Type'].astype('category')

PRODUCT_TYPE_NU = df_nu['Product Type'].cat.categories.to_list()

product_colors = alt.Scale(
    domain=PRODUCT_TYPE_NU,
    range=['red', 'green']
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nu['Product Type'] = df_nu['Product Type'].astype('category')


In [1076]:
bar_nu = alt.Chart(df_nu).mark_bar(cornerRadius=10, height=20).encode(
    x=alt.X('5th pctl:Q').scale(domain=[0, 140]).title('GHG Emissions (kg CO2eq/kg)'),
    x2='95th pctl:Q',
    y=alt.Y('Product:N', title='Product'),
    color=alt.Color('Product Type:N', scale=product_colors) 
).properties(
    width=800,
    height=600
)

# better use median!!! -> corrected in the quarto presentation
mean_points = alt.Chart(df_nu).mark_point(filled=False, color='black', size=200).encode(
  x=alt.X('Mean:Q'),
  y=alt.Y('Product:N'),
)

# Help from Copilot
# Get the '5th pctl' value of 'Bovine Meat (dairy herd)'
bovine_dairy_5th = df_nu.loc[df_nu['Product'] == 'Bovine Meat (dairy herd)', '5th pctl'].values[0]

# Create a new chart 
vline = alt.Chart().mark_rule(color='black', strokeDash=[3,3]).encode(
    x='a:Q'
).transform_calculate(
    a=str(bovine_dairy_5th)
)


bar_nu + mean_points + vline

In [1077]:
# New opaticity range for the comparison, using Column Compare
df_nu['Compare'] = df_nu['Compare'].astype('category')

COMPARE = df_nu['Compare'].cat.categories.to_list()

product_opacity = alt.Scale(
    domain=COMPARE,
    range=[0.1, 1]
)

bar_nu2 = alt.Chart(df_nu).mark_bar(cornerRadius=10, height=20).encode(
    x=alt.X('5th pctl:Q').scale(domain=[0, 140]).title('GHG Emissions (kg CO2eq/kg)'),
    x2='95th pctl:Q',
    y=alt.Y('Product:N', title='Product'),
    color=alt.Color('Product Type:N', scale=product_colors),
    opacity=alt.Opacity('Compare:N', scale=product_opacity, legend=None)  
).properties(
    width=800,
    height=600
)

bar_nu2 + mean_points + vline   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nu['Compare'] = df_nu['Compare'].astype('category')


In [1078]:
# rounding the median for the tooltip

df_nu['Median'] = df_nu['Median'].round(2)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_nu['Median'] = df_nu['Median'].round(2)


## Save df_nu as csv file 'environmental-impact-nu.csv'

In [1079]:
# safe the data frame to use it in the quarto presentation

df_nu.to_csv('cleaned-data/environmental-impact-nu.csv', index = False) 