In [280]:
# Imports
import pandas as pd
import math
import numpy as np

# Get all the data we'll need for now
production = pd.read_csv(filepath_or_buffer="Agriculture_Dataset_text.csv", encoding_errors='ignore')
nutrients_per_food = pd.read_csv(filepath_or_buffer="ABBREV_2.csv", encoding_errors='ignore')
mapping = pd.read_csv(filepath_or_buffer="mapping.csv", encoding_errors='ignore')
nutrient_mapping = pd.read_csv(filepath_or_buffer="abbrev_nutrient_map.csv", encoding_errors='ignore')

# Filter production to fit what we have (EU, most recent date)
is_europe = production['Area'] == "European Union (27)"
is_2017 = production['Year'] == 2017

# Get the relevant row
production = production[is_europe][is_2017]

# Helper methods
as_co2_emission = lambda product: f"{product}-Emissions (CO2eq)-kilotonnes"
as_tonnes = lambda product: f"{product}-Production-tonnes"

# List of all product names
products_raw = production.columns[2::2].map(lambda product: product.split("-Emissions")[0])

display(products_raw)

  production = production[is_europe][is_2017]


Index(['Cereals excluding rice', 'Eggs, hen, in shell', 'Meat, buffalo',
       'Meat, cattle', 'Meat, chicken', 'Meat, goat', 'Meat, pig',
       'Meat, sheep', 'Milk, whole fresh buffalo', 'Milk, whole fresh camel',
       'Milk, whole fresh cow', 'Milk, whole fresh goat',
       'Milk, whole fresh sheep', 'Rice, paddy'],
      dtype='object')

In [281]:
# The base data comes in a format where emissions and productions are
# in the column names, by food.
# We want to have them as rows depending on the food, so we reorganize
# it.

production = production.transpose()[2:]
formatted = pd.DataFrame(data={"Product": [], "Emissions (CO2eq)-kilotonnes": [], "Production-tonnes": []})

for product in products_raw:
    emissions = production.loc[as_co2_emission(product)].iloc[0]
    tonnes = production.loc[as_tonnes(product)].iloc[0]
    formatted.loc[len(formatted.index)] = [
        product,
        emissions,
        tonnes,
    ]

display(formatted)

Unnamed: 0,Product,Emissions (CO2eq)-kilotonnes,Production-tonnes
0,Cereals excluding rice,50454.9984,265438134.6
1,"Eggs, hen, in shell",4779.2664,6392416.0
2,"Meat, buffalo",298.364,17081.0
3,"Meat, cattle",107129.2239,6947855.0
4,"Meat, chicken",2938.517,10502992.0
5,"Meat, goat",1590.7109,82855.0
6,"Meat, pig",35267.6361,22785260.0
7,"Meat, sheep",11144.7621,578175.0
8,"Milk, whole fresh buffalo",482.0265,219483.0
9,"Milk, whole fresh camel",,


In [282]:
# Get our main data source, with all aliments, their nutritional value,
# how much of it is produced and how much CO2 it emits.
production_with_nutrition = (
    formatted
        .merge(mapping, left_on="Product", right_on="col")
        .merge(nutrients_per_food, on="NDB_No")
)
production_with_nutrition = production_with_nutrition.fillna(0)

display(production_with_nutrition)


Unnamed: 0,Product,Emissions (CO2eq)-kilotonnes,Production-tonnes,Unnamed: 0_x,col,Shrt_Desc_x,NDB_No,df_index,Unnamed: 0_y,Shrt_Desc_y,...,Vit_K_(µg),FA_Sat_(g),FA_Mono_(g),FA_Poly_(g),Cholestrl_(mg),GmWt_1,GmWt_Desc1,GmWt_2,GmWt_Desc2,Refuse_Pct
0,Cereals excluding rice,50454.9984,265438134.6,12,Cereals excluding rice,"CORN GRAIN,YEL",20014,6484,6484,"CORN GRAIN,YEL",...,0.3,0.667,1.251,2.163,0.0,166.0,1 cup,0.0,0,0.0
1,"Eggs, hen, in shell",4779.2664,6392416.0,7,"Eggs, hen, in shell","EGG,WHL,RAW,FRSH",1123,111,111,"EGG,WHL,RAW,FRSH",...,0.3,3.126,3.658,1.911,372.0,50.0,1 large,56.0,1 extra large,12.0
2,"Meat, buffalo",298.364,17081.0,9,"Meat, buffalo","GAME MEAT,BUFFALO,H2O,RAW",17160,5258,5258,"GAME MEAT,BUFFALO,H2O,RAW",...,0.0,0.46,0.42,0.27,46.0,28.35,1 oz,453.6,1 lb,0.0
3,"Meat, cattle",107129.2239,6947855.0,1,"Meat, cattle","BEEF,GRASS-FED,GROUND,RAW",13047,3748,3748,"BEEF,GRASS-FED,GROUND,RAW",...,1.1,5.335,4.8,0.532,62.0,85.0,1 serving,0.0,0,0.0
4,"Meat, chicken",2938.517,10502992.0,8,"Meat, chicken","CHICKEN,BROILERS OR FRYERS,MEAT & SKN,RAW",5006,828,828,"CHICKEN,BROILERS OR FRYERS,MEAT & SKN,RAW",...,1.5,4.31,6.24,3.23,75.0,85.0,3 oz,276.0,"1 unit, (yield from 1 lb ready-to-cook chicken)",32.0
5,"Meat, goat",1590.7109,82855.0,4,"Meat, goat","GOAT,RAW",17168,5266,5266,"GOAT,RAW",...,0.0,0.71,1.03,0.17,57.0,28.35,1 oz,453.6,1 lb,0.0
6,"Meat, pig",35267.6361,22785260.0,10,"Meat, pig","PORK,FRESH,GROUND,RAW",10219,2666,2666,"PORK,FRESH,GROUND,RAW",...,0.0,7.87,9.44,1.91,72.0,28.35,1 oz,113.0,4 oz,0.0
7,"Meat, sheep",11144.7621,578175.0,13,"Meat, sheep","LAMB,GROUND,RAW",17224,5322,5322,"LAMB,GROUND,RAW",...,3.6,10.19,9.6,1.85,73.0,28.35,1 oz,113.0,4 oz,0.0
8,"Milk, whole fresh buffalo",482.0265,219483.0,11,"Milk, whole fresh buffalo","MILK,INDIAN BUFFALO,FLUID",1108,96,96,"MILK,INDIAN BUFFALO,FLUID",...,0.0,4.597,1.787,0.146,19.0,244.0,1 cup,976.0,1 quart,0.0
9,"Milk, whole fresh camel",0.0,0.0,2,"Milk, whole fresh camel","MILK,WHL,3.25% MILKFAT,WO/ ADDED VIT A & VITAM...",1211,179,179,"MILK,WHL,3.25% MILKFAT,WO/ ADDED VIT A & VITAM...",...,0.3,1.865,0.812,0.195,10.0,244.0,1 cup,15.0,1 tbsp,0.0


In [283]:
# Get all nutrient names and mappings
relevant_nutrients = nutrient_mapping["abbrev"].iloc
relevant_nutrients_names = nutrient_mapping["nutrient"].iloc

# Normalize CO2 Mass per Aliment Mass
production_with_nutrition["CO2eq Tn/Aliment Tn"] = (
    production_with_nutrition["Emissions (CO2eq)-kilotonnes"]
    * 1000
    / production_with_nutrition["Production-tonnes"]
)

# Keep only relevant data
production_with_nutrition = production_with_nutrition.reindex(columns=[
    "Product",
    "CO2eq Tn/Aliment Tn",
    *relevant_nutrients,
    "GmWt_1",
])

# Get rid of camel milk - none is produced so fills the table with irrelevant data
production_with_nutrition = production_with_nutrition[~production_with_nutrition["Product"].isin(["Milk, whole fresh camel"])]

display(production_with_nutrition)

Unnamed: 0,Product,CO2eq Tn/Aliment Tn,Protein_(g),Lipid_Tot_(g),Carbohydrt_(g),Fiber_TD_(g),Calcium_(mg),Iron_(mg),Magnesium_(mg),Phosphorus_(mg),...,Folate_Tot_(µg),Choline_Tot_(mg),Vit_B12_(µg),Vit_A_(IU),Vit_E_(mg),Vit_D_(µg),Vit_K_(µg),FA_Sat_(g),Cholestrl_(mg),GmWt_1
0,Cereals excluding rice,0.190082,9.42,4.74,74.26,7.3,7.0,2.71,127.0,210.0,...,19.0,0.0,0.0,214.0,0.49,0.0,0.3,0.667,0.0,166.0
1,"Eggs, hen, in shell",0.747646,12.56,9.51,0.72,0.0,56.0,1.75,12.0,198.0,...,47.0,293.8,0.89,540.0,1.05,2.0,0.3,3.126,372.0,50.0
2,"Meat, buffalo",17.467596,20.39,1.37,0.0,0.0,12.0,1.61,32.0,197.0,...,8.0,0.0,1.66,0.0,0.0,0.0,0.0,0.46,46.0,28.35
3,"Meat, cattle",15.419036,19.42,12.73,0.0,0.0,12.0,1.99,19.0,175.0,...,6.0,67.4,1.97,0.0,0.35,0.0,1.1,5.335,62.0,85.0
4,"Meat, chicken",0.279779,18.6,15.06,0.0,0.0,11.0,0.9,20.0,147.0,...,6.0,59.7,0.31,140.0,0.3,0.2,1.5,4.31,75.0,85.0
5,"Meat, goat",19.198732,20.6,2.31,0.0,0.0,13.0,2.83,0.0,180.0,...,5.0,0.0,1.13,0.0,0.0,0.0,0.0,0.71,57.0,28.35
6,"Meat, pig",1.547827,16.88,21.19,0.0,0.0,14.0,0.88,19.0,175.0,...,5.0,0.0,0.7,7.0,0.0,0.0,0.0,7.87,72.0,28.35
7,"Meat, sheep",19.275759,16.56,23.41,0.0,0.0,16.0,1.55,21.0,157.0,...,18.0,69.3,2.31,0.0,0.2,0.1,3.6,10.19,73.0,28.35
8,"Milk, whole fresh buffalo",2.196191,3.75,6.89,5.18,0.0,169.0,0.12,31.0,117.0,...,6.0,0.0,0.36,178.0,0.0,0.0,0.0,4.597,19.0,244.0
10,"Milk, whole fresh cow",0.530739,3.15,3.27,4.78,0.0,113.0,0.03,10.0,84.0,...,5.0,14.3,0.45,162.0,0.07,0.1,0.3,1.865,10.0,244.0


In [284]:
# Normalize nutrients
for nutrient_col, nutrient in zip(relevant_nutrients, relevant_nutrients_names):
    # Get nutrient mass per food mass
    production_with_nutrition[f"{nutrient} g/Aliment g"] = (
        production_with_nutrition[nutrient_col] / production_with_nutrition["GmWt_1"]
    )
    # Get CO2 mass per nutrient mass
    production_with_nutrition[f"CO2eq g/{nutrient} g"] = (
        production_with_nutrition["CO2eq Tn/Aliment Tn"] / production_with_nutrition[f"{nutrient} g/Aliment g"]
    )

def flatten(list):
    out = []
    for ele in list:
        out += ele
    return out

# Keep only relevant data
nutrition_cost = production_with_nutrition.reindex(columns=[
    "Product",
    "CO2eq Tn/Aliment Tn",
    *(flatten(map(lambda n: [f"{n} g/Aliment g", f"CO2eq g/{n} g"], relevant_nutrients_names))),
])

# We now have, for each product, how much CO2 is used for each nutrient, and how much of that nutrient is per food
display(nutrition_cost)

Unnamed: 0,Product,CO2eq Tn/Aliment Tn,Protein g/Aliment g,CO2eq g/Protein g,Fat g/Aliment g,CO2eq g/Fat g,Total carbohydrate g/Aliment g,CO2eq g/Total carbohydrate g,Dietary Fiber g/Aliment g,CO2eq g/Dietary Fiber g,...,Vitamin E g/Aliment g,CO2eq g/Vitamin E g,Vitamin D g/Aliment g,CO2eq g/Vitamin D g,Vitamin K g/Aliment g,CO2eq g/Vitamin K g,Saturated fat g/Aliment g,CO2eq g/Saturated fat g,Cholesterol g/Aliment g,CO2eq g/Cholesterol g
0,Cereals excluding rice,0.190082,0.056747,3.349639,0.028554,6.656878,0.447349,0.424907,0.043976,4.322411,...,0.002952,64.39511,0.0,inf,0.001807,105.178679,0.004018,47.306752,0.0,inf
1,"Eggs, hen, in shell",0.747646,0.2512,2.976299,0.1902,3.930843,0.0144,51.919884,0.0,inf,...,0.021,35.602206,0.04,18.691158,0.006,124.607723,0.06252,11.958515,7.44,0.10049
2,"Meat, buffalo",17.467596,0.719224,24.286726,0.048325,361.464478,0.0,inf,0.0,inf,...,0.0,inf,0.0,inf,0.0,inf,0.016226,1076.53551,1.622575,10.765355
3,"Meat, cattle",15.419036,0.228471,67.488055,0.149765,102.955069,0.0,inf,0.0,inf,...,0.004118,3744.622942,0.0,inf,0.012941,1191.470936,0.062765,245.664111,0.729412,21.139
4,"Meat, chicken",0.279779,0.218824,1.27856,0.177176,1.579098,0.0,inf,0.0,inf,...,0.003529,79.270727,0.002353,118.906091,0.017647,15.854145,0.050706,5.517684,0.882353,0.317083
5,"Meat, goat",19.198732,0.726631,26.421555,0.081481,235.620796,0.0,inf,0.0,inf,...,0.0,inf,0.0,inf,0.0,inf,0.025044,766.597237,2.010582,9.548843
6,"Meat, pig",1.547827,0.595414,2.599579,0.747443,2.07083,0.0,inf,0.0,inf,...,0.0,inf,0.0,inf,0.0,inf,0.277601,5.575717,2.539683,0.609457
7,"Meat, sheep",19.275759,0.584127,32.999262,0.82575,23.343348,0.0,inf,0.0,inf,...,0.007055,2732.338873,0.003527,5464.677745,0.126984,151.796604,0.359436,53.627848,2.574956,7.48586
8,"Milk, whole fresh buffalo",2.196191,0.015369,142.898801,0.028238,77.77511,0.02123,103.449904,0.0,inf,...,0.0,inf,0.0,inf,0.0,inf,0.01884,116.569612,0.077869,28.203711
10,"Milk, whole fresh cow",0.530739,0.01291,41.11119,0.013402,39.602522,0.01959,27.092102,0.0,inf,...,0.000287,1850.00353,0.00041,1295.002471,0.00123,431.66749,0.007643,69.43713,0.040984,12.950025


In [285]:
# We figure out what aliment is the best for each nutrient, in
# terms of CO2. This will be useful four the "dumb and fast" algorithm.
best_ingredients = pd.DataFrame(columns=["Nutrient", "Best Aliment"])
for nutrient in relevant_nutrients_names:
    co2 = f"CO2eq g/{nutrient} g"
    best_ingredient = nutrition_cost[nutrition_cost[co2] == nutrition_cost[co2].min()]
    best_ingredients.loc[len(best_ingredients.index)] = [nutrient, best_ingredient.iloc[0][0]]

display(best_ingredients)

Unnamed: 0,Nutrient,Best Aliment
0,Protein,"Meat, chicken"
1,Fat,"Meat, chicken"
2,Total carbohydrate,Cereals excluding rice
3,Dietary Fiber,Cereals excluding rice
4,Calcium,"Milk, whole fresh goat"
5,Iron,Cereals excluding rice
6,Magnesium,Cereals excluding rice
7,Phosphorus,Cereals excluding rice
8,Potassium,Cereals excluding rice
9,Sodium,"Eggs, hen, in shell"


In [286]:
# Figure out the daily needs a human has
raw_nutrient_needs = pd.read_excel("daily_nutrients.xlsx")
nutrient_join = pd.read_csv("abbrev_nutrient_map.csv", encoding_errors="ignore")
nutrient_needs = nutrient_join.merge(raw_nutrient_needs, left_on="nutrient", right_on="Nutrient")

# Get rid of unit indication (they already match with "ABBREV_2.csv", so they cancel out)
nutrient_needs["Daily Value"] = nutrient_needs["Daily Value"].replace("(mg)|(g)|(mcg)", "", regex=True)

display(nutrient_needs)

Unnamed: 0.1,Unnamed: 0,abbrev,nutrient,Nutrient,Daily Value
0,0,Protein_(g),Protein,Protein,50.0
1,1,Lipid_Tot_(g),Fat,Fat,78.0
2,2,Carbohydrt_(g),Total carbohydrate,Total carbohydrate,275.0
3,3,Fiber_TD_(g),Dietary Fiber,Dietary Fiber,28.0
4,4,Calcium_(mg),Calcium,Calcium,1300.0
5,5,Iron_(mg),Iron,Iron,18.0
6,6,Magnesium_(mg),Magnesium,Magnesium,420.0
7,7,Phosphorus_(mg),Phosphorus,Phosphorus,1250.0
8,8,Potassium_(mg),Potassium,Potassium,4700.0
9,9,Sodium_(mg),Sodium,Sodium,2300.0


In [287]:
# Now we just have a dumb and fast algorithm to try to 
# maximize our nutrition while reducing CO2 emissions.

# List of "menus" produced so far
instances = []

# Helper method
aliment_row = lambda alim: nutrition_cost[nutrition_cost["Product"] == alim]

# Takes a list of tuples (aliment, mass) and returns the mass of C02
def calculate_cost(menu):
    cost = 0
    for aliment, quantity in menu:
        aliment_data = aliment_row(aliment)
        unit_cost = float(aliment_data["CO2eq Tn/Aliment Tn"])
        actual_cost = unit_cost * quantity
        cost += actual_cost
    return cost

# If all needs are fulfilled
def is_done(needs):
    for data in needs:
        if data[1] > 0:
            return False
    return True

# Next nutrient to prioritise
def next_priority(needs):
    best = [None, 0]
    for data in needs:
        if data[1] > best[1]:
            best = data
    return best[0]

# How much of a nutrient is needed
def need_for(needs, nutrient):
    for data in needs:
        if data[0] == nutrient:
            return data[1]
    return None

# Best aliment for a nutrient
def best_for(nutrient):
    return best_ingredients.loc[best_ingredients["Nutrient"] == nutrient]["Best Aliment"].item()

# Add an aliment to the menu, and substract nutrients
def fill(needs, menu, aliment, nutrient):
    # Know how much food we need
    need = need_for(needs, nutrient)
    aliment_data = aliment_row(aliment)
    unitary_quantity = float(aliment_data[f"{nutrient} g/Aliment g"])
    food_quantity = math.ceil(need / unitary_quantity)

    # Substract all nutrients from needs
    for n_need in needs:
        unitary_quantity = float(aliment_data[f"{n_need[0]} g/Aliment g"])
        added_quantity = food_quantity * unitary_quantity
        n_need[1] -= added_quantity

    menu.append((aliment, food_quantity))

# For each nutriment we maximise the best aliment for it first,
# Then maximise the aliment for the nutriment we need the most.
for i, row in best_ingredients.iterrows():
    # List of values where we have [nutriment, needed daily]
    needs = [
        [nutrow["Nutrient"], float(nutrow["Daily Value"])]
        for j, nutrow in nutrient_needs.iterrows()
    ]
    
    # List of tuples where we have (ingredient, quantity)
    menu = []

    # Our current targets
    nutrient, aliment = row["Nutrient"], row["Best Aliment"]
    fill(needs, menu, aliment, nutrient)

    # We then repeat, looking to fill the next nutrient need until
    # we're done
    while not is_done(needs):
        nutrient = next_priority(needs)
        aliment = best_for(nutrient)
        fill(needs, menu, aliment, nutrient)
    
    instances.append(menu)

print(f"Generated {len(instances)} menus")

Generated 29 menus


In [288]:
menus_with_costs = [(calculate_cost(menu), menu) for menu in instances]
menus_with_costs = sorted(menus_with_costs)

# Get rid of duplicate entries in a menu
def simplify(menu):
    aliments = {}
    for aliment, quantity in menu:
        if aliment not in aliments.keys():
            aliments[aliment] = 0
        aliments[aliment] += quantity
    return [(key, value) for key, value in aliments.items()]

best_menu = menus_with_costs[0]
print("\"Best\" menu found:")
for aliment, quantity in simplify(best_menu[1]):
    print(f"  - {aliment}: {quantity}g")
print(f"It releases {best_menu[0]}g of CO2.")

"Best" menu found:
  - Meat, chicken: 6800g
  - Milk, whole fresh goat: 96g
  - Cereals excluding rice: 637g
It releases 2109.65524179775g of CO2.
