Some calculations on nutriments

In [None]:
import pandas as pd
import numpy as np

In [None]:
# Provision of further data

# Amino acid reference for a human(mg/g Protein), Source: ChatGPT (->WHO?)
aa_ref = {
  "Histidin": 15,
  "Isoleucin": 30,
  "Leucin": 59,
  "Lysin": 45,
  "Methionin + Cystein": 22,
  "Phenylalanin + Tyrosin": 38,
  "Threonin": 23,
  "Tryptophan": 6,
  "Valin": 39,

}

score_cols = [f"{k} score" for k in aa_ref.keys()]

In [None]:
df = pd.read_excel("BLS_4_0_Daten_2025_DE.xlsx", sheet_name=0)

df.head()

In [None]:
def get_instances_by_name(name):
  return df[df['Lebensmittelbezeichnung'].str.contains(name, case=False, na=False)]

# This function has to be adopted when nutriments occur that include non veggie products
# However, it may be beneficial to exclude some words for example "bolognese" because they could appear next to "veggie" or similar in one instance
def get_veggie_only(df):
  non_veggie_keywords = ("Fleisch", "Schwein", "pork", "Kuh", "Lamm", "Wurst", "Würst", "Mortadella", "Leber", "Salami", "Schinken", "Fisch", "Schnecke", "Krebs")
  keyword_str = "|".join(non_veggie_keywords)
  return df[~df['Lebensmittelbezeichnung'].str.contains(keyword_str, case=False, na=False)]

In [None]:
def compute_aa_score(row):
  protein = row["PROT625 Protein (Nx6,25) [g/100g]"]

  if protein == 0 or pd.isna(protein):
      return (np.nan) * 9    # ALWAYS two values

  unit_convert_divisor = protein / 1000 # The value needs to be converted into mg per g Protein
  scores = {
        "Isoleucin": row["ILE Isoleucin [g/100g]"] / unit_convert_divisor / aa_ref["Isoleucin"],
        "Leucin": row["LEU Leucin [g/100g]"] / unit_convert_divisor / aa_ref["Leucin"],
        "Lysin": row["LYS Lysin [g/100g]"] / unit_convert_divisor / aa_ref["Lysin"],
        "Methionin + Cystein": row["MET Methionin [g/100g]"] / unit_convert_divisor / aa_ref["Methionin + Cystein"],
        "Phenylalanin + Tyrosin": row["PHE Phenylalanin [g/100g]"] / unit_convert_divisor / aa_ref["Phenylalanin + Tyrosin"],
        "Threonin": row["THR Threonin [g/100g]"] / unit_convert_divisor / aa_ref["Threonin"],
        "Tryptophan": row["TRP Tryptophan [g/100g]"] / unit_convert_divisor / aa_ref["Tryptophan"],
        "Valin": row["VAL Valin [g/100g]"] / unit_convert_divisor / aa_ref["Valin"],
        "Histidin": row["HIS Histidin [g/100g]"] / unit_convert_divisor / aa_ref["Histidin"],
  }


  aa_name, min_score = min(scores.items(), key=lambda x: x[1])
  return scores["Isoleucin"], scores["Leucin"], scores["Lysin"], scores["Methionin + Cystein"], scores["Phenylalanin + Tyrosin"], scores["Threonin"], scores["Tryptophan"], scores["Valin"], scores["Histidin"]
  


# The function returns the approximated biological values for all instances that contains the given name as substring
def get_aa_scores(name):
  aa_columns = [
    "PROT625 Protein (Nx6,25) [g/100g]",
    "HIS Histidin [g/100g]",
    "ILE Isoleucin [g/100g]",
    "LEU Leucin [g/100g]",
    "LYS Lysin [g/100g]",
    "MET Methionin [g/100g]",
    "PHE Phenylalanin [g/100g]",
    "THR Threonin [g/100g]",
    "TRP Tryptophan [g/100g]",
    "VAL Valin [g/100g]",
    "CYSTE Cystein [g/100g]"
  ]
  products = get_instances_by_name(name)[[
    "Lebensmittelbezeichnung"] + aa_columns]

  if products.empty:
    print("No products found!")
    products["Biologische Wertigkeit"] = np.nan
    products["Limitierende Aminosäure"] = None
    return products[[
        "Lebensmittelbezeichnung",
        "Biologische Wertigkeit",
        "Limitierende Aminosäure"
    ]]

  products[aa_columns] = products[aa_columns].apply(
    pd.to_numeric,
    errors="coerce"
  )
  score_cols = [f"{k} score" for k in aa_ref.keys()]
  products[score_cols] = (
    products.apply(
        compute_aa_score,
        axis=1,
        result_type="expand"
    )
  )

  return products[["Lebensmittelbezeichnung", "PROT625 Protein (Nx6,25) [g/100g]"] + score_cols]

get_veggie_only(get_aa_scores("")).sort_values('Lysin score', ascending=False).head(10)


In [None]:
def combine_products(product_names):
  if len(product_names) == 0:
    print("No names specified")
    return
  product_score_matrix = []
  for product in product_names:
    product_score_matrix.append(get_aa_scores(product))
  n = min(len(df) for df in product_score_matrix)
  combination = product_score_matrix[0]
  for i in range(1, len(product_score_matrix)):
    combination[list(score_cols)] = combination[list(score_cols)].iloc[:n].values + product_score_matrix[i][score_cols].iloc[:n].values
    combination["Lebensmittelbezeichnung"] = combination["Lebensmittelbezeichnung"].iloc[:n].values + " + " + product_score_matrix[i]["Lebensmittelbezeichnung"].iloc[:n].values
  combination[list(score_cols)] /= len(product_score_matrix)
  return combination

combine_products(["Hafer flocken", "Vollmilch "])

In [48]:
all_veggie_products = get_veggie_only(get_aa_scores(""))
all_veggie_products["BW"] = all_veggie_products[score_cols].min(axis=1)
#get_aa_scores("Vollmilch ")
#combine_products(["Vollmilch frisch, 3,5 % Fett, pasteurisiert", "Weizen Mehl, Type 405"])
all_veggie_products[["Lebensmittelbezeichnung", "BW", "PROT625 Protein (Nx6,25) [g/100g]"]].sort_values("BW", ascending=False).head(50)

Unnamed: 0,Lebensmittelbezeichnung,BW,"PROT625 Protein (Nx6,25) [g/100g]"
371,"Malzgetränk, alkoholfrei",8.77193,0.38
839,Süßrahmbutter,1.723643,0.59
6871,Pastete im Brotteig gebacken,1.649008,19.53
5695,"Sahnejoghurt mind. 10 % Fett, mit Fruchtzubere...",1.623377,2.8
2699,"Hühnerei Eiklar, gekocht",1.616162,9.9
233,"Hühnerei Eiklar, roh",1.616162,9.9
3495,"Hühnerei Eiklar, pochiert",1.616043,10.1
3723,"Hühnerei Eiklar, gebraten ohne Fett (Pfanne)",1.615612,10.9
3256,"Hühnerei Eiklar, gebacken",1.61524,11.7
1798,"Sahnejoghurt mind. 10 % Fett, mit Vanillezuber...",1.614435,2.7
