Some calculations on nutriments

In [1]:
import pandas as pd
import numpy as np

In [13]:
# Provision of further data

# Amino acid reference for a human(mg/g Protein), Source: ChatGPT (->WHO?)
aa_ref = {
  "Histidin": 15,
  "Isoleucin": 30,
  "Leucin": 59,
  "Lysin": 45,
  "Methionin + Cystein": 22,
  "Phenylalanin + Tyrosin": 38,
  "Threonin": 23,
  "Tryptophan": 6,
  "Valin": 39,

}

score_cols = [f"{k} score" for k in aa_ref.keys()]

In [3]:
df = pd.read_excel("BLS_4_0_Daten_2025_DE.xlsx", sheet_name=0)

df.head()

Unnamed: 0,BLS Code,Lebensmittelbezeichnung,Food name,ENERCJ Energie (Kilojoule) [kJ/100g],ENERCJ Datenherkunft,ENERCJ Referenz,ENERCC Energie (Kilokalorien) [kcal/100g],ENERCC Datenherkunft,ENERCC Referenz,WATER Wasser [g/100g],...,TYR Tyrosin [g/100g],TYR Datenherkunft,TYR Referenz,VAL Valin [g/100g],VAL Datenherkunft,VAL Referenz,"NT Stickstoff, gesamt [g/100g]",NT Datenherkunft,NT Referenz,Hinweis
0,C131000,"Hafer ganzes Korn, roh","Oat whole grain, raw",1443,Formelberechnung,-,343,Formelberechnung,-,11.45,...,0.424,Nährstoffdatenbank,"Kirchhoff, E; Souci - Fachmann - Kraut - Die Z...",0.6425,Nährstoffdatenbank,"Kirchhoff, E; Souci - Fachmann - Kraut - Die Z...",1.724,Aggregation,"Converted value from: Biel, W., et al.; Chemic...",
1,C133000,Hafer Flocken,Oat flakes,1465,Formelberechnung,-,348,Formelberechnung,-,10.07,...,0.57,Nährstoffdatenbank,"Kirchhoff, E; Souci - Fachmann - Kraut - Die Z...",0.81,Nährstoffdatenbank,"Kirchhoff, E; Souci - Fachmann - Kraut - Die Z...",2.115,Analyse,-,
2,C141000,"Gerste ganzes Korn, roh","Barley whole grain, raw",1403,Formelberechnung,-,332,Formelberechnung,-,12.7,...,0.341,Nährstoffdatenbank,"Kirchhoff, E; Souci - Fachmann - Kraut - Die Z...",0.5965,Nährstoffdatenbank,"Kirchhoff, E; Souci - Fachmann - Kraut - Die Z...",1.72,Literatur,Converted value from: Koivistoinen P; Mineral ...,
3,C243000,Gerste Mehl,Barley flour,1434,Formelberechnung,-,340,Formelberechnung,-,6.06,...,0.301,Nährstoffdatenbank,"US Department of Agriculture, Agricultural Res...",0.515,Nährstoffdatenbank,"US Department of Agriculture, Agricultural Res...",1.6,Aggregation,"U.S. Department of Agriculture (USDA), Agricul...",
4,C341000,Mais roh,Maize raw,1418,Formelberechnung,-,336,Formelberechnung,-,11.192,...,0.3935,Nährstoffdatenbank,"Kirchhoff, E; Souci - Fachmann - Kraut - Die Z...",0.4535,Nährstoffdatenbank,"Kirchhoff, E; Souci - Fachmann - Kraut - Die Z...",1.385,Nährstoffdatenbank,"Converted value from: Kirchhoff, E; Souci - Fa...",


In [4]:
def get_instances_by_name(name):
  return df[df['Lebensmittelbezeichnung'].str.contains(name, case=False, na=False)]

# This function has to be adopted when nutriments occur that include non veggie products
# However, it may be beneficial to exclude some words for example "bolognese" because they could appear next to "veggie" or similar in one instance
def get_veggie_only(df):
  non_veggie_keywords = ("Fleisch", "Schwein", "pork", "Kuh", "Lamm", "Wurst", "Würst", "Mortadella", "Leber", "Salami", "Schinken", "Fisch", "Schnecke", "Krebs")
  keyword_str = "|".join(non_veggie_keywords)
  return df[~df['Lebensmittelbezeichnung'].str.contains(keyword_str, case=False, na=False)]

In [10]:
def compute_aa_score(row):
  protein = row["PROT625 Protein (Nx6,25) [g/100g]"]

  if protein == 0 or pd.isna(protein):
      return (np.nan) * 9    # ALWAYS two values

  unit_convert_divisor = protein / 1000 # The value needs to be converted into mg per g Protein
  scores = {
        "Isoleucin": row["ILE Isoleucin [g/100g]"] / unit_convert_divisor / aa_ref["Isoleucin"],
        "Leucin": row["LEU Leucin [g/100g]"] / unit_convert_divisor / aa_ref["Leucin"],
        "Lysin": row["LYS Lysin [g/100g]"] / unit_convert_divisor / aa_ref["Lysin"],
        "Methionin + Cystein": row["MET Methionin [g/100g]"] / unit_convert_divisor / aa_ref["Methionin + Cystein"],
        "Phenylalanin + Tyrosin": row["PHE Phenylalanin [g/100g]"] / unit_convert_divisor / aa_ref["Phenylalanin + Tyrosin"],
        "Threonin": row["THR Threonin [g/100g]"] / unit_convert_divisor / aa_ref["Threonin"],
        "Tryptophan": row["TRP Tryptophan [g/100g]"] / unit_convert_divisor / aa_ref["Tryptophan"],
        "Valin": row["VAL Valin [g/100g]"] / unit_convert_divisor / aa_ref["Valin"],
        "Histidin": row["HIS Histidin [g/100g]"] / unit_convert_divisor / aa_ref["Histidin"],
  }


  aa_name, min_score = min(scores.items(), key=lambda x: x[1])
  return scores["Isoleucin"], scores["Leucin"], scores["Lysin"], scores["Methionin + Cystein"], scores["Phenylalanin + Tyrosin"], scores["Threonin"], scores["Tryptophan"], scores["Valin"], scores["Histidin"]
  


# The function returns the approximated biological values for all instances that contains the given name as substring
def get_aa_scores(name):
  aa_columns = [
    "PROT625 Protein (Nx6,25) [g/100g]",
    "HIS Histidin [g/100g]",
    "ILE Isoleucin [g/100g]",
    "LEU Leucin [g/100g]",
    "LYS Lysin [g/100g]",
    "MET Methionin [g/100g]",
    "PHE Phenylalanin [g/100g]",
    "THR Threonin [g/100g]",
    "TRP Tryptophan [g/100g]",
    "VAL Valin [g/100g]",
    "CYSTE Cystein [g/100g]"
  ]
  products = get_instances_by_name(name)[[
    "Lebensmittelbezeichnung"] + aa_columns]

  if products.empty:
    print("No products found!")
    products["Biologische Wertigkeit"] = np.nan
    products["Limitierende Aminosäure"] = None
    return products[[
        "Lebensmittelbezeichnung",
        "Biologische Wertigkeit",
        "Limitierende Aminosäure"
    ]]

  products[aa_columns] = products[aa_columns].apply(
    pd.to_numeric,
    errors="coerce"
  )
  score_cols = [f"{k} score" for k in aa_ref.keys()]
  products[score_cols] = (
    products.apply(
        compute_aa_score,
        axis=1,
        result_type="expand"
    )
  )

  return products[["Lebensmittelbezeichnung", "PROT625 Protein (Nx6,25) [g/100g]"] + score_cols]

get_veggie_only(get_aa_scores("")).sort_values('Lysin score', ascending=False).head(10)


Unnamed: 0,Lebensmittelbezeichnung,"PROT625 Protein (Nx6,25) [g/100g]",Histidin score,Isoleucin score,Leucin score,Lysin score,Methionin + Cystein score,Phenylalanin + Tyrosin score,Threonin score,Tryptophan score,Valin score
308,Paranuss,16.96,1.041667,1.239207,0.694444,2.974914,1.101663,1.179245,1.867138,1.254838,1.611635
2932,Paranuss geröstet ohne Fett,16.96,1.041667,1.239207,0.694444,2.974914,1.101663,1.179245,1.867138,1.254838,1.611635
1078,Greyerzer mind. 50 % Fett i. Tr.,26.9,1.648079,1.531094,1.792648,2.652923,1.389161,2.424438,2.60223,1.639501,1.957869
1697,Kolbacz,20.04,1.902861,1.617951,1.291861,2.234168,2.378138,2.468975,2.112442,1.576335,3.45642
2214,Aufschnitt-Grundbrät fettreduziert,15.06,1.817618,1.542159,1.252988,2.184293,2.186832,2.286737,2.079239,1.49622,3.480921
6982,Champignonpastete,12.81,1.818891,1.54408,1.325353,2.089987,2.152923,2.307979,2.029664,1.511239,3.398387
6871,Pastete im Brotteig gebacken,19.53,1.863799,1.67669,1.813734,2.07606,1.952462,2.424364,1.996928,1.649008,3.474996
3908,Geselchte,14.21,1.844851,1.563079,1.391274,2.065831,2.125949,2.336995,2.072015,1.527978,3.336477
3147,Wachtelei gebraten ohne Fett (Pfanne),14.507,1.555571,1.455755,1.65131,2.046034,1.72512,2.50254,3.37768,1.771028,1.663565
2321,Wachtelei gekocht,13.175,1.555977,1.456276,1.651276,2.045886,1.725757,2.501444,3.377609,1.771031,1.664769


In [28]:
def combine_products(product_names):
  if len(product_names) == 0:
    print("No names specified")
    return
  product_score_matrix = []
  for product in product_names:
    product_score_matrix.append(get_aa_scores(product))
  n = min(len(df) for df in product_score_matrix)
  combination = product_score_matrix[0]
  for i in range(1, len(product_score_matrix)):
    combination[list(score_cols)] = combination[list(score_cols)].iloc[:n].values + product_score_matrix[i][score_cols].iloc[:n].values
    combination["Lebensmittelbezeichnung"] = combination["Lebensmittelbezeichnung"].iloc[:n].values + " + " + product_score_matrix[i]["Lebensmittelbezeichnung"].iloc[:n].values
  combination[list(score_cols)] /= len(product_score_matrix)
  return combination

combine_products(["Hafer flocken", "Vollmilch "])

Unnamed: 0,Lebensmittelbezeichnung,"PROT625 Protein (Nx6,25) [g/100g]",Histidin score,Isoleucin score,Leucin score,Lysin score,Methionin + Cystein score,Phenylalanin + Tyrosin score,Threonin score,Tryptophan score,Valin score
1,"Hafer Flocken + Vollmilch frisch, 3,5 % Fett, ...",13.22,1.567159,1.464411,1.327907,0.924762,1.369368,1.728856,2.347915,1.507806,1.6015
3274,"Hafer Flocken, gekocht + H-Vollmilch 3,5 % Fet...",2.499,1.71037,1.631801,1.400728,1.052479,1.477403,1.895344,2.458342,1.671883,1.808522
