In [1]:
from ydata-profiling import ProfileReport
from util import get_nutrition_data
import numpy as np
import pandas as pd
from bokeh.plotting import (
    figure,
    show,
    output_notebook,
    ColumnDataSource,
    output_file,
)
from bokeh.models import HoverTool, Span


In [2]:
nutri_df = get_nutrition_data()
nutri_df.head()

2022-11-20 21:47.30 [info     ] [get_worksheet]                workbook_name=nutrition worksheet_name=nutrition
2022-11-20 21:47.32 [info     ] [get_worksheet]                workbook_name=nutrition worksheet_name=nutrition-manual


Unnamed: 0,barcode,group,pantry_ingredient,product_name,quantity,brands,completeness,nutriscore_grade,level_fat,level_salt,level_saturated_fat,level_sugars,per_100g_carbohydrates,per_100g_energy_kcal,per_100g_fat,per_100g_fiber,per_100g_proteins,per_100g_salt,per_100g_saturated_fat,per_100g_sugars
0,4046700003756,Dairy products,15% fat cream,LAC Kochsahne,200 g,LAC,0.6875,d,moderate,low,high,low,4.1,163.0,15.0,0.0,2.9,0.11,9.4,4.1
1,4046700004234,Dairy products,30% fat cream,Schlagsahne,200 g,LAC,0.775,d,high,low,high,low,3.3,311.0,32.0,0.0,2.4,0.08,20.2,3.3
2,4008100120341,Sauces,apple cider vinegar,Bio Apfelessig,500ml,Hengstenberg,0.775,,low,low,low,low,0.7,19.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4311501711699,Canned,applesauce,Apfelmus,360g,Edeka,0.6875,a,low,low,low,moderate,12.5,57.0,0.1,0.0,0.3,0.01,0.05,11.6
4,4311501356517,Canned,artichoke hearts,Artischocken Herzen,240 g,Edeka,0.6875,a,low,moderate,low,low,4.0,27.0,0.0,0.0,1.4,0.8,0.0,1.3


In [3]:
nutri_df[nutri_df.pantry_ingredient.str.contains("ground beef-pork")]

Unnamed: 0,barcode,group,pantry_ingredient,product_name,quantity,brands,completeness,nutriscore_grade,level_fat,level_salt,level_saturated_fat,level_sugars,per_100g_carbohydrates,per_100g_energy_kcal,per_100g_fat,per_100g_fiber,per_100g_proteins,per_100g_salt,per_100g_saturated_fat,per_100g_sugars
140,4056489116066,Meats,ground beef-pork,Hackfleisch gemischt,500 g,Landjunker,0.6625,c,moderate,low,high,low,0.0,234.0,18.0,0.0,18.0,0.15,8.0,0.0


In [3]:
# https://www.omnicalculator.com/conversion/grams-to-calories#grams-to-calories-conversion
kcal_per1g_carbohydrates = 4
kcal_per1g_fat = 9
kcal_per1g_proteins = 4
kcal_per_1g_fiber = 1.7  # assumption, average

In [4]:
nutri_df["calc_per_100g_energy_kcal"] = (
    nutri_df["per_100g_carbohydrates"] * kcal_per1g_carbohydrates
    + nutri_df["per_100g_fat"] * kcal_per1g_fat
    + nutri_df["per_100g_proteins"] * kcal_per1g_proteins
    + nutri_df["per_100g_fiber"] * kcal_per_1g_fiber
)

In [5]:
# should double-check anything that differs more than e.g. 10 kCal
nutri_df["calc_diff_energy_kcal"] = (
    nutri_df["per_100g_energy_kcal"] - nutri_df["calc_per_100g_energy_kcal"]
)

nutri_df["rel_saturated_fats"] = (
    nutri_df["per_100g_saturated_fat"] / nutri_df["per_100g_fat"]
)

In [6]:
# daily recommended quantities
# general diet: around 10% from saturated fats
# heart diet: < 5-6 % of daily caloric intake should be from saturated fats
# nhs guidelines: 20/30 g per day (women/man)

daily_calories = {"alex": 2700, "ariel": 1800}

daily_max_guidelines_absolute = {
    "trans_fats_g": 5,
}

daily_max_guidelines_relative = {
    "saturated_fats_alex": 5,
    "saturated_fats_ariel": 7,
    "overall_fats": 35,
}

daily_max_guidelines_absolute["saturated_fats_alex"] = (
    daily_max_guidelines_relative["saturated_fats_alex"]
    / 100
    * daily_calories["alex"]
    / kcal_per1g_fat
)
daily_max_guidelines_absolute["saturated_fats_ariel"] = (
    daily_max_guidelines_relative["saturated_fats_ariel"]
    / 100
    * daily_calories["ariel"]
    / kcal_per1g_fat
)
daily_max_guidelines_absolute["overall_fats_alex"] = (
    daily_max_guidelines_relative["overall_fats"]
    / 100
    * daily_calories["alex"]
    / kcal_per1g_fat
)
daily_max_guidelines_absolute["overall_fats_ariel"] = (
    daily_max_guidelines_relative["overall_fats"]
    / 100
    * daily_calories["ariel"]
    / kcal_per1g_fat
)

print(daily_max_guidelines_absolute)

{'trans_fats_g': 5, 'saturated_fats_alex': 15.0, 'saturated_fats_ariel': 14.000000000000002, 'overall_fats_alex': 104.99999999999999, 'overall_fats_ariel': 70.0}


## Summary plot

In [7]:
sel_df = nutri_df[
    nutri_df.group.isin(["Dairy products", "Nuts and seeds", "Meats", "Fish"])
]

color_map = {
    "Dairy products": "green",
    "Nuts and seeds": "yellow",
    "Meats": "red",
    "Fish": "blue",
}

In [8]:
sel_df.loc[:, "marker_size"] = 35 * sel_df["rel_saturated_fats"]
sel_df.loc[:, "marker_color"] = sel_df["group"].map(color_map)

ds = ColumnDataSource(data=sel_df)

tooltips = [("(x,y)", "($x, $y)"), ("ingredient", "@pantry_ingredient")]

p = figure(
    title="Summary chart",
    x_axis_label="Energy per 100 g (kCal)",
    y_axis_label="Fat content per 100 g (g)",
    height=800,
    width=1600,
    tooltips=tooltips,
)

circles = p.circle(
    "per_100g_energy_kcal",
    "per_100g_fat",
    color="marker_color",
    size="marker_size",
    # legend="group",
    source=ds,
)
p.legend.location = "top_left"
p.legend.click_policy = "hide"

p.text(
    sel_df["per_100g_energy_kcal"],
    sel_df["per_100g_fat"] + 5,
    text=sel_df["pantry_ingredient"],
    text_color="white",
    text_align="center",
    text_font_size="8pt",
)

fat_line_alex = Span(
    location=daily_max_guidelines_absolute["overall_fats_alex"],
    dimension="width",
    line_color="red",
    line_width=3,
)
fat_line_ariel = Span(
    location=daily_max_guidelines_absolute["overall_fats_ariel"],
    dimension="width",
    line_color="blue",
    line_width=3,
)

p.renderers.extend([fat_line_ariel, fat_line_alex])

output_notebook()
show(p)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
You are attempting to set `plot.legend.location` on a plot that has zero legends added, this will have no effect.

Before legend properties can be set, you must add a Legend explicitly, or call a glyph method with a legend parameter set.

You are attempting to set `plot.legend.click_policy` on a plot that has zero legends added, this will have no effect.

Before legend properties can be set, you must add a Legend explicitly, or call a glyph method with a legend parameter set.



## Sat fat vs. fat chart

In [9]:
nutri_df.group.value_counts()


Dairy products      23
Sauces              19
Frozen goods        16
Prepared            13
Canned              13
Baking              12
Nuts and seeds       8
Meats                7
Grains               6
Juices               5
Spices and herbs     4
Fats and oils        4
Pasta                4
Beverages            2
Vegetables           2
Fish                 2
Fruits               1
Name: group, dtype: int64

In [10]:
sel_df = nutri_df[
    nutri_df.group.isin(
        [
            "Dairy products",
            "Nuts and seeds",
            "Meats",
            "Fish",
            "Grains",
            "Fats and oils",
            "Canned",
        ]
    )
]

color_map = {
    "Dairy products": "green",
    "Nuts and seeds": "yellow",
    "Meats": "red",
    "Fish": "blue",
    "Grains": "grey",
    "Fats and oils": "orange",
    "Canned": "purple",
}

In [11]:
show_text = True

sel_df.loc[:, "marker_size"] = 0.8 * sel_df["per_100g_proteins"]
sel_df.loc[:, "marker_color"] = sel_df["group"].map(color_map)

ds = ColumnDataSource(data=sel_df)

tooltips = [
    ("(x,y)", "($x, $y)"),
    ("ingredient", "@pantry_ingredient"),
    ("group", "@group"),
    ("kCal per 100g", "@per_100g_energy_kcal"),
    ("fat per 100g", "@per_100g_fat"),
    ("sat. fat per 100g", "@per_100g_saturated_fat"),
    ("proteins per 100g", "@per_100g_proteins"),
]

p = figure(
    title="Summary chart",
    x_axis_label="Fat (g) per 100 g",
    y_axis_label="Saturated fat (g) per 100 g",
    height=800,
    width=1600,
    tooltips=tooltips,
)

for group_name, group_df in sel_df.groupby("group"):
    circles = p.circle(
        "per_100g_fat",
        "per_100g_saturated_fat",
        color="marker_color",
        size="marker_size",
        legend_label=group_name,
        source=ColumnDataSource(data=group_df),
    )

    if show_text:
        p.text(
            group_df["per_100g_fat"],
            group_df["per_100g_saturated_fat"] + 0.5,
            text=group_df["pantry_ingredient"],
            text_color=group_df["marker_color"].iloc[0],
            text_align="center",
            angle=np.radians(0),
            text_font_size="6pt",
            legend_label=group_name,
        )


p.legend.location = "top_left"
p.legend.click_policy = "hide"

fat_line_alex = Span(
    location=daily_max_guidelines_absolute["saturated_fats_alex"],
    dimension="width",
    line_color="red",
    line_width=3,
)
p.text(
    [10],
    [daily_max_guidelines_absolute["saturated_fats_alex"]],
    text="Daily saturated fat limit Alex",
    text_color="red",
    text_align="center",
    text_font_size="8pt",
)

fat_line_ariel = Span(
    location=daily_max_guidelines_absolute["saturated_fats_ariel"],
    dimension="width",
    line_color="blue",
    line_width=3,
)
p.text(
    10,
    daily_max_guidelines_absolute["saturated_fats_ariel"],
    text="Daily saturated fat limit Ariel",
    text_color="blue",
    text_align="center",
    text_font_size="8pt",
)


p.renderers.extend([fat_line_ariel, fat_line_alex])

# output_notebook()
output_file("sat_vs_overall_fats.html")
show(p)
