# Estimate Ingredient Quantites from Nutrient Info

Given a food product with an FDA label listing $d$ nutrient quantities (e.g. Calories, carbohydrates, etc.) and $p$ ingredients ordered by weight, we calculate the quantity of each ingredient in the product.

We take a constrained optimization approach, seeking the optimal solution $x$ to the linear system $Ax=b$, where $A$ is a matrix describing the nutrient contents of the ingredients, $b$ is a vector listing the total nutrient contents of the products, and $x$ is the quantity of each ingredient.

The system is subject to the following constraints:
- All ingredient quantities are positive ($x>0$)
- The ingredient quantities are ordered ($x_{i+1}>x_i$) as given in the ingredients list.

**NOTE:** When the number of ingredients is greater than the number of nutrients on the FDA label (when $p>d$), there is no unique solution, and further analysis is required to construct bounds for the nutrient quantities. 

## Imports

In [1]:
import numpy as np
import scipy.optimize as optim

## Synthesize Data

In [2]:
def gen_data(d, p):
    """
    d = Number of FDA requirements (known)
    p = Number of ingredients
    """
    
    # Coefficient matrix:
    # FDA nutrient info for each ingredient
    # - each column is an ingredient
    # - FDA nutrient varies over rows
    # - Sorted in order of increasing quantity (first is smallest)
    A = np.random.rand(d, p)

    # Quantities of each ingredient, sorted in increasing order.
    # (correct solution)
    x = np.sort(np.random.rand(p))

    # Sum FDA nutrients of the product
    b = A@x
    
    return A, x, b

## Solve optimization problem

In [3]:
def estim_quantities(A, b, tol=None):
    # Objective function
    def obj(x):
        return np.sum((A@x-b)**2)

    # Ordering constraint matrix
    # each row: c_i^T * x >= 0
    C = np.zeros([p, p])
    # Require that each value is larger than the previous
    for i in range(p):
        for j in range(p):
            if i == j:
                C[i,j] = 1
            elif j == i-1:
                C[i,j] = -1

    # non-negative constraint (0 <= c_i^T *x <= inf)
    lin_const = optim.LinearConstraint(C, 0, np.inf, keep_feasible=False)
    
    # Solve!
    result = optim.minimize(obj, np.zeros(p), method="COBYLA", constraints=lin_const, tol=tol)

    return result

## Apply some noise to simulate real data

In [4]:
def apply_noise(A, z):
    return A * (1.0 + z * 2*(np.random.rand(*np.shape(A))-0.5))

## Test it out!

In [5]:
# Number of FDA requirements (known)
d = 10
# Number of ingredients
p = 5
# Relative noise to apply to A (reported FDA vals. for ingredients)
zA = 0.05
# Relative noise to apply to b (reported FDA vals. for product)
zb = 0.05

# Generate data & apply noise
A, x, b = gen_data(d, p)
A_noise = apply_noise(A, zA)
b_noise = apply_noise(b, zb)

#print("x = {}".format(x))
print()

print("Without noise:")
res = estim_quantities(A, b)
x_star = res.x
#print("x* = {}".format(x_star))
print("rel. err. = {:.2e}".format(np.linalg.norm(x-x_star)/np.linalg.norm(x)))
print()

print("With noise (zA={:.2e}, zb={:.2e}):".format(zA, zb))
res = estim_quantities(A_noise, b_noise)
x_star = res.x
#print("x_n* = {}".format(x_star))
print("rel. err. = {:.2e}".format(np.linalg.norm(x-x_star)/np.linalg.norm(x)))


Without noise:
rel. err. = 2.69e-03

With noise (zA=5.00e-02, zb=5.00e-02):
rel. err. = 1.47e-01


In [6]:
import qgrid
import pandas as pd

In [7]:
import ipywidgets as ipw

In [8]:
from IPython.display import display, Markdown

## FDA Nutrient Table

In [112]:
def disp_nutrient_table(nutrients, vals):
    display(Markdown(
        "| Nutrient | Value |\n|---|---|\n"
        + '\n'.join([
            "| {} | {:.2f} |".format(nutrient, val)
            for nutrient, val in zip(nutrients, vals)
        ])
    ))

    
def gen_fda_label(ingredients, nutrients, vals):
    inner_tab = ipw.Output()
    with inner_tab:
        disp_table(nutrients, vals)
    return ipw.VBox(
        [
            ipw.HTML("<h2>Nutrition Facts</h2>",),
            inner_tab,
            ipw.Box(layout=ipw.Layout(height='5px')),
            ipw.HTML("<b>Ingredients:</b> {}".format(
                ', '.join(ingredients)
            ))
            
        ],
        layout=ipw.Layout(
            border='1px solid black',
            width='200px',
            align_items='center',
        )
    )

In [113]:
ingredients = [
    'tomato',
    'sugar',
    'onion',
    'garlic',
    'squash'
]

In [116]:
gen_fda_label(ingredients, nutrients, vals)

VBox(children=(HTML(value='<h2>Nutrition Facts</h2>'), Output(), Box(layout=Layout(height='5px')), HTML(value=…

## Ingredients Nutrition Data

In [177]:
def parse_fda_db_csv(filename):
    nutrient_name_map = {
        'Calories': 'Energy',
        'Total Fat': 'Total lipid (fat)',
        'Cholesterol': 'Cholesterol',
        'Sodium': 'Sodium, Na',
        'Total Carbohydrate': 'Carbohydrate, by difference',
        'Dietary Fiber': 'Fiber, total dietary',
        'Total Sugars': 'Sugars, total',
        'Protein': 'Protein'
    }
    
    df = pd.read_csv(filename, skiprows=4, encoding='latin1')
    nut_dict = {}
    for name_common, name_db in nutrient_name_map.items():
        filtered_df = df[df['Nutrient']==name_db]
        if len(filtered_df) > 0:
            # Convert from value per 100g to value per 1g
            nut_val = filtered_df['1Value per 100 g'].iloc[0] / 100
            nut_units = filtered_df['Unit'].iloc[0]
            name_with_units = '{} ({}/g)'.format(name_common, nut_units)
            nut_dict[name_with_units] = nut_val
    return nut_dict

In [178]:
parse_fda_db_csv('garlic.csv')

{'Calories (kcal/g)': 1.49,
 'Total Fat (g/g)': 0.005,
 'Cholesterol (mg/g)': 0.0,
 'Sodium (mg/g)': 0.17,
 'Total Carbohydrate (g/g)': 0.3306,
 'Dietary Fiber (g/g)': 0.021,
 'Total Sugars (g/g)': 0.01,
 'Protein (g/g)': 0.0636}

In [179]:
ingredients = [
    'Broccoli',
    'Carrots',
    'Onions',
    'Garlic',
    'Tomatoes'
]

In [184]:
nutrient_info = {
    ingredient: parse_fda_db_csv('ingredient_data/{}.csv'.format(ingredient.lower()))
    for ingredient in ingredients
}

In [185]:
nutrient_info_df = pd.DataFrame(nutrient_info)
nutrient_info_df

Unnamed: 0,Broccoli,Carrots,Onions,Garlic,Tomatoes
Calories (kcal/g),0.34,0.34,0.4,1.49,0.18
Cholesterol (mg/g),0.0,0.0,0.0,0.0,0.0
Dietary Fiber (g/g),0.026,0.026,0.017,0.021,0.012
Protein (g/g),0.0282,0.0282,0.011,0.0636,0.0088
Sodium (mg/g),0.33,0.33,0.04,0.17,0.05
Total Carbohydrate (g/g),0.0664,0.0664,0.0934,0.3306,0.0389
Total Fat (g/g),0.0037,0.0037,0.001,0.005,0.002
Total Sugars (g/g),0.017,0.017,0.0424,0.01,0.0263


In [222]:
example_recipe = {
    'Tomatoes': 100,
    'Onions': 50,
    'Garlic': 30,
    'Broccoli': 50,
    'Carrots': 60
}

In [223]:
def calculate_nutrition_facts(recipe, nutrient_info):
    nutrition_facts = {
        nutrient: 0.0
        for nutrient in nutrient_info[list(nutrient_info.keys())[0]].keys()
    }
    
    # quantities in grams
    for ingredient, quantity in recipe.items():
        for nutrient, value in nutrient_info[ingredient].items():
            nutrition_facts[nutrient] += quantity * value
            
    return nutrition_facts     

In [224]:
def sort_ingredient_list(recipe):
    # Return list of ingredients in decreasing order of quantity in grams
    return [x[0] for x in reversed(sorted(recipe.items(), key=lambda x: x[1]))]

In [225]:
ingredient_list = sort_ingredient_list(example_recipe)
ingredient_list

['Tomatoes', 'Carrots', 'Broccoli', 'Onions', 'Garlic']

In [227]:
nutrition_facts = calculate_nutrition_facts(example_recipe, nutrient_info)
nutrition_facts

{'Calories (kcal/g)': 120.10000000000001,
 'Total Fat (g/g)': 0.8069999999999999,
 'Cholesterol (mg/g)': 0.0,
 'Sodium (mg/g)': 48.400000000000006,
 'Total Carbohydrate (g/g)': 25.782000000000004,
 'Dietary Fiber (g/g)': 5.539999999999999,
 'Total Sugars (g/g)': 6.92,
 'Protein (g/g)': 6.44}

In [228]:
gen_fda_label(ingredient_list, nutrition_facts.keys(), nutrition_facts.values())

VBox(children=(HTML(value='<h2>Nutrition Facts</h2>'), Output(), Box(layout=Layout(height='5px')), HTML(value=…

In [230]:
def disp_fda_label_from_recipe(recipe, nutrition_facts):
    ingredient_list = sort_ingredient_list(recipe)
    nutrition_facts = calculate_nutrition_facts(recipe, nutrient_info)
    display(gen_fda_label(ingredient_list, nutrition_facts.keys(), nutrition_facts.values()))

In [232]:
disp_fda_label_from_recipe(example_recipe, nutrition_facts)

VBox(children=(HTML(value='<h2>Nutrition Facts</h2>'), Output(), Box(layout=Layout(height='5px')), HTML(value=…

In [239]:
def recipe_wrapper(nutrition_facts, **recipe):
    return disp_fda_label_from_recipe(recipe, nutrition_facts)

In [242]:
{
        ingredient: (0, 500, quantity) 
        for ingredient, quantity in example_recipe.items()
    }

{'Tomatoes': (0, 500, 100),
 'Onions': (0, 500, 50),
 'Garlic': (0, 500, 30),
 'Broccoli': (0, 500, 50),
 'Carrots': (0, 500, 60)}

In [267]:
slider_dict = {
    ingredient: ipw.IntSlider(min=0, max=150, value=quantity, description=ingredient) 
    for ingredient, quantity in example_recipe.items()
}

slider_box = ipw.VBox([
    slider for ingredient, slider in slider_dict.items()
])

label_out = ipw.Output()

ui = ipw.HBox([
    slider_box,
    label_out
])

In [269]:
ipw.interact(
    recipe_wrapper, 
    nutrition_facts=ipw.fixed(nutrition_facts), 
    **slider_dict
)

interactive(children=(IntSlider(value=100, description='Tomatoes', max=150), IntSlider(value=50, description='…

<function __main__.recipe_wrapper(nutrition_facts, **recipe)>

## Solve, too.