# Exploration of Nutrition and Costs


data sources:

- https://fdc.nal.usda.gov/download-datasets.html
- https://www.ers.usda.gov/data-products/fruit-and-vegetable-prices.aspx
- https://www.kaggle.com/datasets/trolukovich/nutritional-values-for-common-foods-and-products
- https://www.fda.gov/food/nutrition-facts-label/daily-value-nutrition-and-supplement-facts-labels


explored, but discarded
- https://www.kaggle.com/datasets/thedevastator/the-nutritional-content-of-food-a-comprehensive
- https://www.kaggle.com/datasets/thunderz/food-data


inspiration sources:

- https://nutritionj.biomedcentral.com/articles/10.1186/s12937-019-0496-5
- https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2954450/
- https://www.healthline.com/nutrition/29-cheap-healthy-foods
- https://nutritionfacts.org/blog/best-nutrition-bang-for-your-buck/

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load Nutrition and Assess 

In [9]:
# data source:
# https://www.kaggle.com/datasets/trolukovich/nutritional-values-for-common-foods-and-products

nutrition = pd.read_csv('data/nutrition.csv', index_col='Unnamed: 0')

In [10]:
nutrition.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8789 entries, 0 to 8788
Data columns (total 76 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   name                         8789 non-null   object
 1   serving_size                 8789 non-null   object
 2   calories                     8789 non-null   int64 
 3   total_fat                    8789 non-null   object
 4   saturated_fat                7199 non-null   object
 5   cholesterol                  8789 non-null   object
 6   sodium                       8789 non-null   object
 7   choline                      8789 non-null   object
 8   folate                       8789 non-null   object
 9   folic_acid                   8789 non-null   object
 10  niacin                       8789 non-null   object
 11  pantothenic_acid             8789 non-null   object
 12  riboflavin                   8789 non-null   object
 13  thiamin                      8789

In [12]:
nutrition.head()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [13]:
nutrition.loc[683:683,].T

Unnamed: 0,683
name,"Wheat flour, whole-grain"
serving_size,100 g
calories,340
total_fat,2.5g
saturated_fat,0.4g
...,...
alcohol,0.0 g
ash,1.58 g
caffeine,0.00 mg
theobromine,0.00 mg


In [14]:
# will use this food dataset since all are 100g serving sizes
# but will need to parse the nutrients from string to numbers and units of measure

# Categorize foods, based on names' most frequent words

In [55]:
# split the name into lists in orders to find the most frequently used words

nutrition['name_split'] = nutrition['name'].apply(lambda x: x.replace(",", "").split())

words = pd.Series([word for EachElement in nutrition['name_split'] for word in EachElement])
words.value_counts()

and          2092
fat          1994
cooked       1785
lean         1508
separable    1457
             ... 
VITAMAN         1
FRUITY          1
beaked          1
Hazelnuts       1
degree          1
Length: 4514, dtype: int64

In [73]:
# top 83 words is the 50th percentile

(words.value_counts().cumsum()/words.value_counts().sum()).iloc[83]

0.49999273414226547

In [74]:
theWord = words.value_counts().index[5]
theWord

# words of interest that can lead to categorizing:

# FOODS
# Babyfood: 'Babyfood'
# Beef: 'Beef'
# Chicken: 'Chicken'
# Pork: 'Pork'
# Lamb: 'Lamb'
# Fish: 'Fish'
# Cheese: 'Cheese'
# Crackers: 'Crackers'
# Soup: 'Soup'
# Cereals: 'Cereals'

# STORAGE STATE
# Fresh: ['fresh', 'raw']
# Frozen: ['frozen', 'raw or frozen']
# Canned: 'canned'
# Dried: 'dry'

# PREPARED
# cooked: 'cooked'
# roasted: 'roasted'
# boiled: 'boiled'
# drained: 'drained'    ## typically with 'boiled'
# solids: ['drained solids', 'solids and liquids']
# prepared: 'prepared'
# uprepared: 'unprepared'
# salt: [without salt, with salt, no salt added]  ## sodium?

# ATTRIBUTES
# boneless: 'boneless'
# fat: ['low fat', 'fat free', 'reducted fat', ...]
# skin: ['skin', 'with skin, without skin', 'meat and skin', 'cooked without skin']

# GRADE
# all: ['all varieties', 'all types', 'all grades']
# choice: 'choice' #like preferred or premium?
# select: 'select' # similar to choice
# imported: imported
# ready-to-eat:ready-to-eat

##################
# explore later
# lean: 'separable lean only', X% fat Y% lean 
# meat:
# water:

'raw'

In [59]:
nutrition[nutrition['name_split'].apply(lambda x: theWord in x)]

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water,name_split
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g,"[Eggplant, raw]"
5,"Cauliflower, raw",100 g,25,0.3g,0.1g,0,30.00 mg,44.3 mg,57.00 mcg,0.00 mcg,...,0.130 g,0.034 g,0.031 g,0.00 mg,0.0 g,0.76 g,0.00 mg,0.00 mg,92.07 g,"[Cauliflower, raw]"
6,"Taro leaves, raw",100 g,42,0.7g,0.2g,0,3.00 mg,12.8 mg,126.00 mcg,0.00 mcg,...,0.151 g,0.060 g,0.307 g,0.00 mg,0.0 g,1.92 g,0.00 mg,0.00 mg,85.66 g,"[Taro, leaves, raw]"
7,"Lamb, raw, ground",100 g,282,23g,10g,73mg,59.00 mg,69.3 mg,18.00 mcg,0.00 mcg,...,10.190 g,9.600 g,1.850 g,73.00 mg,0.0 g,0.87 g,0.00 mg,0.00 mg,59.47 g,"[Lamb, raw, ground]"
15,"Quail, raw, meat only",100 g,134,4.5g,1.3g,70mg,51.00 mg,0,7.00 mcg,0.00 mcg,...,1.320 g,1.280 g,1.170 g,70.00 mg,0,1.32 g,0,0,70.03 g,"[Quail, raw, meat, only]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8783,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,124,3.3g,1.3g,62mg,54.00 mg,64.6 mg,4.00 mcg,0.00 mcg,...,1.287 g,1.481 g,0.238 g,62.00 mg,0.0 g,1.12 g,0.00 mg,0.00 mg,72.64 g,"[Beef, raw, all, grades, trimmed, to, 0"", fat,..."
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,1.353 g,1.554 g,0.244 g,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g,"[Beef, raw, all, grades, trimmed, to, 0"", fat,..."
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,11.570 g,8.720 g,0.980 g,78.00 mg,0,0.92 g,0,0,59.80 g,"[Lamb, raw, separable, lean, and, fat, composi..."
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,1.086 g,1.266 g,0.233 g,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g,"[Beef, raw, all, grades, trimmed, to, 0"", fat,..."


In [143]:
def categorize(TheSeries: pd.Series, pattern: list, category: str):
    if type(pattern) == list:
        for eachItem in pattern:
            return TheSeries.apply(lambda x: category if eachItem in x else '')
    elif type(pattern) == str:
        return TheSeries.apply(lambda x: category if pattern in x else '')
    else:
        print("'pattern' parameter dtype is expected to be either str or list")

In [145]:
def bulk_categorize(theSeries: pd.Series, theDict: dict, column_name):
    temp_df = pd.DataFrame()
    for key in theDict:
        tempSeries = categorize(theSeries, theDict[key], key)
        temp_df[key] = tempSeries
    temp_df[column_name] = temp_df.apply(lambda row: ''.join(str(val) for val in row), axis=1)
    temp_df = temp_df[[column_name]]
    return temp_df

In [151]:
food_types_dict = {
    'Babyfood': 'test',
    'Beef': 'Beef',
    'Chicken': 'Chicken',
    'Pork': 'Pork',
    'Lamb': 'Lamb',
    'Fish': 'Fish',
    'Cheese': 'Cheese',
    'Crackers': 'Crackers',
    'Soup': 'Soup',
    'Cereals': 'Cereals'
}

storage_state_dict = {
    'Fresh': ['fresh', 'raw'],
    'Frozen': ['frozen', 'raw or frozen'],
    'Canned': 'canned',
    'Dried': 'dry'
}

grade_dict = {
    'all': ['all varieties', 'all types', 'all grades'],
    'choice': 'choice',
    'select': 'select',
    'imported': 'imported',
    'ready_to_eat':'ready-to-eat'
}

In [156]:
s = bulk_categorize(nutrition.name, food_types_dict, 'food_types')
t = bulk_categorize(nutrition.name, storage_state_dict, 'storage_state')
u = bulk_categorize(nutrition.name, grade_dict, 'grade')
pd.concat([nutrition, s, t, u],axis=1 )

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water,name_split,food_types,storage_state,grade
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g,[Cornstarch],,,
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g,"[Nuts, pecans]",,,
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g,"[Eggplant, raw]",,,
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,0,0,2.37 g,0,0,8.82 g,"[Teff, uncooked]",,,
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g,"[Sherbet, orange]",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8784,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,125,3.5g,1.4g,62mg,54.00 mg,64.5 mg,4.00 mcg,0.00 mcg,...,62.00 mg,0.0 g,1.11 g,0.00 mg,0.00 mg,72.51 g,"[Beef, raw, all, grades, trimmed, to, 0"", fat,...",Beef,,
8785,"Lamb, cooked, separable lean only, composite o...",100 g,206,8.9g,3.9g,109mg,50.00 mg,0,0.00 mcg,0.00 mcg,...,109.00 mg,0,1.60 g,0,0,59.95 g,"[Lamb, cooked, separable, lean, only, composit...",Lamb,Frozen,imported
8786,"Lamb, raw, separable lean and fat, composite o...",100 g,277,23g,12g,78mg,39.00 mg,0,1.00 mcg,0.00 mcg,...,78.00 mg,0,0.92 g,0,0,59.80 g,"[Lamb, raw, separable, lean, and, fat, composi...",Lamb,Frozen,imported
8787,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",100 g,121,3g,1.1g,60mg,53.00 mg,64.2 mg,4.00 mcg,0.00 mcg,...,60.00 mg,0.0 g,1.10 g,0.00 mg,0.00 mg,73.43 g,"[Beef, raw, all, grades, trimmed, to, 0"", fat,...",Beef,,


# transform Nutrition from wide to long

In [15]:
#transform from wide to long

food = nutrition.melt(id_vars='name')
food['value'] = food['value'].values.astype(str)
food.head()

Unnamed: 0,name,variable,value
0,Cornstarch,serving_size,100 g
1,"Nuts, pecans",serving_size,100 g
2,"Eggplant, raw",serving_size,100 g
3,"Teff, uncooked",serving_size,100 g
4,"Sherbet, orange",serving_size,100 g


In [16]:
pattern = r'(\d+(?:\.\d+)?)\s?(\D+)?'
food[['amount','unit of measure']] = food['value'].str.extract(pattern)
food['amount'] = pd.to_numeric(food['amount'],errors='coerce')

In [18]:
food[food['name']=='Wheat flour, whole-grain'].head(60)

Unnamed: 0,name,variable,value,amount,unit of measure
683,"Wheat flour, whole-grain",serving_size,100 g,100.0,g
9472,"Wheat flour, whole-grain",calories,340,340.0,
18261,"Wheat flour, whole-grain",total_fat,2.5g,2.5,g
27050,"Wheat flour, whole-grain",saturated_fat,0.4g,0.4,g
35839,"Wheat flour, whole-grain",cholesterol,0,0.0,
44628,"Wheat flour, whole-grain",sodium,2.00 mg,2.0,mg
53417,"Wheat flour, whole-grain",choline,31.2 mg,31.2,mg
62206,"Wheat flour, whole-grain",folate,44.00 mcg,44.0,mcg
70995,"Wheat flour, whole-grain",folic_acid,0.00 mcg,0.0,mcg
79784,"Wheat flour, whole-grain",niacin,4.957 mg,4.957,mg


In [19]:
food = food.drop(labels=['value'], axis=1)
food

Unnamed: 0,name,variable,amount,unit of measure
0,Cornstarch,serving_size,100.00,g
1,"Nuts, pecans",serving_size,100.00,g
2,"Eggplant, raw",serving_size,100.00,g
3,"Teff, uncooked",serving_size,100.00,g
4,"Sherbet, orange",serving_size,100.00,g
...,...,...,...,...
659170,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",water,72.51,g
659171,"Lamb, cooked, separable lean only, composite o...",water,59.95,g
659172,"Lamb, raw, separable lean and fat, composite o...",water,59.80,g
659173,"Beef, raw, all grades, trimmed to 0"" fat, sepa...",water,73.43,g


# Load Price Data

In [20]:
fruit = pd.read_csv('https://www.ers.usda.gov/webdocs/DataFiles/51035/Fruit%20Prices%202020.csv')
veg = pd.read_csv('https://www.ers.usda.gov/webdocs/DataFiles/51035/Vegetable%20Prices%202020.csv')

In [22]:
fruit.rename(columns={ fruit.columns[0]: "Item" }, inplace = True)
veg.rename(columns={ veg.columns[0]: "Item" }, inplace = True)
prices = pd.concat([fruit, veg]).reset_index(drop=True)
prices.tail()

Unnamed: 0,Item,Form,RetailPrice,RetailPriceUnit,Yield,CupEquivalentSize,CupEquivalentUnit,CupEquivalentPrice
150,Tomatoes,Canned,1.0175,per pound,1.0,0.5401,pounds,0.5496
151,Turnip greens,Fresh,2.4176,per pound,0.75,0.3197,pounds,1.0304
152,Turnip greens,Canned,1.0429,per pound,0.65,0.3527,pounds,0.566
153,Turnip greens,Frozen,1.9451,per pound,0.776,0.3527,pounds,0.8841
154,Zucchini,Fresh,1.5489,per pound,0.7695,0.3968,pounds,0.7987


In [25]:
# only juice prices are per pint; everything else is per pound--convertable to grams

prices.groupby(['RetailPriceUnit','Form'])['Item'].count()

RetailPriceUnit  Form  
per pint         Juice     11
per pound        Canned    36
                 Dried     17
                 Fresh     66
                 Frozen    25
Name: Item, dtype: int64

In [26]:
prices = (prices[~(prices['RetailPriceUnit'] == 'per pint')]
          .drop(['RetailPriceUnit', 'Yield','CupEquivalentSize','CupEquivalentUnit','CupEquivalentPrice'],axis=1)
          .copy()
         )

In [27]:
gramsPerPound = 453.5924
prices['PricePer100Grams'] = prices['RetailPrice']/gramsPerPound*100
prices.head()

Unnamed: 0,Item,Form,RetailPrice,PricePer100Grams
0,Apples,Fresh,1.5193,0.334948
1,"Apples, applesauce",Canned,1.066,0.235013
4,Apricots,Fresh,2.9665,0.654001
5,"Apricots, packed in juice",Canned,1.6905,0.372691
6,"Apricots, packed in syrup or water",Canned,2.06,0.454152


# Fuzzy Match Analysis

In [39]:
# fuzzymatch food.name with prices.Item

from rapidfuzz import fuzz
from rapidfuzz import process
from rapidfuzz import utils
import time

In [40]:
fuzzyMatchEnabled = True #switch to False if running fuzzymatch is unwanted

In [41]:
def elapse(start):
    end = time.time() - start
    if end > 60:
        end = round(end/60,1)
        m = "minutes"
    else:
        end = round(end, 1)
        m = "seconds"
    print(f'{end} {m} elapsed')

In [42]:
def fuzzymatch(series1, series2, threshold=95):
    s = time.time()
    matches = []
    
    for item in series1:
        closest_match, score, ind = process.extractOne(item, series2, processor=utils.default_process)
        if score >= threshold:
            matches.append({series1.name:item,series2.name:closest_match, 'score':score})
    elapse(s)
    return pd.DataFrame(matches)

In [44]:
matched_df = fuzzymatch(nutrition['name'].drop_duplicates(), prices['Item'].drop_duplicates(), 90)

5.2 seconds elapsed


In [45]:
matched_df.head(10)

Unnamed: 0,name,Item,score
0,Cornstarch,Corn,90.0
1,"Cauliflower, raw",Cauliflower,95.0
2,"Grapes, raw, muscadine",Grapes,90.0
3,"Broccoli, raw, chinese",Broccoli,90.0
4,"Lentils, raw, sprouted",Lentils,90.0
5,"Snacks, cakes, popcorn",Corn,90.0
6,"Brussels sprouts, raw",Brussels sprouts,95.0
7,"Broccoli raab, cooked",Broccoli,90.0
8,"Broccoli, raw, leaves",Broccoli,90.0
9,"Tomatoes, raw, orange",Tomatoes,90.0


In [46]:
matched_df.groupby(['Item', 'name'])['score'].count().index.values

array([('Apples', 'Apples, boiled, cooked, without skin, raw'),
       ('Apples', 'Apples, heated, unsweetened, frozen'),
       ('Apples', 'Apples, microwave, cooked, without skin, raw'),
       ('Apples', 'Apples, uncooked, sulfured, dried'),
       ('Apples', 'Apples, unheated, unsweetened, frozen'),
       ('Apples', 'Apples, with skin, fuji, raw'),
       ('Apples', 'Apples, with skin, gala, raw'),
       ('Apples', 'Apples, with skin, golden delicious, raw'),
       ('Apples', 'Apples, with skin, granny smith, raw'),
       ('Apples', 'Apples, with skin, raw'),
       ('Apples', 'Apples, with skin, red delicious, raw'),
       ('Apples', 'Apples, without skin, raw'),
       ('Apples', 'Applesauce, with salt, sweetened, canned'),
       ('Apples', 'Babyfood, dry, rice and apples'),
       ('Apples', 'Babyfood, junior, applesauce with banana, fruit'),
       ('Apples', 'Babyfood, junior, applesauce, fruit'),
       ('Apples', 'Babyfood, strained, apples and chicken, dinner'),
     

In [47]:
matched_items = matched_df.groupby(['Item', 'name'])['score'].mean().index.get_level_values('Item').unique()
matched_items 

Index(['Apples', 'Apricots', 'Artichoke', 'Asparagus', 'Avocados', 'Bananas',
       'Beets', 'Black beans', 'Blackberries', 'Blueberries', 'Broccoli',
       'Brussels sprouts', 'Butternut squash', 'Cabbage, red', 'Cantaloupe',
       'Carrots', 'Carrots, baby', 'Carrots, raw whole', 'Cauliflower',
       'Cherries', 'Clementines', 'Collard greens', 'Corn', 'Cranberries',
       'Dates', 'Figs', 'Grapefruit', 'Grapes', 'Green beans', 'Green peppers',
       'Honeydew', 'Kale', 'Kidney beans', 'Kiwi', 'Lentils', 'Lima beans',
       'Mustard greens', 'Nectarines', 'Okra', 'Olives', 'Onions', 'Oranges',
       'Papaya', 'Peaches', 'Pears', 'Pineapple', 'Plum', 'Pomegranate',
       'Potatoes', 'Pumpkin', 'Radish', 'Raspberries', 'Spinach',
       'Strawberries', 'Sweet potatoes', 'Tomatoes', 'Turnip greens',
       'Watermelon', 'Zucchini'],
      dtype='object', name='Item')

In [48]:
matched_df.groupby(['Item', 'name'])['score'].mean().loc[(matched_items[57],)].index

# 0 , Apples,           "Apples, with skin ..."
# 1 , Apricots,         "Apricots, raw"
# 2 , Artichokes,       'Artichokes, raw, (globe or french)'
# 3 , Asparagus,        'Asparagus, raw'
# 4 , Avocados,         {all matches}
# 5 , Bananas,          'Bananas, raw'
# 6 , Beets,            'Beets, raw'
# 7 , Black beans,      {no matches}
# 8 , Blackberries,     ['Blackberries, raw','Blackberries, raw (Alaska Native), wild','Blackberries, unsweetened, frozen']
# 9 , Blueberries,      ['Blueberries, frozen (Alaska Native), wild','Blueberries, frozen, wild', 'Blueberries, raw', 'Blueberries, raw (Alaska Native), wild', 'Blueberries, unsweetened, frozen']
#10 , Broccoli,         ['Broccoli raab, raw', 'Broccoli, raw', 'Broccoli, raw, flower clusters']
#11 , Brussels sprouts, 'Brussels sprouts, raw', 'Brussels sprouts, unprepared, frozen'
#12 , Butternut squash  {no matches}
#13 , Cabbage, red      'Cabbage, raw, red'
#14 , 'Cantaloupe'      'Melons, raw, cantaloupe'
#15 , 'Carrots'         {! more Items to check instead of just 'Carrots'}
#16 , 'Carrots, baby',  ['Carrots, raw, baby']
#17 , 'Carrots, raw whole' ['Carrots, raw']
#18 , 'Cauliflower',    {there's other records for Fresh} ['Cauliflower, raw', 'Cauliflower, unprepared, frozen']
#19 , 'Cherries'        ['Cherries, drained, heavy syrup, pitted, canned, sweet', 'Cherries, drained, water pack, canned, sour','Cherries, raw, red, sour', 'Cherries, raw, sweet']
#20 , Clementines,      ['Clementines, raw']
#21 , Collard greens,   {no matches} ['Collards, raw', 'Collards, unprepared, chopped, frozen', 'Collards, without salt, drained, boiled, cooked, chopped, frozen']
#22 , Corn,             ['Corn, raw, white, sweet', 'Corn, raw, yellow, sweet','Corn, drained solids, whole kernel, canned, yellow, sweet','Corn, drained solids, whole kernel, canned, white, sweet','Corn, unprepared, kernels cut off cob, frozen, yellow, sweet','Corn, unprepared, kernels cut off cob, frozen, white, sweet']
#23 , Cranberries       Cranberries, sweetened, dried
#24 , Dates             ['Dates, deglet noor', 'Dates, medjool']
#25 , Figs              ['Figs, stewed, dried', 'Figs, uncooked, dried']
#26 , Grapefruit        ['Grapefruit, California, white, raw','Grapefruit, all areas, white, raw','Grapefruit, Florida, white, raw','Grapefruit, Florida, pink and red, raw','Grapefruit, all areas, pink and red, raw','Grapefruit, all areas, pink and red and white, raw','Grapefruit, California and Arizona, pink and red, raw']
#27 , Grapes            'Grapes, raw, american type (slip skin)'
#28 , Green beans       'Beans, raw, green, snap', Beans, unprepared, all styles, frozen, green, snap', Beans, drained solids, no salt added, canned, green, snap]
#29 , Green peppers     'Peppers, raw, green, sweet'
#30 , Honeydew          ['Honey', 'Melons, raw, honeydew']
#31 , Kale              ['Kale, raw', 'Kale, raw, scotch', 'Kale, unprepared, frozen']
#32 , Kidney beans      ['Beans, raw, mature seeds, red, kidney', 'Beans, raw, mature seeds, royal red, kidney','Beans, raw, mature seeds, all types, kidney', 'Beans, canned, mature seeds, all types, kidney']
#33 , Kiwi              ['Kiwifruit, raw, ZESPRI SunGold', 'Kiwifruit, raw, green']
#34 , Lentiles          ['Lentils, raw', 'Lentils, raw, pink or red']
#35 , Lima beans        ['Lima beans, raw, immature seeds','Lima beans, raw, mature seeds, large','Lima beans, canned, mature seeds, large', 'Lima beans, unprepared, baby, frozen, immature seeds']
#36 , 'Mustard greens'  ['Mustard greens, unprepared, frozen', ] {no match for canned}
#37 , Nectarines        'Nectarines, raw'
#38 , Okra              ['Okra, raw', 'Okra, unprepared, frozen']
#39 , Olives            ['Olives, canned (jumbo-super colossal), ripe','Olives, canned (small-extra large), ripe','Olives, green, canned or bottled, pickled']
#40 , Onions            ['Onions, raw, sweet','Onions, raw, welsh','Onions, raw']
#41 , Oranges           ['Oranges, Florida, raw','Oranges, navels, raw','Oranges, with peel, raw','Oranges, valencias, California, raw','Oranges, all commercial varieties, raw']
#42 , Papaya            'Papayas, raw' {fresh only}
#43 , Peaches           ['Peaches, raw, yellow','Peaches, sweetened, sliced, frozen','Peaches, drained, heavy syrup, canned','Peaches, solids and liquids, water pack, canned','Peaches, solids and liquids, juice pack, canned','Peaches, solids and liquids, heavy syrup pack, canned','Peaches, solids and liquids, light syrup pack, canned','Peaches, solids and liquids, extra light syrup, canned']
#44 , Pears             ['Pears, red anjou, raw','Pears, bartlett, raw','Pears, raw, asian','Pears, bosc, raw','Pears, raw','Pears, green anjou, raw','Pears, drained, heavy syrup, canned','Pears, solids and liquids, water pack, canned','Pears, solids and liquids, juice pack, canned','Pears, solids and liquids, light syrup pack, canned','Pears, solids and liquids, heavy syrup pack, canned','Pears, solids and liquids, extra light syrup pack, canned','Pears, solids and liquids, extra heavy syrup pack, canned']
#45 , Pineapple         ['Pineapple, all varieties, raw','Pineapple, drained, juice pack, canned','Pineapple, solids and liquids, juice pack, canned','Pineapple, solids and liquids, water pack, canned','Pineapple, solids and liquids, heavy syrup pack, canned','Pineapple, solids and liquids, light syrup pack, canned','Pineapple, solids and liquids, extra heavy syrup pack, canned']
#46 , Plum              ['Plums, raw','Plums, uncooked, dried (prunes)']
#47 , Pomegranate       'Pomegranates, raw'
#48 , Potatoes          ['Potatoes, skin, raw','Potatoes, raw, flesh and skin, red','Potatoes, raw, flesh and skin','Potatoes, raw, flesh and skin, russet','Potatoes, raw, flesh and skin, white',['Potatoes, unprepared, extruded, par fried, french fried, frozen','Potatoes, without salt, heated in oven, prepared, extruded, par fried, french fried, frozen','Potatoes, with salt, heated in oven, prepared, cottage-cut, par fried, french fried, frozen', 'Potatoes, drained solids, canned','Potatoes, solids and liquids, canned','Potatoes, no salt added, drained solids, canned']
#49 , Pumpkin           ['Pumpkin, without salt, canned', 'Pumpkin, with salt, canned']
#50 , Radish            ['Radishes, raw',  'Radishes, raw, white icicle', 'Radishes, raw, oriental']
#51 , Raspberries       ['Raspberries, raw','Raspberries, unsweetened, frozen','Raspberries, sweetened, red, frozen']
#52 , Spinach           ['Spinach, raw','Spinach, unprepared, chopped or leaf, frozen','Spinach, without salt, drained, boiled, cooked, chopped or leaf, frozen','Spinach, with salt, drained, boiled, cooked, chopped or leaf, frozen']
#53 , Strawberries      ['Strawberries, raw','Strawberries, unsweetened, frozen','Strawberries, sliced, sweetened, frozen','Strawberries, whole, sweetened, frozen']
#54 , Sweet potatoes    ['Sweet potato, unprepared, raw']
#55 , Tomatoes          ['Tomatoes, canned, crushed','Tomatoes, stewed, canned, ripe, red'] ... there are unmatched FRESH tomatoes
#56 , Turnip greens     ['Turnip greens, raw', 'Turnip greens, no salt added, canned','Turnip greens, solids and liquids, canned','Turnip greens, unprepared, frozen','Turnip greens, with salt, drained, boiled, cooked, frozen','Turnip greens, without salt, drained, boiled, cooked, frozen']
#57 , Watermelon        ['Watermelon, raw']
#58 , Zucchini          ['Squash, raw, baby, zucchini', 'Squash, raw, includes skin, zucchini, summer'] 

Index(['Seeds, dried, watermelon seed kernels', 'Watermelon, raw'], dtype='object', name='name')

In [49]:
prices[prices['Item'].str.contains('Zucchini')]

Unnamed: 0,Item,Form,RetailPrice,PricePer100Grams
154,Zucchini,Fresh,1.5489,0.341474


In [50]:
food[food['name'].str.contains('raw.*ucchini', regex=True)]['name'].drop_duplicates().to_list()

['Squash, raw, baby, zucchini', 'Squash, raw, includes skin, zucchini, summer']

In [51]:
all = food.merge(matched_df, on='name').merge(prices, on='Item')
all = all[['name', 'Item', 'variable', 'amount', 'unit of measure', 'Form', 'RetailPrice', 'PricePer100Grams']]
all.head()

Unnamed: 0,name,Item,variable,amount,unit of measure,Form,RetailPrice,PricePer100Grams
0,Cornstarch,Corn,serving_size,100.0,g,Fresh,1.8908,0.41685
1,Cornstarch,Corn,serving_size,100.0,g,Canned,1.0287,0.22679
2,Cornstarch,Corn,serving_size,100.0,g,Frozen,1.6642,0.366893
3,Cornstarch,Corn,calories,381.0,,Fresh,1.8908,0.41685
4,Cornstarch,Corn,calories,381.0,,Canned,1.0287,0.22679


# Cost per Nutrient

In [60]:
all['dollarsPerNutrient'] = all['PricePer100Grams']/all['amount']
all.head()

Unnamed: 0,name,Item,variable,amount,unit of measure,Form,RetailPrice,PricePer100Grams,dollarsPerNutrient
0,Cornstarch,Corn,serving_size,100.0,g,Fresh,1.8908,0.41685,0.004169
1,Cornstarch,Corn,serving_size,100.0,g,Canned,1.0287,0.22679,0.002268
2,Cornstarch,Corn,serving_size,100.0,g,Frozen,1.6642,0.366893,0.003669
3,Cornstarch,Corn,calories,381.0,,Fresh,1.8908,0.41685,0.001094
4,Cornstarch,Corn,calories,381.0,,Canned,1.0287,0.22679,0.000595


In [61]:
(all[
    (all['variable']=='calories') & 
    (all['name'].str.contains('raw')) & 
    (all['Form']=='Fresh')
    ]
    .sort_values('dollarsPerNutrient')
)

Unnamed: 0,name,Item,variable,amount,unit of measure,Form,RetailPrice,PricePer100Grams,dollarsPerNutrient
2928,"Nuts, raw, acorns",Corn,calories,387.0,,Fresh,1.8908,0.416850,0.001077
56476,"Bananas, raw",Bananas,calories,89.0,,Fresh,0.5249,0.115721,0.001300
42452,"Cereals ready-to-eat, with real strawberries, ...",Strawberries,calories,399.0,,Fresh,2.5800,0.568793,0.001426
25427,"Potatoes, raw, flesh and skin, russet",Potatoes,calories,79.0,,Fresh,0.6682,0.147313,0.001865
24677,"Potatoes, raw, flesh and skin",Potatoes,calories,77.0,,Fresh,0.6682,0.147313,0.001913
...,...,...,...,...,...,...,...,...,...
43427,"Blackberries, raw (Alaska Native), wild",Blackberries,calories,52.0,,Fresh,6.0172,1.326565,0.025511
57827,"Okra, raw",Okra,calories,33.0,,Fresh,3.9803,0.877506,0.026591
43727,"Raspberries, raw",Raspberries,calories,52.0,,Fresh,6.6391,1.463671,0.028148
51303,"Asparagus, raw",Asparagus,calories,20.0,,Fresh,2.7576,0.607947,0.030397


# Daily Value Requirements

In [62]:
import requests
from bs4 import BeautifulSoup

# URL of the webpage
url = "https://www.fda.gov/food/nutrition-facts-label/daily-value-nutrition-and-supplement-facts-labels"

# Send a GET request to the webpage
response = requests.get(url)

# Parse the HTML content of the webpage using BeautifulSoup
soup = BeautifulSoup(response.content, "html.parser")

# Find the table by its class name
table = soup.find("table", {"class": "table table-bordered"})

# Extract table data and convert it to a DataFrame
table_data = []
for row in table.find_all("tr"):
    row_data = [cell.get_text(strip=True) for cell in row.find_all(["th", "td"])]
    table_data.append(row_data)

dv = pd.DataFrame(table_data[1:], columns=table_data[0])
dv

Unnamed: 0,Nutrient,Current Daily Value
0,Added sugars,50g
1,Biotin,30mcg
2,Calcium,1300mg
3,Chloride,2300mg
4,Choline,550mg
5,Cholesterol,300mg
6,Chromium,35mcg
7,Copper,0.9mg
8,Dietary Fiber,28g
9,Fat,78g


In [63]:
# add calories

dv.loc[dv.index.size] = ['calories', '2000Kcal']
dv

Unnamed: 0,Nutrient,Current Daily Value
0,Added sugars,50g
1,Biotin,30mcg
2,Calcium,1300mg
3,Chloride,2300mg
4,Choline,550mg
5,Cholesterol,300mg
6,Chromium,35mcg
7,Copper,0.9mg
8,Dietary Fiber,28g
9,Fat,78g


In [64]:
# split Daily Value from string into numeric amount and unit of measure

pattern = r'(\d+(?:\.\d+)?)\s?(\D+)?'
dv[['DV_amount','DV_UOM']] = dv['Current Daily Value'].str.extract(pattern)
dv['DV_amount'] = pd.to_numeric(dv['DV_amount'],errors='coerce')
dv = dv.drop(labels=['Current Daily Value'], axis=1)
dv['Nutrient'] = dv['Nutrient'].str.lower()
dv

Unnamed: 0,Nutrient,DV_amount,DV_UOM
0,added sugars,50.0,g
1,biotin,30.0,mcg
2,calcium,1300.0,mg
3,chloride,2300.0,mg
4,choline,550.0,mg
5,cholesterol,300.0,mg
6,chromium,35.0,mcg
7,copper,0.9,mg
8,dietary fiber,28.0,g
9,fat,78.0,g


In [65]:
all['variable'].unique()

array(['serving_size', 'calories', 'total_fat', 'saturated_fat',
       'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid',
       'niacin', 'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
       'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
       'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene',
       'vitamin_b12', 'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e',
       'tocopherol_alpha', 'vitamin_k', 'calcium', 'copper', 'irom',
       'magnesium', 'manganese', 'phosphorous', 'potassium', 'selenium',
       'zink', 'protein', 'alanine', 'arginine', 'aspartic_acid',
       'cystine', 'glutamic_acid', 'glycine', 'histidine',
       'hydroxyproline', 'isoleucine', 'leucine', 'lysine', 'methionine',
       'phenylalanine', 'proline', 'serine', 'threonine', 'tryptophan',
       'tyrosine', 'valine', 'carbohydrate', 'fiber', 'sugars',
       'fructose', 'galactose', 'glucose', 'lactose', 'maltose',
       'sucrose', 'fat', 'saturated_fatty_acids',
     

In [66]:
dv['Nutrient'].unique()

array(['added sugars', 'biotin', 'calcium', 'chloride', 'choline',
       'cholesterol', 'chromium', 'copper', 'dietary fiber', 'fat',
       'folate/folic acid', 'iodine', 'iron', 'magnesium', 'manganese',
       'molybdenum', 'niacin', 'pantothenic acid', 'phosphorus',
       'potassium', 'protein', 'riboflavin', 'saturated fat', 'selenium',
       'sodium', 'thiamin', 'total carbohydrate', 'vitamin a',
       'vitamin b6', 'vitamin b12', 'vitamin c', 'vitamin d', 'vitamin e',
       'vitamin k', 'zinc', 'calories'], dtype=object)

In [67]:
matches = fuzzymatch(all['variable'].drop_duplicates(), dv['Nutrient'].drop_duplicates(), 90)
matches

0.0 seconds elapsed


Unnamed: 0,variable,Nutrient,score
0,calories,calories,100.0
1,total_fat,fat,90.0
2,saturated_fat,saturated fat,100.0
3,cholesterol,cholesterol,100.0
4,sodium,sodium,100.0
5,choline,choline,100.0
6,folate,folate/folic acid,90.0
7,folic_acid,folate/folic acid,90.0
8,niacin,niacin,100.0
9,pantothenic_acid,pantothenic acid,100.0


In [68]:
all = all.merge(matches.iloc[:,:2], on='variable').merge(dv, on="Nutrient")
all

Unnamed: 0,name,Item,variable,amount,unit of measure,Form,RetailPrice,PricePer100Grams,dollarsPerNutrient,Nutrient,DV_amount,DV_UOM
0,Cornstarch,Corn,calories,381.00,,Fresh,1.8908,0.416850,0.001094,calories,2000.0,Kcal
1,Cornstarch,Corn,calories,381.00,,Canned,1.0287,0.226790,0.000595,calories,2000.0,Kcal
2,Cornstarch,Corn,calories,381.00,,Frozen,1.6642,0.366893,0.000963,calories,2000.0,Kcal
3,"Snacks, cakes, popcorn",Corn,calories,384.00,,Fresh,1.8908,0.416850,0.001086,calories,2000.0,Kcal
4,"Snacks, cakes, popcorn",Corn,calories,384.00,,Canned,1.0287,0.226790,0.000591,calories,2000.0,Kcal
...,...,...,...,...,...,...,...,...,...,...,...,...
33223,"Corn with red and green peppers, solids and li...",Green peppers,sugars,0.00,,Fresh,1.2772,0.281574,inf,added sugars,50.0,g
33224,"Tomato products, and celery, green peppers, wi...",Green peppers,sugars,7.36,g,Fresh,1.2772,0.281574,0.038257,added sugars,50.0,g
33225,"Restaurant, Arroz con frijoles negros (rice an...",Black beans,sugars,0.86,g,Canned,1.0281,0.226657,0.263555,added sugars,50.0,g
33226,"Restaurant, Arroz con frijoles negros (rice an...",Black beans,sugars,0.86,g,Dried,1.3753,0.303202,0.352560,added sugars,50.0,g


In [69]:
all['PercentOfDV'] = all['amount']/all['DV_amount']
all['CostForEntireDV'] = all['DV_amount']/all['amount'] * all['RetailPrice']
all

Unnamed: 0,name,Item,variable,amount,unit of measure,Form,RetailPrice,PricePer100Grams,dollarsPerNutrient,Nutrient,DV_amount,DV_UOM,PercentOfDV,CostForEntireDV
0,Cornstarch,Corn,calories,381.00,,Fresh,1.8908,0.416850,0.001094,calories,2000.0,Kcal,0.1905,9.925459
1,Cornstarch,Corn,calories,381.00,,Canned,1.0287,0.226790,0.000595,calories,2000.0,Kcal,0.1905,5.400000
2,Cornstarch,Corn,calories,381.00,,Frozen,1.6642,0.366893,0.000963,calories,2000.0,Kcal,0.1905,8.735958
3,"Snacks, cakes, popcorn",Corn,calories,384.00,,Fresh,1.8908,0.416850,0.001086,calories,2000.0,Kcal,0.1920,9.847917
4,"Snacks, cakes, popcorn",Corn,calories,384.00,,Canned,1.0287,0.226790,0.000591,calories,2000.0,Kcal,0.1920,5.357812
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33223,"Corn with red and green peppers, solids and li...",Green peppers,sugars,0.00,,Fresh,1.2772,0.281574,inf,added sugars,50.0,g,0.0000,inf
33224,"Tomato products, and celery, green peppers, wi...",Green peppers,sugars,7.36,g,Fresh,1.2772,0.281574,0.038257,added sugars,50.0,g,0.1472,8.676630
33225,"Restaurant, Arroz con frijoles negros (rice an...",Black beans,sugars,0.86,g,Canned,1.0281,0.226657,0.263555,added sugars,50.0,g,0.0172,59.773256
33226,"Restaurant, Arroz con frijoles negros (rice an...",Black beans,sugars,0.86,g,Dried,1.3753,0.303202,0.352560,added sugars,50.0,g,0.0172,79.959302


In [70]:
all.sort_values(['variable','CostForEntireDV']).head()

Unnamed: 0,name,Item,variable,amount,unit of measure,Form,RetailPrice,PricePer100Grams,dollarsPerNutrient,Nutrient,DV_amount,DV_UOM,PercentOfDV,CostForEntireDV
23832,"Babyfood, dry, with bananas, mixed, cereal",Bananas,calcium,696.0,mg,Fresh,0.5249,0.115721,0.000166,calcium,1300.0,mg,0.535385,0.980417
23833,"Babyfood, dry, with bananas, rice, cereal",Bananas,calcium,691.0,mg,Fresh,0.5249,0.115721,0.000167,calcium,1300.0,mg,0.531538,0.987511
23830,"Babyfood, dry, with bananas, oatmeal, cereal",Bananas,calcium,651.0,mg,Fresh,0.5249,0.115721,0.000178,calcium,1300.0,mg,0.500769,1.048187
23702,"Babyfood, dry, rice and apples",Apples,calcium,850.0,mg,Fresh,1.5193,0.334948,0.000394,calcium,1300.0,mg,0.653846,2.323635
23408,"Potatoes, unprepared, dry mix, au gratin",Potatoes,calcium,311.0,mg,Fresh,0.6682,0.147313,0.000474,calcium,1300.0,mg,0.239231,2.793119


In [71]:
all['rank_CostForEntireDV'] = all.groupby(['variable', 'Form'])['CostForEntireDV'].rank()
all.sort_values(['variable', 'rank_CostForEntireDV'])[(all['variable']=='calories') & (all['Form']=='Fresh')]

  all.sort_values(['variable', 'rank_CostForEntireDV'])[(all['variable']=='calories') & (all['Form']=='Fresh')]


Unnamed: 0,name,Item,variable,amount,unit of measure,Form,RetailPrice,PricePer100Grams,dollarsPerNutrient,Nutrient,DV_amount,DV_UOM,PercentOfDV,CostForEntireDV,rank_CostForEntireDV
642,"Seeds, dried, watermelon seed kernels",Watermelon,calories,557.0,,Fresh,0.3604,0.079455,0.000143,calories,2000.0,Kcal,0.2785,1.294075,1.0
435,"Snacks, cheese-flavor, made from dried potatoe...",Potatoes,calories,551.0,,Fresh,0.6682,0.147313,0.000267,calories,2000.0,Kcal,0.2755,2.425408,2.0
391,"Snack, plain, made from dried potatoes, potato...",Potatoes,calories,545.0,,Fresh,0.6682,0.147313,0.000270,calories,2000.0,Kcal,0.2725,2.452110,3.0
758,"Babyfood, dry, with bananas, rice, cereal",Bananas,calories,404.0,,Fresh,0.5249,0.115721,0.000286,calories,2000.0,Kcal,0.2020,2.598515,4.0
447,"Snacks, reduced fat, made from dried potatoes,...",Potatoes,calories,502.0,,Fresh,0.6682,0.147313,0.000293,calories,2000.0,Kcal,0.2510,2.662151,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711,"Asparagus, without salt, drained, boiled, cook...",Asparagus,calories,18.0,,Fresh,2.7576,0.607947,0.033775,calories,2000.0,Kcal,0.0090,306.400000,383.5
528,"Turnip greens, solids and liquids, canned",Turnip greens,calories,14.0,,Fresh,2.4176,0.532990,0.038071,calories,2000.0,Kcal,0.0070,345.371429,385.0
705,"Asparagus, solids and liquids, no salt added, ...",Asparagus,calories,15.0,,Fresh,2.7576,0.607947,0.040530,calories,2000.0,Kcal,0.0075,367.680000,386.5
708,"Asparagus, solids and liquids, regular pack, c...",Asparagus,calories,15.0,,Fresh,2.7576,0.607947,0.040530,calories,2000.0,Kcal,0.0075,367.680000,386.5


In [72]:
all[all['rank_CostForEntireDV']<=1 & (all['Form']=='Fresh')]

Unnamed: 0,name,Item,variable,amount,unit of measure,Form,RetailPrice,PricePer100Grams,dollarsPerNutrient,Nutrient,DV_amount,DV_UOM,PercentOfDV,CostForEntireDV,rank_CostForEntireDV
642,"Seeds, dried, watermelon seed kernels",Watermelon,calories,557.0,,Fresh,0.3604,0.079455,0.000143,calories,2000.0,Kcal,0.2785,1.294075,1.0
1565,"Seeds, dried, watermelon seed kernels",Watermelon,total_fat,47.0,g,Fresh,0.3604,0.079455,0.001691,fat,78.0,g,0.602564,0.598111,1.0
2488,"Seeds, dried, watermelon seed kernels",Watermelon,fat,47.37,g,Fresh,0.3604,0.079455,0.001677,fat,78.0,g,0.607308,0.593439,1.0
3411,"Seeds, dried, watermelon seed kernels",Watermelon,saturated_fatty_acids,9.779,g,Fresh,0.3604,0.079455,0.008125,fat,78.0,g,0.125372,2.87465,1.0
4177,"Cake, prepared from recipe, pineapple upside-down",Pineapple,fatty_acids_total_trans,22.0,mg,Fresh,0.5685,0.125333,0.005697,fat,78.0,g,0.282051,2.015591,1.0
5257,"Seeds, dried, watermelon seed kernels",Watermelon,saturated_fat,9.8,g,Fresh,0.3604,0.079455,0.008108,saturated fat,20.0,g,0.49,0.73551,1.0
5550,"Oil, corn and canola",Corn,monounsaturated_fatty_acids,58.537,g,Fresh,1.8908,0.41685,0.007121,saturated fat,20.0,g,2.92685,0.646019,1.0
7103,"Seeds, dried, watermelon seed kernels",Watermelon,polyunsaturated_fatty_acids,28.094,g,Fresh,0.3604,0.079455,0.002828,saturated fat,20.0,g,1.4047,0.256567,1.0
7869,"Cake, prepared from recipe, pineapple upside-down",Pineapple,cholesterol,22.0,mg,Fresh,0.5685,0.125333,0.005697,cholesterol,300.0,mg,0.073333,7.752273,1.0
8640,"Potatoes, unprepared, dry mix, au gratin",Potatoes,sodium,2095.0,mg,Fresh,0.6682,0.147313,7e-05,sodium,2300.0,mg,0.91087,0.733585,1.0
