In [1]:
# Import libraries
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import re
from varname import nameof

import requests
from bs4 import BeautifulSoup
import html
import lxml

In [2]:
def num_cleaning(x):
    try:
        return re.match(r'[\d]*[\.\d]*', x)[0]
    except:
        return x

def to_float(x):
    try:
        return float(x)
    except:
        return x

def mapper(data):
    try:
        data.shape[1]       # This is actually to check whether it is a DataFrame or not
        return data.applymap(num_cleaning).applymap(to_float)
    except:
        return data.map(num_cleaning).map(to_float)

def gram_to_liter(x):
    return x * 0.001

def iu_to_mcg(x):
    # This is from international units to mcg retinol
    # I'll use this function to convert vimain a units to something that I can compare with the australian recommendation for daily intake
    return x * 0.3

def key_nutrients():
    return ["protein", "water", "fiber", "vitamin_a", "thiamin", "riboflavin", "niacin", "vitamin_b6", "vitamin_b12", "folate", "vitamin_c", "calcium", "irom", "magnesium", "potassium", "sodium", "zink"]



In [3]:
def nutrition_prep(filename):
    nutrition = pd.read_csv(filename)
    nutrition.set_index("name", inplace = True)

    filter_ = key_nutrients()

    nutrition = nutrition[filter_]

    nutrition = mapper(nutrition)

    nutrition["water"] = nutrition["water"].map(gram_to_liter)
    nutrition["vitamin_a"] = nutrition["vitamin_a"].map(iu_to_mcg)

    return nutrition

def food_selector(foodname, df):
    return df.loc[foodname]

In [4]:
def dailyintake_info(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "lxml")

    di_table = soup.find(id = "tbl-calc")
    di_rows = di_table.find_all("tr")

    di_dict = {}

    for row in di_rows:
        items = row.find_all("td")
        if len(items) > 1:
            di_dict[items[0].text] = items[1].text

    s = pd.Series(di_dict)

    return s

def dailyintake_prep(serie):
    serie = mapper(serie)
    serie.drop("Iodine", inplace = True)
    serie.name = "daily_intake"
    serie.index = key_nutrients()
    return serie

In [5]:
def foodquality(food, dailyintake):
    if len(food) == len(dailyintake):
        s = (food / dailyintake) * 100
        s = s.sort_values(ascending = False)
        s = s.reset_index()
        s.columns = ["nutrient", "%OfDailyIntake"]
        return s

In [30]:
foodname1 = "Cauliflower, raw"
foodname2 = "Cornstarch"
filename = "../data/Nutritional_values.csv"
url_w_30 = "https://www.eatforhealth.gov.au/node/1813927/done?sid=806757&token=05ce5572f5618ac641c9f2395b28c59f"

In [31]:
df = nutrition_prep(filename)
food1 = food_selector(foodname1, df)
food2 = food_selector(foodname2, df)

In [15]:
di = dailyintake_info(url_w_30)
di = dailyintake_prep(di)
pd.DataFrame(di).T

Unnamed: 0,protein,water,fiber,vitamin_a,thiamin,riboflavin,niacin,vitamin_b6,vitamin_b12,folate,vitamin_c,calcium,irom,magnesium,potassium,sodium,zink
daily_intake,46.0,2.1,25.0,700.0,1.1,1.1,14.0,1.3,2.4,400.0,45.0,1000.0,18.0,310.0,2800.0,460.0,8.0


In [9]:
def foodquality(food, dailyintake):
    if len(food) == len(dailyintake):
        df = pd.merge(food, dailyintake, how = "outer", left_index = True, right_index = True)
        df["%OfDailyIntake"] = (food / dailyintake) * 100
        return df.T

In [10]:
foodquality(food, di)

Unnamed: 0,protein,water,fiber,vitamin_a,thiamin,riboflavin,niacin,vitamin_b6,vitamin_b12,folate,vitamin_c,calcium,irom,magnesium,potassium,sodium,zink
"Cauliflower, raw",1.92,0.09207,2.0,0.0,0.05,0.06,0.507,0.184,0.0,57.0,48.2,22.0,0.42,15.0,299.0,30.0,0.27
daily_intake,46.0,2.1,25.0,700.0,1.1,1.1,14.0,1.3,2.4,400.0,45.0,1000.0,18.0,310.0,2800.0,460.0,8.0
%OfDailyIntake,4.173913,4.384286,8.0,0.0,4.545455,5.454545,3.621429,14.153846,0.0,14.25,107.111111,2.2,2.333333,4.83871,10.678571,6.521739,3.375


In [27]:
def foodquality2(dailyintake, foods):
    df = pd.DataFrame(dailyintake)
    count = 1
    for food in foods:
        if len(food) == len(dailyintake):
            df = pd.merge(df, food, how = "outer", left_index = True, right_index = True)
            df["%OfDailyIntake_" + str(count)] = (food / dailyintake) * 100
            count += 1

    return df.T

In [32]:
foodquality2(di, [food1, food2])

Unnamed: 0,protein,water,fiber,vitamin_a,thiamin,riboflavin,niacin,vitamin_b6,vitamin_b12,folate,vitamin_c,calcium,irom,magnesium,potassium,sodium,zink
daily_intake,46.0,2.1,25.0,700.0,1.1,1.1,14.0,1.3,2.4,400.0,45.0,1000.0,18.0,310.0,2800.0,460.0,8.0
"Cauliflower, raw",1.92,0.09207,2.0,0.0,0.05,0.06,0.507,0.184,0.0,57.0,48.2,22.0,0.42,15.0,299.0,30.0,0.27
%OfDailyIntake_1,4.173913,4.384286,8.0,0.0,4.545455,5.454545,3.621429,14.153846,0.0,14.25,107.111111,2.2,2.333333,4.83871,10.678571,6.521739,3.375
Cornstarch,0.26,0.00832,0.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.47,3.0,3.0,9.0,0.06
%OfDailyIntake_2,0.565217,0.39619,3.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,2.611111,0.967742,0.107143,1.956522,0.75


In [23]:
len(food)

17

In [33]:
df.head(10)

Unnamed: 0_level_0,protein,water,fiber,vitamin_a,thiamin,riboflavin,niacin,vitamin_b6,vitamin_b12,folate,vitamin_c,calcium,irom,magnesium,potassium,sodium,zink
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Cornstarch,0.26,0.00832,0.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.47,3.0,3.0,9.0,0.06
"Nuts, pecans",9.17,0.00352,9.6,16.8,0.66,0.13,1.167,0.21,0.0,22.0,1.1,70.0,2.53,121.0,410.0,0.0,4.53
"Eggplant, raw",0.98,0.0923,3.0,6.9,0.039,0.037,0.649,0.084,0.0,22.0,2.2,9.0,0.23,14.0,229.0,2.0,0.16
"Teff, uncooked",13.3,0.00882,8.0,2.7,0.39,0.27,3.363,0.482,0.0,0.0,0.0,180.0,7.63,184.0,427.0,12.0,3.63
"Sherbet, orange",1.1,0.0661,1.3,13.8,0.027,0.097,0.063,0.023,0.13,4.0,2.3,54.0,0.14,8.0,96.0,46.0,0.48
"Cauliflower, raw",1.92,0.09207,2.0,0.0,0.05,0.06,0.507,0.184,0.0,57.0,48.2,22.0,0.42,15.0,299.0,30.0,0.27
"Taro leaves, raw",4.98,0.08566,3.7,1447.5,0.209,0.456,1.513,0.146,0.0,126.0,52.0,107.0,2.25,45.0,648.0,3.0,0.41
"Lamb, raw, ground",16.56,0.05947,0.0,0.0,0.11,0.21,5.96,0.13,2.31,18.0,0.0,16.0,1.55,21.0,222.0,59.0,3.41
"Cheese, camembert",19.8,0.0518,0.0,246.0,0.028,0.488,0.63,0.227,1.3,62.0,0.0,388.0,0.33,20.0,187.0,842.0,2.38
Vegetarian fillets,23.0,0.045,6.1,0.0,1.1,0.9,12.0,1.5,4.2,102.0,0.0,95.0,2.0,23.0,600.0,490.0,1.4
