In [1]:
import requests
import json
import os
import pandas as pd

In [2]:
rawData = pd.read_csv('./foodData.csv')
print(rawData)

        Meal       Food Item  Quantity Unit
0  Breakfast       Skim Milk       250   ml
1      Lunch  Chicken Breast       200   gm
2      Lunch     Cooked Rice       150   gm
3      Lunch        Broccoli       150   gm
4      Lunch         Spinach       100   gm
5      Lunch     Bell pepper       100   gm


In [4]:
params = {
    "api_key": "gTqiZtrpzrFxI3VaU7youPWodWg45qhgNqxvEdY0"
}

In [11]:
def checkIfNutrientIsMacronutrient(nutrient_name):
    # Convert the nutrient name to lowercase for case-insensitive comparison
    nutrient_name_lower = nutrient_name.lower()

    # Define a list of common, useful nutrient names
    useful_nutrient_names = ["calories", "protein", "fat", "carbohydrates", "fiber", "energy"]

    # Check if the nutrient name is in the list of useful nutrient names
    if any(name.lower() in nutrient_name_lower for name in useful_nutrient_names):
        return True
    else:
        return False

def checkIfNutrientNameIsMicroNutrient(nutrient_name):
    # Convert the nutrient name to lowercase for case-insensitive comparison
    nutrient_name_lower = nutrient_name.lower()

    # Define a list of common, useful nutrient names
    useful_nutrient_names = ["sodium", "potassium", "vitamin a", "vitamin c", "vitamin d", "vitamin e", "vitamin b6", "vitamin b12", "folate", "iron", "calcium", "magnesium", "zinc"]

    # Check if the nutrient name is in the list of useful nutrient names
    if any(name.lower() in nutrient_name_lower for name in useful_nutrient_names):
        return True
    else:
        return False

In [14]:
# Query for each of these food items

overall_meal_json = []

for index, row in rawData.iterrows():
    food_item = row['Food Item']
    url = f"https://api.nal.usda.gov/fdc/v1/foods/search?dataType=Foundation&query={food_item}"
    response = requests.get(url, params=params)

    if response.status_code == 200:
        try:
            food_item_information = response.json()["foods"][0]
            print(food_item_information['description'])
            print(food_item_information['foodCategory'])
            important_nutrients = []
            for nutrient_information in food_item_information['foodNutrients']:
                nutrient_name = nutrient_information['nutrientName']
                if(checkIfNutrientIsMacronutrient(nutrient_name)):
                    # print(json.dumps(nutrient_information, indent=4))
                    important_nutrients.append({
                        "nutrientName": nutrient_name,
                        "nutrientType": "MACRO",
                        "unitName": nutrient_information["unitName"],
                        "value": nutrient_information["value"]
                    })
                elif(checkIfNutrientNameIsMicroNutrient(nutrient_name)):
                    important_nutrients.append({
                        "nutrientName": nutrient_name,
                        "nutrientType": "MICRO",
                        "unitName": nutrient_information["unitName"],
                        "value": nutrient_information["value"]
                    })
            # print(json.dumps(important_nutrients, indent=4))    
            overall_meal_json.append({
                "food_item": row['Food Item'],
                "food_item_description": food_item_information['description'],
                "food_item_category": food_item_information['foodCategory'],
                "nutritional_information": important_nutrients
            })   
        except Exception as e:
            print(f"Error: {e}")
            print(response.json())
    else:
        print("Error: Unable to fetch data")

print(overall_meal_json)
    

Milk, nonfat, fluid, with added vitamin A and vitamin D (fat free or skim)
Dairy and Egg Products
Chicken, breast, boneless, skinless, raw
Poultry Products
Flour, rice, brown
Cereal Grains and Pasta
Broccoli, raw
Vegetables and Vegetable Products
Spinach, baby
Vegetables and Vegetable Products
Peppers, bell, green, raw
Vegetables and Vegetable Products
[{'food_item': 'Skim Milk', 'food_item_description': 'Milk, nonfat, fluid, with added vitamin A and vitamin D (fat free or skim)', 'food_item_category': 'Dairy and Egg Products', 'nutritional_information': [{'nutrientName': 'Magnesium, Mg', 'nutrientType': 'MICRO', 'unitName': 'MG', 'value': 12.5}, {'nutrientName': 'Sodium, Na', 'nutrientType': 'MICRO', 'unitName': 'MG', 'value': 41.0}, {'nutrientName': 'Potassium, K', 'nutrientType': 'MICRO', 'unitName': 'MG', 'value': 167}, {'nutrientName': 'Zinc, Zn', 'nutrientType': 'MICRO', 'unitName': 'MG', 'value': 0.45}, {'nutrientName': 'Total lipid (fat)', 'nutrientType': 'MACRO', 'unitName': '

In [24]:
# Convert the array to a DataFrame
df = pd.DataFrame([item for item in overall_meal_json])

# Explode the 'nutritional_information' column to create separate rows
df = df.explode('nutritional_information')

# Expand the nested dictionary in 'nutritional_information'
df = pd.concat([df, df.pop('nutritional_information').apply(pd.Series)], axis=1)

# Rename the columns to be more descriptive
df.columns = ['food_item', 'food_item_description', 'food_item_category', 'nutrientName', 'nutrientType', 'unitName', 'value']

# Reset the index and make 'food_item' the index
df = df.reset_index(drop=True)

         food_item                                                       food_item_description                 food_item_category                              nutrientName nutrientType unitName       value
0        Skim Milk  Milk, nonfat, fluid, with added vitamin A and vitamin D (fat free or skim)             Dairy and Egg Products                             Magnesium, Mg        MICRO       MG    12.50000
1        Skim Milk  Milk, nonfat, fluid, with added vitamin A and vitamin D (fat free or skim)             Dairy and Egg Products                                Sodium, Na        MICRO       MG    41.00000
2        Skim Milk  Milk, nonfat, fluid, with added vitamin A and vitamin D (fat free or skim)             Dairy and Egg Products                              Potassium, K        MICRO       MG   167.00000
3        Skim Milk  Milk, nonfat, fluid, with added vitamin A and vitamin D (fat free or skim)             Dairy and Egg Products                                  Zinc, Zn     

In [107]:
# Open the JSON file
with open('foundationDownload.json', 'r') as file:
    # Load the JSON data into a Python list of dictionaries
    food_data = json.load(file)

food_data = food_data['FoundationFoods']
food_data_foundation = pd.DataFrame(food_data)

# Explode the 'nutritional_information' column to create separate rows
food_data_foundation = food_data_foundation.explode('foodNutrients')

# Expand the nested dictionary in 'nutritional_information'
food_data_foundation = pd.concat([food_data_foundation, food_data_foundation.pop('foodNutrients').apply(pd.Series)], axis=1)

# Reset the index and make 'food_item' the index
food_data_foundation = food_data_foundation.reset_index(drop=True)

# Doing a value_counts() method on each column for this table. This will help us delete columns that are not needed
columns_to_be_removed_list = []
for column in food_data_foundation.columns:
    count_of_different_values_for_column = len(food_data_foundation[column].value_counts())
    if count_of_different_values_for_column <= 1:
        columns_to_be_removed_list.append(column)

print(columns_to_be_removed_list)
# The result after doing this comes out to be: ['foodClass', 'foodAttributes', 'isHistoricalReference', 'dataType', 'type']
food_data_foundation = food_data_foundation.drop(['foodClass', 'foodAttributes', 'isHistoricalReference', 'dataType', 'type'], axis=1)

# For now, I am not using nutrientConversionFactors and inputFoods, do dropping those as well
food_data_foundation = food_data_foundation.drop(['nutrientConversionFactors', 'inputFoods', 'id'], axis=1)

# The structure for foodCategory columnn is {'description': 'Legumes and Legume Products'}, below code unwraps the JSON structure. 
food_data_foundation['foodCategory'] = food_data_foundation['foodCategory'].apply(lambda x: x['description'])


['foodClass', 'foodAttributes', 'isHistoricalReference', 'dataType', 'type']


In [108]:
# Example of nutrient column in the existing dataframe: {'id': 1087, 'number': '301', 'name': 'Calcium, Ca', 'rank': 5300, 'unitName': 'mg'} 
# Convert the JSON in nutrient column to create separate columns
new_columns_for_nutrients = food_data_foundation['nutrient'].apply(pd.Series)
food_data_foundation = pd.concat([food_data_foundation, new_columns_for_nutrients], axis=1)
food_data_foundation = food_data_foundation.drop('nutrient', axis=1)

# Renaming the "name" column to "nutrientName" in the dataframe
food_data_foundation = food_data_foundation.rename(columns={'name': 'nutrientName'})

In [122]:
print(food_data_foundation['nutrientName'].value_counts().sort_values().to_string())


nutrientName
Specific Gravity                                 1
10-Formyl folic acid (10HCOFA)                   1
PUFA 20:3                                        1
5-methyl tetrahydrofolate (5-MTHF)               1
5-Formyltetrahydrofolic acid (5-HCOH4            1
MUFA 22:1                                        2
Phytoene                                         2
Phytofluene                                      2
Stachyose                                        2
Raffinose                                        2
Verbascose                                       2
MUFA 18:1                                        3
PUFA 18:3                                        3
Stigmastadiene                                   4
Phytosterols, other                              4
Carotene, gamma                                  4
Sugars, total including NLEA                     5
Pyruvic acid                                     5
Daidzin                                          6
Genistin          