## Import libraries

In [1]:
import os
import sys

from dotenv import load_dotenv
import pandas as pd

sys.path.append(os.path.abspath('..'))
from src.data.fdc_api_functions import fetch_all, fetch_fdc_data, get_food_info

In [2]:

load_dotenv()
api_key = os.getenv('FOOD_DATA_CENTRAL_API_KEY')

## Fetch data

In [3]:
nutrients = ['Energy','Protein','Total lipid (fat)','Carbohydrate, by difference','Vitamin A, RAE','Vitamin D3 (cholecalciferol)','Calcium, Ca','Magnesium, Mg']
queries = ['fruit','vegetable','grain','fish','bean','nut','poultry','meat','egg','cheese']

In [15]:
def fetch_data(target_directory,nutrients):
    for food in queries:
        data_type = 'Foundation'
        if food == 'fish':           
            data_type = 'SR Legacy'

        food_type = fetch_all(
                    query=food,
                    api_key=api_key,
                    nutrients_list=nutrients,
                    data_type=data_type,
                    start_page=1
                )

        food_type.to_csv(os.path.join(target_directory,food + '.csv'),index=False)

fetch_data('../data/raw/',nutrients)

In [16]:
pd.read_csv('../data/raw/fish.csv')

Unnamed: 0,Description,Energy,Protein,Total lipid (fat),"Carbohydrate, by difference","Vitamin A, RAE",Vitamin D3 (cholecalciferol),"Calcium, Ca","Magnesium, Mg"
0,"Fish, fish sticks, frozen, prepared",277.0,11.00,16.20,21.70,4.0,0.0,16.0,25.0
1,Fish broth,16.0,2.00,0.60,0.40,1.0,,30.0,1.0
2,"Fish, surimi",99.0,15.20,0.90,6.85,20.0,,9.0,43.0
3,"Fish oil, herring",902.0,0.00,100.00,0.00,0.0,,0.0,0.0
4,"Fish oil, menhaden",902.0,0.00,100.00,0.00,0.0,,0.0,0.0
...,...,...,...,...,...,...,...,...,...
247,"Fish, whitefish, broad, head, eyes, cheeks and...",107.0,18.60,3.60,0.00,,,,
248,"Fish, cod, Pacific, cooked, dry heat (may cont...",85.0,18.70,0.50,0.00,2.0,0.6,10.0,24.0
249,"Fish, pollock, Alaska, cooked, dry heat (may c...",111.0,23.50,1.18,0.00,17.0,1.3,72.0,81.0
250,"Margarine-like spread, SMART BALANCE Omega Plu...",605.0,0.13,71.00,0.16,,,,


### Fruits

Whole fruits and fruit juice

In [13]:
fruits = fetch_all(query='fruit',
                   api_key=api_key,
                   nutrients_list=fruits_nutrients,
                    data_type='Foundation',start_page=1)

fruits.to_csv('fruits.csv',index=False)

### Vegetables

Dark green vegs, red and orange vegs, starchy, other

In [27]:
vegetables_nutrients = proximates + vitamins + minerals
vegetables = fetch_all(query='vegetable',
                   api_key=api_key,
                   nutrients_list=vegetables_nutrients,
                    data_type='Foundation',start_page=1)

vegetables.to_csv('vegetables.csv',index=False)

### Grains

Whole wheat bread, brown rice, popcorn, oatmeal (whole grains); refined grains

In [28]:
grains_nutrients = proximates + minerals + ['Molybdenum, Mo'] + ['Fiber, total dietary'] + ['Thiamin','Riboflavin','Niacin','Vitamin B-6','Biotin']
grains = fetch_all(query='grain',
                   api_key=api_key,
                   nutrients_list=grains_nutrients,
                    data_type='Foundation',start_page=1)
grains.to_csv('grains.csv',index=False)

### Protein Foods

Beans and peas, seafood, meat, poultry and eggs, nuts, seeds and soy

In [67]:
fish_nutrients = proximates + minerals + vitamins + essential_aminoacids + ['PUFA 18:3 n-3 c,c,c (ALA)','PUFA 20:5 n-3 (EPA)','PUFA 22:6 n-3 (DHA)'] + \
['Fatty acids, total saturated','Fatty acids, total monounsaturated','Fatty acids, total polyunsaturated','Fatty acids, total trans','Cholesterol']
fish_nutrients = remove_unnecessary_nutrients(fish_nutrients,['Vitamin K (Dihydrophylloquinone)','Vitamin K (phylloquinone)','Lutein + zeaxanthin'])
fish = fetch_all(query='fish',
                   api_key=api_key,
                   nutrients_list=fish_nutrients,
                    data_type='SR Legacy',start_page=1)

fish = fish[fish['Description'].str.contains('raw')]

fish.to_csv('../Foods/fish.csv',index=False)

In [66]:
beans_nutrients = proximates + minerals + ['Sulfur, S','Nickel, Ni','Molybdenum, Mo','Cobalt, Co','Boron, B'] 
beans = fetch_all(query='bean',
                   api_key=api_key,
                   nutrients_list=beans_nutrients,
                    data_type='Foundation',start_page=1)

# Duplicate from vegetables
beans.drop([0,1],inplace=True)
beans.to_csv('beans.csv',index=False)

In [31]:
nuts_nutrients = proximates + minerals
nuts = fetch_all(query='nut',
                   api_key=api_key,
                   nutrients_list=nuts_nutrients,
                    data_type='Foundation',start_page=1)
nuts.to_csv('nuts.csv',index=False)

In [32]:
poultry_nutrients = proximates + minerals + ['Fatty acids, total saturated','Fatty acids, total monounsaturated','Fatty acids, total polyunsaturated','Fatty acids, total trans','Cholesterol']
poultry = fetch_all(query='poultry',
                   api_key=api_key,
                   nutrients_list=poultry_nutrients,
                    data_type='Foundation',start_page=1)
poultry.to_csv('poultry.csv',index=False)

In [33]:
meat_nutrients = proximates + minerals + ['Fatty acids, total saturated','Fatty acids, total monounsaturated','Fatty acids, total polyunsaturated','Fatty acids, total trans','Cholesterol']
meat = fetch_all(query='meat -restaurant',
                   api_key=api_key,
                   nutrients_list=meat_nutrients,
                    data_type='Foundation',start_page=1)
meat.to_csv('meat.csv',index=False)

In [34]:
meat_and_poultry = pd.concat([poultry,meat],axis=0)
meat_and_poultry.drop_duplicates(inplace=True)
meat_and_poultry.to_csv('meat_and_poultry.csv',index=False)

### Diary

Milk and yogurt, cheese

In [35]:
cheese_nutrients = proximates + vitamins + minerals + ['Fatty acids, total saturated','Fatty acids, total monounsaturated','Fatty acids, total polyunsaturated','Fatty acids, total trans']
cheese = fetch_all(query='cheese',
                   api_key=api_key,
                   nutrients_list=cheese_nutrients,
                    data_type='Foundation',start_page=1)
cheese.to_csv('cheese.csv',index=False)

In [36]:
egg_milk_yogurt_nutrients = proximates + vitamins + minerals + ['Fatty acids, total saturated','Fatty acids, total monounsaturated','Fatty acids, total polyunsaturated','Fatty acids, total trans'] + aminoacids
egg_milk_yogurt = fetch_all(query='egg -cheese',
                   api_key=api_key,
                   nutrients_list=egg_milk_yogurt_nutrients,
                    data_type='Foundation',start_page=1)
egg_milk_yogurt.to_csv('egg_milk_yogurt.csv',index=False)