## Nutrition Dataframe

### Step 1: Select out by relevant foods to demand system

<img src="betas.png" alt="betas" width="300">

In [9]:
food_items = [
    "Banana",
    "Barley",
    "Beef",
    "Beetroot",
    "Berbere",
    "Bread/biscuit",
    "Butter/ghee",
    "Carrot",
    "Chick Pea",
    "Coffee",
    "Eggs",
    "Fenugreek",
    "Field Pea",
    "Garlic",
    "Goat & mutton meat",
    "Ground nuts",
    "Horsebeans",
    "Injera",
    "Kariya",
    "Leafy Greens",
    "Lentils",
    "Maize",
    "Mango",
    "Milk",
    "Oils",
    "Onion",
    "Orange",
    "Pasta/Macaroni",
    "Potato",
    "Rice",
    "Salt",
    "Shiro",
    "Soda",
    "Sorghum",
    "Sugar",
    "Tea",
    "Teff",
    "Tomato",
    "Wheat"
]

### Step 2: Pull in FCT Data from Sheet

In [2]:
!pip install -r requirements.txt



In [10]:
import pandas as pd
import numpy as np
from eep153_tools.sheets import read_sheets

In [11]:
Ethiopia_Data = 'https://docs.google.com/spreadsheets/d/1PVqM25qZyDz5K3jsLDu-JgPS5-vmro4wHk8k6OIB9Eo/edit?usp=sharing'

In [12]:
fullnutrients = read_sheets(Ethiopia_Data,sheet='FCT Sorted')

In [13]:
fullnutrients.head()

Unnamed: 0,index,Energy,Protein,Vitamin A,Vitamin D,Vitamin E,Vitamin C,Vitamin B-6,Vitamin B-12,Calcium,...,Zinc,Fiber,Folate,Carbohydrate,Niacin,Phosphorus,Potassium,Riboflavin,Thiamin,Vitamin K
0,Avocado,6700.0,20.0,70,0,20.7,100,2.57,0,120.0,...,6.4,67.0,810.0,85.3,17.38,520.0,4850,1.3,0.67,210
1,Banana,890.0,11.0,30,0,1.0,87,4.0,0,50.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0
2,Barley,3490.0,123.0,0,0,10.0,0,2.0,0,260.0,...,20.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0
3,Beef,3230.0,249.0,0,0,0.0,0,3.0,19,40.0,...,41.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0
4,Beer,410.0,3.0,0,0,0.0,0,1.0,0,50.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0


### Step 3: Check the list of items that are not included in the df

In [14]:
manual_search_items = []
for item in food_items:
    if item not in fullnutrients['index'].values:
        manual_search_items.append(item)

print(manual_search_items)

['Carrot', 'Chick Pea', 'Field Pea', 'Horsebeans', 'Kariya', 'Lentils', 'Shiro', 'Tomato', 'Wheat']


### Step 4: Filling in the remaining missing items

In [15]:
apikey = "LUJvyyWCR8JkuTS6nSLkEdtiC70cbEfD7TCQbvab"

In [16]:
import fooddatacentral as fdc
import warnings

In [17]:
def get_nutrients(FDC):
    food_nutrients = fdc.nutrients(apikey,FDC)
    if food_nutrients is None:
        return f'no response for {FDC}'
    q = food_nutrients.Quantity
    if "Energy" in q:
        # convert from kJ to kcal ugh
        energy_kcal = q["Energy"] / 4.184
        q["Energy"] = energy_kcal
    return pd.Series(q)

In [18]:
len(manual_search_items)

9

In [19]:
codes = {'Carrot': 2258586, 'Chick Pea': 2644282, 'Field Pea': 2644284, 
         'Horsebeans': 2707367, 'Kariya': 170497, 'Lentils': 2707425, 
         'Shiro': 2644282, 'Tomato': 2709719, 'Wheat': 169725}
#shiro is a slight stretch, as it is a garbonzo bean soup that is labeled as entirely garbanzo here.

In [21]:
get_nutrients(2644284)

Proximates                              0.0000
Water                                  11.0100
Energy (Atwater General Factors)      354.0150
Energy (Atwater Specific Factors)     345.5703
Nitrogen                                3.3960
Protein                                21.2250
Total lipid (fat)                       2.4190
Ash                                     3.5100
Carbohydrates                           0.0000
Carbohydrate, by difference            61.8360
Starch                                 37.3800
Minerals                                0.0000
Calcium, Ca                            71.4000
Iron, Fe                                5.9300
Magnesium, Mg                         183.9000
Phosphorus, P                         427.6000
Potassium, K                         1243.0000
Sodium, Na                              3.4580
Zinc, Zn                                3.6540
Copper, Cu                              0.9359
Manganese, Mn                           1.2880
Name: Quantit

#### Pulling the remaining nutrients and formatting them

In [26]:
def reindex_nutrients(input_series, target_nutrients, priority_map):
    """
    Reindexes an input Series of nutrient values based on a priority mapping.
    
    Parameters:
      input_series (pd.Series): A Series indexed by nutrient names (from the source food data).
      target_nutrients (list): A list of target nutrient names for the output.
          Example: ['Energy', 'Protein', 'Vitamin A', 'Vitamin D', 'Vitamin E',
                    'Vitamin C', 'Vitamin B-6', 'Vitamin B-12', 'Calcium', 'Magnesium',
                    'Iron', 'Zinc', 'Fiber', 'Folate', 'Carbohydrate', 'Niacin',
                    'Phosphorus', 'Potassium', 'Riboflavin', 'Thiamin', 'Vitamin K']
      priority_map (dict): A dictionary where each key is a nutrient (from the target list)
          and the value is a list of candidate keys (in order of preference) to search for 
          in the input_series.
          
    Returns:
      pd.Series: A Series with index equal to target_nutrients. For each nutrient,
                 the value is taken from the first candidate found in the input_series,
                 or 0 if none of the candidates are available.
    """
    output_dict = {}
    
    # Loop over each nutrient desired in the final output.
    for nutrient in target_nutrients:
        # If a priority mapping exists for this nutrient, use it;
        # otherwise, default to simply using the nutrient name itself.
        candidates = priority_map.get(nutrient, [nutrient])
        found_value = None
        
        # Iterate over the candidate keys and take the first that exists in the input series.
        for candidate in candidates:
            if candidate in input_series.index:
                found_value = input_series[candidate]
                break
        # If none of the candidate keys were found, assign 0.
        output_dict[nutrient] = found_value if found_value is not None else 0
        
    # Convert the dictionary to a pandas Series and return.
    return pd.Series(output_dict)

In [27]:
target_nutrients = list(fullnutrients.columns)

# priority map
priority_map = {
    'Energy': [
        "Energy", 
        "Energy (Atwater Specific Factors)", 
        "Energy (Atwater General Factors)"
    ],
    'Protein': ["Protein"],
    'Vitamin A': ["Vitamin A, RAE"],
    'Vitamin C': ["Vitamin C, total ascorbic acid"],
    'Vitamin B-6': ["Vitamin B-6"],
    'Calcium': ["Calcium, Ca"],
    'Magnesium': ["Magnesium, Mg"],
    'Iron': ["Iron, Fe"],
    'Zinc': ["Zinc, Zn"],
    'Fiber': ["Fiber, total dietary"],
    'Folate': ["Folate, total"],
    'Carbohydrate': ["Carbohydrate, by difference"],
    'Niacin': ["Niacin"],
    'Phosphorus': ["Phosphorus, P"],
    'Potassium': ["Potassium, K"],
    'Riboflavin': ["Riboflavin"],
    'Thiamin': ["Thiamin"]
}

In [28]:
def create_remaining_df(listFDCIDs, target_nutrients, priority_map):
    listdfs = []
    for id in listFDCIDs:
        output_series  = reindex_nutrients(get_nutrients(id), target_nutrients, priority_map)
        newdf = output_series.to_frame().T
        listdfs.append(newdf)
    total_df = pd.concat(listdfs)
    return total_df

In [29]:
remaining_items = create_remaining_df(list(codes.values()),target_nutrients, priority_map)
remaining_items = remaining_items.reset_index().drop(columns = ['index'])
remaining_items.insert(0, 'index', list(codes.keys()))
final = remaining_items.drop(columns = ['level_0'])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  q["Energy"] = energy_kcal
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  q["Energy"] = energy_kcal
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  q["Energy"] = energy_kcal
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  q["Energy"] = energy_kcal
A value is trying to be set on a copy of a slice from a DataFram

In [30]:
pd.options.display.float_format = '{:.2f}'.format

In [31]:
final

Unnamed: 0,index,Energy,Protein,Vitamin A,Vitamin D,Vitamin E,Vitamin C,Vitamin B-6,Vitamin B-12,Calcium,...,Zinc,Fiber,Folate,Carbohydrate,Niacin,Phosphorus,Potassium,Riboflavin,Thiamin,Vitamin K
0,Carrot,44.98,0.94,0.0,0.0,0.0,0.0,0.15,0.0,30.48,...,0.24,3.1,37.06,10.27,1.41,39.81,279.6,0.1,0.07,0.0
1,Chick Pea,371.99,21.27,0.0,0.0,0.0,0.0,0.0,0.0,111.1,...,3.12,0.0,0.0,60.36,0.0,353.1,1074.0,0.0,0.0,0.0
2,Field Pea,345.57,21.23,0.0,0.0,0.0,0.0,0.0,0.0,71.4,...,3.65,0.0,0.0,61.84,0.0,427.6,1243.0,0.0,0.0,0.0
3,Horsebeans,9.2,7.06,1.0,0.0,0.0,0.3,0.06,0.0,34.0,...,0.94,5.0,92.0,18.3,0.63,116.0,249.0,0.08,0.09,0.0
4,Kariya,39.91,2.0,59.0,0.0,0.0,242.5,0.28,0.0,18.0,...,0.3,1.5,23.0,9.46,0.95,46.0,340.0,0.09,0.09,0.0
5,Lentils,27.49,8.97,0.0,0.0,0.0,1.4,0.17,0.0,19.0,...,1.26,7.9,171.0,20.0,1.0,179.0,367.0,0.07,0.16,0.0
6,Shiro,371.99,21.27,0.0,0.0,0.0,0.0,0.0,0.0,111.1,...,3.12,0.0,0.0,60.36,0.0,353.1,1074.0,0.0,0.0,0.0
7,Tomato,4.78,0.82,32.0,0.0,0.0,16.3,0.08,0.0,10.0,...,0.15,1.2,13.0,4.04,0.6,23.0,226.0,0.02,0.05,0.0
8,Wheat,197.9,7.49,0.0,0.0,0.0,2.6,0.27,0.0,28.0,...,1.65,1.1,38.0,42.53,3.09,200.0,169.0,0.15,0.23,0.0


### Step 5: Converting units and combining the Data Frames!

**First, I need to convert out FTC table to 100gs from kgs**

In [32]:
fullnutrients.iloc[:,1:]=fullnutrients.iloc[:,1:]/10

1       3.00
2       0.00
3       0.00
4       0.00
5       2.00
6       0.00
7       0.00
8       0.00
9     925.00
10      1.00
11    366.00
12      0.00
13    810.00
14      3.00
15     43.00
16      0.00
17      0.00
18      0.00
19      0.00
20      0.00
21    819.00
22      7.00
23      7.00
24     38.00
25     27.00
26     28.00
27    378.00
28      0.00
29      0.00
30      0.00
31      8.00
32      0.00
33      3.00
34    146.00
35     47.00
36      0.00
37      0.00
38      0.00
39      0.00
40    168.00
41      5.00
42      0.00
43   1056.00
44      0.00
45      0.00
46      0.00
Name: Vitamin A, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  fullnutrients.iloc[:,1:]=fullnutrients.iloc[:,1:]/10
1     0.00
2     0.00
3     0.00
4     0.00
5     0.00
6     0.00
7     0.00
8     0.00
9     2.00
10    0.00
11    0.00
12    0.00
13    8.00
14    0.00
15   12.00
16    0.00
17    0.00
18    0.00
19    0.00
20    0.00
21    0

In [33]:
fullnutrients.head() #perfect

Unnamed: 0,index,Energy,Protein,Vitamin A,Vitamin D,Vitamin E,Vitamin C,Vitamin B-6,Vitamin B-12,Calcium,...,Zinc,Fiber,Folate,Carbohydrate,Niacin,Phosphorus,Potassium,Riboflavin,Thiamin,Vitamin K
0,Avocado,670.0,2.0,7.0,0.0,2.07,10.0,0.26,0.0,12.0,...,0.64,6.7,81.0,8.53,1.74,52.0,485.0,0.13,0.07,21.0
1,Banana,89.0,1.1,3.0,0.0,0.1,8.7,0.4,0.0,5.0,...,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Barley,349.0,12.3,0.0,0.0,1.0,0.0,0.2,0.0,26.0,...,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Beef,323.0,24.9,0.0,0.0,0.0,0.0,0.3,1.9,4.0,...,4.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Beer,41.0,0.3,0.0,0.0,0.0,0.0,0.1,0.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


**Now that all food units are 100g, lets combine**

In [34]:
final_nutrition = pd.concat([fullnutrients, final]).reset_index().drop(columns = ['level_0'])

In [37]:
final_nutrition.drop([32]).to_pickle('nutrientdffinal?')