## Creating a Complete Set of Nutrients for the Consumed Foods

In [1]:
!pip install -r requirements.txt



In [2]:
Ethiopia_Data = 'https://docs.google.com/spreadsheets/d/1PVqM25qZyDz5K3jsLDu-JgPS5-vmro4wHk8k6OIB9Eo/edit?usp=sharing'

In [3]:
import pandas as pd
import numpy as np
from eep153_tools.sheets import read_sheets

# Change 'Uganda_Data' to key of your own sheet in Sheets, above
x = read_sheets(Ethiopia_Data,sheet='Food Expenditures (2018-19)')
x = x.set_index(['i','t','m','j']).squeeze()

In [4]:

# Now prices
p = read_sheets(Ethiopia_Data,sheet='Food Prices (2018-19)').set_index(['t','m','j','u'])

# Compute medians of prices for particular time, place and unit
p = p.groupby(['t','m','j','u']).median()

# Just keep metric units
p = p.xs('Kg',level="u").squeeze().unstack('j')

# Drop prices for goods we don't have expenditures for
p = p[x.index.levels[-1]].T

# Household characteristics
d = read_sheets(Ethiopia_Data,sheet="Household Characteristics")
d.columns.name = 'k'

# Fill blanks with zeros
d = d.replace(np.nan,0)

# Expenditures x may have duplicate columns
x = x.T.groupby(['i','t','m','j']).sum()
x = x.replace(0,np.nan) # Replace zeros with missing

# Take logs of expenditures; call this y
y = np.log(x)

d.set_index(['i','t','m'],inplace=True)

In [5]:
food_expenditures = read_sheets(Ethiopia_Data,sheet='Food Expenditures (2018-19)')

In [6]:
foodseaten = food_expenditures['j'].unique()
listchat = list(foodseaten)
listchat

['Coffee',
 'Onion',
 'Salt',
 'Shiro',
 'Sugar',
 'Berbere',
 'Kariya',
 'Leafy Greens',
 'Lentils',
 'Oils',
 'Papaya',
 'Potato',
 'Avocado',
 'Bread/biscuit',
 'Goat & mutton meat',
 'Eggs',
 'Pasta/Macaroni',
 'Tea',
 'Tomato',
 'Horsebeans',
 'Wheat',
 'Garlic',
 'Beef',
 'Beer',
 'Maize',
 'Chick Pea',
 'Teff',
 'Soda',
 'Rice',
 'Sorghum',
 'Hops (gesho)',
 'Tella',
 'Poultry',
 'Mango',
 'Banana',
 'Millet',
 'Carrot',
 'Field Pea',
 'Other prepared food',
 'Barley',
 'Mung bean',
 'Milk',
 'Orange',
 'Injera',
 'Vetch',
 'Fenugreek',
 'Other fruit',
 'Linseed',
 'Beetroot',
 'Sesame',
 'Butter/ghee',
 'Honey',
 'Oats',
 'Sweet potato',
 'Sun Flower',
 'Cheese',
 'Ground nuts',
 'Other condiments',
 'Niger Seed',
 'Fish',
 'Other seed',
 'Haricot Beans',
 'Moringa/Shiferaw/Halloka',
 'Chat/Kat',
 'Bula',
 'Other cereal',
 'Other tuber or stem',
 'Other pulse or nut',
 'Kocho',
 'Other vegetable',
 'Godere',
 'Boye/Yam',
 'Cassava']

In [14]:
loadmatched = pd.read_pickle("bestchatmatch.pkl")
pd.set_option('display.max_rows', None)
loadmatched

Unnamed: 0,ingredient,best_match,justification
0,Coffee,"Coffee, brewed from grounds, prepared with tap...",Exact match for brewed coffee.
1,Onion,"Onions, raw",Exact match for raw onions.
2,Salt,"Salt, table",Exact match for table salt.
3,Shiro,,"No good match for Shiro, a traditional Ethiopi..."
4,Sugar,"Sugar, white, granulated",Exact match for white granulated sugar.
5,Berbere,,"No good match for Berbere, a traditional Ethio..."
6,Kariya,,"No good match for Kariya, potentially a local ..."
7,Leafy Greens,"Spinach, raw",Spinach is a common leafy green.
8,Lentils,"Lentils, raw",Exact match for raw lentils.
9,Oils,"Oil, vegetable, canola or sunflower",General match for vegetable oil.


In [15]:
pd.reset_option('display.max_rows')

In [16]:
num_matches = loadmatched.count()

In [17]:
num_matches

ingredient       73
best_match       53
justification    73
dtype: int64

In [18]:
loadmatched.loc[3, 'justification']

'No good match for Shiro, a traditional Ethiopian spice mix or stew.'

# Finding nutrition for the rest

In [19]:
apikey = "LUJvyyWCR8JkuTS6nSLkEdtiC70cbEfD7TCQbvab"

In [20]:
import fooddatacentral as fdc
import warnings

D = {}
count = 0
for food in  df.Food.tolist():
    try:
        FDC = df.loc[df.Food==food,:].FDC[count]
        count+=1
        D[food] = fdc.nutrients(apikey,FDC).Quantity
    except AttributeError: 
        warnings.warn("Couldn't find FDC Code %s for food %s." % (food,FDC))        

FoodNutrients = pd.DataFrame(D,dtype=float)

NameError: name 'df' is not defined

In [22]:
def get_nutrients(FDC):
    food_nutrients = fdc.nutrients(apikey,FDC).Quantity
    return food_nutrients

In [23]:
get_nutrients(169145)

Proximates             0.000
Water                 87.580
Energy               180.000
Protein                1.610
Total lipid (fat)      0.170
                      ...   
Proline                0.042
Serine                 0.059
Alcohol, ethyl         0.000
Caffeine               0.000
Theobromine            0.000
Name: Quantity, Length: 97, dtype: float64