## Libraries

In [1]:
import os 
import numpy as np 
import pandas as pd 

from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")


In [2]:
sorted(os.listdir("../data/nutrients/"))

['cassava_raw.csv',
 'chicken_raw_meat_only_stewing.csv',
 'corn_raw.csv',
 'corn_raw_yellow_sweet.csv',
 'cowpeas_raw_mature_seeds_catjang.csv',
 'eggplant_raw.csv',
 'fish_raw_atlantic_mackerel.csv',
 'gari_by_golden_tropics_ltd.csv',
 'millet_raw.csv',
 'onions_raw.csv',
 'peppers_dried_ancho.csv',
 'peppers_raw_red_sweet.csv',
 'plantains_raw_green.csv',
 'rice_raw_longgrain_brown.csv',
 'sorghum_grain.csv',
 'sources.txt',
 'soybeans_raw_mature_seeds.csv',
 'tomatoes_raw.csv',
 'yam_raw.csv']

## Reading Data and Filtering Nutrients

In [3]:
def read_data(file_name):
    df = pd.read_csv(f"../data/nutrients/{file_name}", skiprows=6, skipfooter=5)
    df = df.loc[df.Nutrient.isin(nutrients)]
    df["commodity"] = file_name.split(".")[0]
    return df

In [4]:
nutrients = ["Calories", "Protein", "Calcium", "Iron", "Vitamin A, RAE", "Thiamin [Vitamin B1]", "Riboflavin [Vitamin B2]", "Niacin [Vitamin B3]", "Vitamin C [Ascorbic acid]"]

In [5]:
nutrients

['Calories',
 'Protein',
 'Calcium',
 'Iron',
 'Vitamin A, RAE',
 'Thiamin [Vitamin B1]',
 'Riboflavin [Vitamin B2]',
 'Niacin [Vitamin B3]',
 'Vitamin C [Ascorbic acid]']

In [6]:
corn_raw = pd.read_csv("../data/nutrients/corn_raw.csv", skiprows=6, skipfooter=5)

In [7]:
corn_raw = corn_raw.loc[corn_raw.Nutrient.isin(nutrients)]

In [8]:
corn_raw

Unnamed: 0,Nutrient,Amount,Unit,DV
0,Calories,86.0,,
1,"Vitamin A, RAE",9.0,mcg,1 %
8,Thiamin [Vitamin B1],0.155,mg,13 %
9,Riboflavin [Vitamin B2],0.055,mg,4 %
10,Niacin [Vitamin B3],1.77,mg,11 %
17,Vitamin C [Ascorbic acid],6.8,mg,8 %
25,Calcium,2.0,mg,0 %
27,Iron,0.52,mg,3 %
34,Protein,3.27,g,7 %


In [9]:
commodities_nutrients = pd.concat([read_data(f) for f in sorted(os.listdir("../data/nutrients/")) if f.endswith("csv")])

In [10]:
commodities_nutrients

Unnamed: 0,Nutrient,Amount,Unit,DV,commodity
0,Calories,160.0,,,cassava_raw
1,"Vitamin A, RAE",1.0,mcg,0 %,cassava_raw
8,Thiamin [Vitamin B1],0.087,mg,7 %,cassava_raw
9,Riboflavin [Vitamin B2],0.048,mg,4 %,cassava_raw
10,Niacin [Vitamin B3],0.854,mg,5 %,cassava_raw
...,...,...,...,...,...
10,Niacin [Vitamin B3],0.552,mg,3 %,yam_raw
18,Vitamin C [Ascorbic acid],17.1,mg,19 %,yam_raw
26,Calcium,17.0,mg,1 %,yam_raw
28,Iron,0.54,mg,3 %,yam_raw


## Saving Data

In [11]:
commodities_nutrients.to_csv("../data/commodities_nutrients.csv", index=False)

In [12]:
# pd.read_csv("../data/commodities_gha_nutrients.csv")