## Libraries

In [47]:
import os 
import numpy as np 
import pandas as pd 

from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")


In [48]:
sorted(os.listdir("../data/nutrients/"))

['cassava_raw.csv',
 'chicken_raw_meat_only_stewing.csv',
 'corn_raw.csv',
 'corn_raw_yellow_sweet.csv',
 'cowpeas_raw_mature_seeds_catjang.csv',
 'eggplant_raw.csv',
 'fish_raw_atlantic_mackerel.csv',
 'gari_by_golden_tropics_ltd.csv',
 'millet_raw.csv',
 'onions_raw.csv',
 'peppers_dried_ancho.csv',
 'peppers_raw_red_sweet.csv',
 'plantains_raw_green.csv',
 'rice_raw_longgrain_brown.csv',
 'sorghum_grain.csv',
 'sources.txt',
 'soybeans_raw_mature_seeds.csv',
 'tomatoes_raw.csv',
 'yam_raw.csv']

## Reading Data and Filtering Nutrients

In [49]:
def read_data(file_name):
    df = pd.read_csv(f"../data/nutrients/{file_name}", skiprows=6, skipfooter=5)
    df = df.loc[df.Nutrient.isin(nutrients)]
    df["commodity"] = file_name.split(".")[0]
    return df

In [59]:
nutrients = ["Calories", "Protein", "Calcium", "Iron", "Vitamin A, RAE", "Thiamin [Vitamin B1]", "Riboflavin [Vitamin B2]", "Niacin [Vitamin B3]", "Vitamin C [Ascorbic acid]"]

In [51]:
nutrients

['Calories',
 'Protein',
 'Calcium',
 'Iron',
 'Vitamin A, RAE',
 'Thiamin [Vitamin B1]',
 'Riboflavin [Vitamin B2]',
 'Niacin [Vitamin B3]',
 'Vitamin C [Ascorbic acid]']

In [52]:
corn_raw = pd.read_csv("../data/nutrients/corn_raw.csv", skiprows=6, skipfooter=5)

In [53]:
corn_raw = corn_raw.loc[corn_raw.Nutrient.isin(nutrients)]

In [54]:
corn_raw

Unnamed: 0,Nutrient,Amount,Unit,DV
0,Calories,86.0,,
1,"Vitamin A, RAE",9.0,mcg,1 %
8,Thiamin [Vitamin B1],0.155,mg,13 %
9,Riboflavin [Vitamin B2],0.055,mg,4 %
10,Niacin [Vitamin B3],1.77,mg,11 %
17,Vitamin C [Ascorbic acid],6.8,mg,8 %
25,Calcium,2.0,mg,0 %
27,Iron,0.52,mg,3 %
34,Protein,3.27,g,7 %


In [55]:
commodities_nutrients = pd.concat([read_data(f) for f in sorted(os.listdir("../data/nutrients/")) if f.endswith("csv")])

In [56]:
commodities_nutrients

Unnamed: 0,Nutrient,Amount,Unit,DV,commodity
0,Calories,160.0,,,cassava_raw
1,"Vitamin A, RAE",1.0,mcg,0 %,cassava_raw
8,Thiamin [Vitamin B1],0.087,mg,7 %,cassava_raw
9,Riboflavin [Vitamin B2],0.048,mg,4 %,cassava_raw
10,Niacin [Vitamin B3],0.854,mg,5 %,cassava_raw
...,...,...,...,...,...
10,Niacin [Vitamin B3],0.552,mg,3 %,yam_raw
18,Vitamin C [Ascorbic acid],17.1,mg,19 %,yam_raw
26,Calcium,17.0,mg,1 %,yam_raw
28,Iron,0.54,mg,3 %,yam_raw


## Saving Data

In [57]:
commodities_nutrients.to_csv("../data/commodities_nutrients.csv", index=False)

In [58]:
# pd.read_csv("../data/commodities_gha_nutrients.csv")

Unnamed: 0,commodity,price,Calcium,Calories,Iron,Niacin [Vitamin B3],Protein,Riboflavin [Vitamin B2],Thiamin [Vitamin B1],Vitamin C [Ascorbic acid]
0,Cassava,0.652,16.0,160.0,0.27,0.854,1.36,0.048,0.087,20.6
1,Gari,0.396,71.0,357.0,2.57,0.0,0.0,0.0,0.0,0.0
2,Maize,0.456,2.0,86.0,0.52,1.77,3.27,0.055,0.155,6.8
3,Maize (yellow),0.491,2.0,86.0,0.52,1.77,3.27,0.055,0.155,6.8
4,Millet,0.651,8.0,378.0,3.01,4.72,11.02,0.29,0.421,0.0
5,Rice (imported),1.0,9.0,367.0,1.29,6.494,7.54,0.095,0.541,0.0
6,Rice (local),1.267,9.0,367.0,1.29,6.494,7.54,0.095,0.541,0.0
7,Sorghum,0.667,13.0,329.0,3.36,3.688,10.62,0.096,0.332,0.0
8,Yam,0.605,17.0,118.0,0.54,0.552,1.53,0.032,0.112,17.1
9,Yam (puna),0.646,17.0,118.0,0.54,0.552,1.53,0.032,0.112,17.1
