# 3. REFINED RESULTS AND STATISTICS

In the USDA database, it is assumed that the nutrient values are per 100g of the food item.
In IASI database, FOOD_PORTION is given in grams.
Therefore, we need to scale the nutrient values accordingly:

$ \text{Nutrient Value (scaled)} = \left( \frac{\text{Nutrient Value (per 100g)} \times \text{FOOD\_PORTION (g)}}{100} \right) $

In [1]:
# Imports
import pandas as pd

In [4]:
refined_iasi_df = pd.read_csv("./data/PROCESSED_iasi_with_nutrients_raw.csv")
refined_iasi_df.head()

# Drop columns food_code, fdc_ids, double_check_desc
refined_iasi_df = refined_iasi_df.drop(columns=['FOOD_CODE', 'fdc_ids', 'double_check_desc'])
refined_iasi_df.head()

Unnamed: 0,ID,MEAL_ID,FOOD_PORTION,DESC,Retinol (UG),Lycopene (UG),cis_Lycopene (UG),trans_Lycopene (UG),Carotene_beta (UG),cis_beta_Carotene (UG),...,Cryptoxanthin_beta (UG),Choline_total (MG),Carotene_alpha (UG),Vitamin_K_phylloquinone (UG),Zeaxanthin (UG),Lutein (UG),Lutein_plus_zeaxanthin (UG),cis_Lutein/Zeaxanthin (UG),Vitamin_D_D2_plus_D3 (UG),Vitamin_A_RAE (UG)
0,1,0,293.0,"Whole milk, average",31.0,0.0,0.0,0.0,7.0,7.0,...,0.0,17.8,0.0,0.3,0.0,0.0,0.0,0.0,1.1,32.0
1,1,1,0.56,"Beef, average, fat, cooked",3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,86.1,0.0,1.6,0.0,0.0,0.0,0.0,0.1,3.0
2,1,1,6.93,"Beef, rump steak, grilled, lean",1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,77.2,0.0,1.6,0.0,0.0,0.0,0.0,0.2,1.0
3,1,2,14.0,"Beefburgers, chilled/frozen, fried",3.0,0.0,0.0,0.0,0.0,0.0,...,0.0,79.4,0.0,1.9,0.0,0.0,0.0,0.0,0.0,3.0
4,1,3,4.945,"Pork, fat, cooked",1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,75.7,0.0,0.0,0.0,0.0,0.0,0.0,0.6,1.0


In [6]:
nutrient_columns = refined_iasi_df.columns.difference(['ID', 'MEAL_ID', 'FOOD_PORTION', 'DESC'])

# In the USDA database, it is assumed that the nutrient values are per 100g of the food item.
# In IASI database, FOOD_PORTION is given in grams.
# Therefore, we need to scale the nutrient values accordingly.
for col in nutrient_columns:
    refined_iasi_df[col] = (refined_iasi_df[col] * refined_iasi_df['FOOD_PORTION']) / 100.0

refined_iasi_df.to_csv('./data/PROCESSED_iasi_with_nutrients_scaled_with_food_portion.csv', index=False)
refined_iasi_df.head()

Unnamed: 0,ID,MEAL_ID,FOOD_PORTION,DESC,Retinol (UG),Lycopene (UG),cis_Lycopene (UG),trans_Lycopene (UG),Carotene_beta (UG),cis_beta_Carotene (UG),...,Cryptoxanthin_beta (UG),Choline_total (MG),Carotene_alpha (UG),Vitamin_K_phylloquinone (UG),Zeaxanthin (UG),Lutein (UG),Lutein_plus_zeaxanthin (UG),cis_Lutein/Zeaxanthin (UG),Vitamin_D_D2_plus_D3 (UG),Vitamin_A_RAE (UG)
0,1,0,293.0,"Whole milk, average",90.83,0.0,0.0,0.0,20.51,20.51,...,0.0,52.154,0.0,0.879,0.0,0.0,0.0,0.0,3.223,93.76
1,1,1,0.56,"Beef, average, fat, cooked",0.0168,0.0,0.0,0.0,0.0,0.0,...,0.0,0.48216,0.0,0.00896,0.0,0.0,0.0,0.0,0.00056,0.0168
2,1,1,6.93,"Beef, rump steak, grilled, lean",0.0693,0.0,0.0,0.0,0.0,0.0,...,0.0,5.34996,0.0,0.11088,0.0,0.0,0.0,0.0,0.01386,0.0693
3,1,2,14.0,"Beefburgers, chilled/frozen, fried",0.42,0.0,0.0,0.0,0.0,0.0,...,0.0,11.116,0.0,0.266,0.0,0.0,0.0,0.0,0.0,0.42
4,1,3,4.945,"Pork, fat, cooked",0.04945,0.0,0.0,0.0,0.0,0.0,...,0.0,3.743365,0.0,0.0,0.0,0.0,0.0,0.0,0.02967,0.04945


In [10]:
refined_iasi_per_patient_db = refined_iasi_df.groupby('ID')[nutrient_columns].sum().reset_index()

refined_iasi_per_patient_db.to_csv('./data/PROCESSED_iasi_per_patient_nutrient_totals.csv', index=False)

refined_iasi_per_patient_db

Unnamed: 0,ID,Carotene_alpha (UG),Carotene_beta (UG),Choline_total (MG),Cryptoxanthin_beta (UG),Lutein (UG),Lutein_plus_zeaxanthin (UG),Lycopene (UG),Retinol (UG),Tocopherols_and_tocotrienols (MG),Vitamin_A_RAE (UG),Vitamin_D_D2_plus_D3 (UG),Vitamin_E_alpha_tocopherol (MG),Vitamin_K_phylloquinone (UG),Zeaxanthin (UG),cis_Lutein/Zeaxanthin (UG),cis_Lycopene (UG),cis_beta_Carotene (UG),trans_Lycopene (UG),trans_beta_Carotene (UG)
0,1,242.5311,1009.429600,408.573295,184.6164,548.1022,548.1022,2164.1064,737.972950,7.745296,840.82885,5.899340,7.745296,33.637110,548.1022,548.1022,2164.1064,1009.429600,2164.1064,1009.429600
1,2,630.6125,2128.977125,377.161393,220.4643,731.6432,731.6432,2363.8453,342.401175,9.178721,555.94795,5.163748,9.178721,93.043325,731.6432,731.6432,2363.8453,2128.977125,2363.8453,2128.977125
2,3,1199.1745,3538.682000,1025.751800,102.6547,1307.8918,1307.8918,1065.7300,1463.336200,7.414431,1813.00070,7.526110,7.414431,92.280410,1307.8918,1307.8918,1065.7300,3538.682000,1065.7300,3538.682000
3,4,2563.3259,7117.482900,480.861885,226.8877,1309.4101,1309.4101,6166.8730,2924.805150,5.424610,3629.44085,7.256420,5.424610,78.122070,1309.4101,1309.4101,6166.8730,7117.482900,6166.8730,7117.482900
4,5,2573.8259,7900.390900,481.450280,350.8857,1528.4101,1528.4101,6150.6475,2906.995100,6.309708,3682.13030,7.238710,6.309708,93.791170,1528.4101,1528.4101,6150.6475,7900.390900,6150.6475,7900.390900
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622,623,2412.1920,6316.281100,327.880330,144.5444,811.1288,811.1288,1847.8556,439.179000,19.020370,1070.72640,6.689860,19.020370,57.635280,811.1288,811.1288,1847.8556,6316.281100,1847.8556,6316.281100
623,624,2274.4227,6234.251600,262.364530,24.2218,1243.4201,1243.4201,664.3127,324.376600,9.049313,940.59100,3.453150,9.049313,109.677430,1243.4201,1243.4201,664.3127,6234.251600,664.3127,6234.251600
624,625,2502.7039,6943.313525,471.392587,205.7507,1175.0305,1175.0305,2535.7959,462.567625,27.096342,1152.32330,3.831777,27.096342,82.016045,1175.0305,1175.0305,2535.7959,6943.313525,2535.7959,6943.313525
625,650,2370.0572,7126.048500,479.518090,110.5061,2516.3527,2516.3527,2378.3469,486.243400,7.847652,1182.71670,5.922870,7.847652,184.588630,2516.3527,2516.3527,2378.3469,7126.048500,2378.3469,7126.048500


### PLOTS AND STATISTICS - TBD