# USDA DATA SET GRAPHS
## Contributors: Nguyen, Regan, and Jagdon
### Below will be a list of products we were not able to obtain data for with the provided dataset with its name and nutrient id:
- Baby Foods (3)
- Soups, Sauces, and Gravies (6)
- Breakfast Cereals (8)
- Finfish and Shellfish Products (15)
- Lamb, Veal, and Game Products (17)
- Sweets (19)
- Fast Foods (21)
- Meals, Entrees, and Side Dishes (22)
- Snacks (23)
- American Indian/Alaska Native Foods (24)
- Resturant Foods (25)

# Diary and Egg Products

In [15]:
# THIS COMMENT SECTION IS ESSENTIALLY THE SAME PROCESS AS EVERY OTHER CODE BLOCK IN THIS NOTEBOOK
# THE ONLY DIFFERENCES WOULD BE THE NEW DATAFRAME NAMES THAT REFER TO THE NUTRIENT IDs AND THE NUTRIENT IDs THEMSELVES.

# IT IS FOR THIS REASON AS TO WHY THERE WILL NOT BE COMMENTS ON OTHER CODE BLOCKS FOR THE DURATION OF THIS IPYNB

# Diary

# import the necessary libraries to make graphs and process data
import pandas as pd
import plotly.express as px

# read the initial data we were given, which is FF_SR data.csv
csv = pd.read_csv('FF_SR_ data.csv')

# from this dataframe, we will create a new only only containing the category a food lies within, the food's name, the nutrient in the food, the SR (Standard Reference)
# mean of that nutrient per 100 grams of that food and the same values for FF (Foundation Foods)
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]

# this changes the dataframe where if a row contains :, it prints a new filtered data set with this
# the main reasoning for this is to filter out the SFA values which listed times instead of actual names for FF_Components
df = df[df['FF_Component'].str.contains(':') == False]

# inserts a new column labeled as PC (Percent Change), where it takes the difference between the FF and the SR mean and then multiplying it by 100 
# to get an actual % value
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100

# this creates a specific dataframe for a desired food group, which varies (as you'll see throughout this ipynb file)
# in this case the food group we want to focus on is Diary/Egg products which falll under the nutrient id category of 1
DAIRY = df[df['food_category_id'] == 1]

# in order to have an easier time identifying which nutrients, we used the 'ascending=False' parameter to see which ones had the greatest % change
DAIRY = DAIRY.sort_values(by="PC", ascending=False)

# these modify the dataset so it only includes values of nutrients and percent change that have an actual number assigned to them, making the data
# cleaner and easier to work with it
DAIRY = DAIRY[DAIRY['PC'] != 0]
DAIRY = DAIRY[DAIRY['SR Mean per 100g'] != 0]

# this list will also fluctuate depending on the nutrient id we ant to obtain, but it narrows down the top ten (or less) nutrients in each food group to see which ones
# havve changed the most over time
topten = DAIRY[0:11]

# command to add each df for each food category into the same data set
# topten.to_csv('globaldata.csv',mode='a',index=False,header=False)

# make a histogram to get all of the accurate values, with the x-axis being the nutrient names and the y-axis getting the percent change over time (saying this due to the premise of the SR Legacy Dataset)
# 'text_auto' is a function that shows the x-values on the graph, with 'color =' allowing for easier distinction between Nutrients in the same food group
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Diary and Egg Products',text_auto='.3s', color = 'FF_Component')

# further modifications to the graph, adding things like font size, angle, determines whether or not markers are clipped, by setting it to false nothing is cropped out of the graph
# there is virtually no ceiling for the bar graphs, meaning it could values bigger than shown here, while still being in presentable manner
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)

# makes sure that all of this data is in height order
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})

# presents the graph using the .show() function
fig.show()

# Spices and Herbs

In [16]:
# Spices and Herbs
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
SPICES = df[df['food_category_id'] == 2]
SPICES = SPICES.sort_values(by="PC", ascending=False)
SPICES = SPICES[SPICES['PC'] != 0]
SPICES = SPICES[SPICES['SR Mean per 100g'] != 0]
topten = SPICES[0:10]
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Spice & Herb',text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()

# Fats and Oils

In [17]:
# Fats and Oils
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
FATS = df[df['food_category_id'] == 4]
FATS = FATS.sort_values(by="PC", ascending=False)
FATS = FATS[FATS['PC'] != 0]
FATS = FATS[FATS['SR Mean per 100g'] != 0]
topten = FATS[0:15]
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Fats & Oils',text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()

# Poultry Products

In [18]:
# Poultry Products
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
POULTRY = df[df['food_category_id'] == 5]
POULTRY = POULTRY.sort_values(by="PC", ascending=False)
LEGUME = POULTRY[POULTRY['PC'] != 0]
POULTRY = POULTRY[POULTRY['SR Mean per 100g'] != 0]
topten = POULTRY[0:15]
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Poultry Products',text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()

# Sausages and Luncheon Meats

In [19]:
# Sausages and Luncheon meats
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
SAUSAGE = df[df['food_category_id'] == 7]
SAUSAGE = SAUSAGE.sort_values(by="PC", ascending=False)
SAUSAGE = SAUSAGE[SAUSAGE['PC'] != 0]
SAUSAGE = SAUSAGE[SAUSAGE['SR Mean per 100g'] != 0]
topten = SAUSAGE[0:11]
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Sausages and Luncheon Meats',text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()


# Fruits and Fruit Juices

In [20]:
# Fruits and Fruit Juices
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
FRUITS = df[df['food_category_id'] == 9]
FRUITS = FRUITS.sort_values(by="PC", ascending=False)
FRUITS = FRUITS[FRUITS['PC'] != 0]
FRUITS = FRUITS[FRUITS['SR Mean per 100g'] != 0]
topten = FRUITS[0:19]
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Fruits and Fruit Juices',text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()

# Pork Products

In [21]:
# Fruits and Fruit Juices
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
PORK = df[df['food_category_id'] == 10]
PORK = PORK.sort_values(by="PC", ascending=False)
PORK = PORK[PORK['PC'] != 0]
PORK = PORK[PORK['SR Mean per 100g'] != 0]
topten = PORK[0:22]
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Pork Products',text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()

# Vegetables and Vegetable Products

In [22]:
# Vegetables and Vegetable Products
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
VEG = df[df['food_category_id'] == 11]
VEG = VEG.sort_values(by="PC", ascending=False)
VEG = VEG[VEG['PC'] != 0]
VEG = VEG[VEG['SR Mean per 100g'] != 0]
topten = VEG[0:15]
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Vegetables and Vegetable Products',text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()

# Nut and Seed Products

In [23]:
# Nut and Seed Products
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
NUT = df[df['food_category_id'] == 12]
NUT = NUT.sort_values(by="PC", ascending=False)
NUT = NUT[NUT['PC'] != 0]
NUT = NUT[NUT['SR Mean per 100g'] != 0]
topten = NUT[0:14]
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Nut and Seed Products',text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()

# Beef Products

In [24]:
# Beef Products
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
BEEF = df[df['food_category_id'] == 13]
BEEF = BEEF.sort_values(by="PC", ascending=False)
BEEF = BEEF[BEEF['PC'] != 0]
BEEF = BEEF[BEEF['SR Mean per 100g'] != 0]
topten = BEEF[0:17]
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Beef Products',text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()

# Beverages

In [25]:
# Beverages
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
BEV = df[df['food_category_id'] == 14]
BEV = BEV.sort_values(by="PC", ascending=False)
BEV = BEV[BEV['PC'] != 0]
BEV = BEV[BEV['SR Mean per 100g'] != 0]
topten = BEV[0:10]
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Beverages',text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
fig.show()

# Legumes and Legume Products

In [26]:
# LEGUMES
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
LEGUME = df[df['food_category_id'] == 16]
LEGUME = LEGUME.sort_values(by="PC", ascending=False)
LEGUME = LEGUME[LEGUME['PC'] != 0]
LEGUME = LEGUME[LEGUME['SR Mean per 100g'] != 0]
topten = LEGUME[0:17]
fig = px.histogram(topten, y='PC', x='FF_Component', title='Percent Change in the Nutrients of Legumes and Legume Products',text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.show()

# Baked Products

In [27]:
# Baked Products
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
BAKE = df[df['food_category_id'] == 18]
BAKE = BAKE.sort_values(by="PC", ascending=False)
BAKE = BAKE[BAKE['PC'] != 0]
BAKE = BAKE[BAKE['SR Mean per 100g'] != 0]
topten = BAKE[0:10]
fig = px.histogram(topten, y='PC', x='FF_Component', title="Percent Change in the Nutrients of Baked Products",text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.show()

# Cereal Grains and Pasta

In [28]:
# Cereal Grains and Pasta
import pandas as pd
import plotly.express as px
csv = pd.read_csv('FF_SR_ data.csv')
df = csv[['food_category_id','FF Food description','FF_Component','SR Mean per 100g','FF Mean per 100g']]
df = df[df['FF_Component'].str.contains('SFA') == False]
df["PC"] = abs((df["FF Mean per 100g"]-df["SR Mean per 100g"])/(df["SR Mean per 100g"])) * 100
GRAINS = df[df['food_category_id'] == 20]
GRAINS = GRAINS.sort_values(by="PC", ascending=False)
GRAINS = GRAINS[GRAINS['PC'] != 0]
GRAINS = GRAINS[GRAINS['SR Mean per 100g'] != 0]
topten = GRAINS[0:10]
fig = px.histogram(topten, y='PC', x='FF_Component', title="Percent Change in the Nutrients of Cereal, Grains, and Pasta",text_auto='.3s', color = 'FF_Component')
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.show()