In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import missingno as msno
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

<font size=4><b>Read the Data</b></font>

In [None]:
df=pd.read_csv('../input/nutrition-facts/menu.csv')
df.head()

<font size=4><b>Checking for null values</b></font>

In [None]:
msno.matrix(df)

<font size=4><b>Unique values in Category Column</b></font>

In [None]:
df.Category.unique()

<font size=3>Below we analyse the count for each Category in McDonald's menu. We observe that Coffee and Tea has been consumed heavily by the customers</font>

In [None]:
plt.style.use('default')
plt.figure(figsize=(7,4),edgecolor='0.1',dpi=100)
a=sns.countplot(x='Category',facecolor='darkgreen',data=df)


xticks=plt.xticks(rotation=55,family='serif')
yticks=plt.yticks(family='serif')

plt.xlabel(df['Category'].all(),font='serif')
plt.ylabel('Count',font='serif')

a.spines['bottom'].set_color('gray')
a.spines['left'].set_color('gray')
sns.despine()

<font size=5><b>Analysis of Number of Items per Category</b></font>
<br>
<font size=4>Below we observe that Coffee and Tea category has the most number of unique items followed by Breakfast in the menu.</font>

In [None]:
px.density_heatmap(x='Category',y='Item',data_frame=df,width=900)

<font size=5><b>Analysis of Nutrients</b></font>

<font size=4><b>Average Calories distribution for each category</b></font>
<br>
<font size=3>Below we observe hightest amount of calories for Chicken & fish followed by Breakfast and Smoothies</font>

In [None]:
calories=pd.DataFrame(df.groupby('Category')['Calories'].mean())
        
colors=['gray']*9
colors[3]='#AF0038'
fig = go.Figure(data=[go.Bar(
    x=calories.index,
    y=calories['Calories'],
    marker_color=colors
)])
fig.update_layout(width=700,height=500)
fig.update_yaxes(title='Avg Calories')
fig.show()

<font size=5><b>Total Fat</b></font>
<br>
<font size=3>Below is the avg Total Fat distribution for each category.We have breakfast ,Chicken & Fish and Beef & Pork at the forefront when it comes to the amount of Fat intake by us.</font>

In [None]:
fat=pd.DataFrame(df.groupby('Category')['Total Fat'].mean())
        
colors=['gray']*9
colors[2]='#C4451C'
fig = go.Figure(data=[go.Bar(
    x=fat.index,
    y=fat['Total Fat'],
    marker_color=colors
)])
fig.update_yaxes(title='Avg Total Fat')
fig.update_layout(width=700,height=500)
fig.show()

<font size=5><b>Carbohydrates</b></font>
<br>
<font size=3>We see that Smoothies & Shakes have the highest Average Carbohydrates as compared to other Categories in the menu.</font>

In [None]:
carbs=pd.DataFrame(df.groupby('Category')['Carbohydrates'].mean())
        
colors=['gray']*9
colors[7]='#B82E2E'
fig = go.Figure(data=[go.Bar(
    x=carbs.index,
    y=carbs['Carbohydrates'],
    marker_color=colors
)])
fig.update_yaxes(title='Avg Carbohydrates')
fig.update_layout(width=700,height=500)
fig.show()

<font size=5><b>Sugars</b></font>
<br>
<font size=3>Carbohydrates are an essential macronutrient the body requires in large amounts to run smoothly, but not all carbs are created equal. Sugars mostly make up carbohydrates.Thus we have Smoothies and Shakes with high Average Sugars Distributions</font>

In [None]:
sugars=pd.DataFrame(df.groupby('Category')['Sugars'].mean())
        
colors=['gray']*9
colors[7]='#B82E2E'
fig = go.Figure(data=[go.Bar(
    x=sugars.index,
    y=sugars['Sugars'],
    marker_color=colors
)])
fig.update_yaxes(title='Avg Sugars')
fig.update_layout(width=700,height=500)
fig.show()

<font size=5><b>Protein</b></font>
<br>
<font size=3>Large amount of proteins are observed in Chicken & Fish followed by Beef & Pork</font>

In [None]:
protein=pd.DataFrame(df.groupby('Category')['Protein'].mean())
        
colors=['gray']*9
colors[3]='#1C8356'
fig = go.Figure(data=[go.Bar(
    x=protein.index,
    y=protein['Protein'],
    marker_color=colors
)])
fig.update_yaxes(title='Avg Protein')
fig.update_layout(width=700,height=500)
fig.show()

<font size=5><b>Cholesterol</b></font>

In [None]:
chol=pd.DataFrame(df.groupby('Category')['Cholesterol'].mean())
        
colors=['gray']*9
colors[2]='#85660D'
fig = go.Figure(data=[go.Bar(
    x=chol.index,
    y=chol['Cholesterol'],
    marker_color=colors
)])
fig.update_yaxes(title='Avg Cholesterol')
fig.update_layout(width=700,height=500)
fig.show()

<font size=4><b>Below we analyze how are Carbohydrates and Sugars related</b></font>

In [None]:
plt.style.use('ggplot')
plt.figure(figsize=(8,5),dpi=80)
sns.scatterplot(x='Carbohydrates',y='Sugars',data=df,alpha=0.8,s=60)
plt.title('Carbohydrates vs Sugars',x=0.5,y=1.05)

<font size=5><b>Average Nutrient Analysis(% Daily Value) for each of the Categories</b></font>


In [None]:
cols=['Vitamin A (% Daily Value)','Vitamin C (% Daily Value)','Calcium (% Daily Value)',
      'Iron (% Daily Value)','Total Fat (% Daily Value)',
      'Cholesterol (% Daily Value)','Carbohydrates (% Daily Value)']

table=df.pivot_table(columns='Category',values=cols)
table.style.background_gradient(cmap='viridis')

<font size=4><b>For each Category below we analyze the items with highest amount of Nutrients in the Mc Donald's menu</b></font>

<font size=5><b>Breakfast</b></font>

In [None]:
breakfast=df[df['Category']=='Breakfast']
cols=['Calories','Total Fat','Carbohydrates','Sodium','Protein','Dietary Fiber']
for col in cols:
    print(col)
    a=breakfast.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

<font size=5><b>Chicken & Fish</b></font>

In [None]:
chfish=df[df['Category']=='Chicken & Fish']
cols=['Calories','Total Fat','Carbohydrates','Sodium','Protein','Dietary Fiber']
for col in cols:
    print(col)
    a=chfish.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

<font size=5><b>Beverages</b></font>

In [None]:
bev=df[df['Category']=='Beverages']
cols=['Calories','Total Fat','Carbohydrates','Sodium','Protein','Dietary Fiber']
for col in cols:
    print(col)
    a=bev.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

<font size=5><b>Smoothies & Shakes</b></font>

In [None]:
shakes=df[df['Category']=='Smoothies & Shakes']
cols=['Calories','Total Fat','Carbohydrates','Sodium','Protein','Dietary Fiber']
for col in cols:
    print(col)
    a=shakes.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

<font size=5><b>Salads</b></font>

In [None]:
salads=df[df['Category']=='Salads']
cols=['Calories','Total Fat','Carbohydrates','Sodium','Protein','Dietary Fiber','Cholesterol']
for col in cols:
    print(col)
    a=salads.groupby('Item')[col].max().sort_values(ascending=False).head(1)
    print(a)
    print('-'*40)
    print('\n')

<font size=5>Do upvote if found insightful🙂</font>