In [70]:
# Import library

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
plt.style.use('fivethirtyeight')
warnings.filterwarnings('ignore')
%matplotlib inline

In [71]:
from IPython.core.display import display, HTML, Javascript
# ----- Notebook Theme -----
color_map = ['#f4a261', '#e8f6f3', '#d0ece7', '#a2d9ce', '#73c6b6', '#45b39d', 
                        '#16a085', '#138d75', '#117a65', '#0e6655', '#e76f51']

prompt = color_map[-1]
main_color = color_map[0]
strong_main_color = color_map[1]
custom_colors = [strong_main_color, main_color]

css_file = ''' 

div #notebook {
background-color: white;
line-height: 20px;
}

#notebook-container {
%s
margin-top: 2em;
padding-top: 2em;
border-top: 4px solid %s; /* white */
-webkit-box-shadow: 0px 0px 8px 2px rgba(224, 212, 226, 0.5); /* pink */
    box-shadow: 0px 0px 8px 2px rgba(224, 212, 226, 0.5); /* pink */
}

div .input {
margin-bottom: 1em;
}

.rendered_html h1, .rendered_html h2, .rendered_html h3, .rendered_html h4, .rendered_html h5, .rendered_html h6 {
color: %s; /* orange */
font-weight: 600;
}

div.input_area {
border: none;
    background-color: %s; /* rgba(229, 143, 101, 0.1); orange [exactly #E58F65] */
    border-top: 2px solid %s; /* orange */
}

div.input_prompt {
color: %s; /* light blue */
}

div.output_prompt {
color: %s; /* strong orange */
}

div.cell.selected:before, div.cell.selected.jupyter-soft-selected:before {
background: %s; /* orange */
}

div.cell.selected, div.cell.selected.jupyter-soft-selected {
    border-color: %s; /* orange */
}

.edit_mode div.cell.selected:before {
background: %s; /* orange */
}

.edit_mode div.cell.selected {
border-color: %s; /* orange */

}
'''
def to_rgb(h): 
    return tuple(int(h[i:i+2], 16) for i in [0, 2, 4])

main_color_rgba = 'rgba(%s, %s, %s, 0.1)' % (to_rgb(main_color[1:]))
open('notebook.css', 'w').write(css_file % ('width: 95%;', main_color, main_color, main_color_rgba, main_color,  main_color, prompt, main_color, main_color, main_color, main_color))

def nb(): 
    return HTML("<style>" + open("notebook.css", "r").read() + "</style>")
nb()

In [72]:
# Import Data
# df = DataFrame
df = pd.read_csv("../input/mcdonalds-india-menu-nutrition-facts/India_Menu.csv")
df.head()

In [73]:
# Check data
# Dataframe.isnull() -> Object to check null values for
df.isnull().sum()

# Result: Missing Value in last column "Sodium"
# No missing values are found in other columns, and we can take the median or average value to fill in the missing values.

In [74]:
df.describe()
# method returns description of the data in the DataFrame.

In [75]:
fat_data = ["Total fat (g)", "Sat Fat (g)", "Trans fat (g)"]
fat_data = df[fat_data]

fat_data.head()


In [76]:
# find the menu with the most energy
# mean() gibt den Mittelwert einer Achse

df.groupby("Menu Category")["Energy (kCal)"].mean().sort_values(ascending=False)

# Gourmet Menu contains the most energy per serving

In [77]:
# Analysing the protein content of the menus

df.groupby("Menu Category")["Protein (g)"].mean().sort_values(ascending=False)

# Result: Gourmet Menu contains the highest protein content

In [78]:
df.groupby("Menu Category")["Total fat (g)"].mean().sort_values(ascending=False)

# Highest fat content contains the Gourmet Menu

In [79]:
# Total carbohydrate (g) 

df.groupby("Menu Category")["Total carbohydrate (g)"].mean().sort_values(ascending=False)

# Highest carbohydrate content in Gourmet Menu

In [80]:
# Sodium (mg)

df.groupby("Menu Category")["Sodium (mg)"].mean().sort_values(ascending=False)

# Highest Sodium content in Gourmet Menu

In [81]:
# Total Sugars (g)
df.groupby("Menu Category")["Total Sugars (g)"].mean().sort_values(ascending=False)
# Highest sugar contains Beverage Menu, 2nd highest contains McCafe Menu

In [82]:
# Verwenden Sie matplotlib.pyplot.figure(), um die Eigenschaften der Abbildung festzulegen
# Verwendung von matplotlib.pyplot.figure() zum Hinzufügen von Subplots zu einer Figur

plt.figure(figsize=(18,8))
plt1 = sns.lineplot(data=fat_data)

plt.show()

In [83]:
plt.figure(figsize=(18,8))
plt2 = sns.lineplot(x="Total fat (g)", data=df,y="Sat Fat (g)",hue="Menu Category")
plt.show()

In [84]:
plt.figure(figsize=(18,8))
plt3 = sns.lineplot(x="Total fat (g)",data=df,y="Trans fat (g)",hue="Menu Category")

plt.show()

In [85]:
# from the line graphs we can see that most people reduce their intake of trans fats
# when the total calories increase, it is more likely that saturated fats increase

In [86]:
plt.figure(figsize=(18,8))
plt4 = sns.histplot(x="Cholesterols (mg)", y="Total fat (g)", data=df)
plt.show()

In [87]:
# it seems unregulated

In [88]:
df.head()

In [89]:
cols = ["Energy (kCal)","Protein (g)","Total fat (g)","Total carbohydrate (g)","Total Sugars (g)","Sodium (mg)"]

df["Per Serve Size_No units"] = df["Per Serve Size"].map(lambda x: str(x)[:-2])

for col in cols:
    df[col+"mean"] = df[col].astype(float) / df["Per Serve Size_No units"].astype(float)

df.head()

In [92]:


plt.figure(figsize=(18,8))
plt5 = sns.lineplot(x="Total fat (g)", data=df,y="Sodium (mg)",hue="Menu Category")
plt.show()



In [93]:
df['Menu Items'].nunique()
# The nunique() method returns the number of unique values for each column

In [94]:
df['Menu Items'].unique()
# The unique() function is used to find the unique elements of an array

In [100]:
drinks = df['Per Serve Size'].str.contains('ml')
df.loc[drinks, 'Food Category']='Drink'
foods = df['Per Serve Size'].str.contains('g')
df.loc[foods, 'Food Category']='Food'

# The loc() function helps us to retrieve data values from a dataset at an ease.

In [101]:
df["Sodium (mg)"].fillna(0, inplace=True)
# The fillna() method replaces the NULL values with a specified value
df["Sodium (mg)mean"].fillna(0, inplace=True)
df.head()


In [102]:
df.isnull().sum()

In [103]:
# find highest energy in Food and Category
df.groupby("Menu Items")["Energy (kCal)"].sum().sort_values(ascending=False)


In [104]:
# Result: The Chicken Cheese Lava Burger has the highest energy in Food and the Strawberry Green Tea in Drinks.

# find highest sugar in Food and Category
df.groupby("Menu Items")["Total Sugars (g)"].sum().sort_values(ascending=False)

In [105]:
# Result: Large Fanta Orange is the highest in sugar.

# find the product with the highest sodium

df.groupby("Menu Items")["Sodium (mg)"].sum().sort_values(ascending=False)

In [106]:
# The Ghee Rice with Mc Spicy Fried Chicken is the product with the highest Sodium content.

# Here are correlations:
sns.heatmap(df.corr(), annot=True,cmap='RdYlGn', linewidths=0.3)

# annot – an array of the same shape as data which is used to annotate the heatmap. 
# cmap – a matplotlib colormap name or object

In [None]:
#These nutritions have a strong correlation:
#    Energy and Total Fat
#   Added Sugars and Total Sugars
#   Sodium and Protein

In [111]:
#The subplot() function takes three arguments that describes the layout of the figure.
#The layout is organized in rows and columns, which are represented by the first and second argument.
# The third argument represents the index of the current plot.

fig, ax = plt.subplots(1,3)

fig.set_figheight(15)
fig.set_figwidth(20)

plt1 = sns.scatterplot(x="Total fat (g)",data=df,y="Energy (kCal)", ax=ax[0])
plt2 = sns.scatterplot(x="Protein (g)", data=df,y="Sodium (mg)",ax=ax[1])
plt3 = sns.scatterplot(x="Added Sugars (g)", data=df, y="Total Sugars (g)",ax=ax[2])

plt1.set_title("Fat vs Energy")
plt2.set_title("Protein vs Sodium")
plt3.set_title("Added Sugars vs Total Sugars")

plt.show()
