In [None]:
import pandas as pd
import re
import matplotlib.pyplot as plt


In [None]:
# read the csv
df = pd.read_csv("menu.csv")
# inspect the data
df.head()

In [None]:
df.info()

In [None]:
# checking for duplicated data
df.duplicated().sum()

In [None]:
df.shape
# checking for the existence of null data
df.isnull().sum()

In [None]:
# checking for any incorrect data types 
df.dtypes

df["Category"].unique()

In [None]:
# analysing the dataset

# How many calories does the average McDonald's value meal contain? 
df.describe()["Calories"]["mean"]

In [None]:
# How much do beverages, like soda or coffee, contribute to the overall caloric intake? 
df_filtered = df[df["Category"].isin(["Beverages","Coffee & Tea"])]
calorie_intake_drinks = round((df_filtered["Calories"].sum()/df["Calories"].sum())*100,2)
calorie_intake_drinks
        

In [None]:
# Does ordered grilled chicken instead of crispy increase a sandwich's nutritional value? 
nutritional_columns = [
    'Calories',
    'Calories from Fat',
    'Total Fat',
    'Saturated Fat',
    'Trans Fat',
    'Cholesterol',
    'Sodium',
    'Carbohydrates',
    'Dietary Fiber',
    'Sugars',
    'Protein'
]


df_grilled_sandwich = df[df["Item"].str.contains("Grilled") & df["Item"].str.contains("Sandwich")]
df_crispy_sandwich = df[df["Item"].str.contains("Crispy") & df["Item"].str.contains("Sandwich")]
replace_dict = {' Crispy': '', ' Grilled': ''}
sandwich_types = list(set(df[df["Item"].str.contains("Sandwich")]["Item"].str.replace('|'.join(replace_dict.keys()), lambda x: replace_dict[x.group()], regex=True)))
for sandwich in sandwich_types:
    word_list = sandwich.split(" ")
    regex_pattern = r'\b(?:{})\b'.format('.*'.join(word_list))
    filtered_df = df[df['Item'].apply(lambda x: bool(re.search(regex_pattern, x, re.IGNORECASE)))]

    if(filtered_df["Item"].shape[0] == 2):
        for nutritional_elements in nutritional_columns:
            values = [
                filtered_df.iloc[0,:][nutritional_elements],
                filtered_df.iloc[1,:][nutritional_elements]]
            labels = [
                "Crispy" if "crispy" in filtered_df.iloc[0,:]["Item"].lower() else "Grilled" ,
                "Grilled" if "grilled" in filtered_df.iloc[1,:]["Item"].lower() else "Grilled"]
            plt.bar(labels, values, color=['blue', 'green'])  # You can customize colors if needed
            plt.xlabel(nutritional_elements)
            plt.ylabel('Magnitude')
            plt.title(f"Comparing, {sandwich}")
            plt.show()


In [None]:
# What about ordering egg whites instead of whole eggs? 
nutritional_columns = [
    'Calories',
    'Calories from Fat',
    'Total Fat',
    'Saturated Fat',
    'Trans Fat',
    'Cholesterol',
    'Sodium',
    'Carbohydrates',
    'Dietary Fiber',
    'Sugars',
    'Protein'
]
pattern = r"\s+whites\b"
df_egg_whites = df[df["Item"].str.contains("Egg Whites")] #Egg Whites
df_egg_list = df_egg_whites["Item"].str.replace(pattern, "", regex=True, flags=re.IGNORECASE)
df_whole_eggs = df[df["Item"].isin(df_egg_list)]
egg_types = list(df[df["Item"].isin(df_egg_list)]["Item"].unique())

for egg in egg_types:
    word_list = egg.split(" ")
    regex_pattern = r'\b(?:{})\b'.format('.*'.join(word_list))
    filtered_df = df[df['Item'].apply(lambda x: bool(re.search(regex_pattern, x, re.IGNORECASE)))]
    for nutritional_elements in nutritional_columns:
        values = [
            filtered_df.iloc[0,:][nutritional_elements],
            filtered_df.iloc[1,:][nutritional_elements]]
        labels = [
            "Egg Whites" if "whites" in filtered_df.iloc[0,:]["Item"].lower() else "Whole Egg" ,
            "Whole Egg" if "whites" not in filtered_df.iloc[1,:]["Item"].lower() else "Egg Whites"]
        plt.bar(labels, values, color=['blue', 'green'])  # You can customize colors if needed
        plt.xlabel(nutritional_elements)
        plt.ylabel('Magnitude')
        plt.title(f"Comparing, {egg}")
        plt.show()
