In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Data

In [2]:
drinks_content = pd.read_csv("data/starbucks_drinkMenu_expanded.csv")

In [3]:
drinks_content.head()

Unnamed: 0,Beverage_category,Beverage,Beverage_prep,Calories,Total Fat (g),Trans Fat (g),Saturated Fat (g),Sodium (mg),Total Carbohydrates (g),Cholesterol (mg),Dietary Fibre (g),Sugars (g),Protein (g),Vitamin A (% DV),Vitamin C (% DV),Calcium (% DV),Iron (% DV),Caffeine (mg)
0,Coffee,Brewed Coffee,Short,3,0.1,0.0,0.0,0,5,0,0,0,0.3,0%,0%,0%,0%,175
1,Coffee,Brewed Coffee,Tall,4,0.1,0.0,0.0,0,10,0,0,0,0.5,0%,0%,0%,0%,260
2,Coffee,Brewed Coffee,Grande,5,0.1,0.0,0.0,0,10,0,0,0,1.0,0%,0%,0%,0%,330
3,Coffee,Brewed Coffee,Venti,5,0.1,0.0,0.0,0,10,0,0,0,1.0,0%,0%,2%,0%,410
4,Classic Espresso Drinks,Caffè Latte,Short Nonfat Milk,70,0.1,0.1,0.0,5,75,10,0,9,6.0,10%,0%,20%,0%,75


In [4]:
drinks_content.shape

(242, 18)

In [6]:
drinks_content.isna().sum()

Beverage_category            0
Beverage                     0
Beverage_prep                0
Calories                     0
 Total Fat (g)               0
Trans Fat (g)                0
Saturated Fat (g)            0
 Sodium (mg)                 0
 Total Carbohydrates (g)     0
Cholesterol (mg)             0
 Dietary Fibre (g)           0
 Sugars (g)                  0
 Protein (g)                 0
Vitamin A (% DV)             0
Vitamin C (% DV)             0
 Calcium (% DV)              0
Iron (% DV)                  0
Caffeine (mg)                1
dtype: int64

In [8]:
drinks_content.columns

Index(['Beverage_category', 'Beverage', 'Beverage_prep', 'Calories',
       ' Total Fat (g)', 'Trans Fat (g) ', 'Saturated Fat (g)', ' Sodium (mg)',
       ' Total Carbohydrates (g) ', 'Cholesterol (mg)', ' Dietary Fibre (g)',
       ' Sugars (g)', ' Protein (g) ', 'Vitamin A (% DV) ', 'Vitamin C (% DV)',
       ' Calcium (% DV) ', 'Iron (% DV) ', 'Caffeine (mg)'],
      dtype='object')

# 2. Calories summary

In [15]:
drinks_content["Calories"].describe()

count    242.000000
mean     193.871901
std      102.863303
min        0.000000
25%      120.000000
50%      185.000000
75%      260.000000
max      510.000000
Name: Calories, dtype: float64

# 3. Clean

In [20]:
drinks = drinks_content[["Beverage_category", "Beverage", "Beverage_prep", "Calories"]].copy()

In [21]:
drinks.isna().sum()

Beverage_category    0
Beverage             0
Beverage_prep        0
Calories             0
dtype: int64

# 4. Calorie Difference

In [23]:
drinks["Calorie_diff"] = 135 - drinks["Calories"]
drinks

Unnamed: 0,Beverage_category,Beverage,Beverage_prep,Calories,Calorie_diff
0,Coffee,Brewed Coffee,Short,3,132
1,Coffee,Brewed Coffee,Tall,4,131
2,Coffee,Brewed Coffee,Grande,5,130
3,Coffee,Brewed Coffee,Venti,5,130
4,Classic Espresso Drinks,Caffè Latte,Short Nonfat Milk,70,65
...,...,...,...,...,...
237,Frappuccino® Blended Crème,Strawberries & Crème (Without Whipped Cream),Soymilk,320,-185
238,Frappuccino® Blended Crème,Vanilla Bean (Without Whipped Cream),Tall Nonfat Milk,170,-35
239,Frappuccino® Blended Crème,Vanilla Bean (Without Whipped Cream),Whole Milk,200,-65
240,Frappuccino® Blended Crème,Vanilla Bean (Without Whipped Cream),Soymilk,180,-45


# 5. Summarise

In [36]:
drinks.groupby("Beverage_category")["Calories"].mean()

Beverage_category
Classic Espresso Drinks              140.172414
Coffee                                 4.250000
Frappuccino® Blended Coffee          276.944444
Frappuccino® Blended Crème           233.076923
Frappuccino® Light Blended Coffee    162.500000
Shaken Iced Beverages                114.444444
Signature Espresso Drinks            250.000000
Smoothies                            282.222222
Tazo® Tea Drinks                     177.307692
Name: Calories, dtype: float64

# 6. Which `Beverage_prep` type contains more than the average calories of all drinks?

In [45]:
avg_cal = int(np.mean(drinks["Calories"]))
grouped_drinks = drinks.groupby("Beverage_prep")["Calories"].mean().reset_index(name = "Calories")
grouped_drinks.loc[grouped_drinks.Calories > avg_cal]

Unnamed: 0,Beverage_prep,Calories
0,2% Milk,218.0
3,Grande Nonfat Milk,209.615385
7,Soymilk,207.272727
11,Venti Nonfat Milk,260.0
12,Whole Milk,283.75


# 7. Best coffee drink if you are concerned about calories

In [42]:
pd.unique(drinks["Beverage_category"])

array(['Coffee', 'Classic Espresso Drinks', 'Signature Espresso Drinks',
       'Tazo® Tea Drinks', 'Shaken Iced Beverages', 'Smoothies',
       'Frappuccino® Blended Coffee', 'Frappuccino® Light Blended Coffee',
       'Frappuccino® Blended Crème'], dtype=object)

In [47]:
drinks.loc[drinks["Beverage_category"].str.contains("Coffee|Espresso")].sort_values("Calories").head(1)

Unnamed: 0,Beverage_category,Beverage,Beverage_prep,Calories,Calorie_diff
0,Coffee,Brewed Coffee,Short,3,132


# 8. Espresso drinks

In [48]:
espresso_drinks = drinks.loc[drinks["Beverage_category"].str.contains("Espresso")]
espresso_drinks

Unnamed: 0,Beverage_category,Beverage,Beverage_prep,Calories,Calorie_diff
4,Classic Espresso Drinks,Caffè Latte,Short Nonfat Milk,70,65
5,Classic Espresso Drinks,Caffè Latte,2% Milk,100,35
6,Classic Espresso Drinks,Caffè Latte,Soymilk,70,65
7,Classic Espresso Drinks,Caffè Latte,Tall Nonfat Milk,100,35
8,Classic Espresso Drinks,Caffè Latte,2% Milk,150,-15
...,...,...,...,...,...
97,Signature Espresso Drinks,Hot Chocolate (Without Whipped Cream),Soymilk,330,-195
98,Signature Espresso Drinks,Caramel Apple Spice (Without Whipped Cream),Short,140,-5
99,Signature Espresso Drinks,Caramel Apple Spice (Without Whipped Cream),Tall,210,-75
100,Signature Espresso Drinks,Caramel Apple Spice (Without Whipped Cream),Grande,280,-145


# 9. Group by `prep` and find calories

In [50]:
espresso_drinks.groupby("Beverage_prep")["Calories"].mean().reset_index(name = "Average Calories")

Unnamed: 0,Beverage_prep,Average Calories
0,2% Milk,223.571429
1,Doppio,10.0
2,Grande,147.5
3,Grande Nonfat Milk,191.25
4,Short,72.5
5,Short Nonfat Milk,100.0
6,Solo,5.0
7,Soymilk,190.0
8,Tall,110.0
9,Tall Nonfat Milk,147.5


# 10. Sort by desc

In [51]:
espresso_drinks.groupby("Beverage_prep")["Calories"].mean().reset_index(name = "Average Calories").sort_values("Average Calories", ascending = False)

Unnamed: 0,Beverage_prep,Average Calories
11,Venti Nonfat Milk,247.5
0,2% Milk,223.571429
10,Venti,192.5
3,Grande Nonfat Milk,191.25
7,Soymilk,190.0
2,Grande,147.5
9,Tall Nonfat Milk,147.5
8,Tall,110.0
5,Short Nonfat Milk,100.0
4,Short,72.5
