# 02. Serving_size

- Convert the serving_size into g 
- Normalize the data for 100g

In [1]:
import pandas as pd
import numpy as np


## McDonald's Serving unit

In [2]:
mcd = pd.read_pickle("../00_data/02_mcdonalds_clean_v1.pkl")

In [3]:
mcd

Unnamed: 0,category,item_name,serving_size_g,calories_kcal,total_fat_g,saturated_fat_g,trans_fat_g,cholesterol_mg,sodium_mg,carbohydrates_g,dietary_fiber_g,sugars_g,protein_g,category_clean
0,Breakfast,Egg McMuffin,4.8 oz (136 g),300,13,5,0,260,750,31,4,3,17,breakfast
1,Breakfast,Egg White Delight,4.8 oz (135 g),250,8,3,0,25,770,30,4,3,18,breakfast
2,Breakfast,Sausage McMuffin,3.9 oz (111 g),370,23,8,0,45,780,29,4,2,14,breakfast
3,Breakfast,Sausage McMuffin with Egg,5.7 oz (161 g),450,28,10,0,285,860,30,4,2,21,breakfast
4,Breakfast,Sausage McMuffin with Egg Whites,5.7 oz (161 g),400,23,8,0,50,880,30,4,2,21,breakfast
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258,Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Medium),14.2 oz (403 g),810,32,15,1,60,400,114,2,103,21,beverage
259,Smoothies & Shakes,McFlurry with Reese's Peanut Butter Cups (Snack),7.1 oz (202 g),410,16,8,0,30,200,57,1,51,10,beverage
260,sauce,home made sauce,30,41,3.4,0.3,,,0.5,2.3,0.2,1.5,0.4,sauce
261,sauce,balsamico,30,22,0.8,0.1,,,0.6,3.2,0.1,3.2,0.0,sauce


In [4]:
mcd_split = mcd["serving_size_g"].str.split( pat=" ", expand=True)

In [5]:
mcd_split

Unnamed: 0,0,1,2,3,4
0,4.8,oz,(136,g),
1,4.8,oz,(135,g),
2,3.9,oz,(111,g),
3,5.7,oz,(161,g),
4,5.7,oz,(161,g),
...,...,...,...,...,...
258,14.2,oz,(403,g),
259,7.1,oz,(202,g),
260,30,,,,
261,30,,,,


In [6]:
mcd_split.iloc[:,2] = mcd_split.iloc[:,2].str.replace("(", "")

In [7]:
pd.set_option('display.max_rows', 500)


mcd_split

Unnamed: 0,0,1,2,3,4
0,4.8,oz,136,g),
1,4.8,oz,135,g),
2,3.9,oz,111,g),
3,5.7,oz,161,g),
4,5.7,oz,161,g),
5,6.5,oz,185,g),
6,5.3,oz,150,g),
7,5.8,oz,164,g),
8,5.4,oz,153,g),
9,5.9,oz,167,g),


In [8]:
# rename the columns 

mcd_split = mcd_split.rename(columns = {0 : "a",
                                      1 : "b",
                                      2 : "c",
                                      3 : "d",
                                      4 : "e"})

In [9]:
pd.set_option('display.max_rows', 500)

mcd_split

Unnamed: 0,a,b,c,d,e
0,4.8,oz,136,g),
1,4.8,oz,135,g),
2,3.9,oz,111,g),
3,5.7,oz,161,g),
4,5.7,oz,161,g),
5,6.5,oz,185,g),
6,5.3,oz,150,g),
7,5.8,oz,164,g),
8,5.4,oz,153,g),
9,5.9,oz,167,g),


#### Line 132 has messy data --> manually adjust the values, such it has the same pattern as other beverages / 177ml does not get los as information, because 6 fl oz = 177ml.

In [10]:
mcd_split.loc[132,:]

a       6
b      fl
c      oz
d    (177
e     ml)
Name: 132, dtype: object

In [11]:
mcd_split.loc[132,"d"] = "cup"

In [12]:
mcd_split.loc[132,"e"] = "None"

In [13]:
mcd_split.loc[132,:]

a       6
b      fl
c      oz
d     cup
e    None
Name: 132, dtype: object

#### Same approach for line 260 - 262

In [14]:
mcd_split.loc[260:263,:]

Unnamed: 0,a,b,c,d,e
260,30,,,,
261,30,,,,
262,50,,,,


In [15]:
mcd_split.loc[260,["c","d"]] = ["30", "g"]
mcd_split.loc[261,["c","d"]] = ["30", "g"]
mcd_split.loc[262,["c","d"]] = ["50", "g"]

In [16]:
mcd_split.loc[260:263,:]

Unnamed: 0,a,b,c,d,e
260,30,,30,g,
261,30,,30,g,
262,50,,50,g,


#### same approach for line 130. 131

In [17]:
mcd_split.loc[130:131,:]

Unnamed: 0,a,b,c,d,e
130,1,carton,236,ml),
131,1,carton,236,ml),


In [18]:
mcd_split.loc[130,["a","b","c","d"]] = ["8", "fl", "oz", "None"] #convert 236ml back to fl oz
mcd_split.loc[131,["a","b","c","d"]] = ["8", "fl", "oz", "None"] #convert 236ml back to fl oz

In [19]:
mcd_split.loc[130:131,:]

Unnamed: 0,a,b,c,d,e
130,8,fl,oz,,
131,8,fl,oz,,


In [20]:
mcd_split

Unnamed: 0,a,b,c,d,e
0,4.8,oz,136,g),
1,4.8,oz,135,g),
2,3.9,oz,111,g),
3,5.7,oz,161,g),
4,5.7,oz,161,g),
5,6.5,oz,185,g),
6,5.3,oz,150,g),
7,5.8,oz,164,g),
8,5.4,oz,153,g),
9,5.9,oz,167,g),


## Calculation logic

The logic to get the correct g-value is:
- if column c contains a numeric value --> value in c is the g value -> unit = g
- if column c does not contain a numeric value --> value in a is the right value --> unit = fl oz

In [21]:
mcd_split = mcd_split.assign(serve_size_new = "")
mcd_split.head()

Unnamed: 0,a,b,c,d,e,serve_size_new
0,4.8,oz,136,g),,
1,4.8,oz,135,g),,
2,3.9,oz,111,g),,
3,5.7,oz,161,g),,
4,5.7,oz,161,g),,


#### filter all rows with column c containing a numeric value --> write the value into column "serve_size_new"

In [22]:
mcd_split.loc[~mcd_split["c"].str.contains("oz"), "serve_size_new"] = mcd_split["c"]
mcd_split.loc[~mcd_split["c"].str.contains("oz"), "serve_size_unit"] = "g"
mcd_split.head()

Unnamed: 0,a,b,c,d,e,serve_size_new,serve_size_unit
0,4.8,oz,136,g),,136,g
1,4.8,oz,135,g),,135,g
2,3.9,oz,111,g),,111,g
3,5.7,oz,161,g),,161,g
4,5.7,oz,161,g),,161,g


#### filter all rows with column c contining "oz" --> write the value of column a into column "serve_size_new"

In [23]:
mcd_split.loc[mcd_split["c"].str.contains("oz"), "serve_size_new"] = mcd_split["a"]
mcd_split.loc[mcd_split["c"].str.contains("oz"), "serve_size_unit"] = "fl oz"
mcd_split

Unnamed: 0,a,b,c,d,e,serve_size_new,serve_size_unit
0,4.8,oz,136,g),,136.0,g
1,4.8,oz,135,g),,135.0,g
2,3.9,oz,111,g),,111.0,g
3,5.7,oz,161,g),,161.0,g
4,5.7,oz,161,g),,161.0,g
5,6.5,oz,185,g),,185.0,g
6,5.3,oz,150,g),,150.0,g
7,5.8,oz,164,g),,164.0,g
8,5.4,oz,153,g),,153.0,g
9,5.9,oz,167,g),,167.0,g


#### merge back to the main dataframe mcd

In [24]:
len(mcd)

263

In [25]:
len(mcd_split)

263

In [26]:
mcdonalds = pd.merge(mcd, mcd_split, left_index=True, right_index=True)

In [27]:
mcdonalds

Unnamed: 0,category,item_name,serving_size_g,calories_kcal,total_fat_g,saturated_fat_g,trans_fat_g,cholesterol_mg,sodium_mg,carbohydrates_g,...,sugars_g,protein_g,category_clean,a,b,c,d,e,serve_size_new,serve_size_unit
0,Breakfast,Egg McMuffin,4.8 oz (136 g),300,13.0,5.0,0.0,260.0,750.0,31.0,...,3.0,17.0,breakfast,4.8,oz,136,g),,136.0,g
1,Breakfast,Egg White Delight,4.8 oz (135 g),250,8.0,3.0,0.0,25.0,770.0,30.0,...,3.0,18.0,breakfast,4.8,oz,135,g),,135.0,g
2,Breakfast,Sausage McMuffin,3.9 oz (111 g),370,23.0,8.0,0.0,45.0,780.0,29.0,...,2.0,14.0,breakfast,3.9,oz,111,g),,111.0,g
3,Breakfast,Sausage McMuffin with Egg,5.7 oz (161 g),450,28.0,10.0,0.0,285.0,860.0,30.0,...,2.0,21.0,breakfast,5.7,oz,161,g),,161.0,g
4,Breakfast,Sausage McMuffin with Egg Whites,5.7 oz (161 g),400,23.0,8.0,0.0,50.0,880.0,30.0,...,2.0,21.0,breakfast,5.7,oz,161,g),,161.0,g
5,Breakfast,Steak & Egg McMuffin,6.5 oz (185 g),430,23.0,9.0,1.0,300.0,960.0,31.0,...,3.0,26.0,breakfast,6.5,oz,185,g),,185.0,g
6,Breakfast,"Bacon, Egg & Cheese Biscuit (Regular Biscuit)",5.3 oz (150 g),460,26.0,13.0,0.0,250.0,1300.0,38.0,...,3.0,19.0,breakfast,5.3,oz,150,g),,150.0,g
7,Breakfast,"Bacon, Egg & Cheese Biscuit (Large Biscuit)",5.8 oz (164 g),520,30.0,14.0,0.0,250.0,1410.0,43.0,...,4.0,19.0,breakfast,5.8,oz,164,g),,164.0,g
8,Breakfast,"Bacon, Egg & Cheese Biscuit with Egg Whites (R...",5.4 oz (153 g),410,20.0,11.0,0.0,35.0,1300.0,36.0,...,3.0,20.0,breakfast,5.4,oz,153,g),,153.0,g
9,Breakfast,"Bacon, Egg & Cheese Biscuit with Egg Whites (L...",5.9 oz (167 g),470,25.0,12.0,0.0,35.0,1420.0,42.0,...,4.0,20.0,breakfast,5.9,oz,167,g),,167.0,g


#### reshuffle the oder of the columns and drop unrelevant columns

In [28]:
mcdonalds.columns

Index(['category', 'item_name', 'serving_size_g', 'calories_kcal',
       'total_fat_g', 'saturated_fat_g', 'trans_fat_g', 'cholesterol_mg',
       'sodium_mg', 'carbohydrates_g', 'dietary_fiber_g', 'sugars_g',
       'protein_g', 'category_clean', 'a', 'b', 'c', 'd', 'e',
       'serve_size_new', 'serve_size_unit'],
      dtype='object')

In [29]:
mcdonalds = mcdonalds[['category_clean','item_name', 'serve_size_new', 'serve_size_unit', 'calories_kcal',
       'total_fat_g', 'saturated_fat_g', 'trans_fat_g', 'cholesterol_mg',
       'sodium_mg', 'carbohydrates_g', 'dietary_fiber_g', 'sugars_g',
       'protein_g', 'category', 'serving_size_g'
       ]].copy()

In [30]:
mcdonalds

Unnamed: 0,category_clean,item_name,serve_size_new,serve_size_unit,calories_kcal,total_fat_g,saturated_fat_g,trans_fat_g,cholesterol_mg,sodium_mg,carbohydrates_g,dietary_fiber_g,sugars_g,protein_g,category,serving_size_g
0,breakfast,Egg McMuffin,136.0,g,300,13.0,5.0,0.0,260.0,750.0,31.0,4.0,3.0,17.0,Breakfast,4.8 oz (136 g)
1,breakfast,Egg White Delight,135.0,g,250,8.0,3.0,0.0,25.0,770.0,30.0,4.0,3.0,18.0,Breakfast,4.8 oz (135 g)
2,breakfast,Sausage McMuffin,111.0,g,370,23.0,8.0,0.0,45.0,780.0,29.0,4.0,2.0,14.0,Breakfast,3.9 oz (111 g)
3,breakfast,Sausage McMuffin with Egg,161.0,g,450,28.0,10.0,0.0,285.0,860.0,30.0,4.0,2.0,21.0,Breakfast,5.7 oz (161 g)
4,breakfast,Sausage McMuffin with Egg Whites,161.0,g,400,23.0,8.0,0.0,50.0,880.0,30.0,4.0,2.0,21.0,Breakfast,5.7 oz (161 g)
5,breakfast,Steak & Egg McMuffin,185.0,g,430,23.0,9.0,1.0,300.0,960.0,31.0,4.0,3.0,26.0,Breakfast,6.5 oz (185 g)
6,breakfast,"Bacon, Egg & Cheese Biscuit (Regular Biscuit)",150.0,g,460,26.0,13.0,0.0,250.0,1300.0,38.0,2.0,3.0,19.0,Breakfast,5.3 oz (150 g)
7,breakfast,"Bacon, Egg & Cheese Biscuit (Large Biscuit)",164.0,g,520,30.0,14.0,0.0,250.0,1410.0,43.0,3.0,4.0,19.0,Breakfast,5.8 oz (164 g)
8,breakfast,"Bacon, Egg & Cheese Biscuit with Egg Whites (R...",153.0,g,410,20.0,11.0,0.0,35.0,1300.0,36.0,2.0,3.0,20.0,Breakfast,5.4 oz (153 g)
9,breakfast,"Bacon, Egg & Cheese Biscuit with Egg Whites (L...",167.0,g,470,25.0,12.0,0.0,35.0,1420.0,42.0,3.0,4.0,20.0,Breakfast,5.9 oz (167 g)


## Burger King's serving unit

In [31]:
bk = pd.read_pickle("../00_data/01_burger_king_clean_v1.pkl")

In [32]:
bk

Unnamed: 0,item_name,category,serving_size_g,calories_kcal,total_fat_g,saturated_fat_g,trans_fat_g,cholesterol_mg,sodium_mg,carbohydrates_g,dietary_fiber_g,sugars_g,protein_g,category_clean
0,WHOPPER® Sandwich,WHOPPER® SANDWICHES,270,660,40.0,12.0,1.5,90,980,49,2,11,28,sides
1,WHOPPER® Sandwich with Cheese,WHOPPER® SANDWICHES,292,740,46.0,16.0,2.0,115,1340,50,2,11,32,sides
2,Bacon & Cheese WHOPPER® Sandwich,WHOPPER® SANDWICHES,303,790,51.0,17.0,2.0,125,1560,50,2,11,35,sides
3,DOUBLE WHOPPER® Sandwich,WHOPPER® SANDWICHES,354,900,58.0,20.0,3.0,175,1050,49,2,11,48,sides
4,DOUBLE WHOPPER® Sandwich with Cheese,WHOPPER® SANDWICHES,377,980,64.0,24.0,3.0,195,1410,50,2,11,52,sides
5,TRIPLE WHOPPER® Sandwich,WHOPPER® SANDWICHES,438,1130,75.0,28.0,4.0,255,1120,49,2,11,67,sides
6,TRIPLE WHOPPER® Sandwich with Cheese,WHOPPER® SANDWICHES,461,1220,82.0,32.0,4.5,280,1470,50,2,11,71,sides
7,WHOPPER JR.®,WHOPPER® SANDWICHES,134,310,18.0,5.0,0.5,40,390,27,1,7,13,sides
8,BACON KING™ Sandwich,FLAME BROILED BURGERS,356,1150,79.0,31.0,3.5,240,2150,49,2,10,61,burger
9,Cheddar BACON KING™ Sandwich,FLAME BROILED BURGERS,366,1190,84.0,33.0,3.5,235,1930,50,2,11,64,burger


In [33]:
bk_split = bk["serving_size_g"].str.split( pat=" ", expand=True)

In [34]:
bk_split = bk_split.rename(columns = {0:"a",1:"b",2:"c"})


In [35]:
bk_split = bk_split.assign(serve_size_new = "", serve_size_unit = "")
bk_split.head()

Unnamed: 0,a,b,c,serve_size_new,serve_size_unit
0,270,,,,
1,292,,,,
2,303,,,,
3,354,,,,
4,377,,,,


--> line 155 has messy data. 1 pouch of capri sun = 6.2 fl oz

In [36]:
bk_split.loc[155]

a                      1
b                  pouch
c                   None
serve_size_new          
serve_size_unit         
Name: 155, dtype: object

In [37]:
bk_split.loc[155,"a"] = 6.2
bk_split.loc[155,"b"] = "fl"
bk_split.loc[155,"c"] = "oz"
bk_split.loc[155]

a                  6.2
b                   fl
c                   oz
serve_size_new        
serve_size_unit       
Name: 155, dtype: object

In [38]:
bk_split.loc[bk_split["c"].isna(), "serve_size_new"] = bk_split["a"]
bk_split.loc[bk_split["c"].isna(), "serve_size_unit"] = "g"

In [39]:
bk_split.loc[bk_split["c"].notna(), "serve_size_unit"] = "fl oz"
bk_split.loc[bk_split["c"].notna(), "serve_size_new"] = bk_split["a"]

In [40]:
bk_split

Unnamed: 0,a,b,c,serve_size_new,serve_size_unit
0,270.0,,,270.0,g
1,292.0,,,292.0,g
2,303.0,,,303.0,g
3,354.0,,,354.0,g
4,377.0,,,377.0,g
5,438.0,,,438.0,g
6,461.0,,,461.0,g
7,134.0,,,134.0,g
8,356.0,,,356.0,g
9,366.0,,,366.0,g


#### join the bk_split into the main df

In [41]:
burger_king = pd.merge(bk, bk_split, how="left", left_index=True, right_index=True)

In [42]:
burger_king.columns

Index(['item_name', 'category', 'serving_size_g', 'calories_kcal',
       'total_fat_g', 'saturated_fat_g', 'trans_fat_g', 'cholesterol_mg',
       'sodium_mg', 'carbohydrates_g', 'dietary_fiber_g', 'sugars_g',
       'protein_g', 'category_clean', 'a', 'b', 'c', 'serve_size_new',
       'serve_size_unit'],
      dtype='object')

In [43]:
burger_king = burger_king[['category_clean','item_name', 'serve_size_new', 'serve_size_unit', 'calories_kcal',
       'total_fat_g', 'saturated_fat_g', 'trans_fat_g', 'cholesterol_mg',
       'sodium_mg', 'carbohydrates_g', 'dietary_fiber_g', 'sugars_g',
       'protein_g', 'category', 'serving_size_g']].copy()

In [44]:
burger_king

Unnamed: 0,category_clean,item_name,serve_size_new,serve_size_unit,calories_kcal,total_fat_g,saturated_fat_g,trans_fat_g,cholesterol_mg,sodium_mg,carbohydrates_g,dietary_fiber_g,sugars_g,protein_g,category,serving_size_g
0,sides,WHOPPER® Sandwich,270.0,g,660,40.0,12.0,1.5,90,980,49,2,11,28,WHOPPER® SANDWICHES,270
1,sides,WHOPPER® Sandwich with Cheese,292.0,g,740,46.0,16.0,2.0,115,1340,50,2,11,32,WHOPPER® SANDWICHES,292
2,sides,Bacon & Cheese WHOPPER® Sandwich,303.0,g,790,51.0,17.0,2.0,125,1560,50,2,11,35,WHOPPER® SANDWICHES,303
3,sides,DOUBLE WHOPPER® Sandwich,354.0,g,900,58.0,20.0,3.0,175,1050,49,2,11,48,WHOPPER® SANDWICHES,354
4,sides,DOUBLE WHOPPER® Sandwich with Cheese,377.0,g,980,64.0,24.0,3.0,195,1410,50,2,11,52,WHOPPER® SANDWICHES,377
5,sides,TRIPLE WHOPPER® Sandwich,438.0,g,1130,75.0,28.0,4.0,255,1120,49,2,11,67,WHOPPER® SANDWICHES,438
6,sides,TRIPLE WHOPPER® Sandwich with Cheese,461.0,g,1220,82.0,32.0,4.5,280,1470,50,2,11,71,WHOPPER® SANDWICHES,461
7,sides,WHOPPER JR.®,134.0,g,310,18.0,5.0,0.5,40,390,27,1,7,13,WHOPPER® SANDWICHES,134
8,burger,BACON KING™ Sandwich,356.0,g,1150,79.0,31.0,3.5,240,2150,49,2,10,61,FLAME BROILED BURGERS,356
9,burger,Cheddar BACON KING™ Sandwich,366.0,g,1190,84.0,33.0,3.5,235,1930,50,2,11,64,FLAME BROILED BURGERS,366


In [45]:
len(burger_king)

176

## Subway serving size

In [46]:
sub = pd.read_pickle("../00_data/03_subway_clean_v1.pkl")

In [47]:
sub

Unnamed: 0,category,item_name,serving_size_g,calories_kcal,total_fat_g,saturated_fat_g,trans_fat_g,cholesterol_mg,sodium_mg,carbohydrates_g,dietary_fiber_g,sugars_g,protein_g,category_clean
0,Sandwich,BBQ Rib,208,580,31,10.0,0.0,60.0,1260.0,54,3.0,18.0,21,sandwich
1,Sandwich,Black Forest Ham,219,260,4,1.0,0.0,30.0,750.0,42,5.0,7.0,19,sandwich
2,Sandwich,Chicken & Bacon Ranch Melt,284,540,26,10.0,5.0,100.0,1100.0,41,3.0,5.0,36,sandwich
3,Sandwich,Chicken Mango Curry,234,330,7,15.0,0.0,50.0,840.0,43,3.0,8.0,24,sandwich
4,Sandwich,Chicken Tikka,205,290,5,1.0,0.0,50.0,720.0,39,2.0,5.0,23,sandwich
5,Sandwich,Chicken Vindaloo,234,340,9,1.0,0.0,50.0,880.0,42,3.0,6.0,24,sandwich
6,Sandwich,Classic Tuna,223,450,25,45.0,0.0,40.0,610.0,38,2.0,5.0,19,sandwich
7,Sandwich,Cold Cut Combo,213,330,12,35.0,0.0,45.0,1060.0,40,2.0,5.0,16,sandwich
8,Sandwich,Genoa Salami,208,430,23,8.0,0.0,60.0,1180.0,40,2.0,5.0,18,sandwich
9,Sandwich,Italian B.M.T.®,213,380,17,6.0,0.0,50.0,1100.0,40,2.0,5.0,19,sandwich


--> data already clean. Just need to add 2 new columns, so it has the same data structure as the other 2

In [48]:
sub = sub.assign(serve_size_new = sub["serving_size_g"])

In [49]:
sub = sub.assign(serve_size_unit = "g")

In [50]:
sub.columns

Index(['category', 'item_name', 'serving_size_g', 'calories_kcal',
       'total_fat_g', 'saturated_fat_g', 'trans_fat_g', 'cholesterol_mg',
       'sodium_mg', 'carbohydrates_g', 'dietary_fiber_g', 'sugars_g',
       'protein_g', 'category_clean', 'serve_size_new', 'serve_size_unit'],
      dtype='object')

In [51]:
subway = sub[['category_clean','item_name', 'serve_size_new', 'serve_size_unit', 'calories_kcal',
       'total_fat_g', 'saturated_fat_g', 'trans_fat_g', 'cholesterol_mg',
       'sodium_mg', 'carbohydrates_g', 'dietary_fiber_g', 'sugars_g',
       'protein_g', 'category', 'serving_size_g']].copy()

In [52]:
subway

Unnamed: 0,category_clean,item_name,serve_size_new,serve_size_unit,calories_kcal,total_fat_g,saturated_fat_g,trans_fat_g,cholesterol_mg,sodium_mg,carbohydrates_g,dietary_fiber_g,sugars_g,protein_g,category,serving_size_g
0,sandwich,BBQ Rib,208,g,580,31,10.0,0.0,60.0,1260.0,54,3.0,18.0,21,Sandwich,208
1,sandwich,Black Forest Ham,219,g,260,4,1.0,0.0,30.0,750.0,42,5.0,7.0,19,Sandwich,219
2,sandwich,Chicken & Bacon Ranch Melt,284,g,540,26,10.0,5.0,100.0,1100.0,41,3.0,5.0,36,Sandwich,284
3,sandwich,Chicken Mango Curry,234,g,330,7,15.0,0.0,50.0,840.0,43,3.0,8.0,24,Sandwich,234
4,sandwich,Chicken Tikka,205,g,290,5,1.0,0.0,50.0,720.0,39,2.0,5.0,23,Sandwich,205
5,sandwich,Chicken Vindaloo,234,g,340,9,1.0,0.0,50.0,880.0,42,3.0,6.0,24,Sandwich,234
6,sandwich,Classic Tuna,223,g,450,25,45.0,0.0,40.0,610.0,38,2.0,5.0,19,Sandwich,223
7,sandwich,Cold Cut Combo,213,g,330,12,35.0,0.0,45.0,1060.0,40,2.0,5.0,16,Sandwich,213
8,sandwich,Genoa Salami,208,g,430,23,8.0,0.0,60.0,1180.0,40,2.0,5.0,18,Sandwich,208
9,sandwich,Italian B.M.T.®,213,g,380,17,6.0,0.0,50.0,1100.0,40,2.0,5.0,19,Sandwich,213


# pickle all dataframes 

In [53]:
#burger_king
burger_king.to_pickle("../00_data/01_burger_king_clean_new_serving_size.pkl")

In [54]:
#mcdonalds
mcdonalds.to_pickle("../00_data/02_mcdonalds_clean_new_serving_size.pkl")

In [55]:
#subway
subway.to_pickle("../00_data/03_subway_clean_new_serving_size.pkl")

# join all 3 into 1 dataframe for further processing

In [56]:
complete = pd.DataFrame()
complete = complete.append(mcdonalds).append(subway).append(burger_king)

In [59]:
complete.to_pickle("../00_data/00_comibined_clean_new_serving_size.pkl")

In [60]:
complete.to_csv("../00_data/00_comibined_clean_new_serving_size.csv", index=False)