In [1]:
import pandas as pd
import numpy as np
from fractions import Fraction


csv_path = "Data/mr_boston_flattened.csv"

In [2]:
cocktail_df = pd.read_csv(csv_path, encoding="utf-8")

In [3]:
# Create list of measurement columns to loop through
ingredient_cols = ["ingredient-1","ingredient-2","ingredient-3","ingredient-4","ingredient-5","ingredient-6"]
measurement_cols = ["measurement-1", "measurement-2", "measurement-3", "measurement-4", "measurement-5", "measurement-6"]

# Loop through columns and strip units
for col in measurement_cols:
    cocktail_df[col] = cocktail_df[col].str.replace(" oz","")
    for index, value in cocktail_df[col].items():
        row = str(value).lower()
        
        try:    
            if ("tsp" in row):
                row = round(float(Fraction(row.split(" tsp")[0]))/6, 2)
                cocktail_df[col][index] = row

            elif ("ml" in row):
                row = row.replace("-","")
                row = row.replace("ml","")
                row = float(row.split(" ")[0])*float(row.split(" ")[1])
                row = round(float(Fraction(row))*25.3, 2)
                cocktail_df[col][index] = row

            elif (" or" in row):
                row = round(float(Fraction(row.split(" or")[0])), 2)
                cocktail_df[col][index] = row

            elif ("/" in row):
                if (len(row.split()) == 2):
                        row = row.split()
                        row = round(float(Fraction(row[0]))+float(Fraction(row[1])), 2)

                        cocktail_df[col][index] = row

                elif (len(row.split()) == 1):
                        row = row.split()
                        row = round(float(Fraction(row[0])), 2)

                        cocktail_df[col][index] = row
                        
        except:
            cocktail_df[col][index] = row
    
cocktail_df.head()

Unnamed: 0,name,category,measurement-1,ingredient-1,measurement-2,ingredient-2,measurement-3,ingredient-3,measurement-4,ingredient-4,measurement-5,ingredient-5,measurement-6,ingredient-6,instructions,glass,glass-size
0,Gauguin,Cocktail Classics,2.0,Light Rum,1.0,Passion Fruit Syrup,1.0,Lemon Juice,1.0,Lime Juice,,,,,Combine ingredients with a cup of crushed ice ...,Old-Fashioned Glass,6 to 8 ounces
1,Fort Lauderdale,Cocktail Classics,1.5,Light Rum,0.5,Sweet Vermouth,0.25,Juice of Orange,0.25,Juice of a Lime,,,,,Shake with ice and strain into old-fashioned g...,Old-Fashioned Glass,6 to 8 ounces
2,Apple Pie,Cordials and Liqueurs,3.0,Apple schnapps,1.0,Cinnamon schnapps,,Apple slice,,,,,,,Pour into ice-filled old-fashioned glass. Garn...,Old-Fashioned Glass,6 to 8 ounces
3,Cuban Cocktail No. 1,Cocktail Classics,0.5,Juice of a Lime,0.5,Powdered Sugar,2.0,Light Rum,,,,,,,Shake with ice and strain into cocktail glass.,Cocktail Glass,6 or more ounces
4,Cool Carlos,Cocktail Classics,1.5,Dark rum,2.0,Cranberry Juice,2.0,Pineapple Juice,1.0,Orange curacao,1.0,Sour Mix,,,"Mix all ingredients except curacao with ice, s...",Collins Glass,14 to 16 ounces


In [4]:
measurements_list = cocktail_df[measurement_cols].values.tolist()
cocktail_df["all_measurements"] = measurements_list

ingredients_list = cocktail_df[ingredient_cols].values.tolist()
cocktail_df["all_ingredients"] = ingredients_list

cocktail_df.head()

Unnamed: 0,name,category,measurement-1,ingredient-1,measurement-2,ingredient-2,measurement-3,ingredient-3,measurement-4,ingredient-4,measurement-5,ingredient-5,measurement-6,ingredient-6,instructions,glass,glass-size,all_measurements,all_ingredients
0,Gauguin,Cocktail Classics,2.0,Light Rum,1.0,Passion Fruit Syrup,1.0,Lemon Juice,1.0,Lime Juice,,,,,Combine ingredients with a cup of crushed ice ...,Old-Fashioned Glass,6 to 8 ounces,"[2, 1, 1, 1, nan, nan]","[ Light Rum, Passion Fruit Syrup, Lemon Juic..."
1,Fort Lauderdale,Cocktail Classics,1.5,Light Rum,0.5,Sweet Vermouth,0.25,Juice of Orange,0.25,Juice of a Lime,,,,,Shake with ice and strain into old-fashioned g...,Old-Fashioned Glass,6 to 8 ounces,"[1.5, 0.5, 0.25, 0.25, nan, nan]","[ Light Rum, Sweet Vermouth, Juice of Orange..."
2,Apple Pie,Cordials and Liqueurs,3.0,Apple schnapps,1.0,Cinnamon schnapps,,Apple slice,,,,,,,Pour into ice-filled old-fashioned glass. Garn...,Old-Fashioned Glass,6 to 8 ounces,"[3, 1, nan, nan, nan, nan]","[ Apple schnapps, Cinnamon schnapps, Apple s..."
3,Cuban Cocktail No. 1,Cocktail Classics,0.5,Juice of a Lime,0.5,Powdered Sugar,2.0,Light Rum,,,,,,,Shake with ice and strain into cocktail glass.,Cocktail Glass,6 or more ounces,"[0.5, 0.5, 2, nan, nan, nan]","[ Juice of a Lime, Powdered Sugar, Light Rum..."
4,Cool Carlos,Cocktail Classics,1.5,Dark rum,2.0,Cranberry Juice,2.0,Pineapple Juice,1.0,Orange curacao,1.0,Sour Mix,,,"Mix all ingredients except curacao with ice, s...",Collins Glass,14 to 16 ounces,"[1.5, 2, 2, 1, 1, nan]","[ Dark rum, Cranberry Juice, Pineapple Juice..."


In [7]:
cocktail_df.to_csv("Data/mr_boston_flattened_cleaned.csv", index=False)