## TOC:
* [Converting measures to grams.](#1)
* [Converting standard measures to ml.](#2)
* [oz, ml, gal glass, etc.](#3)
* [Individual cases](#4)
* [Lemons](#5)
* [Limes](#6)
* [Oranges](#7)
* [Apples](#8)
* [Pineapples](#9)
* [Garnishes (remaning after fruit fix)](#10)
* [Missing measures](#11)

In [1]:
# import os
import re
# import math
import pandas as pd
import numpy as np
from fractions import Fraction
# import matplotlib.pyplot as plt
# import seaborn as sns
# from sqlalchemy import create_engine
# import pymysql
# import requests
# from bs4 import BeautifulSoup
# from sklearn.linear_model import LinearRegression
# from statsmodels.tsa.ar_model import AR
# from statsmodels.tools.eval_measures import rmse
# from scipy import stats

In [2]:
pd.set_option('display.max_columns', 100)

In [3]:
# Split ingredients from description, also include shape of glass, type of ice and way to mix as separate columns.
# Align measurements.
# Add labels: type of alcohol, tastes of other ingredients.

In [4]:
### Collect cocktails raiting.
# Cross-check with dataset above. Align names. Add missing cocktails?

**Code below is pretty unstructured because original 'Measures' data is very messy and unstructured. So whatever catches an eye first is corrected first.**

\* Note: all individual adjustments are either verified with instructions included in this dataset or with most common recipe available in the Internet.

In [5]:
df_cocktails = pd.read_csv('./raw_data/stage0.csv')
df_cocktails.drop('Unnamed: 0', axis=1, inplace=True)

df_cons = pd.read_csv('./raw_data/stage1.csv')
df_cons.drop('Unnamed: 0', axis=1, inplace=True)

In [6]:
# Function to print all ingredients with measures and instructions for given cocktail. Comes handy for spot check.

def recipe(name):
    ingredient = []
    measure = []
    for i in range (1, 13):
        ingredient.append(df_cocktails.loc[df_cocktails['strDrink'] == name]['strIngredient'+str(i)].values[0])
        measure.append(df_cocktails.loc[df_cocktails['strDrink'] == name]['strMeasure'+str(i)].values[0])
    df_recipe = pd.DataFrame({'Ingredient': ingredient, 'Measure': measure})
    
    print(df_cocktails.loc[df_cocktails['strDrink'] == name]['strInstructions'].values[0])
    return df_recipe

In [7]:
def repl_from_dic(dic, row):
    '''Function should be applied only on filtered data to avoid unexpected text values in field "Value".'''
    '''Dictionary might include only text that need to be explicitly translated to a number by a human.'''
    if row.Value in dic.keys():
        return dic[row.Value]
    else:
        try:
            number = float(Fraction(row.Value.split()[-1]))
            if len(row.Value.split()) > 1:
                return float(row.Value.split()[0])+number
            else:
                return number
        except (ValueError, AttributeError):
            pass
# print(repl_from_dic(values_replace, df_cons.iloc[23]))   # Test
# print(repl_from_dic(values_replace, df_cons.iloc[1]))
# print(repl_from_dic(values_replace, df_cons.iloc[2]))

In [8]:
print(len(df_cons['MeasureName'].unique()))
df_cons['MeasureName'].unique()

121


array(['white', 'oz', 'shot', 'bacardi', nan, 'dry', 'part', 'jigger',
       'shots', 'bottle', 'cl', 'dashes', 'cup', 'scoops', 'hot',
       'chopped', 'qt', 'parts', 'blue', 'ml', 'tsp', 'black', 'smirnoff',
       'can', 'cubes', 'pint', 'pale', 'cups', 'superfine', 'fifth',
       'large', 'instant', 'glass', 'l', 'hard', 'strip', 'jiggers',
       'light', 'grape', 'handful', 'tblsp', '1/2', 'dark', 'jamaican',
       'cream', 'chilled', 'bottles', 'measures', 'or', 'stoli', 'splash',
       'dash', 'gr', 'red', 'ginger', 'lemon', 'quart', 'hill', 'slice',
       'label', 'whole', '1', 'piece', 'tbsp', 'chunks', 'sweet', 'cold',
       'packages', 'tropical', 'apple', 'blended', 'mild', 'cans',
       'drops', 'with', 'of', '1/4', 'pure', 'cracked', 'frozen', 'gal',
       'wedges', 'muscatel', 'leaves', 'skimmed', 'cube', 'taste',
       'sweetened', 'needed)', 'wedge', 'double', 'splashes', 'spoons',
       'unsweetened', 'top', 'one-inch', 'crushed', 'berry', 'topping',
     

Values will be converted to numberic in batches related to other characteristics, this way it's easier to process the mess and avoid mistakes.

In [9]:
df_cons.drop(df_cons.loc[(df_cons['strDrink'] == 'Swedish Coffee')
                         & (df_cons['MeasureName'] == 'taste')].index, axis=0, inplace=True)

df_cons.drop(df_cons.loc[(df_cons['strDrink']=='Kurant Tea') 
                         & (df_cons['MeasureName']=="needed)")].index, axis=0, inplace=True)

df_cons.drop(df_cons.loc[df_cons['strIngredients']=="Ice"].index, axis=0, inplace=True)

## Converting measures to grams. <a class="anchor" id="1"></a>

In [10]:
df_cons['MeasureName'] = np.where((df_cons['strDrink'] == 'Adam Bomb') & (df_cons['MeasureName'] == 'pint'),
                                 'pinch', df_cons['MeasureName'])

In [11]:
dict_gr = {'chunks':30, 'gr':1, 'pinch':0.36, 'pinches':0.36, 'tsp':4, 'tblsp':14, 'cup':128, 'cube':4, 'piece':4, 'packages':90}
values_replace = {'1-3':2}
for key, value in dict_gr.items():
# First convert value as it is to numeric format
    df_cons['Value_numeric'] = np.where((df_cons['MeasureName'] == key) & (df_cons['Value_numeric'].isnull()),
                                        df_cons.apply(lambda row: repl_from_dic(values_replace, row), axis=1),
                                        df_cons['Value_numeric'])
# Then convert them to ml
    df_cons['Value_gr'] = np.where((df_cons['MeasureName'] == key) & (df_cons['Value_gr'].isnull()),
                                   df_cons['Value_numeric']*dict_gr[key],
                                   df_cons['Value_gr'])

In [12]:
# Processing all exceptionals from function in a cell above.

In [13]:
df_cons['Value_gr'] = np.where(df_cons['strIngredients'] == 'Banana', 120, df_cons['Value_gr'])

In [14]:
df_cons.loc[(df_cons['strIngredients'] == 'Sugar') & (df_cons['MeasureName'].isnull())]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
1135,Classic Old-Fashioned,Ordinary Drink,Old-fashioned glass,Sugar,"In an old-fashioned glass, muddle the bitters ...",1,1,,,,,,,
1559,Lemon Shot,Shot,Shot glass,Sugar,Mix Galliano and Absolut Citron in a shot glas...,bacardi,bacardi,,,,,,,
1671,Atomic Lokade,Ordinary Drink,Collins Glass,Sugar,"In a shaker, place lemonade, vodka, blue Curac...",,,,,,,,,


In [15]:
cond1 = (df_cons['Value_ml'].isnull())
cond2 = (df_cons['Value_gr'].isnull())
cond3 = (df_cons['Garnish_type'].isnull())
print(len(df_cons.loc[cond1 & cond2 & cond3]['MeasureName']))
print(len(df_cons.loc[cond1 & cond2 & cond3]['MeasureName'].unique()))
df_cons.loc[cond1 & cond2 & cond3]['MeasureName'].unique()

1681
111


array(['white', 'oz', 'shot', 'bacardi', nan, 'dry', 'part', 'jigger',
       'shots', 'bottle', 'cl', 'dashes', 'scoops', 'hot', 'chopped',
       'qt', 'parts', 'blue', 'ml', 'black', 'smirnoff', 'can', 'pint',
       'pale', 'cups', 'superfine', 'fifth', 'large', 'instant', 'glass',
       'l', 'hard', 'strip', 'jiggers', 'light', 'grape', 'handful',
       '1/2', 'dark', 'jamaican', 'cream', 'chilled', 'bottles',
       'measures', 'or', 'stoli', 'splash', 'dash', 'red', 'ginger',
       'lemon', 'quart', 'hill', 'slice', 'label', 'whole', '1', 'tbsp',
       'sweet', 'cold', 'tropical', 'apple', 'blended', 'mild', 'cans',
       'drops', 'cup', 'with', 'of', '1/4', 'pure', 'cracked', 'frozen',
       'gal', 'wedges', 'muscatel', 'leaves', 'skimmed', 'taste',
       'sweetened', 'wedge', 'double', 'tblsp', 'splashes', 'spoons',
       'unsweetened', 'top', 'one-inch', 'berry', 'topping', 'schweppes',
       'package', 'grated', 'crushed', 'fresh', 'lime', 'up', 'fill',
       'tabl

## Converting standard measures to ml.<a class="anchor" id="2"></a>

## Some measures are ending up with description of a product, not the name of the measure.

In [16]:
# Separately as can't be fitted into loop below

df_cons['Value'] = np.where(df_cons['MeasureName'] == 'fifth', df_cons['Value']+'/5', df_cons['Value'])


df_cons['MeasureName'] = np.where((df_cons['strIngredients'].str.contains('Egg')) & (df_cons['Value_ml'].isnull()),
                                    'egg', df_cons['MeasureName'])


df_cons['Value_numeric'] = np.where((df_cons['strDrink'] == 'Artillery Punch')&(df_cons['MeasureName_copy'] == 'black'),
                               1, df_cons['Value_numeric'])
df_cons['MeasureName'] = np.where((df_cons['strDrink'] == 'Artillery Punch')&(df_cons['MeasureName'] == 'black'),
                               'quart', df_cons['MeasureName'])

df_cons['Value_numeric'] = np.where((df_cons['strDrink'] == 'Jamaican Coffee')&(df_cons['MeasureName_copy'] == 'black'),
                               1/6, df_cons['Value_numeric'])
df_cons['MeasureName'] = np.where((df_cons['strDrink'] == 'Jamaican Coffee')&(df_cons['MeasureName'] == 'black'),
                               'glass', df_cons['MeasureName'])

df_cons['Value'] = np.where((df_cons['MeasureName'] == 'pale'), '1', df_cons['Value'])
df_cons['MeasureName'] = np.where((df_cons['MeasureName'] == 'pale'), 'part', df_cons['MeasureName'])

In [17]:
df_cons['MeasureName'] = np.where(df_cons['MeasureName'].isin(['part', 'parts']), 'oz', df_cons['MeasureName'])

In [18]:
to_fix = ['white', 'dry', 'hot', 'blue', 'smirnoff', 'superfine', 'hard', 'light', 'grape', 'jamaican', 'cream', 'red', 'lemon',
          'cold', 'tropical', 'blended', 'frozen', 'muscatel', 'skimmed', 'double', 'sweetened', 'unsweetened', 'schweppes',
          'grated', 'fresh', 'hazlenut', 'plain', 'ground']

In [19]:
for i in to_fix:
    df_cons['strIngredients'] = np.where(df_cons['MeasureName'] == i,
                                         (df_cons['strIngredients']+' '+i.title()),
                                         df_cons['strIngredients'])
    
    df_cons['Value'] = np.where(df_cons['MeasureName'] == i,
                                 df_cons['Value'].str.rsplit(n=1, expand=True)[0],
                                 df_cons['Value'])
    
    df_cons['MeasureName'] = np.where(df_cons['MeasureName'] == i,
                                     df_cons['Value'].str.rsplit(n=1, expand=True)[1],
                                     df_cons['MeasureName'])

In [20]:
# Separately as we don't need to add key-words to ingredient's name

to_fix = ['bacardi', 'chilled', 'stoli', 'of']

for i in to_fix:
    df_cons['MeasureName'] = np.where(df_cons['MeasureName_copy'] == i,
                                     df_cons['Value'].str.rsplit(n=1, expand=True)[1],
                                     df_cons['MeasureName'])

    df_cons['Value'] = np.where(df_cons['MeasureName_copy'] == i,
                                 df_cons['Value'].str.rsplit(n=1, expand=True)[0],
                                 df_cons['Value'])

In [21]:
dic = {'dark':['Rub Light Or Dark', '2', 'oz'],
        'or':['White Or Red Wine', '2', 'oz'],
        'ginger':['Green Ginger Wine', '1', 'oz'],
       'hill':["Boone's Strawberry Hill Wine", '1', 'bottle'],
       'label':['Vodka Smirnoff Red Label', '1/5', 'bottle'],
       'apple':['Turkish Apple Tea', '1', 'cup'],
       'taste':['Lemon Juice', '1/2', 'oz'],
       'berry':['Kool-Aid Tropical Berry', '1', 'gal']}

for key, value in dic.items():
    df_cons['strIngredients'] = np.where(df_cons['MeasureName_copy'] == key, dic[key][0], df_cons['strIngredients'])
    df_cons['Value'] = np.where(df_cons['MeasureName_copy'] == key, dic[key][1], df_cons['Value'])
    df_cons['MeasureName'] = np.where(df_cons['MeasureName_copy'] == key, dic[key][2], df_cons['MeasureName'])

## oz, cl, qt, ml, l, shot(s), pint, jigger(s), dash(es), drops, can(s), cup(s), spoons, glass, splash(es), scoops, measures, gal, dl<a class="anchor" id="3"></a>

In [22]:
df_cons['Value'] = np.where(df_cons['strMeasures'] == '1 1/2', '1 1/2', df_cons['Value'])

In [23]:
measures_dict = {'oz':30, 'cl':10, 'ml':1, 'shot':25, 'shots':25, 'pint':473, 'jigger':44, 'jiggers':44, 'dash':1, 'dashes':1,
                 'drops':1, 'can':330, 'cans':330, 'cup':237, 'cups':237, 'tblsp':15, 'tbsp':15, 'tablespoons':15, 'tsp':5,
                 'spoons':5, 'l':1000, 'qt':946, 'quart':946, 'glass':350, 'splash':6, 'splashes':6, 'scoops':60, 'measures':30,
                 'gal':3785, 'dl':100, 'package':60, 'fifth':750, 'egg':45, '1/2':25}

In [24]:
values_replace = {'2-3':2.5, '70ml/2fl':70, 'add 10':10, '3-4':3.5, '10-12':11, 'add 250':250, '1-2':1.5, '1-3':2,
                  'add 1/2':0.5, 'about 8':8, 'add':1, '8-10':9}

In [25]:
for key, value in measures_dict.items():
# First convert value as it is to numeric format
    df_cons['Value_numeric'] = np.where((df_cons['MeasureName'] == key) & (df_cons['Value_numeric'].isnull()),
                                        df_cons.apply(lambda row: repl_from_dic(values_replace, row), axis=1),
                                        df_cons['Value_numeric'])
# Then convert them to ml
    df_cons['Value_ml'] = np.where((df_cons['MeasureName'] == key)&(df_cons['Value_ml'].isnull())&(df_cons['Value_gr'].isnull()),
                                   df_cons['Value_numeric']*measures_dict[key],
                                   df_cons['Value_ml'])

### Individual cases <a class="anchor" id="4"></a>

**Issues with 'glass' measure**

In [26]:
df_cons['Value_ml'] = np.where((df_cons['strDrink'] == 'Jamaican Coffee') & (df_cons['strIngredients'] == 'Rum'),
                                    180/6, df_cons['Value_ml'])

In [27]:
df_cons['Value_ml'] = np.where((df_cons['strDrink'] == 'Tequila Surprise') & (df_cons['MeasureName'] == 'glass'),
                                    44, df_cons['Value_ml'])

In [28]:
df_cons['Value_ml'] = np.where((df_cons['strDrink'] == 'Butter Baby') & (df_cons['strIngredients'] == 'Vanilla Ice-Cream'),
                                    60*2, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where((df_cons['strDrink'] == 'Butter Baby') & (df_cons['strIngredients'] == 'Butterscotch schnapps'),
                                    30, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where((df_cons['strDrink'] == 'Butter Baby') & (df_cons['strIngredients'] == 'Milk'),
                                    300, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where((df_cons['strDrink'] == 'Butter Baby') & (df_cons['strIngredients'] == 'Vodka'),
                                    30*2, df_cons['Value_ml'])

**Issues with 'bottle' measure**

In [29]:
df_cons['Value_ml'] = np.where((df_cons['strCategory'] == 'Beer') & (df_cons['MeasureName'] == 'bottle'),
                                    330, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where((df_cons['strCategory'] == 'Punch / Party Drink') & (df_cons['MeasureName'] == 'bottle'),
                                    750, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where((df_cons['strDrink'] == 'Brain Fart') & (df_cons['MeasureName'] == 'bottle'),
                                    125, df_cons['Value_ml'])

**Separate cocktails fixed individually**

In [30]:
# Snake Bite (UK)

df_cons['Value_ml'] = np.where((df_cons['strDrink'] == 'Snake Bite (UK)') & (df_cons['MeasureName_copy'] == 'dry'),
                                    473/2, df_cons['Value_ml'])
df_cons['strIngredients'] = np.where((df_cons['strDrink'] == 'Snake Bite (UK)') & (df_cons['MeasureName_copy'] == 'dry'),
                                    'Cider Sweet Or Dry', df_cons['strIngredients'])

In [31]:
# Snakebite and Black

df_cons['Value_ml'] = np.where((df_cons['MeasureName'] == 'bit'), 5, df_cons['Value_ml'])

In [32]:
# Herbal flame

df_cons['strIngredients'] = np.where((df_cons['MeasureName'] == 'sweet'), 'Very Sweet Tea', df_cons['strIngredients'])

## Lemons.<a class="anchor" id="5"></a>

In [33]:
df_cons.loc[df_cons['strIngredients'] == 'Lemon']

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
509,A True Amaretto Sour,Cocktail,Old-fashioned glass,Lemon,Rub the rim of an old fashioned glass with lem...,juice of 1/2,juice of,1/2,,,,,,1/2
636,Boston Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake juice of lemon, powdered sugar, blended ...",juice of 1/2,juice of,1/2,,,,,,1/2
647,Brandy Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake brandy, juice of lemon, and powdered sug...",juice of 1/2,juice of,1/2,,,,,,1/2
658,California Lemonade,Ordinary Drink,Collins glass,Lemon,Shake all ingredients (except carbonated water...,juice of 1,juice of,1,,,,,,1
731,Gin Fizz,Ordinary Drink,Highball glass,Lemon,"Shake all ingredients with ice cubes, except s...",juice of 1/2,juice of,1/2,,,,,,1/2
733,Gin Sling,Ordinary Drink,Old-fashioned glass,Lemon,Dissolve powdered sugar in mixture of water an...,juice of 1/2,juice of,1/2,,,,,,1/2
777,Japanese Fizz,Ordinary Drink,Highball glass,Lemon,Shake all ingredients (except carbonated water...,juice of 1/2,juice of,1/2,,,,,,1/2
835,New York Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake blended whiskey, juice of lemon, and pow...",juice of 1/2,juice of,1/2,,,,,,1/2
875,Royal Gin Fizz,Ordinary Drink,Highball glass,Lemon,Shake all ingredients (except carbonated water...,juice of 1/2,juice of,1/2,,,,,,1/2
929,Tequila Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake tequila, juice of lemon, and powdered su...",juice of 1/2,juice of,1/2,,,,,,1/2


In [34]:
cond1 = df_cons['strIngredients'] == 'Lemon'
cond2 = df_cons['Value'] == 'juice of'
cond3 = df_cons['MeasureName'] == '1'
cond4 = df_cons['MeasureName'] == '1/2'
cond5 = df_cons['MeasureName'] == '1/4'

df_cons['Value_ml'] = np.where(cond1 & cond2 & cond3, 45, df_cons['Value_ml'])  #45 is aprox amount of ml of juice in 1 lemon
df_cons['Value_ml'] = np.where(cond1 & cond2 & cond4, 45/2, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where(cond1 & cond2 & cond5, 45/4, df_cons['Value_ml'])

In [35]:
df_cons.loc[(df_cons['strIngredients'] == 'Lemon') & df_cons['Value_ml'].isnull()]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
962,Wine Punch,Punch / Party Drink,Collins Glass,Lemon,Combine all of the ingredients and pour over a...,2,2,,,,,,,
1111,Brandy Cobbler,Ordinary Drink,Old-fashioned glass,Lemon,"In an old-fashioned glass, dissolve the sugar ...",1,1,,,,,,,
1238,Lemon Drop,Cocktail,Cocktail glass,Lemon,Shake and strain into a chilled cocktail glass...,juice of 1 wedge,juice of 1,wedge,,,,,,wedge
1239,Lemon Shot,Shot,Shot glass,Lemon,Mix Galliano and Absolut Citron in a shot glas...,wedge,wedge,,,,,,,
1307,Rum Cobbler,Ordinary Drink,Old-fashioned glass,Lemon,"In an old-fashioned glass, dissolve the sugar ...",1,1,,,,,,,
1308,Rum Cooler,Ordinary Drink,Collins glass,Lemon,Pour the rum and soda into a collins glass alm...,1,1,,,,,,,
1320,Sangria - The World's Best,Punch / Party Drink,Pitcher,Lemon,"Mix wine, sugar and fruit, and let sit in the ...",1 large,1,large,,,,,,large
1340,Snowball,Ordinary Drink,Highball glass,Lemon,Place one ice cube in the glass and add 1 1/2 ...,1 slice,1,slice,,,,,,slice
1479,Brandy Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake brandy, juice of lemon, and powdered sug...",1/2 slice,1/2,slice,,,,,,slice
1501,Dirty Martini,Cocktail,Cocktail glass,Lemon,"Pour the vodka, dry vermouth and olive brine i...",1 wedge,1,wedge,,,,,,wedge


In [36]:
cond1 = df_cons['strIngredients'] == 'Lemon'
cond2 = df_cons['MeasureName'].isnull()
cond3 = df_cons['MeasureName'] == 'large'
cond4 = df_cons['Value'] == '1'
cond5 = df_cons['Value'] == '2'
cond6 = df_cons['Value'] == '1/2'
cond7 = df_cons['MeasureName'] == 'lime'

df_cons['Value_ml'] = np.where(cond1 & (cond2 | cond3) & cond4, 45, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where(cond1 & (cond2 | cond3) & cond5, 45*2, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where(cond1 & (cond2 | cond3) & cond6, 45/2, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where(cond1 & cond7, 20, df_cons['Value_ml'])  # Recipe verified via Internet manually

In [37]:
df_cons.loc[(df_cons['strIngredients'] == 'Lemon') & df_cons['Value_ml'].isnull()]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
1238,Lemon Drop,Cocktail,Cocktail glass,Lemon,Shake and strain into a chilled cocktail glass...,juice of 1 wedge,juice of 1,wedge,,,,,,wedge
1239,Lemon Shot,Shot,Shot glass,Lemon,Mix Galliano and Absolut Citron in a shot glas...,wedge,wedge,,,,,,,
1340,Snowball,Ordinary Drink,Highball glass,Lemon,Place one ice cube in the glass and add 1 1/2 ...,1 slice,1,slice,,,,,,slice
1479,Brandy Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake brandy, juice of lemon, and powdered sug...",1/2 slice,1/2,slice,,,,,,slice
1501,Dirty Martini,Cocktail,Cocktail glass,Lemon,"Pour the vodka, dry vermouth and olive brine i...",1 wedge,1,wedge,,,,,,wedge
1613,Scotch Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake scotch, juice of lime, and powdered suga...",1/2 slice,1/2,slice,,,,,,slice
1630,Tequila Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake tequila, juice of lemon, and powdered su...",1/2 slice,1/2,slice,,,,,,slice
1655,Absolut Summertime,Cocktail,Collins glass,Lemon,Add all ingredients except lemon to shaker fil...,1 slice,1,slice,,,,,,slice
1685,Boston Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake juice of lemon, powdered sugar, blended ...",1 slice,1,slice,,,,,,slice
1714,Frisco Sour,Ordinary Drink,Whiskey sour glass,Lemon,Shake all ingredients (except slices of lemon ...,1 slice,1,slice,,,,,,slice


In [38]:
df_cons.loc[df_cons['MeasureName'] == 'wedge']

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
1238,Lemon Drop,Cocktail,Cocktail glass,Lemon,Shake and strain into a chilled cocktail glass...,juice of 1 wedge,juice of 1,wedge,,,,,,wedge
1501,Dirty Martini,Cocktail,Cocktail glass,Lemon,"Pour the vodka, dry vermouth and olive brine i...",1 wedge,1,wedge,,,,,,wedge
1810,Bloody Mary,Ordinary Drink,Old-fashioned glass,Lime,"Stirring gently, pour all ingredients into hig...",1 wedge,1,wedge,,,,,,wedge
1889,3-Mile Long Island Iced Tea,Ordinary Drink,Collins Glass,Bitters,Fill 14oz glass with ice and alcohol. Fill 2/3...,1 wedge,1,wedge,,,,,,wedge
1904,Arizona Twister,Cocktail,Hurricane glass,Pineapple,"Just mix in the shots of rum, vodka, and tequi...",1 wedge,1,wedge,,,,,,wedge


In [39]:
df_cons.loc[df_cons['MeasureName'] == 'slice']

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
623,Bleeding Surgeon,Soft Drink / Soda,Collins glass,Orange,Pour Shot of Rum over slice of orange. Fill th...,1 slice,1,slice,,,,,,slice
1340,Snowball,Ordinary Drink,Highball glass,Lemon,Place one ice cube in the glass and add 1 1/2 ...,1 slice,1,slice,,,,,,slice
1479,Brandy Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake brandy, juice of lemon, and powdered sug...",1/2 slice,1/2,slice,,,,,,slice
1612,Scotch Cobbler,Ordinary Drink,Old-fashioned glass,Orange,"Pour scotch, brandy, and curacao over ice in a...",1 slice,1,slice,,,,,,slice
1613,Scotch Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake scotch, juice of lime, and powdered suga...",1/2 slice,1/2,slice,,,,,,slice
1630,Tequila Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake tequila, juice of lemon, and powdered su...",1/2 slice,1/2,slice,,,,,,slice
1655,Absolut Summertime,Cocktail,Collins glass,Lemon,Add all ingredients except lemon to shaker fil...,1 slice,1,slice,,,,,,slice
1685,Boston Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake juice of lemon, powdered sugar, blended ...",1 slice,1,slice,,,,,,slice
1714,Frisco Sour,Ordinary Drink,Whiskey sour glass,Lemon,Shake all ingredients (except slices of lemon ...,1 slice,1,slice,,,,,,slice
1721,Gin Smash,Ordinary Drink,Old-fashioned glass,Orange,Muddle sugar with carbonated water and mint sp...,1 slice,1,slice,,,,,,slice


In [40]:
cond1 = df_cons['strIngredients'] == 'Lemon'
cond2 = df_cons['strDrink'] == 'Lemon Drop'
df_cons['Value_ml'] = np.where(cond1 & cond2, 45/2, df_cons['Value_ml']) # Although it says juice of 1 wedge, most common recipe
                                                                        # is to use juice of 1/2 of lemon.

In [41]:
cond1 = df_cons['MeasureName'].isin(['slice', 'wedge'])
cond2 = ~df_cons['strDrink'].isin(['Lemon Drop', '3-Mile Long Island Iced Tea'])

df_cons['Garnish_type'] = np.where(cond1 & cond2, df_cons['MeasureName'], df_cons['Garnish_type'])
df_cons['Garnish_amount'] = np.where(cond1 & cond2, df_cons['Value'], df_cons['Garnish_amount'])

In [42]:
df_cons.loc[(df_cons['strIngredients'] == 'Lemon') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
1239,Lemon Shot,Shot,Shot glass,Lemon,Mix Galliano and Absolut Citron in a shot glas...,wedge,wedge,,,,,,,
1752,New York Sour,Ordinary Drink,Whiskey sour glass,Lemon,"Shake blended whiskey, juice of lemon, and pow...",,,,,,,,,
1885,Sweet Sangria,Punch / Party Drink,Pitcher,Lemon,Dissolve the sugar in hot water and cool. Peel...,,,,,,,,,
1886,Zoksel,Soft Drink / Soda,Beer pilsner,Lemon,"No specific mixinginstructions, just poor ever...",,,,,,,,,
1901,3-Mile Long Island Iced Tea,Ordinary Drink,Collins Glass,Lemon,Fill 14oz glass with ice and alcohol. Fill 2/3...,,,,,,,,,


In [43]:
df_cons.loc[df_cons['strDrink'] == 'Lemon Shot']

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
310,Lemon Shot,Shot,Shot glass,Galliano,Mix Galliano and Absolut Citron in a shot glas...,1/2 oz,1/2,oz,0.5,15.0,,,,oz
798,Lemon Shot,Shot,Shot glass,Absolut Citron,Mix Galliano and Absolut Citron in a shot glas...,1/2 oz,1/2,oz,0.5,15.0,,,,oz
1239,Lemon Shot,Shot,Shot glass,Lemon,Mix Galliano and Absolut Citron in a shot glas...,wedge,wedge,,,,,,,
1559,Lemon Shot,Shot,Shot glass,Sugar,Mix Galliano and Absolut Citron in a shot glas...,bacardi,bacardi,,,,,,,
1737,Lemon Shot,Shot,Shot glass,151 Proof Rum,Mix Galliano and Absolut Citron in a shot glas...,,,,,,,,,


In [44]:
cond1 = df_cons['strIngredients'] == 'Lemon'
cond2 = df_cons['strDrink'] == 'Lemon Shot'
df_cons['Garnish_amount'] = np.where(cond1 & cond2, 1, df_cons['Garnish_amount'])
df_cons['Garnish_type'] = np.where(cond1 & cond2, 'wedge', df_cons['Garnish_type'])

In [45]:
cond1 = df_cons['strIngredients'] == 'Lemon'
cond2 = df_cons['strDrink'] == 'New York Sour'
df_cons['Garnish_amount'] = np.where(cond1 & cond2, '1/2', df_cons['Garnish_amount'])
df_cons['Garnish_type'] = np.where(cond1 & cond2, 'slice', df_cons['Garnish_type'])

Check remaining cocktails one by one.

In [46]:
recipe('Sweet Sangria')

Dissolve the sugar in hot water and cool. Peel the citrus fruits and break into wedges. Mix the wine, sugar syrup, fruit, and Fresca in a pitcher and put in the fridge for a few hours. Serve in tall glasses with a straw.


Unnamed: 0,Ingredient,Measure
0,Red wine,2 bottles
1,Sugar,1 cup
2,Water,2 cups hot
3,Apple,1 cup
4,Orange,wedges\n
5,Lime,wedges\n
6,Lemon,
7,Fresca,
8,,
9,,


In [47]:
# Should be dropped as measures are too messy and it's impossible to recover original recipe.
df_cons.drop(df_cons.loc[df_cons['strDrink'] == 'Sweet Sangria'].index, axis=0, inplace=True)

In [48]:
recipe('Zoksel')

No specific mixinginstructions, just poor every ingredient in one glass. The lemon goes with it.


Unnamed: 0,Ingredient,Measure
0,Beer,
1,Root beer,
2,Lemonade,
3,Coca-Cola,slice\n
4,7-Up,
5,Creme de Cassis,
6,Lemon,
7,,
8,,
9,,


In [49]:
# This cocktail should be dropped due to lack of guidance.

df_cons.drop(df_cons.loc[df_cons['strDrink'] == 'Zoksel'].index, axis=0, inplace=True)

In [50]:
recipe('3-Mile Long Island Iced Tea')

Fill 14oz glass with ice and alcohol. Fill 2/3 glass with cola and remainder with sweet & sour. Top with dash of bitters and lemon wedge.


Unnamed: 0,Ingredient,Measure
0,Gin,1/2 oz
1,Light rum,1/2 oz
2,Tequila,1/2 oz
3,Triple sec,1/2 oz
4,Vodka,1/2 oz
5,Coca-Cola,
6,Sweet and sour,1-2 dash
7,Bitters,1 wedge
8,Lemon,
9,,


In [51]:
# Measures of this cocktail shifted, also some measures are missing.

cond1 = df_cons['strDrink'] == '3-Mile Long Island Iced Tea'

df_cons['Value_ml'] = np.where(cond1 & (df_cons['strIngredients'] == 'Coca-Cola'), 200, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where(cond1 & (df_cons['strIngredients'] == 'Sweet And Sour'), 30, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where(cond1 & (df_cons['strIngredients'] == 'Bitters'), 1, df_cons['Value_ml'])
df_cons['Garnish_amount'] = np.where(cond1 & (df_cons['strIngredients'] == 'Lemon'), 1, df_cons['Garnish_amount'])
df_cons['Garnish_type'] = np.where(cond1 & (df_cons['strIngredients'] == 'Lemon'), 'wedge', df_cons['Garnish_type'])

In [52]:
df_cons.loc[(df_cons['strIngredients'] == 'Lemon') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()]
# We're done with lemon and can move on.

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy


## Limes.<a class="anchor" id="6"></a>

In [53]:
df_cons.loc[(df_cons['strIngredients'] == 'Lime') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
169,Caipirissima,Ordinary Drink,Collins Glass,Lime,Same as Caipirinha but instead of cachaca you ...,2,2,,,,,,,
560,Amaretto Mist,Ordinary Drink,Old-fashioned glass,Lime,Pour amaretto in an old-fashioned glass over c...,1,1,,,,,,,
656,Caipirinha,Ordinary Drink,Old-fashioned glass,Lime,Place lime and sugar into old fashioned glass ...,1,1,,,,,,,
684,Cuba Libra,Ordinary Drink,Highball glass,Lime,Fill tall glass with ice cubes. Add rum. Rub c...,squeeze,squeeze,,,,,,,
685,Cuba Libre,Ordinary Drink,Highball glass,Lime,Build all ingredients in a Collins glass fille...,juice of 1/2,juice of,1/2,,,,,,1/2
686,Daiquiri,Ordinary Drink,Cocktail glass,Lime,Pour all ingredients into shaker with ice cube...,juice of 1/2,juice of,1/2,,,,,,1/2
689,Dark Caipirinha,Cocktail,Highball glass,Lime,Muddle the sugar into the lime wedges in an ol...,1,1,,,,,,,
806,Long vodka,Ordinary Drink,Highball glass,Lime,Shake a tall glass with ice cubes and Angostur...,1/2,1/2,,,,,,,
823,Mojito,Cocktail,Highball glass,Lime,Muddle mint leaves with sugar and lime juice. ...,juice of 1,juice of,1,,,,,,1
895,Scotch Sour,Ordinary Drink,Whiskey sour glass,Lime,"Shake scotch, juice of lime, and powdered suga...",juice of 1/2,juice of,1/2,,,,,,1/2


In [54]:
cond1 = (df_cons['strIngredients'] == 'Lime') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()
cond2 = (df_cons['Value'] == '1/2')|(df_cons['MeasureName'] == '1/2')
cond3 = (df_cons['Value'] == '1')|(df_cons['MeasureName'] == '1')
cond4 = (df_cons['Value'] == '2')

df_cons['Value_ml'] = np.where(cond1 & cond2, 30/2, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where(cond1 & cond3, 30, df_cons['Value_ml'])
df_cons['Value_ml'] = np.where(cond1 & cond4, 30*2, df_cons['Value_ml'])

In [55]:
df_cons.loc[(df_cons['strIngredients'] == 'Lime') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
684,Cuba Libra,Ordinary Drink,Highball glass,Lime,Fill tall glass with ice cubes. Add rum. Rub c...,squeeze,squeeze,,,,,,,
1203,Happy Skipper,Ordinary Drink,Highball glass,Lime,"Pour Captain Morgan's Spiced Rum over ice, fil...",,,,,,,,,
1719,Gin Rickey,Cocktail,Highball glass,Lime,Half-fill a tall glass with ice. Mix the gin a...,garnish,garnish,,,,,,,


Again, let's check remaining cocktails one by one.

In [56]:
recipe('Cuba Libra')

Fill tall glass with ice cubes. Add rum. Rub cut edge of lime on rim of glass then squeeze juice into glass. Fill with Coca-Cola. Garnish with lime slice. Enjoy!


Unnamed: 0,Ingredient,Measure
0,Dark rum,1-2 shot
1,Lime,Squeeze
2,Coca-Cola,Fill with
3,Ice,
4,,
5,,
6,,
7,,
8,,
9,,


In [57]:
cond1 = (df_cons['strIngredients'] == 'Lime') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()
cond2 = df_cons['strDrink'] == 'Cuba Libra'

df_cons['Value_ml'] = np.where(cond1 & cond2, 30, df_cons['Value_ml'])

In [58]:
recipe('Happy Skipper')

Pour Captain Morgan's Spiced Rum over ice, fill glass to top with Ginger Ale. Garnish with lime. Tastes like a cream soda. Named for the Gilligan's Island reference ("The Captain" *in* "Ginger" is a Happy Skipper!)


Unnamed: 0,Ingredient,Measure
0,Spiced rum,1 1/2 cl
1,Ginger ale,
2,Lime,
3,Ice,
4,,
5,,
6,,
7,,
8,,
9,,


In [59]:
recipe('Gin Rickey')

Half-fill a tall glass with ice. Mix the gin and Grenadine together and pour over the ice. Add the lime or lemon juice and top off with soda water. Decorate the glass with lime and/or lemon slices.


Unnamed: 0,Ingredient,Measure
0,Gin,2 oz
1,Grenadine,1 tsp
2,lemon,Juice of 1/2
3,Soda Water,Top up with
4,Lime,Garnish
5,,
6,,
7,,
8,,
9,,


In [60]:
cond1 = (df_cons['strIngredients'] == 'Lime') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()
cond2 = df_cons['strDrink'].isin(['Happy Skipper', 'Gin Rickey'])

df_cons['Garnish_amount'] = np.where(cond1 & cond2, 1, df_cons['Garnish_amount'])
df_cons['Garnish_type'] = np.where(cond1 & cond2, 'slice', df_cons['Garnish_type'])

In [61]:
df_cons.loc[(df_cons['strIngredients'] == 'Lime') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy


## Oranges.<a class="anchor" id="7"></a>

In [62]:
df_cons.loc[(df_cons['strIngredients'] == 'Orange') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
174,Caribbean Orange Liqueur,Homemade Liqueur,Collins Glass,Orange,Pare very thinly the bright-colored rind from ...,3 large,3,large,,,,,,large
999,Abbey Cocktail,Ordinary Drink,Cocktail glass,Orange,Shake all ingredients (except for the cherry) ...,juice of 1/4,juice of,1/4,,,,,,1/4
1472,Bourbon Sour,Ordinary Drink,Whiskey sour glass,Orange,"In a shaker half-filled with ice cubes, combin...",1,1,,,,,,,
1528,Gin Sour,Ordinary Drink,Whiskey sour glass,Orange,"In a shaker half-filled with ice cubes, combin...",1,1,,,,,,,
1605,Rum Sour,Ordinary Drink,Whiskey sour glass,Orange,"In a shaker half-filled with ice cubes, combin...",1,1,,,,,,,
1610,Sangria - The World's Best,Punch / Party Drink,Pitcher,Orange,"Mix wine, sugar and fruit, and let sit in the ...",1 large,1,large,,,,,,large
1643,Wine Punch,Punch / Party Drink,Collins Glass,Orange,Combine all of the ingredients and pour over a...,3,3,,,,,,,
1700,Classic Old-Fashioned,Ordinary Drink,Old-fashioned glass,Orange,"In an old-fashioned glass, muddle the bitters ...",1,1,,,,,,,
1712,"French ""75""",Ordinary Drink,Collins glass,Orange,"In a shaker half-filled with ice cubes, combin...",1,1,,,,,,,
1713,French 75,Ordinary Drink,Collins glass,Orange,"Combine gin, sugar, and lemon juice in a cockt...",1,1,,,,,,,


In [63]:
cond1 = (df_cons['strDrink'] == 'Abbey Cocktail')
cond2 = (df_cons['strIngredients'] == 'Orange')
df_cons['Value_ml'] = np.where(cond1 & cond2, 75/4, df_cons['Value_ml'])

In [64]:
recipe('John Collins')

Pour all ingredients directly into highball glass filled with ice. Stir gently. Garnish. Add a dash of Angostura bitters.


Unnamed: 0,Ingredient,Measure
0,Bourbon,2 oz
1,Lemon juice,1 oz
2,Sugar,1 tsp superfine
3,Club soda,3 oz
4,Maraschino cherry,1
5,Orange,1
6,,
7,,
8,,
9,,


In [65]:
recipe('Caribbean Orange Liqueur')

Pare very thinly the bright-colored rind from the oranges (no white). Blot the peel on paper towels to remove any excess oil. Put peel in a 4 cup screw-top jar. Add 2 cups vodka. Close jar. Store in a cool, dark place for 2 days or until the vodka has absorbed the flavor. Remove peel and add remaining vodka. Close jar and add remaining cup of vodka. Close the jar and store in a cool dark place at least 1 month to age.


Unnamed: 0,Ingredient,Measure
0,Orange,3 large
1,Vodka,3 cups
2,Sugar,1 1/3 cup superfine
3,,
4,,
5,,
6,,
7,,
8,,
9,,


In [66]:
# It's actually not a cocktail and should be dropped together with all other homemage liqueurs.
df_cons.drop(df_cons.loc[df_cons['strCategory']=='Homemade Liqueur'].index, axis=0, inplace=True)

In [67]:
recipe("Sangria - The World's Best")

Mix wine, sugar and fruit, and let sit in the fridge for 18-24 hours. The mixture will have a somewhat syrupy consistency. Before serving stir in brandy and cut the mixture with soda water until it have a thinner, more wine like consistency. Serve from a pitcher in wine glasses.


Unnamed: 0,Ingredient,Measure
0,Red wine,1 1/2 L
1,Sugar,1 cup
2,Lemon,1 large
3,Orange,1 large
4,Apple,1 large
5,Brandy,3-4 oz plain
6,Soda water,
7,,
8,,
9,,


In [68]:
recipe('Wine Punch')

Combine all of the ingredients and pour over a block of ice.


Unnamed: 0,Ingredient,Measure
0,Red wine,1 bottle
1,Lemon,2
2,Orange juice,1 cup
3,Orange,3
4,Pineapple juice,1 cup
5,,
6,,
7,,
8,,
9,,


In [69]:
# For Sangria amount of oranges specified stays for juice but for the rest it's for garnish by slices.
cond1 = (df_cons['strDrink'] == "Sangria - The World's Best")
cond2 = (df_cons['strIngredients'] == 'Orange')
df_cons['Value_ml'] = np.where(cond1 & cond2, 75*3, df_cons['Value_ml'])

In [70]:
# Cocktail 'French "75"' appeares two times.

df_cons.drop(df_cons.loc[df_cons['strDrink']=='French "75"'].index, axis=0, inplace=True)

In [71]:
# Add orange slice as a garnish for remaining cocktails.

cond1 = (df_cons['strIngredients'] == 'Orange') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()

df_cons['Garnish_amount'] = np.where(cond1, df_cons['Value'], df_cons['Garnish_amount'])
df_cons['Garnish_type'] = np.where(cond1, 'slice', df_cons['Garnish_type'])

In [72]:
df_cons.loc[(df_cons['strIngredients'] == 'Orange') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy


## Apples.<a class="anchor" id="8"></a>

In [73]:
df_cons.loc[(df_cons['strIngredients'] == 'Apple') & (df_cons['Value_ml'].isnull()) & df_cons['Garnish_type'].isnull()]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
1767,Sangria - The World's Best,Punch / Party Drink,Pitcher,Apple,"Mix wine, sugar and fruit, and let sit in the ...",1 large,1,large,,,,,,large


In [74]:
cond1 = (df_cons['strDrink'] == "Sangria - The World's Best")
cond2 = (df_cons['strIngredients'] == 'Apple')
df_cons['Value_ml'] = np.where(cond1 & cond2, 100, df_cons['Value_ml'])

## Pineapple.<a class="anchor" id="9"></a>

In [75]:
cond1 = df_cons['strIngredients'] == 'Pineapple'
cond2 = df_cons['MeasureName'].isnull()
df_cons['Value_ml'] = np.where(cond1 & cond2, 900, df_cons['Value_ml'])

## Garnishes (remaning from fruit's fix).<a class="anchor" id="10"></a>

In [76]:
lst = ['strip', 'handful', 'twist', 'whole', 'leaves', 'sprigs', 'sticks']

In [77]:
df_cons['Value'] = np.where(df_cons['Value'] == '1 long', '1', df_cons['Value'])
df_cons['Value'] = np.where(df_cons['Value'] == '2 fresh', '2', df_cons['Value'])

In [78]:
df_cons['Garnish_type'] = np.where(df_cons['MeasureName'].isin(lst), df_cons['MeasureName'], df_cons['Garnish_type'])
df_cons['Garnish_amount'] = np.where(df_cons['MeasureName'].isin(lst), df_cons['Value'], df_cons['Garnish_amount'])

In [79]:
df_cons['Garnish_type'] = np.where(df_cons['strIngredients'] == 'Whipped Cream', 'ml', df_cons['Garnish_type'])
df_cons['Garnish_amount'] = np.where(df_cons['strIngredients'] == 'Whipped Cream', '60', df_cons['Garnish_amount'])

In [80]:
df_cons['Garnish_type'] = np.where(df_cons['strIngredients'] == 'Cloves', 'whole', df_cons['Garnish_type'])
df_cons['Garnish_amount'] = np.where(df_cons['strIngredients'] == 'Cloves', df_cons['Value'], df_cons['Garnish_amount'])

In [81]:
lst = ['fill', 'top', 'up', 'with', 'sweet']
df_cons['Garnish_type'] = np.where(df_cons['MeasureName'].isin(lst), 'top up', df_cons['Garnish_type'])
df_cons['Garnish_amount'] = np.where(df_cons['MeasureName'].isin(lst), 0, df_cons['Garnish_amount'])

In [82]:
df_cons['Garnish_type'] = np.where((df_cons['MeasureName'] == 'pinch')&(df_cons['Value_gr'].isnull()), 'around the rim', df_cons['Garnish_type'])
df_cons['Garnish_type'] = np.where((df_cons['strIngredients'] == 'Salt')&(df_cons['Value_gr'].isnull()), 'rim', df_cons['Garnish_type'])
df_cons['Garnish_amount'] = np.where((df_cons['Garnish_type'] == 'around the rim')&(df_cons['Value_gr'].isnull()), 0.36, df_cons['Garnish_amount'])

## Dealing with missing measures.<a class="anchor" id="11"></a>

In [83]:
cond1 = (df_cons['Value_ml'].isnull())
cond2 = (df_cons['Value_gr'].isnull())
cond3 = (df_cons['Garnish_type'].isnull())
print(len(df_cons.loc[cond1 & cond2 & cond3]['MeasureName']))
print(len(df_cons.loc[cond1 & cond2 & cond3]['MeasureName'].unique()))
df_cons.loc[cond1 & cond2 & cond3]['MeasureName'].unique()

265
2


array([None, nan], dtype=object)

# CHECK NONE MEASURES WITH VALUES, PROBABLY ALL CAN BE REPLACED TO 'PART' , I.E. TO 'OZ'

In [84]:
df_cons.loc[(df_cons['MeasureName'].isnull()) & (~df_cons['Value'].isnull())]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
0,'57 Chevy with a White License Plate,Cocktail,Highball glass,Creme De Cacao White,1. Fill a rocks glass with ice 2.add white cre...,1 oz white,1,,,,,,,white
12,69 Special,Ordinary Drink,Collins Glass,Gin Dry,Pour 2 oz. gin. Add 4 oz. 7-up. Add Lemon Juic...,2 oz dry,2,,,,,,,dry
26,ABC,Shot,Shot glass,Amaretto,Layered in a shot glass.,1/3,1/3,,,,,,,
37,Absolut limousine,Other/Unknown,Collins Glass,Absolut Citron,Fill Absolut into a glass. Add Lime juice. Add...,2/3,2/3,,,,,,,
46,Adam Sunrise,Ordinary Drink,Collins Glass,Vodka,Fill blender up with ice. Fill half with Barto...,1/2,1/2,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1862,Tuxedo Cocktail,Ordinary Drink,Cocktail glass,Cherry,Stir all ingredients with ice and strain into ...,1,1,,,,,,,
1872,Apple Cider Punch #1,Punch / Party Drink,Collins Glass,Nutmeg Ground,"If you use the whole all spice and cloves, tie...",1 tsp ground,1,,,,,,,ground
1875,Boozy Snickers Milkshake,Milk / Float / Shake,Mason jar,Mini-Snickers Bars,Place the snickers bars in a plastic bag and r...,15,15,,,,,,,
1877,Chocolate Monkey,Milk / Float / Shake,Parfait glass,Cherry,"blend liqeuors with ice-cream, milk and syrup....",1,1,,,,,,,


In [94]:
df_cons.loc[(df_cons['MeasureName_copy'] == 'fifth')]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
152,Brain Fart,Punch / Party Drink,Punch bowl,Everclear,Mix all ingredients together. Slowly and gentl...,1 fifth,1/5,fifth,0.2,150.0,,,,fifth
581,Apricot punch,Punch / Party Drink,Punch bowl,Champagne,Pour all ingrediants into a large punch bowl. ...,4 fifth,4/5,fifth,0.8,600.0,,,,fifth
1056,Apricot punch,Punch / Party Drink,Punch bowl,Vodka,Pour all ingrediants into a large punch bowl. ...,1 fifth,1/5,fifth,0.2,150.0,,,,fifth
1065,Artillery Punch,Punch / Party Drink,Punch bowl,Red Wine,Combine all the ingredients in a large punch b...,1 fifth,1/5,fifth,0.2,150.0,,,,fifth


In [85]:
df_cons.loc[(df_cons['MeasureName'].isnull()) & (~df_cons['Value'].isnull()) & (df_cons['strIngredients'].str.contains('Egg'))]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy


In [86]:
df_cons.loc[df_cons['strDrink'] == 'Ace']

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
42,Ace,Cocktail,Martini Glass,Gin,Shake all the ingredients in a cocktail shaker...,2 shots,2,shots,2.0,50.0,,,,shots
530,Ace,Cocktail,Martini Glass,Grenadine,Shake all the ingredients in a cocktail shaker...,1/2 shot,1/2,shot,0.5,12.5,,,,shot
1012,Ace,Cocktail,Martini Glass,Heavy Cream,Shake all the ingredients in a cocktail shaker...,1/2 shot,1/2,shot,0.5,12.5,,,,shot
1415,Ace,Cocktail,Martini Glass,Milk,Shake all the ingredients in a cocktail shaker...,1/2 shot,1/2,shot,0.5,12.5,,,,shot
1657,Ace,Cocktail,Martini Glass,Egg White,Shake all the ingredients in a cocktail shaker...,1/2 fresh,1/2,egg,0.5,22.5,,,,fresh


In [87]:
recipe("Ace")

Shake all the ingredients in a cocktail shaker and ice then strain in a cold glass.


Unnamed: 0,Ingredient,Measure
0,Gin,2 shots
1,Grenadine,1/2 shot
2,Heavy cream,1/2 shot
3,Milk,1/2 shot
4,Egg White,1/2 Fresh
5,,
6,,
7,,
8,,
9,,


In [88]:
df_cons.loc[(df_cons['MeasureName'].isnull())
            & (df_cons['Value_ml'].isnull())
            & (df_cons['Value_gr'].isnull())
            & (df_cons['Garnish_amount'].isnull())]['strIngredients'].unique()

array(['Creme De Cacao White', 'Blue Curacao', 'Gin Dry', 'Amaretto',
       'Absolut Citron', 'Vodka', 'Peppermint Schnapps', 'Tea Hot',
       'Maui Blue', 'Champagne', 'Vodka Smirnoff', "Bailey'S Irish Cream",
       'Guinness Stout', 'Sugar Superfine', 'Sambuca White', 'Grenadine',
       'Corona', 'Coffee', 'Kahlua', 'Cider Hard', '151 Proof Rum Light',
       'Kool-Aid Grape', 'Carbonated Water', 'Absinthe',
       'Light Rum Jamaican', 'Sherry Cream', 'Gin', 'Coca-Cola',
       'Blueberry Schnapps', 'Lime Juice', 'Sweet And Sour',
       'Sweet Vermouth Red', 'Iced Tea Lemon', 'Grand Marnier',
       'Chocolate Syrup', 'Root Beer', 'Banana Liqueur', 'Ginger Ale',
       'Sambuca', 'Peach Schnapps', 'Coffee Cold', 'Kool-Aid Tropical',
       'Pina Colada Mix', 'Tequila', 'Orange Juice', 'Lemonade Cold',
       'Milk', 'Tonic Water', 'Schweppes Russchian', 'Triple Sec',
       'Cognac', 'Water', 'Club Soda', 'Midori Melon Liqueur',
       'Lemon Peel', 'Lemonade Frozen', 'Soda Wat

In [89]:
liquid = ['Blue Curacao', 'Amaretto', 'Absolut Citron', 'Vodka', 'Peppermint Schnapps', 'Champagne', "Bailey'S Irish Cream",
          'Guinness Stout', 'Grenadine', 'Corona', 'Coffee', 'Kahlua', 'Egg Yolk', 'Carbonated Water', 'Absinthe', 'Gin',
          'Coca-Cola', 'Blueberry Schnapps', 'Lime Juice', 'Sweet And Sour', 'Grand Marnier', 'Chocolate Syrup', 'Root Beer',
          'Banana Liqueur',
       'Ginger Ale', 'Sambuca', 'Peach Schnapps', 'Coffee Cold',
       'Pina Colada Mix', 'Tequila', 'Orange Juice', 'Milk',
       'Tonic Water', 'Schweppes Russchian', 'Triple Sec', 'Cognac',
       'Water', 'Egg White', 'Club Soda', 'Midori Melon Liqueur',
       'Lemon Peel', 'Soda Water', 'Cream', 'Cherry Brandy', 'Lemonade',
       'Sugar', 'Mint', 'Powdered Sugar', 'Dry Vermouth',
       'Angostura Bitters', 'Olive', 'Daiquiri Mix', 'Goldschlager',
       'Lemon-Lime Soda', 'Cranberry Juice', 'Lemon Juice', 'Light Cream',
       'Sour Mix', 'Maraschino Cherry', 'Strawberries', 'Cherry',
       'Bitter Lemon', 'Nutmeg', 'Pineapple Juice', 'Orange Peel', 'Salt',
       'Brandy', '151 Proof Rum', 'Orange Spiral', 'Cinnamon', 'Red Wine',
       'Grapefruit Juice', '7-Up', 'Egg White Fresh', 'Fruit',
       'Oreo Cookie', 'Cloves', 'Pepper', 'Lime Peel', 'Cherries',
       'Mini-Snickers Bars', 'Fruit Juice']

In [90]:
# Check if there are values in both ml and garnish.

In [91]:
# Drop 'strMeasures', 'Value', 'MeasureName', 'Value_numeric'.

In [92]:
df_cons.to_csv('./raw_data/stage3.csv')

In [93]:
# df_cons = pd.read_csv('./raw_data/stage3.csv')
# df_cons.drop('Unnamed: 0', axis=1, inplace=True)