In [1]:
import re
import pandas as pd
import numpy as np
from fractions import Fraction

In [2]:
df_cocktails = pd.read_csv('./raw_data/stage0.csv')
df_cocktails.drop('Unnamed: 0', axis=1, inplace=True)

df_cons = pd.read_csv('./clean_data/data_fixed_measures.csv')
df_cons.drop('Unnamed: 0', axis=1, inplace=True)

In [3]:
df_cons.head()

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy
0,'57 Chevy with a White License Plate,Cocktail,Highball glass,Creme De Cacao White,1. Fill a rocks glass with ice 2.add white cre...,1 oz white,1,oz,1.0,30.0,,,,white
1,1-900-FUK-MEUP,Shot,Old-fashioned glass,Absolut Kurant,Shake ingredients in a mixing tin filled with ...,1/2 oz,1/2,oz,0.5,15.0,,,,oz
2,110 in the shade,Beer,Beer Glass,Lager,Drop shooter in glass. Fill with beer,16 oz,16,oz,16.0,480.0,,,,oz
3,151 Florida Bushwacker,Milk / Float / Shake,Beer mug,Malibu Rum,Combine all ingredients. Blend until smooth. G...,1/2 oz,1/2,oz,0.5,15.0,,,,oz
4,155 Belmont,Cocktail,White wine glass,Dark Rum,Blend with ice. Serve in a wine glass. Garnish...,1 shot,1,shot,1.0,25.0,,,,shot


In [4]:
# Set up new columns to be filled in later

df_cons[['Alc_type', 'Basic_taste']] = np.nan
df_cons[['Alc_type', 'Basic_taste']] = df_cons[['Alc_type', 'Basic_taste']].astype('object')
df_cons = df_cons[['strDrink', 'strCategory', 'strGlass', 'strIngredients', 'Alc_type', 'Basic_taste', 'strInstructions',
                   'strMeasures', 'Value', 'MeasureName', 'Value_numeric', 'Value_ml', 'Value_gr', 'Garnish_amount',
                   'Garnish_type', 'MeasureName_copy']]

In [5]:
alc_type = {'Vodka':'Vodka', 'Rum':'Rum', 'Tequila':'Tequila', 'Schnapps':'Schnapps', 'Gin':'Gin', 'Triple Sec':'Triple Sec',
            'Scotch':'Whisky', 'Wine':'Wine', 'Absolut':'Vodka', 'Champagne':'Champagne', 'Cider':'Cider', 'Port':'Port',
            'Prosecco':'Prosecco', 'Brandy':'Brandy', 'Bacardi':'Rum', 'Whiskey':'Whisky', 'Absinthe':'Absinthe',
            'Creme De':'Creamy Liqueur', 'Bitter':'Bitter', 'Bitters':'Bitter', 'Vermouth':'Vermouth',
            'Cointreau':'Triple Sec', "Irish Cream":'Creamy Liqueur', 'Sambuca':'Sambuca', 'Lager':'Beer',
            'Jack Daniels':'Whisky', 'Goldschlager':'Schnapps', 'Kahlua':'Creamy Liqueur', 'Applejack':'Brandy',
            'Menthe':'Sweet Liqueur', 'Southern Comfort':'Sweet Liqueur', 'Chocolate Liqueur':'Creamy Liqueur',
            'Campari':'Campari', 'Maui Blue':'Schnapps', 'Jägermeister':'Bitter', 'Crown Royal':'Whisky', 'Everclear':'Vodka',
            'Ale':'Beer', 'Stout':'Beer', 'Midori Melon Liqueur':'Sweet Liqueur', 'Bourbon':'Whisky', 'Corona':'Beer',
            'Banana Liqueur':'Sweet Liqueur', 'Cherry Heering':'Sweet Liqueur', 'Whisky':'Whisky', 'Dubonnet Rouge':'Wine',
            'Pisco':'Pisco', 'Cognac':'Brandy', 'Firewater':'Vodka', 'Galliano':'Sweet Liqueur', 'Cachaca':'Cachaca',
            'Curacao':'Triple Sec', 'Amaretto':'Sweet Liqueur', 'Creme De Cassis':'Sweet Liqueur',
            'Frangelico':'Sweet Liqueur', 'Kiwi Liqueur':'Sweet Liqueur', 'Ricard':'Sweet Liqueur', 'Jim Beam':'Whisky',
            'Advocaat':'Creamy Liqueur', 'Ouzo':'Ouzo', 'Godiva Liqueur':'Creamy Liqueur', 'Grand Marnier':'Triple Sec',
            'Yukon Jack':'Sweet Liqueur', 'Wild Turkey':'Whisky', 'Zima':'Beer', 'Raspberry Liqueur':'Sweet Liqueur',
            'Johnnie Walker':'Whisky', 'Kirschwasser':'Brandy', 'Pisang Ambon':'Sweet Liqueur', 'Sherry Dry':'Brandy',
            'Creme De Banane':'Sweet Liqueur', 'Coconut Liqueur':'Sweet Liqueur', 'Tia Maria':'Creamy Liqueur',
            'Maraschino Liqueur':'Sweet Liqueur', 'Benedictine':'Sweet Liqueur', 'Chartreuse':'Sweet Liqueur',
            'Alcohol':'Vodka', 'Apfelkorn':'Sweet Liqueur', 'Drambuie':'Whisky', 'Cherry Liqueur':'Sweet Liqueur',
            'Aquavit':'Vodka', 'Strawberry Liqueur':'Sweet Liqueur', 'Aperol':'Aperol', 'Lillet Blanc':'Sweet Liqueur',
            'Melon Liqueur':'Sweet Liqueur', 'Hot Damn':'Schnapps', 'Rub':'Rum'
           }

In [6]:
non_alc_taste = {'Cranberry':'sour', 'Cream':'cream', 'Tea':'bitter', 'Coffee':'bitter', 'Lemon':'sour', 'Lime':'sour',
                 'Apple':'sweet', 'Sour Mix':'sour', '7-Up':'sweet', 'Sugar':'sweet', 'Grenadine':'sweet', 'Egg':'egg',
                 'Kool-Aid':'sweet', 'Water':'water', 'Mint':'mint', 'Orange Juice':'sweet', 'Sprite':'sweet',
                 'Grapefruit Juice':'bitter', 'Pineapple Juice':'sweet', 'Cola':'sweet', 'Peach Nectar':'sweet',
                 'Sweet And Sour':'sweet', 'Soda':'sweet', 'Mountain Dew':'sweet', 'Milk':'cream', 'Syrup':'sweet',
                 'Root Beer':'sweet', 'Orange':'sweet', 'Tomato Juice':'salty', 'Tabasco':'spicy', 'Sarsaparilla':'sweet',
                 'Pineapple':'sweet', 'Jello':'sweet', 'Anis':'spicy', 'Schweppes':'bitter', 'Tropicana':'sweet', 'Surge':'sweet',
                 'Cinnamon':'spicy', 'Banana':'sweet', 'Olive Brine':'salty','Olive':'salty', 'Salt':'salty',
                 'Vanilla Extract':'sweet', 'Fruit Punch':'sweet', 'Cocoa Powder':'sweet', 'Cloves':'spicy',
                 'Blackcurrant Squash':'sweet', 'Blackcurrant Cordial':'sweet', 'Grape Juice Unsweetened':'sour',
                 'Cherry':'sweet', 'Strawberries':'sweet', 'Hot Chocolate':'cream', 'Nutmeg':'spicy',
                 'Worcestershire Sauce':'salty', 'St. Germain':'sweet', 'Dr. Pepper':'sweet', 'Sirup':'sweet',
                 'Oreo Cookie':'cream', 'Berries':'sweet', 'Snickers':'sweet', 'Allspice':'spicy', 'Lavender':'spicy',
                 'Caramel Sauce':'sweet', 'Chocolate Sauce':'sweet', 'Kummel':'spicy', 'Daiquiri Mix':'sweet',
                 'Half-And-Half':'cream', 'Fruit Juice':'sweet', 'Cherry':'sweet', 'Cherries':'sweet', 'Candy':'sweet',
                 'Pepper':'spicy'
                }

In [7]:
# Here can be a function processing both dictionaries in one go, but due to time preasure let's leave it.

In [8]:
for key, value in alc_type.items():
    df_cons['Alc_type'] = np.where(df_cons['strIngredients'].str.contains(key), alc_type[key], df_cons['Alc_type'])

In [9]:
for key, value in non_alc_taste.items():
    df_cons['Basic_taste'] = np.where((df_cons['strIngredients'].str.contains(key)) & (df_cons['Alc_type'].isnull()),
                                      non_alc_taste[key], df_cons['Basic_taste'])

In [10]:
# To be handled separatelly.

# Beer
df_cons['Alc_type'] = np.where(df_cons['strIngredients'] == 'Beer', 'Beer', df_cons['Alc_type'])

# Ginger Beer
df_cons['Alc_type'] = np.where(df_cons['strIngredients'] == 'Ginger Beer', np.nan, df_cons['Alc_type'])
df_cons['Basic_taste'] = np.where(df_cons['strIngredients'] == 'Ginger Beer','sweet', df_cons['Basic_taste'])

In [11]:
# Checks for missing errors

In [12]:
df_cons.loc[(~df_cons['Alc_type'].isnull()) & (~df_cons['Basic_taste'].isnull())]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,Alc_type,Basic_taste,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy


In [13]:
df_cons.loc[(df_cons['Alc_type'].isnull()) & (df_cons['Basic_taste'].isnull())]

Unnamed: 0,strDrink,strCategory,strGlass,strIngredients,Alc_type,Basic_taste,strInstructions,strMeasures,Value,MeasureName,Value_numeric,Value_ml,Value_gr,Garnish_amount,Garnish_type,MeasureName_copy


In [14]:
df_cons.to_csv('./clean_data/clean_data.csv')