In [1]:
import pandas as pd
import numpy as np
from database import engine

%load_ext nb_black

<IPython.core.display.Javascript object>

In [18]:
# Functions
def query_data():
    """Query Database and return Data Frame"""
    query = """
    select
	    strdrink,
	    stringredient1,
	    stringredient2,
	    stringredient3,
	    stringredient4,
	    stringredient5,
	    stringredient6,
	    stringredient7,
	    stringredient8,
	    stringredient9,
	    stringredient10,
	    stringredient11,
	    stringredient12,
	    strmeasure1_clean,
	    strmeasure2_clean,
	    strmeasure3_clean,
	    strmeasure4_clean,
	    strmeasure5_clean,
	    strmeasure6_clean,
	    strmeasure7_clean,
	    strmeasure8_clean,
	    strmeasure9_clean,
	    strmeasure10_clean,
	    strmeasure11_clean,
        strmeasure12_clean
	    
	from 
	    all_cocktails
	;
        """

    return pd.read_sql_query(query, engine)


def cols_to_lower(df, columns):
    """Convert batch of columns to lowercase"""
    return df[columns].apply(lambda col: col.str.lower())


def get_cols_list(df, starts_with_criteria):
    """Get a list of columns based on criteria, returns list"""
    return [col for col in df.columns if col.startswith(starts_with_criteria)]


def shape_data_long(df, list_of_cols, col_start_string, col_end_string, new_name):
    """Reshapes ingredients or measurements in long format"""
    return (
        df[["strdrink"] + list_of_cols]
        .melt(id_vars="strdrink", value_vars=list_of_cols)
        .assign(
            ingred_num=lambda df_: df_["variable"]
            .str.replace(col_start_string, "")
            .str.replace(col_end_string, "")
            .astype("int")
        )
        .drop("variable", axis=1)
        .rename({"value": new_name}, axis=1)
    )


def merge_long(table_1, table_2):
    """Merges ingredients and measurements table, converts to wide and returns Data Frame"""
    return (
        table_1.merge(table_2, on=["strdrink", "ingred_num"])
        .sort_values(["strdrink", "ingred_num"])
        .drop("ingred_num", axis=1)
        .dropna()
        .reset_index(drop=True)
    )


def pivot_wide(df):
    return (
        df.pivot_table(
            index="strdrink", columns="ingredient", values="amount", aggfunc=np.sum
        )
        .fillna(0)
        .reset_index()
    )


def recode_ingredients(df, dictionary):
    return df.assign(ingredient=lambda df_: df_["ingredient"].replace(dictionary))


def calculate_row_sum(df):
    """Function takes in a dataframe, calculates the row sum of numeric columns, and returns a dataframe"""
    return df.assign(
        row_sum=lambda df_: df_.select_dtypes(include=np.number).sum(axis=1)
    )


def get_prop_cols(df):
    """Function takes in dataframe, gets columns for calculating proportion"""
    return [col for col in df.columns if (col != "strdrink") and (col != "row_sum")]


def calculate_row_prop(df, prop_cols):
    """Function takes in dataframe, calculates proportions for columns, returns dataframe"""

    return df[prop_cols].apply(lambda col: col / df["row_sum"])


def query_and_preprocess_data():

    df = query_data()

    ingred_cols = get_cols_list(df, "stringredient")
    measure_cols = get_cols_list(df, "strmeasure")

    df[ingred_cols] = cols_to_lower(df, ingred_cols)

    ingredient_long = shape_data_long(
        df, ingred_cols, "stringredient", "", "ingredient"
    )
    measure_long = shape_data_long(df, measure_cols, "strmeasure", "_clean", "amount")

    combined_long = merge_long(ingredient_long, measure_long)
    # Added recoding here
    ingredient_dict = create_ingredient_map()
    recoded_long = recode_ingredients(combined_long, ingredient_dict)

    combined_wide = pivot_wide(recoded_long)

    w_rowsum = calculate_row_sum(combined_wide)

    prop_cols = get_prop_cols(w_rowsum)
    w_rowsum[prop_cols] = calculate_row_prop(w_rowsum, prop_cols)

    return w_rowsum.drop("row_sum", axis=1)

<IPython.core.display.Javascript object>

In [19]:
def create_ingredient_map():
    return {
        "151 proof rum": "rum",
        "7-up": "soda",
        # "absinthe",
        "absolut citron": "flavored vodka",
        "absolut kurant": "flavored vodka",
        "absolut peppar": "flavored vodka",
        "absolut vodka": "vodka",
        # "advocaat",
        "agave syrup": "sugar",
        "ale": "beer",
        "allspice": "spice",
        "almond": "nut",
        "almond flavoring": "nut",
        "amaretto": "liqueur",
        "amaro montenegro": "liqueur",
        "angelica root": "herb",
        "angostura bitters": "bitters",
        "anis": "spice",
        "anise": "spice",
        "anisette": "liqueur",
        "aperol": "aperitif",
        "apfelkorn": "liqueur",
        "apple": "fruit",
        "apple brandy": "brandy",
        "apple cider": "fruit juice",
        "apple juice": "fruit juice",
        "apple schnapps": "schnapps",
        "applejack": "brandy",
        "apricot": "fruit",
        "apricot brandy": "brandy",
        "apricot nectar": "fruit juice",
        # "aquavit" ,
        "asafoetida": "spice",
        "añejo rum": "rum",
        "bacardi limon": "flavored rum",
        "baileys irish cream": "baileys",
        "banana": "fruit",
        "banana liqueur": "liqueur",
        "basil": "herb",
        # "beer",
        "benedictine": "liqueur",
        "berries": "fruit",
        "bitter lemon": "fruit",
        "bitters": "bitters",
        "black pepper": "spice",
        "black sambuca": "liqueur",
        "blackberries": "fruit",
        "blackberry brandy": "brandy",
        "blackcurrant cordial": "liqueur",
        "blackcurrant squash": "liqueur",
        "blackstrap rum": "rum",
        "blended scotch": "scotch",
        "blended whiskey": "whiskey",
        "blood orange": "fruit",
        "blue curacao": "liqueur",
        "blueberries": "fruit",
        "bourbon": "bourbon",
        "brandy": "brandy",
        "brown sugar": "sugar",
        # "butter",
        "butterscotch schnapps": "schnapps",
        # "cachaca",
        "campari": "aperitif",
        "candy": "sugar",
        "cantaloupe": "fruit",
        "caramel coloring": "sugar",
        "caramel sauce": "sugar",
        "carbonated soft drink": "soda",
        # "carbonated water",
        "cardamom": "spice",
        "carrot": "fruit",
        "cayenne pepper": "spice",
        "celery salt": "spice",
        "chambord raspberry liqueur": "liqueur",
        # "champagne",
        "cherries": "fruit",
        "cherry": "fruit",
        "cherry brandy": "brandy",
        "cherry grenadine": "sugar",
        # "cherry heering",
        "cherry juice": "fruit juice",
        "cherry liqueur": "liqueur",
        "chocolate": "sugar",
        "chocolate ice-cream": "ice cream",
        "chocolate liqueur": "liqueur",
        "chocolate milk": "milk",
        "chocolate sauce": "sugar",
        "chocolate syrup": "sugar",
        "cider": "fruit juice",
        "cinnamon": "spice",
        "cloves": "spice",
        # "club soda",
        "coca-cola": "soda",
        "cocoa powder": "spice",
        "coconut liqueur": "liqueur",
        "coconut milk": "milk",
        "coconut rum": "rum",
        "coconut syrup": "sugar",
        # "coffee",
        "coffee brandy": "brandy",
        "coffee liqueur": "liqueur",
        # "cognac",
        "cointreau": "liqueur",
        "condensed milk": "milk",
        "coriander": "spice",
        "corn syrup": "sugar",
        # "cornstarch",
        "corona": "beer",
        "cranberries": "fruit",
        "cranberry juice": "fruit juice",
        "cranberry vodka": "flavored vodka",
        "cream": "milk",
        "cream of coconut": "milk",
        "creme de banane": "liqueur",
        "creme de cacao": "liqueur",
        "creme de cassis": "liqueur",
        "creme de mure": "liqueur",
        "crown royal": "whiskey",
        "cucumber": "fruit",
        "cumin seed": "spice",
        "curacao": "liqueur",
        "daiquiri mix": "mix",
        "dark creme de cacao": "liqueur",
        "dark rum": "rum",
        "demerara sugar": "sugar",
        "dr. pepper": "soda",
        # "drambuie",
        # "dry vermouth",
        # "dubonnet rouge",
        # "egg",
        "egg white": "egg",
        "egg yolk": "egg",
        "elderflower cordial": "liqueur",
        # "erin cream",
        "espresso": "coffee",
        # "everclear",
        "falernum": "liqueur",
        "fennel seeds": "spice",
        "figs": "fruit",
        "firewater": "whiskey",
        # "food coloring" ,
        # "frangelico",
        "fresca": "soda",
        "fresh lemon juice": "fruit juice",
        "fresh lime juice": "fruit juice",
        # "fruit",
        # "fruit juice",
        "fruit punch": "fruit juice",
        # "galliano",
        # "gin",
        "ginger": "spice",
        "ginger ale": "soda",
        "ginger beer": "soda",
        "ginger syrup": "sugar",
        # "glycerine",
        "godiva liqueur": "liqueur",
        "gold rum": "rum",
        "gold tequila": "tequila",
        "goldschlager": "schnapps",
        # "grain alcohol",
        "grand marnier": "liqueur",
        "grape juice": "fruit juice",
        "grape soda": "soda",
        "grapefruit juice": "fruit juice",
        "grapes": "fruit",
        "green chartreuse": "liqueur",
        "green creme de menthe": "liqueur",
        "grenadine": "sugar",
        "guava juice": "fruit juice",
        "guinness stout": "beer",
        "half-and-half": "milk",
        "heavy cream": "milk",
        "honey": "sugar",
        "honey syrup": "sugar",
        "hot chocolate": "milk",
        "hot damn": "schnapps",
        # "hot sauce",
        "hpnotiq": "fruit juice",
        # "ice",
        "iced tea": "fruit juice",
        "irish cream": "milk",
        "irish whiskey": "whiskey",
        "islay single malt scotch": "scotch",
        "jack daniels": "whiskey",
        # "jagermeister",
        "jello": "sugar",
        "jim beam": "bourbon",
        "johnnie walker": "scotch",
        "jägermeister": "jagermeister",
        "kahlua": "liqueur",
        "kirschwasser": "brandy",
        "kiwi": "fruit",
        "kiwi liqueur": "liqueur",
        "kool-aid": "fruit juice",
        "kummel": "liqueur",
        "lager": "beer",
        "lavender": "spice",
        "lemon": "fruit",
        "lemon juice": "fruit juice",
        "lemon peel": "fruit",
        "lemon vodka": "flavored vodka",
        "lemon-lime soda": "soda",
        "lemonade": "soda",
        "licorice root": "spice",
        "light cream": "milk",
        "light rum": "rum",
        "lillet": "aperitif",
        "lillet blanc": "aperitif",
        "lime": "fruit",
        "lime juice": "fruit juice",
        "lime juice cordial": "liqueur",
        "lime peel": "fruit",
        "lime vodka": "flavored vodka",
        "limeade": "fruit juice",
        "malibu rum": "rum",
        "mango": "fruit",
        "maple syrup": "sugar",
        "maraschino cherry": "fruit",
        "maraschino liqueur": "liqueur",
        "marjoram leaves": "spice",
        "marshmallows": "sugar",
        "maui": "schnapps",
        "melon liqueur": "liqueur",
        "mezcal": "tequila",
        "midori melon liqueur": "liqueur",
        # "milk",
        "mini-snickers bars": "sugar",
        "mint": "spice",
        "mint syrup": "sugar",
        "mountain dew": "soda",
        "nutmeg": "spice",
        "olive": "fruit",
        # "olive brine",
        "orange": "fruit",
        "orange bitters": "bitters",
        "orange curacao": "liqueur",
        "orange juice": "fruit juice",
        "orange peel": "fruit",
        "orange spiral": "fruit",
        "oreo cookie": "sugar",
        "orgeat syrup": "sugard",
        "ouzo": "aperitif",
        "papaya": "fruit",
        "passion fruit juice": "fruit juice",
        "passion fruit syrup": "sugar",
        "passoa": "liqueur",
        "peach bitters": "bitters",
        "peach brandy": "brandy",
        "peach nectar": "fruit juice",
        "peach schnapps": "schnapps",
        "peach vodka": "flavored vodka",
        "peachtree schnapps": "schnapps",
        "pepper": "spice",
        "peppermint extract": "spice",
        "pepsi cola": "soda",
        # "pernod",
        "peychaud bitters": "bitters",
        "pina colada mix": "mix",
        "pineapple": "fruit",
        "pineapple juice": "fruit juice",
        "pineapple syrup": "sugar",
        "pink lemonade": "soda",
        # "pisang ambon",
        # "pisco",
        "pomegranate juice": "fruit juice",
        # "port",
        "powdered sugar": "sugar",
        # "prosecco",
        "raisins": "fruit",
        "raspberry liqueur": "liqueur",
        "raspberry syrup": "sugar",
        "raspberry vodka": "flavored vodka",
        "red chili flakes": "spice",
        # "red wine",
        # "ricard",
        "root beer": "soda",
        "rose": "spice",
        "rosemary": "spice",
        "rosemary syrup": "sugar",
        "roses sweetened lime juice": "fruit juice",
        "rosso vermouth": "vermouth",
        "ruby port": "port",
        # "rum",
        "rumple minze": "schnapps",
        "rye whiskey": "whiskey",
        "salt": "spice",
        "salted chocolate": "sugar",
        # "sambuca",
        # "sarsaparilla",
        "schweppes russchian": "soda",
        # "scotch",
        "sherbet": "sugar",
        # "sherry",
        "sirup of roses": "sugar",
        "sloe gin": "gin",
        "soda water": "carbonated water",
        "sour mix": "mix",
        "southern comfort": "whiskey",
        # "soy sauce",
        "spiced rum": "rum",
        "sprite": "soda",
        "st. germain": "liqueur",
        "strawberries": "fruit",
        "strawberry liqueur": "liqueur",
        "strawberry schnapps": "schnapps",
        # "sugar",
        "sugar syrup": "sugar",
        "surge": "soda",
        # "sweet and sour",
        "sweet vermouth": "vermouth",
        "tabasco sauce": "spice",
        "tea": "fruit juice",
        "tennessee whiskey": "whiskey",
        # "tequila",
        "thyme": "spice",
        "tia maria": "liqueur",
        "tomato juice": "fruit juice",
        # "tonic water",
        "triple sec": "liqueur",
        "tropicana": "fruit juice",
        "vanilla": "spice",
        "vanilla extract": "spice",
        "vanilla ice-cream": "milk",
        "vanilla syrup": "sugar",
        "vanilla vodka": "flavored vodka",
        # "vermouth",
        # "vodka",
        # "water",
        "watermelon": "fruit",
        "whipped cream": "milk",
        "whipping cream": "milk",
        # "whiskey",
        "whisky": "whiskey",
        "white creme de menthe": "liqueur",
        "white rum": "rum",
        "white wine": "wine",
        "wild turkey": "bourbon",
        # "wine",
        "worcestershire sauce": "spice",
        "wormwood": "spice",
        "yellow chartreuse": "liqueur",
        "yoghurt": "milk",
        "yukon jack": "whiskey",
        # "zima",
    }

<IPython.core.display.Javascript object>

In [21]:
df = query_and_preprocess_data()

<IPython.core.display.Javascript object>