In [1]:
import pandas as pd
import numpy as np
from database import engine

%load_ext nb_black

<IPython.core.display.Javascript object>

In [47]:
# Functions
def query_data():
    """Query Database and return Data Frame"""
    query = """
    select
	    strdrink,
	    stringredient1,
	    stringredient2,
	    stringredient3,
	    stringredient4,
	    stringredient5,
	    stringredient6,
	    stringredient7,
	    stringredient8,
	    stringredient9,
	    stringredient10,
	    stringredient11,
	    stringredient12,
	    strmeasure1_clean,
	    strmeasure2_clean,
	    strmeasure3_clean,
	    strmeasure4_clean,
	    strmeasure5_clean,
	    strmeasure6_clean,
	    strmeasure7_clean,
	    strmeasure8_clean,
	    strmeasure9_clean,
	    strmeasure10_clean,
	    strmeasure11_clean,
        strmeasure12_clean
	    
	from 
	    all_cocktails
	;
        """

    return pd.read_sql_query(query, engine)


def cols_to_lower(df, columns):
    """Convert batch of columns to lowercase"""
    return df[columns].apply(lambda col: col.str.lower())


def get_cols_list(df, starts_with_criteria):
    """Get a list of columns based on criteria, returns list"""
    return [col for col in df.columns if col.startswith(starts_with_criteria)]


def shape_data_long(df, list_of_cols, col_start_string, col_end_string, new_name):
    """Reshapes ingredients or measurements in long format"""
    return (
        df[["strdrink"] + list_of_cols]
        .melt(id_vars="strdrink", value_vars=list_of_cols)
        .assign(
            ingred_num=lambda df_: df_["variable"]
            .str.replace(col_start_string, "")
            .str.replace(col_end_string, "")
            .astype("int")
        )
        .drop("variable", axis=1)
        .rename({"value": new_name}, axis=1)
    )


def merge_long(table_1, table_2):
    """Merges ingredients and measurements table, converts to wide and returns Data Frame"""
    return (
        table_1.merge(table_2, on=["strdrink", "ingred_num"])
        .sort_values(["strdrink", "ingred_num"])
        .drop("ingred_num", axis=1)
        .dropna()
        .reset_index(drop=True)
    )


def pivot_wide(df):
    return (
        df.pivot_table(
            index="strdrink", columns="ingredient", values="amount", aggfunc=np.sum
        )
        .fillna(0)
        .reset_index()
    )


def calculate_row_sum(df):
    """Function takes in a dataframe, calculates the row sum of numeric columns, and returns a dataframe"""
    return df.assign(
        row_sum=lambda df_: df_.select_dtypes(include=np.number).sum(axis=1)
    )


def get_prop_cols(df):
    """Function takes in dataframe, gets columns for calculating proportion"""
    return [col for col in df.columns if (col != "strdrink") and (col != "row_sum")]


def calculate_row_prop(df, prop_cols):
    """Function takes in dataframe, calculates proportions for columns, returns dataframe"""

    return df[prop_cols].apply(lambda col: col / df["row_sum"])


def query_and_preprocess_data():

    df = query_data()

    ingred_cols = get_cols_list(df, "stringredient")
    measure_cols = get_cols_list(df, "strmeasure")

    df[ingred_cols] = cols_to_lower(df, ingred_cols)

    ingredient_long = shape_data_long(
        df, ingred_cols, "stringredient", "", "ingredient"
    )
    measure_long = shape_data_long(df, measure_cols, "strmeasure", "_clean", "amount")

    combined_long = merge_long(ingredient_long, measure_long)
    # Added recoding here
    recoded_long = recode_ingredients(combined_long, ingredient_dict)
    
    combined_wide = pivot_wide(recoded_long)

    w_rowsum = calculate_row_sum(combined_wide)

    prop_cols = get_prop_cols(w_rowsum)
    w_rowsum[prop_cols] = calculate_row_prop(w_rowsum, prop_cols)

    return w_rowsum.drop("row_sum", axis=1)

<IPython.core.display.Javascript object>

In [44]:
ingredient_dict = {
    "151 proof rum": "rum",
    "7-up": "soda",
    # "absinthe",
    "absolut citron": "flavored vodka",
    "absolut kurant": "flavored vodka",
    "absolut peppar": "flavored vodka",
    "absolut vodka": "vodka",
    # "advocaat",
    "agave syrup": "sugar",
    "ale": "beer",
    "allspice": "spice",
    "almond": "nut",
    "almond flavoring": "nut",
    "amaretto": "liqueur",
    "amaro montenegro": "liqueur",
    "angelica root": "herb",
    "angostura bitters": "bitters",
    "anis": "spice",
    "anise": "spice",
    "anisette": "liqueur",
    # "aperol",
    "apfelkorn": "liqueur",
    "apple": "fruit",
    "apple brandy": "brandy",
    "apple cider": "fruit juice",
    "apple juice": "fruit juice",
    "apple schnapps": "schnapps",
    "applejack": "brandy",
    "apricot": "fruit",
    "apricot brandy": "brandy",
    "apricot nectar": "fruit juice",
    # "aquavit" ,
    "asafoetida": "spice",
    "añejo rum": "rum",
    "bacardi limon": "flavored rum",
    "baileys irish cream": "baileys",
    "banana": "fruit",
    "banana liqueur": "liqueur",
    "basil": "herb",
    # "beer",
    "benedictine": "liqueur",
    "berries": "fruit",
    "bitter lemon": "fruit",
    "bitters": "bitters",
    "black pepper": "spice",
    "black sambuca": "liqueur",
    "blackberries": "fruit",
    "blackberry brandy": "brandy",
    "blackcurrant cordial": "liqueur",
    "blackcurrant squash": "liqueur",
    "blackstrap rum": "rum",
    "blended scotch": "scotch",
    "blended whiskey": "whiskey",
    "blood orange": "fruit",
    "blue curacao": "liqueur",
    "blueberries": "fruit",
    "bourbon": "bourbon",
    "brandy": "brandy",
    "brown sugar": "sugar",
    # "butter",
    "butterscotch schnapps": "schnapps",
    # "cachaca",
    # "campari",
    "candy": "sugar",
    "cantaloupe": "fruit",
    "caramel coloring": "sugar",
    "caramel sauce": "sugar",
    "carbonated soft drink": "soda",
    # "carbonated water",
    "cardamom": "spice",
    "carrot": "fruit",
    "cayenne pepper": "spice",
    "celery salt": "spice",
    "chambord raspberry liqueur": "liqueur",
    # "champagne",
    "cherries": "fruit",
    "cherry": "fruit",
    "cherry brandy": "brandy",
    "cherry grenadine": "sugar",
    # "cherry heering",
    "cherry juice": "fruit juice",
    "cherry liqueur": "liqueur",
    "chocolate": "sugar",
    "chocolate ice-cream": "ice cream",
    "chocolate liqueur": "liqueur",
    "chocolate milk": "milk",
    "chocolate sauce": "sugar",
    "chocolate syrup": "sugar",
    "cider": "fruit juice",
    "cinnamon": "spice",
    "cloves": "spice",
    # "club soda",
    "coca-cola": "soda",
    "cocoa powder": "spice",
    "coconut liqueur": "liqueur",
    "coconut milk": "milk",
    "coconut rum": "rum",
    "coconut syrup": "sugar",
    # "coffee",
    "coffee brandy": "brandy",
    "coffee liqueur": "liqueur",
    # "cognac",
    "cointreau": "liqueur",
    "condensed milk": "milk",
    "coriander": "spice",
    "corn syrup": "sugar",
    # "cornstarch",
    "corona": "beer",
    "cranberries": "fruit",
    "cranberry juice": "fruit juice",
    "cranberry vodka": "flavored vodka",
    "cream": "milk",
    "cream of coconut": "milk",
    "creme de banane": "liqueur",
    "creme de cacao": "liqueur",
    "creme de cassis": "liqueur",
    "creme de mure": "liqueur",
    "crown royal": "whiskey",
    "cucumber": "fruit",
    "cumin seed": "spice",
    "curacao": "liqueur",
    "daiquiri mix": "mix",
    "dark creme de cacao": "liqueur",
    "dark rum": "rum",
    "demerara sugar": "sugar",
    "dr. pepper": "soda",
    # "drambuie",
    # "dry vermouth",
    # "dubonnet rouge",
    # "egg",
    "egg white": "egg",
    "egg yolk": "egg",
    "elderflower cordial": "liqueur",
    # "erin cream",
    "espresso": "coffee",
    # "everclear",
    "falernum": "liqueur",
    "fennel seeds": "spice",
    "figs": "fruit",
    #     "firewater",
    #     "food coloring",
    #     "frangelico",
    #     "fresca",
    #     "fresh lemon juice",
    #     "fresh lime juice",
    #     "fruit",
    #     "fruit juice",
    #     "fruit punch",
    #     "galliano",
    #     "gin",
    #     "ginger",
    #     "ginger ale",
    #     "ginger beer",
    #     "ginger syrup",
    #     "glycerine",
    #     "godiva liqueur",
    #     "gold rum",
    #     "gold tequila",
    #     "goldschlager",
    #     "grain alcohol",
    #     "grand marnier",
    #     "grape juice",
    #     "grape soda",
    #     "grapefruit juice",
    #     "grapes",
    #     "green chartreuse",
    #     "green creme de menthe",
    #     "grenadine",
    #     "guava juice",
    #     "guinness stout",
    #     "half-and-half",
    #     "heavy cream",
    #     "honey",
    #     "honey syrup",
    #     "hot chocolate",
    #     "hot damn",
    #     "hot sauce",
    #     "hpnotiq",
    #     "ice",
    #     "iced tea",
    #     "irish cream",
    #     "irish whiskey",
    #     "islay single malt scotch",
    #     "jack daniels",
    #     "jagermeister",
    #     "jello",
    #     "jim beam",
    #     "johnnie walker",
    #     "jägermeister",
    #     "kahlua",
    #     "kirschwasser",
    #     "kiwi",
    #     "kiwi liqueur",
    #     "kool-aid",
    #     "kummel",
    #     "lager",
    #     "lavender",
    #     "lemon",
    #     "lemon juice",
    #     "lemon peel",
    #     "lemon vodka",
    #     "lemon-lime soda",
    #     "lemonade",
    #     "licorice root",
    #     "light cream",
    #     "light rum",
    #     "lillet",
    #     "lillet blanc",
    #     "lime",
    #     "lime juice",
    #     "lime juice cordial",
    #     "lime peel",
    #     "lime vodka",
    #     "limeade",
    #     "malibu rum",
    #     "mango",
    #     "maple syrup",
    #     "maraschino cherry",
    #     "maraschino liqueur",
    #     "marjoram leaves",
    #     "marshmallows",
    #     "maui",
    #     "melon liqueur",
    #     "mezcal",
    #     "midori melon liqueur",
    #     "milk",
    #     "mini-snickers bars",
    #     "mint",
    #     "mint syrup",
    #     "mountain dew",
    #     "nutmeg",
    #     "olive",
    #     "olive brine",
    #     "orange",
    #     "orange bitters",
    #     "orange curacao",
    #     "orange juice",
    #     "orange peel",
    #     "orange spiral",
    #     "oreo cookie",
    #     "orgeat syrup",
    #     "ouzo",
    #     "papaya",
    #     "passion fruit juice",
    #     "passion fruit syrup",
    #     "passoa",
    #     "peach bitters",
    #     "peach brandy",
    #     "peach nectar",
    #     "peach schnapps",
    #     "peach vodka",
    #     "peachtree schnapps",
    #     "pepper",
    #     "peppermint extract",
    #     "pepsi cola",
    #     "pernod",
    #     "peychaud bitters",
    #     "pina colada mix",
    #     "pineapple",
    #     "pineapple juice",
    #     "pineapple syrup",
    #     "pink lemonade",
    #     "pisang ambon",
    #     "pisco",
    #     "pomegranate juice",
    #     "port",
    #     "powdered sugar",
    #     "prosecco",
    #     "raisins",
    #     "raspberry liqueur",
    #     "raspberry syrup",
    #     "raspberry vodka",
    #     "red chili flakes",
    #     "red wine",
    #     "ricard",
    #     "root beer",
    #     "rose",
    #     "rosemary",
    #     "rosemary syrup",
    #     "roses sweetened lime juice",
    #     "rosso vermouth",
    #     "ruby port",
    #     "rum",
    #     "rumple minze",
    #     "rye whiskey",
    #     "salt",
    #     "salted chocolate",
    #     "sambuca",
    #     "sarsaparilla",
    #     "schweppes russchian",
    #     "scotch",
    #     "sherbet",
    #     "sherry",
    #     "sirup of roses",
    #     "sloe gin",
    #     "soda water",
    #     "sour mix",
    #     "southern comfort",
    #     "soy sauce",
    #     "spiced rum",
    #     "sprite",
    #     "st. germain",
    #     "strawberries",
    #     "strawberry liqueur",
    #     "strawberry schnapps",
    #     "sugar",
    #     "sugar syrup",
    #     "surge",
    #     "sweet and sour",
    #     "sweet vermouth",
    #     "tabasco sauce",
    #     "tea",
    #     "tennessee whiskey",
    #     "tequila",
    #     "thyme",
    #     "tia maria",
    #     "tomato juice",
    #     "tonic water",
    #     "triple sec",
    #     "tropicana",
    #     "vanilla",
    #     "vanilla extract",
    #     "vanilla ice-cream",
    #     "vanilla syrup",
    #     "vanilla vodka",
    #     "vermouth",
    #     "vodka",
    #     "water",
    #     "watermelon",
    #     "whipped cream",
    #     "whipping cream",
    #     "whiskey",
    #     "whisky",
    #     "white creme de menthe",
    #     "white rum",
    #     "white wine",
    #     "wild turkey",
    #     "wine",
    #     "worcestershire sauce",
    #     "wormwood",
    #     "yellow chartreuse",
    #     "yoghurt",
    #     "yukon jack",
    #     "zima",
}

<IPython.core.display.Javascript object>