In [18]:
import requests
import json
import pandas as pd
import numpy as np
import requests
import plotly.graph_objects as go
from datetime import datetime as dt
from bs4 import BeautifulSoup
from math import log, log2, ceil
from re import search, sub, IGNORECASE
from os import environ
import sys
import pickle
from sqlalchemy import Table, Column, Integer, String, MetaData, create_engine
from psycopg2 import connect
from unidecode import unidecode

In [292]:
def get_env_creds():
    return dict(
        DB_HOST=environ["COCKTAILS_HOST"],
        DB_PW=environ["COCKTAILS_PWD"],
        DB_PORT=environ["COCKTAILS_PORT"],
        DB_USER=environ["COCKTAILS_USER"],
        DB_NAME=environ["COCKTAILS_DB"],
    )


def get_creds():
    creds = get_env_creds()
    return (
        creds.get("DB_USER"),
        creds.get("DB_PW"),
        creds.get("DB_HOST"),
        creds.get("DB_NAME"),
        creds.get("DB_PORT"),
    )


def create_conn_string(library="psycopg2"):
    DB_USER, DB_PW, DB_HOST, DB_NAME, DB_PORT = get_creds()
    conn_string = (
        f"postgresql+{library}://{DB_USER}:{DB_PW}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
    )
    return conn_string


def save_object(obj, filename):
    with open(filename, "wb") as outp:
        dump(obj, outp, HIGHEST_PROTOCOL)


def load_object(filename):
    with open(filename, "rb") as file:
        return load(file)


def reconcile_dropdown_filter(in_filter):
    return "" if in_filter is None else in_filter


def create_OR_filter_string(filters):
    filters = [
        "|".join(reconcile_dropdown_filter(filter))
        if isinstance(filter, list)
        else reconcile_dropdown_filter(filter)
        for filter in filters
    ]
    filters = [i for i in filters if i]

    ret_filters = "|".join(filters)
    ret_filters = sub("\|{2,}", "|", ret_filters)
    ret_filters = sub("\|$|^\|", "", ret_filters)

    return ret_filters


def apply_AND_filters(filters, df):
    filters = create_OR_filter_string(filters)
    filter_list = filters.split("|")
    if "" in filter_list:
        filter_list.remove("")

    if len(filter_list) > 0:
        filtered_df = df.copy()
        for f in filter_list:
            cocktail_ids = filtered_df.loc[
                (
                    filtered_df["mapped_ingredient"].str.contains(
                        f, regex=True, flags=IGNORECASE
                    )
                )
                | (
                    filtered_df["recipe_name"].str.contains(
                        f, regex=True, flags=IGNORECASE
                    )
                ),
                "cocktail_id",
            ].values.tolist()
            filtered_df = filtered_df.loc[
                filtered_df["cocktail_id"].isin(cocktail_ids), :
            ]

        cocktail_ids = filtered_df["cocktail_id"].unique().tolist()
    else:
        cocktail_ids = df["cocktail_id"].unique().tolist()

    return df.loc[df["cocktail_id"].isin(cocktail_ids), :]


def create_set_from_series(ser):
    return set(ser.unique())


def convert_set_to_sorted_list(s):
    ret = [i for i in s if i]
    ret.sort()
    return ret


def create_filter_lists(df):
    df_non_null = df.loc[(isnull(df["alcohol_type"])) & (~isnull(df["ingredient"])), :]
    ingredient_set = create_set_from_series(df_non_null["mapped_ingredient"])
    garnish_set = create_set_from_series(
        df_non_null.loc[
            (
                df_non_null["ingredient"].str.contains(
                    "^Garnish: ", regex=True, flags=IGNORECASE
                )
            )
            | (df_non_null["unit"] == "garnish"),
            "mapped_ingredient",
        ]
    )

    bitter_set = create_set_from_series(
        df_non_null.loc[
            (
                df_non_null["mapped_ingredient"].str.contains(
                    "bitter", regex=True, flags=IGNORECASE
                )
            )
            & (isnull(df_non_null["alcohol_type"])),
            "mapped_ingredient",
        ]
    )

    syrup_set = create_set_from_series(
        df_non_null.loc[
            (
                df_non_null["mapped_ingredient"].str.contains(
                    "syrup", regex=True, flags=IGNORECASE
                )
            )
            & (isnull(df_non_null["alcohol_type"])),
            "mapped_ingredient",
        ]
    )

    other_ingredients = ingredient_set - garnish_set - bitter_set - syrup_set

    other_list = convert_set_to_sorted_list(other_ingredients)
    garnish_list = convert_set_to_sorted_list(garnish_set)
    bitter_list = convert_set_to_sorted_list(bitter_set)
    syrup_list = convert_set_to_sorted_list(syrup_set)

    return {
        "other": other_list,
        "garnish": garnish_list,
        "bitter": bitter_list,
        "syrup": syrup_list,
    }


def get_favorite(user_id, cocktail_id):
    sql = f"""
    SELECT favorite 
    FROM user_favorites 
    WHERE user_id={user_id} AND cocktail_id={cocktail_id}
    """

    return run_query(sql)


def get_bookmark(user_id, cocktail_id):
    sql = f"""
    SELECT bookmark 
    FROM user_bookmarks 
    WHERE user_id={user_id} AND cocktail_id={cocktail_id}
    """

    return run_query(sql)


def get_cocktail_nps(cocktail_id):
    sql = (
        f"select cocktail_nps from vw_cocktail_ratings where cocktail_id={cocktail_id}"
    )

    return run_query(sql)


def update_bookmark(user_id, cocktail_id, bool_val, sql_only=False, sqls_to_run=None):
    sql = f"""
            insert into user_bookmarks(user_id,cocktail_id,bookmark,last_updated_ts)
            values({user_id}, {cocktail_id}, {bool_val}, now())
            on conflict(user_id, cocktail_id)
            do
            update set bookmark=EXCLUDED.bookmark;
            """
    if sqls_to_run is not None:
        run_query(sqls_to_run)
    else:
        if sql_only:
            return sql
        else:
            run_query(sql)

    return None


def update_favorite(user_id, cocktail_id, bool_val, sql_only=False, sqls_to_run=None):
    sql = f"""
            insert into user_favorites(user_id,cocktail_id,favorite,last_updated_ts)
            values({user_id}, {cocktail_id}, {bool_val}, now())
            on conflict(user_id, cocktail_id)
            do
            update set favorite=EXCLUDED.favorite;
            """
    if sqls_to_run is not None:
        run_query(sqls_to_run)
    else:
        if sql_only:
            return sql
        else:
            run_query(sql)

    return None


def update_rating(user_id, cocktail_id, rating, sql_only=False, sqls_to_run=None):
    sql = f"""
            insert into user_ratings(user_id,cocktail_id,rating,last_updated_ts)
            values({user_id}, {cocktail_id}, {rating}, now())
            on conflict(user_id, cocktail_id)
            do
            update set rating=EXCLUDED.rating;
            """
    if sqls_to_run is not None:
        run_query(sqls_to_run)
    else:
        if sql_only:
            return sql
        else:
            run_query(sql)

    return None


def get_my_bar(user_id, return_df=False):
    my_bar, columns = run_query(
        f"""
            with user_bar_ids as (
                select unnest(ingredient_list) as ingredient_id
                from user_bar
                where user_id = {user_id}
            )
            select i.*
            from ingredients i
            join user_bar_ids ubi
            ON i.ingredient_id = ubi.ingredient_id
        """,
        True,
    )

    if return_df:
        return DataFrame(my_bar, columns=columns)

    return my_bar, columns


def get_available_cocktails(user_id, include_garnish=True, return_df=True):
    my_bar = get_my_bar(user_id, True)
    my_ingredients = my_bar["ingredient_id"].to_list()
    my_ingredients_str = ",".join(map(str, my_ingredients))
    if len(my_ingredients) == 0:
        ingredient_where = "where ingredient_id < 0"
    else:
        ingredient_where = f"where ingredient_id IN ({my_ingredients_str})"

    if include_garnish:
        sql_str = ""
    else:
        sql_str = "and lower(i.ingredient) NOT LIKE 'garnish%'"

    available_cocktails, columns = run_query(
        f"""
            with my_bar as (
                select distinct ingredient_id, ingredient, mapped_ingredient
                from ingredients
                {ingredient_where}
            ), my_cocktails as (
            select c.*,
                   i.ingredient, 
                   i.mapped_ingredient, 
                   my_bar.ingredient_id IS NOT NULL as have_ingredient
            from cocktails c
            join cocktails_ingredients ci
                on c.cocktail_id = ci.cocktail_id
            join ingredients i
                on ci.ingredient_id = i.ingredient_id
                {sql_str}
            left join my_bar
                on i.ingredient_id = my_bar.ingredient_id
            )
            select cocktail_id,
                   recipe_name,
                   link,
                   have_ingredient,
                   COUNT(*) as num_ingredients,
                   ARRAY_AGG(ingredient) as ingredients,
                   ARRAY_AGG(mapped_ingredient) as mapped_ingredients
            from my_cocktails
            group by 1,2,3,4
        """,
        return_df,
    )

    available_cocktails_df = DataFrame(available_cocktails, columns=columns)

    pivoted = available_cocktails_df.pivot_table(
        index=["cocktail_id", "recipe_name", "link"],
        columns="have_ingredient",
        values=["ingredients", "mapped_ingredients", "num_ingredients"],
        aggfunc="max",
    ).reset_index()

    empty_list = [[] for s in range(pivoted.shape[0])]
    if ("ingredients", True) not in pivoted.columns:
        pivoted[("ingredients", True)] = empty_list
        pivoted[("mapped_ingredients", True)] = empty_list
        pivoted[("num_ingredients", True)] = 0

    if ("ingredients", False) not in pivoted.columns:
        pivoted[("ingredients", False)] = empty_list
        pivoted[("mapped_ingredients", False)] = empty_list
        pivoted[("num_ingredients", False)] = 0

    pivoted.columns = [
        "_".join(map(str, col)) if col[1] != "" else col[0] for col in pivoted.columns
    ]

    pivoted["perc_ingredients_in_bar"] = where(
        isnull(pivoted["num_ingredients_True"]), 0, pivoted["num_ingredients_True"]
    ) / (
        where(
            isnull(pivoted["num_ingredients_False"]),
            0,
            pivoted["num_ingredients_False"],
        )
        + where(
            isnull(pivoted["num_ingredients_True"]),
            0,
            pivoted["num_ingredients_True"],
        )
    )
    pivoted["perc_ingredients_in_bar"] = where(
        isnull(pivoted["perc_ingredients_in_bar"]),
        0,
        pivoted["perc_ingredients_in_bar"],
    )

    return pivoted


def update_bar(user_id, ingredient_list, sql_only=False, sqls_to_run=None):
    ingredient_sql = "{" + ",".join(map(str, ingredient_list)) + "}"
    sql = f"""
            insert into user_bar(user_id,ingredient_list,last_updated_ts)
            values({user_id}, '{ingredient_sql}', now())
            on conflict(user_id)
            do
            update set ingredient_list=EXCLUDED.ingredient_list;
            """
    if sqls_to_run is not None:
        run_query(sqls_to_run)
    else:
        if sql_only:
            return sql
        else:
            run_query(sql)

    return None


def run_query(sql, ret_columns=False):
    DB_USER, DB_PW, DB_HOST, DB_NAME, DB_PORT = get_creds()
    with connect(
        database=DB_NAME,
        user=DB_USER,
        password=DB_PW,
        host=DB_HOST,
        port=DB_PORT,
        sslmode="require",
    ) as conn:
        with conn.cursor() as cursor:
            cursor.execute(sql)
            # columns = [desc[0] for desc in cursor.description]
            if search("insert|update|create|delete", sql, flags=IGNORECASE):
                results = None
                columns = None
            else:
                columns = [desc[0] for desc in cursor.description]
                results = cursor.fetchall()

    if ret_columns:
        return results, columns

    return results

In [58]:
liquor_links = {
    "bourbon": "https://www.liquor.com/bourbon-cocktails-4779435",
    "vodka": "https://www.liquor.com/vodka-cocktails-4779437",
    "rum": "https://www.liquor.com/rum-cocktails-4779434",
    "scotch": "https://www.liquor.com/scotch-cocktails-4779431",
    "rye_whiskey": "https://www.liquor.com/rye-whiskey-cocktails-4779433",
    "other_whiskey": "https://www.liquor.com/whiskey-cocktails-4779430",
    "tequila_mezcal": "https://www.liquor.com/tequila-and-mezcal-cocktails-4779429",
    "cognac_brandy": "https://www.liquor.com/brandy-cocktails-4779428",
    "other": "https://www.liquor.com/other-cocktails-4779427",
    "gins": "https://www.liquor.com/gin-cocktails-4779436"
}

In [30]:
get_recipe_links("bourbon")

In [59]:
master_list = list()
for liquor, liquor_link in liquor_links.items():
    print(liquor)
    master_list.append(get_recipe_links(liquor))

bourbon
vodka
rum
scotch
rye_whiskey
other_whiskey
tequila_mezcal
cognac_brandy
other
gins


In [61]:
len(master_list)

10

In [63]:
master_list_flat = list()
for liquor in master_list:
    for recipe in liquor.get("recipes"):
        if recipe not in master_list:
            master_list_flat.append(recipe)

In [64]:
len(master_list_flat)

941

In [66]:
current_list = pd.read_parquet("../data/cocktails.parquet")

In [67]:
current_list

Unnamed: 0,recipe_name,image,link,ingredients
0,Left Hand,https://www.liquor.com/thmb/AiDRNEsVOf0BcyZyZR...,https://www.liquor.com/left-hand-cocktail-reci...,"[{'name': 'bourbon', 'quantity': '1 1/2', 'uni..."
1,Kentucky Buck,https://www.liquor.com/thmb/eBDxCvX1UkYRrDBRWO...,https://www.liquor.com/kentucky-buck-cocktail-...,"[{'name': 'small strawberries, hulled', 'quant..."
2,Improved Whiskey,https://www.liquor.com/thmb/Sitwqz4ThZZcrRIOC0...,https://www.liquor.com/improved-whiskey-cockta...,"[{'name': 'bourbon or rye whiskey', 'quantity'..."
3,Mint Julep,https://www.liquor.com/thmb/Q4PaMi8kooZ_ZGpvro...,https://www.liquor.com/recipes/mint-julep/,"[{'name': 'mint leaves', 'quantity': '8', 'uni..."
4,Whiskey Smash,https://www.liquor.com/thmb/IjKQql6LcjTyCWuqbi...,https://www.liquor.com/recipes/whiskey-smash/,"[{'name': 'lemon wedges', 'quantity': '3', 'un..."
...,...,...,...,...
645,Bear With Me Honey,https://www.liquor.com/thmb/XuqefIn6mQtfumOium...,https://www.liquor.com/recipes/bear-with-me-ho...,"[{'name': 'Barr Hill Tom Cat gin', 'quantity':..."
646,Dark Side,https://www.liquor.com/thmb/pMRUE9SQYE9wgZRfxF...,https://www.liquor.com/recipes/darkside/,"[{'name': 'gin', 'quantity': '2 1/2', 'unit': ..."
647,CBD Gin & Tonic,https://www.liquor.com/thmb/7bSy9_lFVe_Z7UrfLq...,https://www.liquor.com/recipes/cbd-gin-and-tonic/,"[{'name': 'gin', 'quantity': '2', 'unit': 'oun..."
648,Crystal Ramos Gin Fizz,https://www.liquor.com/thmb/m9fHbIA9T73NCoVmNJ...,https://www.liquor.com/recipes/crystal-ramos-g...,"[{'name': 'Tanqueray gin', 'quantity': '1 1/4'..."


In [72]:
new_list = pd.DataFrame(master_list_flat).merge(
    current_list.assign(index=1)[["recipe_name", "index"]], on="recipe_name", how="left"
)

In [75]:
new_list.to_parquet("../data/cocktail.parquet")

In [6]:
DB_HOST = os.environ['COCKTAILS_HOST']
DB_PW = os.environ["COCKTAILS_PWD"]
DB_PORT = os.environ["COCKTAILS_PORT"]
DB_USER = os.environ["COCKTAILS_USER"]
DB_NAME = os.environ["COCKTAILS_DB"]
# DATABASE_URL = "/Users/jeremycolon/jc_git/cocktails-database/data/db.sqlite"

In [7]:
try:
    # link to your database
    conn = psycopg2.connect(
        database=DB_NAME,
        user=DB_USER,
        password=DB_PW,
        host=DB_HOST,
        port=DB_PORT,
        sslmode="require"
    )
except Exception as e:
    print("I am unable to connect to the database", e)

In [21]:
res, columns = run_query(
    "select * from public.cocktails", True
)
current_list = pd.DataFrame(res, columns=columns)

In [32]:
current_list

Unnamed: 0,cocktail_id,recipe_name,image,link
0,0,Left Hand,https://www.liquor.com/thmb/AiDRNEsVOf0BcyZyZR...,https://www.liquor.com/left-hand-cocktail-reci...
1,1,Kentucky Buck,https://www.liquor.com/thmb/eBDxCvX1UkYRrDBRWO...,https://www.liquor.com/kentucky-buck-cocktail-...
2,2,Improved Whiskey,https://www.liquor.com/thmb/Sitwqz4ThZZcrRIOC0...,https://www.liquor.com/improved-whiskey-cockta...
3,3,Mint Julep,https://www.liquor.com/thmb/Q4PaMi8kooZ_ZGpvro...,https://www.liquor.com/recipes/mint-julep/
4,4,Whiskey Smash,https://www.liquor.com/thmb/IjKQql6LcjTyCWuqbi...,https://www.liquor.com/recipes/whiskey-smash/
...,...,...,...,...
645,646,Dark Side,https://www.liquor.com/thmb/pMRUE9SQYE9wgZRfxF...,https://www.liquor.com/recipes/darkside/
646,647,CBD Gin & Tonic,https://www.liquor.com/thmb/7bSy9_lFVe_Z7UrfLq...,https://www.liquor.com/recipes/cbd-gin-and-tonic/
647,648,Crystal Ramos Gin Fizz,https://www.liquor.com/thmb/m9fHbIA9T73NCoVmNJ...,https://www.liquor.com/recipes/crystal-ramos-g...
648,649,CBD Dirty Martini,https://www.liquor.com/thmb/gD8FDqXJDIr1fJFXS5...,https://www.liquor.com/recipes/cbd-dirty-martini/


In [56]:
new_list = pd.read_parquet("../data/cocktail.parquet")

In [57]:
new_list["rank"] = new_list.groupby(["recipe_name", "image", "link"])["index"].rank(method="first")

In [58]:
new_list = new_list.fillna(0)

In [59]:
new_list = new_list.loc[new_list["rank"] <= 1]

In [60]:
new_list

Unnamed: 0,recipe_name,image,link,ingredients,index,rank
0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,"[{'name': 'bonded bourbon, preferably Evan Wil...",0.0,0.0
1,A Little Chili Punch,https://www.liquor.com/thmb/07fw3CG6OUX4XcF3nX...,https://www.liquor.com/a-little-chili-punch-re...,"[{'name': 'whole lemons', 'quantity': '5', 'un...",0.0,0.0
2,Night Tripper,https://www.liquor.com/thmb/StBH0_s9AhJb-j5nF-...,https://www.liquor.com/night-tripper-cocktail-...,"[{'name': 'bourbon', 'quantity': '1 3/4 ', 'un...",0.0,0.0
3,Fanciulli,https://www.liquor.com/thmb/GjMtP-2krR9EwwXQFq...,https://www.liquor.com/fanciulli-cocktail-reci...,"[{'name': 'bourbon or rye whiskey', 'quantity'...",0.0,0.0
4,Left Hand,https://www.liquor.com/thmb/AiDRNEsVOf0BcyZyZR...,https://www.liquor.com/left-hand-cocktail-reci...,"[{'name': 'bourbon', 'quantity': '1 1/2', 'uni...",1.0,1.0
...,...,...,...,...,...,...
939,Champagne Holiday Punch,https://www.liquor.com/thmb/K_rEEH-OC80eoXAPep...,https://www.liquor.com/recipes/champagne-holid...,"[{'name': 'genever', 'quantity': '1 1/2', 'uni...",1.0,1.0
940,Bear With Me Honey,https://www.liquor.com/thmb/XuqefIn6mQtfumOium...,https://www.liquor.com/recipes/bear-with-me-ho...,"[{'name': 'Barr Hill Tom Cat gin', 'quantity':...",1.0,1.0
941,Dark Side,https://www.liquor.com/thmb/pMRUE9SQYE9wgZRfxF...,https://www.liquor.com/recipes/darkside/,"[{'name': 'gin', 'quantity': '2 1/2', 'unit': ...",1.0,1.0
942,CBD Gin & Tonic,https://www.liquor.com/thmb/7bSy9_lFVe_Z7UrfLq...,https://www.liquor.com/recipes/cbd-gin-and-tonic/,"[{'name': 'gin', 'quantity': '2', 'unit': 'oun...",1.0,1.0


In [203]:
new_cocktails = new_list.loc[new_list["index"]==0]

In [204]:
new_cocktails

Unnamed: 0,recipe_name,image,link,ingredients,index,rank
0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,"[{'name': 'bonded bourbon, preferably Evan Wil...",0.0,0.0
1,A Little Chili Punch,https://www.liquor.com/thmb/07fw3CG6OUX4XcF3nX...,https://www.liquor.com/a-little-chili-punch-re...,"[{'name': 'whole lemons', 'quantity': '5', 'un...",0.0,0.0
2,Night Tripper,https://www.liquor.com/thmb/StBH0_s9AhJb-j5nF-...,https://www.liquor.com/night-tripper-cocktail-...,"[{'name': 'bourbon', 'quantity': '1 3/4 ', 'un...",0.0,0.0
3,Fanciulli,https://www.liquor.com/thmb/GjMtP-2krR9EwwXQFq...,https://www.liquor.com/fanciulli-cocktail-reci...,"[{'name': 'bourbon or rye whiskey', 'quantity'...",0.0,0.0
11,High King Highball,https://www.liquor.com/thmb/I3XnI6PVyP4Ma5UzQ0...,https://www.liquor.com/recipes/high-king-highb...,"[{'name': 'raspberries', 'quantity': '3', 'uni...",0.0,0.0
101,Lavender Mule,https://www.liquor.com/thmb/eNaqKuFm1OPqkHMC1m...,https://www.liquor.com/lavender-mule-cocktail-...,"[{'name': 'vodka', 'quantity': '1 1/2', 'unit'...",0.0,0.0
105,Homemade Zima,https://www.liquor.com/thmb/SmonZZmJEZtn3_Rg-4...,https://www.liquor.com/homemade-zima-cocktail-...,"[{'name': 'vodka', 'quantity': '1 1/2', 'unit'...",0.0,0.0
146,Kir Royale,https://www.liquor.com/thmb/ug3rioghdw9lQNRjHa...,https://www.liquor.com/recipes/how-to-make-a-k...,"[{'name': 'creme de cassis', 'quantity': '1/2'...",0.0,0.0
173,Vegan Milk Punch,https://www.liquor.com/thmb/Z0rE-ELkuKeMEFPsbS...,https://www.liquor.com/vegan-milk-punch-7973822,"[{'name': 'cachaça', 'quantity': '1 1/2', 'uni...",0.0,0.0
174,Frozen Piña Colada,https://www.liquor.com/thmb/DBqU37WdOMZCcoXVud...,https://www.liquor.com/frozen-pina-colada-cock...,"[{'name': 'light rum', 'quantity': '2', 'unit'...",0.0,0.0


In [205]:
cocktails_exploded = new_cocktails.explode("ingredients")

In [206]:
cocktails_exploded.reset_index()

Unnamed: 0,level_0,recipe_name,image,link,ingredients,index,rank
0,0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,"{'name': 'bonded bourbon, preferably Evan Will...",0.0,0.0
1,0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,"{'name': 'lemon juice, freshly squeezed', 'qua...",0.0,0.0
2,0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,"{'name': 'rich simple syrup', 'quantity': '3/4...",0.0,0.0
3,0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,"{'name': 'apricot brandy', 'quantity': '1', 'u...",0.0,0.0
4,0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,"{'name': 'egg white', 'quantity': '1/2', 'unit...",0.0,0.0
...,...,...,...,...,...,...,...
557,853,Ramos Melon Fizz,https://www.liquor.com/thmb/WtGYp6YvCU90VXMyJk...,https://www.liquor.com/ramos-melon-fizz-cockta...,"{'name': 'Soda water, preferably Topo Chico, t...",0.0,0.0
558,853,Ramos Melon Fizz,https://www.liquor.com/thmb/WtGYp6YvCU90VXMyJk...,https://www.liquor.com/ramos-melon-fizz-cockta...,"{'name': 'Garnish: Imperial-grade matcha', 'qu...",0.0,0.0
559,924,Kir Royale,https://www.liquor.com/thmb/ug3rioghdw9lQNRjHa...,https://www.liquor.com/recipes/how-to-make-a-k...,"{'name': 'creme de cassis', 'quantity': '1/2',...",0.0,0.0
560,924,Kir Royale,https://www.liquor.com/thmb/ug3rioghdw9lQNRjHa...,https://www.liquor.com/recipes/how-to-make-a-k...,"{'name': 'sparkling wine', 'quantity': None, '...",0.0,0.0


In [250]:
cocktails_ingredients = pd.concat(
    [cocktails_exploded.reset_index(), pd.json_normalize(cocktails_exploded['ingredients'])], 
    axis=1
).drop(["index", "ingredients"], axis=1).rename(columns={"name": "ingredient"})

In [251]:
cocktails_ingredients.loc[cocktails_ingredients["ingredient"].str.lower()=="peel"]

Unnamed: 0,level_0,recipe_name,image,link,rank,ingredient,quantity,unit
388,648,Le Grand Louis,https://www.liquor.com/thmb/2uNjyZOQVjBzLZR6z8...,https://www.liquor.com/le-grand-louis-cocktail...,0.0,peel,1,lemon


In [252]:
cocktails_ingredients.loc[cocktails_ingredients["recipe_name"]=="Le Grand Louis", :]

Unnamed: 0,level_0,recipe_name,image,link,rank,ingredient,quantity,unit
385,648,Le Grand Louis,https://www.liquor.com/thmb/2uNjyZOQVjBzLZR6z8...,https://www.liquor.com/le-grand-louis-cocktail...,0.0,Grand Marnier Cuvée Louis-Alexandre (or origin...,2,ounces
386,648,Le Grand Louis,https://www.liquor.com/thmb/2uNjyZOQVjBzLZR6z8...,https://www.liquor.com/le-grand-louis-cocktail...,0.0,dry vermouth,3/4,ounce
387,648,Le Grand Louis,https://www.liquor.com/thmb/2uNjyZOQVjBzLZR6z8...,https://www.liquor.com/le-grand-louis-cocktail...,0.0,orange bitters,1,dash
388,648,Le Grand Louis,https://www.liquor.com/thmb/2uNjyZOQVjBzLZR6z8...,https://www.liquor.com/le-grand-louis-cocktail...,0.0,peel,1,lemon
389,648,Le Grand Louis,https://www.liquor.com/thmb/2uNjyZOQVjBzLZR6z8...,https://www.liquor.com/le-grand-louis-cocktail...,0.0,rosemary sprig,,garnish


In [253]:
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Mental Note Non-Alcoholic") &
    (cocktails_ingredients["ingredient"]=="heaping barspoon"), 
    "ingredient"] = "raspberry preserves"

In [254]:
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Le Grand Louis") &
    (cocktails_ingredients["ingredient"]=="peel"), 
    "unit"] = None
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Le Grand Louis") &
    (cocktails_ingredients["ingredient"]=="peel"),  
    "ingredient"] = "lemon peel"

In [255]:
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Tequila Manhattan") &
    (cocktails_ingredients["ingredient"]=="Garnish:"), 
    "unit"] = "garnish"
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Tequila Manhattan") &
    (cocktails_ingredients["ingredient"]=="Garnish:"), 
    "ingredient"] = "cherry"

In [256]:
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Campari Spritz") &
    (cocktails_ingredients["ingredient"]=="Garnish:"), 
    "unit"] = "garnish"
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Campari Spritz") &
    (cocktails_ingredients["ingredient"]=="Garnish:"), 
    "ingredient"] = "orange"

In [262]:
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Siesta")
    & (cocktails_ingredients["ingredient"]=="grapefruit"),
"ingredient"] = "grapefruit peel"
# cocktails_ingredients.loc[
#     (cocktails_ingredients["recipe_name"]=="Siesta")
#     & (cocktails_ingredients["ingredient"]=="Garnish:"),
# "unit"] = "garnish"

In [258]:
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Yuzu Midori Sour")
    & (cocktails_ingredients["ingredient"]=="Garnish:"),
"ingredient"] = "edible flower"
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Yuzu Midori Sour")
    & (cocktails_ingredients["ingredient"]=="Garnish:"),
"unit"] = "garnish"

In [259]:
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Mexican Martini")
    & (cocktails_ingredients["ingredient"]=="2 "), "ingredient"
] = "tequila"

In [260]:
cocktails_ingredients.loc[
    (cocktails_ingredients["recipe_name"]=="Mexican Martini"), "ingredient"]

189                            tequila
190                          Cointreau
191       lime juice, freshly squeezed
192    green olive brine, from the jar
193                salt rim (optional)
194                         lime wedge
195                             olives
Name: ingredient, dtype: object

In [261]:
cocktails_ingredients.loc[cocktails_ingredients["ingredient"].str.lower()=="garnish:", "link"].tolist()

[]

In [160]:
ingredients_set = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).strip()) if ig.get("name") is not None else None 
        for cocktail in new_cocktails.to_dict("records") 
        for ig in cocktail.get("ingredients")
    ]
)

In [161]:
ingredients_set

{'100 proof rye whiskey',
 '2',
 'absinthe',
 'acid league wine proxies sauvage',
 'agave caramel',
 'agave syrup',
 'agave syrup (recipe below)',
 'allspice berries, whole',
 'allspice tincture',
 'amaro santoni',
 'amer picon',
 'amontillado sherry',
 'angostura bitters',
 'anis del mono or other imported anisette (the drier the better)',
 'aperol',
 'apple bitters',
 'apple brandy',
 'apple cider vinegar',
 'apple slice',
 'apple, cored and diced',
 'applejack',
 'appleton estate reserve rum',
 'apricot brandy',
 'apricot liqueur',
 'aquafaba',
 'august uncommon',
 'bitters',
 'blackberry',
 'blanc verjus',
 'blanc vermouth',
 'blanco tequila',
 'blanco tequila, preferably el tesoro',
 'blended scotch whisky',
 'bonded bourbon, preferably evan williams bottled-in-bond',
 'bourbon',
 'bourbon or rye whiskey',
 'brandy',
 'brewed hibiscus tea',
 'brut champagne',
 'cachaca',
 'calpico',
 'calvados',
 'campari',
 'candied ginger',
 'carnation flower (optional)',
 'carpano antica formul

In [None]:
list(os.environ.keys())

In [136]:
cocktails_ingredients.drop(["image","link","recipe_name"], axis=1)

Unnamed: 0,level_0,ingredient,unit,quantity
0,0,"bonded bourbon, preferably Evan Williams Bottl...",ounces,1 1/2
1,0,"lemon juice, freshly squeezed",ounce,3/4
2,0,rich simple syrup,ounce,3/4
3,0,apricot brandy,barspoon,1
4,0,egg white,ounce,1/2
...,...,...,...,...
557,853,"Soda water, preferably Topo Chico, to top",,
558,853,Garnish: Imperial-grade matcha,,
559,924,creme de cassis,ounce,1/2
560,924,sparkling wine,,


### Need to pull IDs from database and adjust new recipe IDs accordingly

In [102]:
current_cocktails_ingredients_list, cols = run_query("select * from public.cocktails_ingredients;", True)
current_cocktails_ingredients = pd.DataFrame(current_cocktails_ingredients_list, columns=cols)
current_ingredients_list, cols = run_query("select * from public.ingredients;", True)
current_ingredient = pd.DataFrame(current_ingredients_list, columns=cols)
current_cocktails_list, cols = run_query("select * from public.cocktails;", True)
current_cocktails = pd.DataFrame(current_cocktails_list, columns=cols)

In [308]:
ingredients = pd.read_excel("../data/ingredient_map.xlsx", sheet_name="Sheet1").fillna("None")

In [309]:
ingredients

Unnamed: 0,ingredient_id,ingredient,mapped_ingredient,alcohol_type
0,1,blended scotch whisky,scotch,scotch
1,2,orange bitters,orange bitters,
2,3,london dry gin,dry gin,gin
3,4,orange curaçao,orange curaçao,liqueur
4,5,cachaça (such as leblon),cachaça,liqueur
...,...,...,...,...
1441,1443,yuzu juice,yuzu,
1442,1444,zucca rabarbaro amaro,amaro,liqueur
1443,1445,pimm's no. 1,pimm's no. 1 liqueur,liqueur
1444,1446,laird's applejack,apple brandy,brandy


In [263]:
cocktails_ingredients

Unnamed: 0,level_0,recipe_name,image,link,rank,ingredient,quantity,unit
0,0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,0.0,"bonded bourbon, preferably Evan Williams Bottl...",1 1/2,ounces
1,0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,0.0,"lemon juice, freshly squeezed",3/4,ounce
2,0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,0.0,rich simple syrup,3/4,ounce
3,0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,0.0,apricot brandy,1,barspoon
4,0,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...,0.0,egg white,1/2,ounce
...,...,...,...,...,...,...,...,...
557,853,Ramos Melon Fizz,https://www.liquor.com/thmb/WtGYp6YvCU90VXMyJk...,https://www.liquor.com/ramos-melon-fizz-cockta...,0.0,"Soda water, preferably Topo Chico, to top",,
558,853,Ramos Melon Fizz,https://www.liquor.com/thmb/WtGYp6YvCU90VXMyJk...,https://www.liquor.com/ramos-melon-fizz-cockta...,0.0,Garnish: Imperial-grade matcha,,
559,924,Kir Royale,https://www.liquor.com/thmb/ug3rioghdw9lQNRjHa...,https://www.liquor.com/recipes/how-to-make-a-k...,0.0,creme de cassis,1/2,ounce
560,924,Kir Royale,https://www.liquor.com/thmb/ug3rioghdw9lQNRjHa...,https://www.liquor.com/recipes/how-to-make-a-k...,0.0,sparkling wine,,


In [108]:
current_cocktails["cocktail_id"].max()

649

In [264]:
new_cocktails["rank"] = new_cocktails.groupby("recipe_name").cumcount()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_cocktails["rank"] = new_cocktails.groupby("recipe_name").cumcount()


In [265]:
new_cocktails = new_cocktails.loc[new_cocktails["rank"] == 0]

In [266]:
new_cocktails = new_cocktails.reset_index().drop("level_0", axis=1).reset_index()

In [267]:
new_cocktails["cocktail_id"] = new_cocktails["level_0"]+current_cocktails["cocktail_id"].max()+1

In [268]:
new_cocktails_final = new_cocktails.drop(
    ["level_0", "index", "rank", "ingredients"], 
    axis=1
)[["cocktail_id", "recipe_name", "image", "link"]]

In [269]:
new_cocktails_final

Unnamed: 0,cocktail_id,recipe_name,image,link
0,650,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...
1,651,A Little Chili Punch,https://www.liquor.com/thmb/07fw3CG6OUX4XcF3nX...,https://www.liquor.com/a-little-chili-punch-re...
2,652,Night Tripper,https://www.liquor.com/thmb/StBH0_s9AhJb-j5nF-...,https://www.liquor.com/night-tripper-cocktail-...
3,653,Fanciulli,https://www.liquor.com/thmb/GjMtP-2krR9EwwXQFq...,https://www.liquor.com/fanciulli-cocktail-reci...
4,654,High King Highball,https://www.liquor.com/thmb/I3XnI6PVyP4Ma5UzQ0...,https://www.liquor.com/recipes/high-king-highb...
5,655,Lavender Mule,https://www.liquor.com/thmb/eNaqKuFm1OPqkHMC1m...,https://www.liquor.com/lavender-mule-cocktail-...
6,656,Homemade Zima,https://www.liquor.com/thmb/SmonZZmJEZtn3_Rg-4...,https://www.liquor.com/homemade-zima-cocktail-...
7,657,Kir Royale,https://www.liquor.com/thmb/ug3rioghdw9lQNRjHa...,https://www.liquor.com/recipes/how-to-make-a-k...
8,658,Vegan Milk Punch,https://www.liquor.com/thmb/Z0rE-ELkuKeMEFPsbS...,https://www.liquor.com/vegan-milk-punch-7973822
9,659,Frozen Piña Colada,https://www.liquor.com/thmb/DBqU37WdOMZCcoXVud...,https://www.liquor.com/frozen-pina-colada-cock...


In [270]:
ingredients

Unnamed: 0,ingredient_id,ingredient,ingredient_map,alcohol_type
0,1,blended scotch whisky,scotch,scotch
1,2,orange bitters,orange bitters,
2,3,london dry gin,dry gin,gin
3,4,orange curaçao,orange curaçao,liqueur
4,5,cachaça (such as leblon),cachaça,liqueur
...,...,...,...,...
1438,1440,whiskey barrel-aged bitters,whiskey barrel-aged bitters,
1439,1441,whole grapefruits,grapefruit,
1440,1442,whole lemons,lemon,
1441,1443,yuzu juice,yuzu,


In [280]:
cocktails_ingredients = cocktails_ingredients.drop(
    ["image", "link", "level_0", "rank"], 
    axis=1
).drop_duplicates().merge(
    new_cocktails_final[["recipe_name", "cocktail_id"]], 
    on="recipe_name"
).assign(
    ingredient=lambda row: row["ingredient"].str.lower().str.replace(",|[*'']","", regex=True).apply(unidecode).str.strip()
).merge(
    ingredients, on="ingredient", how="inner"
)[["cocktail_id", "ingredient_id", "unit", "quantity"]]

In [283]:
current_cocktails_ingredients["cocktail_ingredient_id"].max()

3656

In [287]:
cocktails_ingredients = cocktails_ingredients.reset_index().rename(
    columns={"index": "cocktail_ingredient_id"}
) #current_cocktails_ingredients["cocktail_ingredient_id"].max()

In [289]:
cocktails_ingredients["cocktail_ingredient_id"] = cocktails_ingredients["cocktail_ingredient_id"] + \
    current_cocktails_ingredients["cocktail_ingredient_id"].max()+1

In [290]:
cocktails_ingredients

Unnamed: 0,cocktail_ingredient_id,cocktail_id,ingredient_id,unit,quantity
0,3657,650,1313,ounces,1 1/2
1,3658,650,1380,ounce,3/4
2,3659,651,1380,ounces,6
3,3660,655,1380,ounce,1/2
4,3661,656,1380,ounce,1/2
...,...,...,...,...,...
398,4055,714,1387,ounce,3/4
399,4056,714,1416,ounce,1/2
400,4057,714,1414,ounces,1 3/4
401,4058,714,1423,,


In [295]:
engine = create_engine(url=create_conn_string())

In [298]:
cocktails_ingredients.to_sql(
    "cocktails_ingredients", con=engine, if_exists="append", index=False
)

403

In [303]:
new_cocktails.drop(
    ["ingredients", "level_0", "index", "rank"], 
    axis=1
)[["cocktail_id", "recipe_name", "image", "link"]]

Unnamed: 0,cocktail_id,recipe_name,image,link
0,650,Stone Fruit Sour,https://www.liquor.com/thmb/6Vj-P1MQv899YMpA-N...,https://www.liquor.com/stone-fruit-sour-cockta...
1,651,A Little Chili Punch,https://www.liquor.com/thmb/07fw3CG6OUX4XcF3nX...,https://www.liquor.com/a-little-chili-punch-re...
2,652,Night Tripper,https://www.liquor.com/thmb/StBH0_s9AhJb-j5nF-...,https://www.liquor.com/night-tripper-cocktail-...
3,653,Fanciulli,https://www.liquor.com/thmb/GjMtP-2krR9EwwXQFq...,https://www.liquor.com/fanciulli-cocktail-reci...
4,654,High King Highball,https://www.liquor.com/thmb/I3XnI6PVyP4Ma5UzQ0...,https://www.liquor.com/recipes/high-king-highb...
5,655,Lavender Mule,https://www.liquor.com/thmb/eNaqKuFm1OPqkHMC1m...,https://www.liquor.com/lavender-mule-cocktail-...
6,656,Homemade Zima,https://www.liquor.com/thmb/SmonZZmJEZtn3_Rg-4...,https://www.liquor.com/homemade-zima-cocktail-...
7,657,Kir Royale,https://www.liquor.com/thmb/ug3rioghdw9lQNRjHa...,https://www.liquor.com/recipes/how-to-make-a-k...
8,658,Vegan Milk Punch,https://www.liquor.com/thmb/Z0rE-ELkuKeMEFPsbS...,https://www.liquor.com/vegan-milk-punch-7973822
9,659,Frozen Piña Colada,https://www.liquor.com/thmb/DBqU37WdOMZCcoXVud...,https://www.liquor.com/frozen-pina-colada-cock...


In [304]:
new_cocktails.drop(
    ["ingredients", "level_0", "index", "rank"], 
    axis=1
)[["cocktail_id", "recipe_name", "image", "link"]].to_sql(
    "cocktails", con=engine, if_exists="append", index=False
)

65

In [312]:
ingredients = pd.read_excel("../data/ingredient_map.xlsx", sheet_name="Sheet1")

In [313]:
ingredients

Unnamed: 0,ingredient_id,ingredient,mapped_ingredient,alcohol_type
0,1,blended scotch whisky,scotch,scotch
1,2,orange bitters,orange bitters,
2,3,london dry gin,dry gin,gin
3,4,orange curaçao,orange curaçao,liqueur
4,5,cachaça (such as leblon),cachaça,liqueur
...,...,...,...,...
1441,1443,yuzu juice,yuzu,
1442,1444,zucca rabarbaro amaro,amaro,liqueur
1443,1445,pimm's no. 1,pimm's no. 1 liqueur,liqueur
1444,1446,laird's applejack,apple brandy,brandy


In [314]:
ingredients.to_sql(
    "ingredients", con=engine, if_exists="replace", index=False
)

446

In [None]:
res = conn.execute(
        """
        SELECT ci.ingredient, count(distinct(c.cocktail_id))
        FROM cocktails c
        JOIN cocktails_ingredients ci
        ON c.cocktail_id = ci.cocktail_id
        where ci.ingredient = '1'
        group by 1
        order by 2 desc
        """
    ).fetchall()
res

In [None]:
res = conn.execute(
        """
        SELECT ci.ingredient, c.cocktail_id, c.link
        FROM cocktails c
        JOIN cocktails_ingredients ci
        ON c.cocktail_id = ci.cocktail_id
        where ci.ingredient = '8 to 10'
        """
    ).fetchall()
res

In [None]:
conn.close()

In [None]:
garnishes = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).title().strip()) if re.search("^Garnish: ", str(ig.get("name")), flags=re.IGNORECASE) or ig.get("unit") == "garnish" else None 
        for cocktail in cocktails 
        for ig in cocktail.get("ingredients")
    ]
)

In [None]:
bitters = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).title().strip()) if re.search("bitter", str(ig.get("name"))) else None 
        for cocktail in cocktails 
        for ig in cocktail.get("ingredients")
    ]
)

In [None]:
syrups = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).title().strip()) if re.search("syrup", str(ig.get("name"))) else None 
        for cocktail in cocktails 
        for ig in cocktail.get("ingredients")
    ]
)

In [None]:
ingredients = ingredients - garnishes - bitters - syrups
# ingredients = ingredients - bitters
# ingredients = ingredients - syrups

In [None]:
[i for i in ingredients if re.search("\\bgin\\b", i, flags=re.IGNORECASE)]

In [None]:
pd.set_option("display.max_rows", 100)

In [None]:
alc = "vodka"
res = conn.execute(
        f"""
        with alcohol as (
            select distinct cocktail_id
            from cocktails_ingredients ci
            LEFT JOIN ingredient_map im
            ON ci.ingredient = im.ingredient
            -- where alcohol_type = '{alc}'
        ), ingredients as (
            SELECT ci.*, 
            COALESCE(im.mapped_ingredient, ci.ingredient) as mapped_ingredient, 
            im.alcohol_type
            FROM cocktails_ingredients  ci
            LEFT JOIN ingredient_map im
            ON ci.ingredient = im.ingredient
        )
        select i.*
        from ingredients i
        join alcohol a
        on i.cocktail_id = a.cocktail_id
        where i.alcohol_type != '{alc}' or i.alcohol_type is null;
        """
    ).fetchall()

In [None]:
results = pd.DataFrame(
    res, 
    columns=["index", "cocktail_id", "ingredient", "unit", "quantity", "mapped_ingredient", "alcohol_type"]
)

In [None]:
results

In [None]:
results.groupby("mapped_ingredient").agg({
    "cocktail_id": "nunique",
    "index": "count"
}).reset_index().assign(
    total=len(results["cocktail_id"].unique()),
    perc_of_total=lambda row: row["cocktail_id"]/row["total"]
).sort_values("cocktail_id", ascending=False).head(100)