In [42]:
import requests
import json
import pandas as pd
import numpy as np
import requests
import plotly.graph_objects as go
from datetime import datetime as dt
from bs4 import BeautifulSoup
from math import log, log2, ceil
import re
import os
import sys
import os
import pickle
from sqlalchemy import Table, Column, Integer, String, MetaData, create_engine
import psycopg2
from unidecode import unidecode

In [2]:
def save_object(obj, filename):
    with open(filename, 'wb') as outp:
        pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)
    
    
def load_object(filename):
    with open(filename, "rb") as file:
        return pickle.load(file)
    

def get_ingredient_list(link, html):
    ingredient_list = html.find("ul", attrs={"class": "ingredient-list"})

    ret = list()
    if ingredient_list is None:
        ingredients = [li for li in html.find(
            "ul", attrs={"class": "structured-ingredients__list"}).find_all("li")]
        for el in ingredients:
            ret.append({
                "name": el.find("p").find("span", attrs={"data-ingredient-name": "true"}).text if el.find("p").find("span", attrs={"data-ingredient-name": "true"}) is not None else None,
                "unit": el.find("p").find("span", attrs={"data-ingredient-unit": "true"}).text if el.find("p").find("span", attrs={"data-ingredient-unit": "true"}) is not None else None,
                "quantity": el.find("p").find("span", attrs={"data-ingredient-quantity": "true"}).text if el.find("p").find("span", attrs={"data-ingredient-quantity": "true"}) is not None else None
            })
    else:
        ingredients = [
            re.sub("\n", "", li.text) for li in html.find("ul", attrs={"class": "ingredient-list"}).find_all("li")
        ]
        for el in ingredients:
            if re.search("^garnish", el, flags=re.IGNORECASE):
                quantity = None
                unit = "garnish"
                name = re.sub("^Garnish: ", "", el).strip()
            else:
                quantity = re.search("^[0-9/ ]+", el).group(0).strip()
                unit = re.sub(quantity, "", re.search(
                    "^[0-9/ ]+ [A-Za-z]+ ", el).group(0).strip()).strip()
                name = re.sub(f"{quantity}|{unit}", "", el).strip()
            ret.append({
                "name": name,
                "unit": unit,
                "quantity": quantity
            })
    image = html.find(
        "figure",
        attrs={"class": "comp figure figure--primary-image"}
    ).find(
        "div",
        attrs={"class", "img-placeholder"}
    ).find("img")["src"]

    return {
        "recipe_name": re.sub(" Cocktail Recipe$", "", html.find("title").text),
        "image": image,
        "link": link,
        "ingredients": ret
    }


def get_recipe_ingredients(recipe_links, recipes):
    for link in recipe_links:
        text = requests.get(link).text
        html = BeautifulSoup(text, 'html.parser')

        try:
            recipe = get_ingredient_list(link, html)
            recipes.append(recipe)

        except AttributeError as e:
            try:
                sub_links = [a["href"] for a in html.find_all(
                    "a", attrs={"class": "mntl-sc-block-heading__link"})]
                get_recipe_ingredients(sub_links, recipes)
            except AttributeError as e:
                print(f"{link} - No recipe or sub-links")

    return recipes

In [36]:
liquor_links = {
    "bourbon": "https://www.liquor.com/bourbon-cocktails-4779435",
    "vodka": "https://www.liquor.com/vodka-cocktails-4779437",
    "rum": "https://www.liquor.com/rum-cocktails-4779434",
    "scotch": "https://www.liquor.com/scotch-cocktails-4779431",
    "rye_whiskey": "https://www.liquor.com/rye-whiskey-cocktails-4779433",
    "other_whiskey": "https://www.liquor.com/whiskey-cocktails-4779430",
    "tequila_mezcal": "https://www.liquor.com/tequila-and-mezcal-cocktails-4779429",
    "cognac_brandy": "https://www.liquor.com/brandy-cocktails-4779428",
    "other": "https://www.liquor.com/other-cocktails-4779427",
    "gins": "https://www.liquor.com/gin-cocktails-4779436"
}

In [59]:
recipes = list()
all_recipes = list()
for liquor, liquor_link in liquor_links.items():
    print(liquor)
    text = requests.get(liquor_link).text
    html = BeautifulSoup(text, 'html.parser')
    
    recipe_links = [a["href"] for a in html.find_all("a", attrs={"class": "comp card"})]
    
    all_recipes.append({"liquor": liquor, "recipes": get_recipe_ingredients(recipe_links, recipes)})

bourbon
vodka
rum
scotch
rye_whiskey
other_whiskey
tequila_mezcal
cognac_brandy
other
gins


In [60]:
master_list = list()
for liquor in all_recipes:
    for recipe in liquor.get("recipes"):
        if recipe not in master_list:
            master_list.append(recipe)

In [61]:
master_list[400]

{'recipe_name': 'Sidecar',
 'image': 'https://www.liquor.com/thmb/SPwtnGzW6q81adT1Ohr36hZ8464=/720x0/filters:no_upscale():max_bytes(150000):strip_icc()/sidecar-720x720-primary-a107a98a86dc4ebd87581f16ccb8c0d5.jpg',
 'link': 'https://www.liquor.com/recipes/sidecar/',
 'ingredients': [{'name': 'cognac', 'unit': 'ounces', 'quantity': '1 1/2'},
  {'name': 'orange liqueur', 'unit': 'ounce', 'quantity': '3/4'},
  {'name': 'lemon juice', 'unit': 'ounce', 'quantity': '3/4'},
  {'name': 'orange twist', 'unit': None, 'quantity': None},
  {'name': 'sugar', 'unit': None, 'quantity': None}]}

In [62]:
pd.DataFrame(master_list).to_parquet("cocktail.parquet")

In [64]:
save_object(master_list, "cocktails.pkl")

In [3]:
cocktails = load_object("../data/cocktails.pkl")

In [6]:
cocktails_df = pd.DataFrame(cocktails)

In [7]:
cocktails_df = cocktails_df.reset_index().rename(columns={"index":"cocktail_id"})

In [8]:
cocktails_df

Unnamed: 0,cocktail_id,recipe_name,image,link,ingredients
0,0,Benton’s Old Fashioned,https://www.liquor.com/thmb/yJ29_z002k54RrDas-...,https://www.liquor.com/recipes/bentons-old-fas...,"[{'name': 'fat-washed Four Roses bourbon', 'un..."
1,1,Normandie Old Fashioned,https://www.liquor.com/thmb/jXKKgCjYLMGVJYtPfo...,https://www.liquor.com/recipes/normandie-old-f...,"[{'name': 'coconut-infused bourbon*', 'unit': ..."
2,2,Bourbon & Berries,https://www.liquor.com/thmb/Iy7vDE2gYGePZx4D80...,https://www.liquor.com/recipes/bourbon-and-ber...,"[{'name': 'bourbon', 'unit': 'ounces', 'quanti..."
3,3,Zapatero,https://www.liquor.com/thmb/7LRPVNlYxn5AVLfMh8...,https://www.liquor.com/recipes/the-zapatero/,"[{'name': 'mezcal', 'unit': 'ounces', 'quantit..."
4,4,Brown Butter Old Fashioned,https://www.liquor.com/thmb/c0nqh8XJtL-oHqJoIm...,https://www.liquor.com/recipes/brown-butter-ol...,"[{'name': 'bitters', 'unit': 'dashes', 'quanti..."
...,...,...,...,...,...
489,489,Last Word,https://www.liquor.com/thmb/Kymj4IyIe3CvBisTJE...,https://www.liquor.com/recipes/the-last-word/,"[{'name': 'gin', 'unit': 'ounce', 'quantity': ..."
490,490,Cotter Swizzle,https://www.liquor.com/thmb/To1_eWD9ELMox05PH6...,https://www.liquor.com/cotter-swizzle-cocktail...,"[{'name': 'Ransom Old Tom gin', 'unit': 'ounce..."
491,491,Coffee & Cigarettes,https://www.liquor.com/thmb/3wp13Z-MAHMB5dlYwG...,https://www.liquor.com/recipes/coffee-and-ciga...,"[{'name': 'The Block Autumn gin', 'unit': 'oun..."
492,492,Golden Purl,https://www.liquor.com/thmb/IsrrKcWyhDeo3d6wlo...,https://www.liquor.com/recipes/golden-purl/,"[{'name': 'cider', 'unit': 'cup', 'quantity': ..."


In [9]:
cocktails_exploded = cocktails_df.explode("ingredients")

In [10]:
cocktails_exploded.reset_index()

Unnamed: 0,index,cocktail_id,recipe_name,image,link,ingredients
0,0,0,Benton’s Old Fashioned,https://www.liquor.com/thmb/yJ29_z002k54RrDas-...,https://www.liquor.com/recipes/bentons-old-fas...,"{'name': 'fat-washed Four Roses bourbon', 'uni..."
1,0,0,Benton’s Old Fashioned,https://www.liquor.com/thmb/yJ29_z002k54RrDas-...,https://www.liquor.com/recipes/bentons-old-fas...,"{'name': 'maple syrup', 'unit': 'ounce', 'quan..."
2,0,0,Benton’s Old Fashioned,https://www.liquor.com/thmb/yJ29_z002k54RrDas-...,https://www.liquor.com/recipes/bentons-old-fas...,"{'name': 'bitters', 'unit': 'dashes', 'quantit..."
3,0,0,Benton’s Old Fashioned,https://www.liquor.com/thmb/yJ29_z002k54RrDas-...,https://www.liquor.com/recipes/bentons-old-fas...,"{'name': 'orange twist', 'unit': None, 'quanti..."
4,1,1,Normandie Old Fashioned,https://www.liquor.com/thmb/jXKKgCjYLMGVJYtPfo...,https://www.liquor.com/recipes/normandie-old-f...,"{'name': 'coconut-infused bourbon*', 'unit': '..."
...,...,...,...,...,...,...
2741,493,493,"Curiosity Killed the Cat, You Know",https://www.liquor.com/thmb/-nQMlGE-mkqMWCVnbD...,https://www.liquor.com/recipes/curiosity-kille...,"{'name': 'Macvin du Jura liqueur wine', 'unit'..."
2742,493,493,"Curiosity Killed the Cat, You Know",https://www.liquor.com/thmb/-nQMlGE-mkqMWCVnbD...,https://www.liquor.com/recipes/curiosity-kille...,"{'name': 'honey syrup', 'unit': 'ounce', 'quan..."
2743,493,493,"Curiosity Killed the Cat, You Know",https://www.liquor.com/thmb/-nQMlGE-mkqMWCVnbD...,https://www.liquor.com/recipes/curiosity-kille...,"{'name': 'lemon juice, freshly squeezed', 'uni..."
2744,493,493,"Curiosity Killed the Cat, You Know",https://www.liquor.com/thmb/-nQMlGE-mkqMWCVnbD...,https://www.liquor.com/recipes/curiosity-kille...,"{'name': 'chamomile tea, heated', 'unit': 'oun..."


In [11]:
cocktails_ingredients = pd.concat(
    [cocktails_exploded.reset_index(), pd.json_normalize(cocktails_exploded['ingredients'])], 
    axis=1
).drop(["index", "ingredients"], axis=1).rename(columns={"name": "ingredient"})

In [38]:
DB_URL = os.environ['DATABASE_URL']
DB_PW = os.environ["COCKTAILS_PWD"]
DB_PORT = os.environ["COCKTAILS_PORT"]
DB_USER = os.environ["COCKTAILS_USER"]
DB_NAME = os.environ["COCKTAILS_DB"]
DATABASE_URL = os.environ["DATABASE_URL"].replace(
        "postgres://", "postgresql://"
    )

In [43]:
try:
    # link to your database
    conn = psycopg2.connect(DATABASE_URL, sslmode='require')
except Exception as e:
    print("I am unable to connect to the database", e)

In [60]:
cur = conn.cursor()  # creating a cursor
 
with conn.cursor() as cur:
    cur.execute("""
    CREATE TABLE
    """)

SyntaxError: syntax error at or near "user"
LINE 2:     DROP TABLE user;
                       ^


In [59]:
res

[('akbmflnjbjqrvh',)]

In [23]:
cocktails_ingredients.drop(["image","link","recipe_name"], axis=1).to_sql(
    "cocktails_ingredients", con=engine, if_exists="replace"
)

NameError: name 'cocktails_ingredients' is not defined

In [23]:
cocktails_df.drop(["ingredients"], axis=1).to_sql(
    "cocktails", con=engine, if_exists="replace", index=False
)

In [24]:
res = engine.execute(
        """
        SELECT ci.ingredient, count(distinct(c.cocktail_id))
        FROM cocktails c
        JOIN cocktails_ingredients ci
        ON c.cocktail_id = ci.cocktail_id
        group by 1
        order by 2 desc
        """
    ).fetchall()
res

[('bitters', 75),
 ('lime juice', 74),
 ('lemon juice', 69),
 ('simple syrup', 63),
 ('vodka', 51),
 ('bourbon', 49),
 ('lemon twist', 48),
 ('gin', 46),
 ('whiskey', 43),
 ('rum', 41),
 ('lemon juice, freshly squeezed', 40),
 ('orange twist', 34),
 ('sweet vermouth', 33),
 ('mint sprig', 30),
 ('lime juice, freshly squeezed', 29),
 ('tequila', 26),
 ('lime wheel', 24),
 ('Angostura bitters', 20),
 ('Campari', 20),
 ('lime wedge', 19),
 ('orange juice', 19),
 ('Benedictine', 18),
 ('grenadine', 17),
 ('maraschino cherry', 17),
 ('mezcal', 16),
 ('absinthe', 16),
 ('dry vermouth', 16),
 ('pineapple juice', 16),
 ('Club soda', 15),
 ('scotch', 15),
 ('agave nectar', 15),
 ('sugar', 15),
 ('egg white', 15),
 ('honey syrup', 14),
 ('mint leaves', 14),
 ('Cointreau', 14),
 ('nutmeg', 13),
 ('brandied cherry', 13),
 ('maple syrup', 12),
 ('lemon peel', 11),
 ('orange wheel', 11),
 ('brandy', 11),
 ('cognac', 11),
 ('lemon wheel', 11),
 ('grapefruit juice, freshly squeezed', 11),
 ('orange pe

In [61]:
meta = MetaData()

user = Table(
   'users', meta, 
   Column('id', Integer, primary_key = True), 
   Column('email', String), 
   Column('pwd', String), 
)

In [62]:
meta.create_all(engine)

In [31]:
engine.execute(
    """DELETE FROM `user`"""
).fetchall()

ProgrammingError: (psycopg2.errors.SyntaxError) syntax error at or near "`"
LINE 1: DELETE FROM `user`
                    ^

[SQL: DELETE FROM `user`]
(Background on this error at: https://sqlalche.me/e/14/f405)

In [27]:
res = engine.execute(
        """
        SELECT * FROM user;
        """
    ).fetchall()
res

[('akbmflnjbjqrvh',)]

In [20]:
res

[('akbmflnjbjqrvh',)]

In [21]:
engine.dispose()

In [12]:
ingredients = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).title().strip()) if ig.get("name") is not None else None 
        for cocktail in cocktails 
        for ig in cocktail.get("ingredients")
    ]
)

In [13]:
garnishes = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).title().strip()) if re.search("^Garnish: ", str(ig.get("name")), flags=re.IGNORECASE) or ig.get("unit") == "garnish" else None 
        for cocktail in cocktails 
        for ig in cocktail.get("ingredients")
    ]
)

In [14]:
bitters = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).title().strip()) if re.search("bitter", str(ig.get("name"))) else None 
        for cocktail in cocktails 
        for ig in cocktail.get("ingredients")
    ]
)

In [15]:
syrups = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).title().strip()) if re.search("syrup", str(ig.get("name"))) else None 
        for cocktail in cocktails 
        for ig in cocktail.get("ingredients")
    ]
)

In [16]:
ingredients = ingredients - garnishes - bitters - syrups
# ingredients = ingredients - bitters
# ingredients = ingredients - syrups

In [18]:
[i for i in ingredients if re.search("\\bgin\\b", i, flags=re.IGNORECASE)]

['Lemongrass-Infused Gin',
 'Ki No Bi Gin (Or Other Herbaceous Dry Gin)',
 'Gin',
 'Gin (Plymouth Or London Dry)',
 'Plymouth Gin',
 'London Dry Or Old Tom Gin',
 'Scapegrace Black Gin',
 'Aviation Gin',
 "Gin (Such As Hendrick'S)",
 'Monkey 47 Gin',
 'London Dry Gin',
 'Clear Spirit Of Choice (Such As Aquavit, Gin, Tequila, Vodka Or White Rum)',
 'Monopolowa Dry Gin (Or Other Citrus-Forward Gin)',
 'Sloe Gin',
 'Opihr Gin',
 'Beefeater Gin',
 'The Block Autumn Gin',
 'Ransom Old Tom Gin']

In [20]:
cocktails_df

Unnamed: 0,cocktail_id,recipe_name,image,link,ingredients
0,0,Benton’s Old Fashioned,https://www.liquor.com/thmb/yJ29_z002k54RrDas-...,https://www.liquor.com/recipes/bentons-old-fas...,"[{'name': 'fat-washed Four Roses bourbon', 'un..."
1,1,Normandie Old Fashioned,https://www.liquor.com/thmb/jXKKgCjYLMGVJYtPfo...,https://www.liquor.com/recipes/normandie-old-f...,"[{'name': 'coconut-infused bourbon*', 'unit': ..."
2,2,Bourbon & Berries,https://www.liquor.com/thmb/Iy7vDE2gYGePZx4D80...,https://www.liquor.com/recipes/bourbon-and-ber...,"[{'name': 'bourbon', 'unit': 'ounces', 'quanti..."
3,3,Zapatero,https://www.liquor.com/thmb/7LRPVNlYxn5AVLfMh8...,https://www.liquor.com/recipes/the-zapatero/,"[{'name': 'mezcal', 'unit': 'ounces', 'quantit..."
4,4,Brown Butter Old Fashioned,https://www.liquor.com/thmb/c0nqh8XJtL-oHqJoIm...,https://www.liquor.com/recipes/brown-butter-ol...,"[{'name': 'bitters', 'unit': 'dashes', 'quanti..."
...,...,...,...,...,...
489,489,Last Word,https://www.liquor.com/thmb/Kymj4IyIe3CvBisTJE...,https://www.liquor.com/recipes/the-last-word/,"[{'name': 'gin', 'unit': 'ounce', 'quantity': ..."
490,490,Cotter Swizzle,https://www.liquor.com/thmb/To1_eWD9ELMox05PH6...,https://www.liquor.com/cotter-swizzle-cocktail...,"[{'name': 'Ransom Old Tom gin', 'unit': 'ounce..."
491,491,Coffee & Cigarettes,https://www.liquor.com/thmb/3wp13Z-MAHMB5dlYwG...,https://www.liquor.com/recipes/coffee-and-ciga...,"[{'name': 'The Block Autumn gin', 'unit': 'oun..."
492,492,Golden Purl,https://www.liquor.com/thmb/IsrrKcWyhDeo3d6wlo...,https://www.liquor.com/recipes/golden-purl/,"[{'name': 'cider', 'unit': 'cup', 'quantity': ..."
