In [1]:
import requests
import json
import pandas as pd
import numpy as np
import requests
import plotly.graph_objects as go
from datetime import datetime as dt
from bs4 import BeautifulSoup
from math import log, log2, ceil
import re
import os
import sys
import os
import pickle
from unidecode import unidecode

In [2]:
def save_object(obj, filename):
    with open(filename, 'wb') as outp:
        pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)
    
    
def load_object(filename):
    with open(filename, "rb") as file:
        return pickle.load(file)
    

def get_ingredient_list(link, html):
    ingredient_list = html.find("ul", attrs={"class": "ingredient-list"})

    ret = list()
    if ingredient_list is None:
        ingredients = [li for li in html.find(
            "ul", attrs={"class": "structured-ingredients__list"}).find_all("li")]
        for el in ingredients:
            ret.append({
                "name": el.find("p").find("span", attrs={"data-ingredient-name": "true"}).text if el.find("p").find("span", attrs={"data-ingredient-name": "true"}) is not None else None,
                "unit": el.find("p").find("span", attrs={"data-ingredient-unit": "true"}).text if el.find("p").find("span", attrs={"data-ingredient-unit": "true"}) is not None else None,
                "quantity": el.find("p").find("span", attrs={"data-ingredient-quantity": "true"}).text if el.find("p").find("span", attrs={"data-ingredient-quantity": "true"}) is not None else None
            })
    else:
        ingredients = [
            re.sub("\n", "", li.text) for li in html.find("ul", attrs={"class": "ingredient-list"}).find_all("li")
        ]
        for el in ingredients:
            if re.search("^garnish", el, flags=re.IGNORECASE):
                quantity = None
                unit = "garnish"
                name = re.sub("^Garnish: ", "", el).strip()
            else:
                quantity = re.search("^[0-9/ ]+", el).group(0).strip()
                unit = re.sub(quantity, "", re.search(
                    "^[0-9/ ]+ [A-Za-z]+ ", el).group(0).strip()).strip()
                name = re.sub(f"{quantity}|{unit}", "", el).strip()
            ret.append({
                "name": name,
                "unit": unit,
                "quantity": quantity
            })
    image = html.find(
        "figure",
        attrs={"class": "comp figure figure--primary-image"}
    ).find(
        "div",
        attrs={"class", "img-placeholder"}
    ).find("img")["src"]

    return {
        "recipe_name": re.sub(" Cocktail Recipe$", "", html.find("title").text),
        "image": image,
        "link": link,
        "ingredients": ret
    }


def get_recipe_ingredients(recipe_links, recipes):
    for link in recipe_links:
        text = requests.get(link).text
        html = BeautifulSoup(text, 'html.parser')

        try:
            recipe = get_ingredient_list(link, html)
            recipes.append(recipe)

        except AttributeError as e:
            try:
                sub_links = [a["href"] for a in html.find_all(
                    "a", attrs={"class": "mntl-sc-block-heading__link"})]
                get_recipe_ingredients(sub_links, recipes)
            except AttributeError as e:
                print(f"{link} - No recipe or sub-links")

    return recipes

In [36]:
liquor_links = {
    "bourbon": "https://www.liquor.com/bourbon-cocktails-4779435",
    "vodka": "https://www.liquor.com/vodka-cocktails-4779437",
    "rum": "https://www.liquor.com/rum-cocktails-4779434",
    "scotch": "https://www.liquor.com/scotch-cocktails-4779431",
    "rye_whiskey": "https://www.liquor.com/rye-whiskey-cocktails-4779433",
    "other_whiskey": "https://www.liquor.com/whiskey-cocktails-4779430",
    "tequila_mezcal": "https://www.liquor.com/tequila-and-mezcal-cocktails-4779429",
    "cognac_brandy": "https://www.liquor.com/brandy-cocktails-4779428",
    "other": "https://www.liquor.com/other-cocktails-4779427",
    "gins": "https://www.liquor.com/gin-cocktails-4779436"
}

In [59]:
recipes = list()
all_recipes = list()
for liquor, liquor_link in liquor_links.items():
    print(liquor)
    text = requests.get(liquor_link).text
    html = BeautifulSoup(text, 'html.parser')
    
    recipe_links = [a["href"] for a in html.find_all("a", attrs={"class": "comp card"})]
    
    all_recipes.append({"liquor": liquor, "recipes": get_recipe_ingredients(recipe_links, recipes)})

bourbon
vodka
rum
scotch
rye_whiskey
other_whiskey
tequila_mezcal
cognac_brandy
other
gins


In [60]:
master_list = list()
for liquor in all_recipes:
    for recipe in liquor.get("recipes"):
        if recipe not in master_list:
            master_list.append(recipe)

In [61]:
master_list[400]

{'recipe_name': 'Sidecar',
 'image': 'https://www.liquor.com/thmb/SPwtnGzW6q81adT1Ohr36hZ8464=/720x0/filters:no_upscale():max_bytes(150000):strip_icc()/sidecar-720x720-primary-a107a98a86dc4ebd87581f16ccb8c0d5.jpg',
 'link': 'https://www.liquor.com/recipes/sidecar/',
 'ingredients': [{'name': 'cognac', 'unit': 'ounces', 'quantity': '1 1/2'},
  {'name': 'orange liqueur', 'unit': 'ounce', 'quantity': '3/4'},
  {'name': 'lemon juice', 'unit': 'ounce', 'quantity': '3/4'},
  {'name': 'orange twist', 'unit': None, 'quantity': None},
  {'name': 'sugar', 'unit': None, 'quantity': None}]}

In [62]:
pd.DataFrame(master_list).to_parquet("cocktail.parquet")

In [64]:
save_object(master_list, "cocktails.pkl")

In [3]:
cocktails = load_object("cocktails.pkl")

In [4]:
cocktails_df = pd.DataFrame(cocktails)

In [5]:
cocktails_df.loc[
    cocktails_df["ingredients"].astype(str).str.contains("orange juice"), :
]

Unnamed: 0,recipe_name,image,link,ingredients
13,Wassail Sour,https://www.liquor.com/thmb/8PuSO3jqlRDwT5d4HG...,https://www.liquor.com/wassail-sour-cocktail-r...,"[{'name': 'bourbon (or brandy)', 'unit': 'ounc..."
65,Two-Hit Fig Punch,https://www.liquor.com/thmb/fRyW415gnEC4f72vRn...,https://www.liquor.com/recipes/two-hit-fig-punch/,"[{'name': 'fresh figs', 'unit': None, 'quantit..."
92,Summerthyme Screwdriver,https://www.liquor.com/thmb/7avcnHgHmzSDhqH3D8...,https://www.liquor.com/recipes/summerthyme-scr...,"[{'name': None, 'unit': 'orange wheels', 'quan..."
101,Hairy Navel,https://www.liquor.com/thmb/vsItaRRfzCU2ZmLilL...,https://www.liquor.com/hairy-navel-cocktail-re...,"[{'name': 'vodka', 'unit': 'ounces', 'quantity..."
106,Cabana Boy,https://www.liquor.com/thmb/vy9fG6mRsuojc1QgLP...,https://www.liquor.com/recipes/cabana-boy/,"[{'name': 'vodka', 'unit': 'ounce', 'quantity'..."
112,Harvey Wallbanger,https://www.liquor.com/thmb/Xsd65hW-6LlvLb6rp0...,https://www.liquor.com/recipes/harvey-wallbanger/,"[{'name': 'vodka', 'unit': 'ounces', 'quantity..."
120,Holiday at the Hive,https://www.liquor.com/thmb/kuOf59nBkINZUPLo-_...,https://www.liquor.com/holiday-at-the-hive-coc...,"[{'name': 'water', 'unit': 'ounce', 'quantity'..."
123,Cherry Revolution,https://www.liquor.com/thmb/uxGxxs6U4f-V9bt8Pz...,https://www.liquor.com/cherry-revolution-cockt...,"[{'name': 'vodka', 'unit': 'ounces', 'quantity..."
127,Mimosa,https://www.liquor.com/thmb/Yx8cv4ajREJG3nTr3J...,https://www.liquor.com/recipes/mimosa/,"[{'name': 'orange juice', 'unit': 'ounces', 'q..."
157,Beachbum’s Own,https://www.liquor.com/thmb/P5hbpeaFafaB5jPJ7q...,https://www.liquor.com/recipes/beachbums-own/,"[{'name': 'rum', 'unit': 'ounces', 'quantity':..."


In [68]:
cocktails

[{'recipe_name': 'Benton’s Old Fashioned',
  'image': 'https://www.liquor.com/thmb/yJ29_z002k54RrDas-xSJ4VcMNI=/720x0/filters:no_upscale():max_bytes(150000):strip_icc()/__opt__aboutcom__coeus__resources__content_migration__liquor__2018__08__14074619__bentons-old-fashioned-720x720-recipe-acc67854ebf54e9597329cc81f75e4c5.jpg',
  'link': 'https://www.liquor.com/recipes/bentons-old-fashioned/',
  'ingredients': [{'name': 'fat-washed Four Roses bourbon',
    'unit': 'ounces',
    'quantity': '2'},
   {'name': 'maple syrup', 'unit': 'ounce', 'quantity': '1/4'},
   {'name': 'bitters', 'unit': 'dashes', 'quantity': '2'},
   {'name': 'orange twist', 'unit': None, 'quantity': None}]},
 {'recipe_name': 'Normandie Old Fashioned',
  'image': 'https://www.liquor.com/thmb/jXKKgCjYLMGVJYtPfoed9aXKR-g=/720x0/filters:no_upscale():max_bytes(150000):strip_icc()/normandie-old-fashioned-720x720-primary-a8dd117866b9451da646684ce2a1680a.jpg',
  'link': 'https://www.liquor.com/recipes/normandie-old-fashioned/'

In [37]:
ingredients = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).title().strip()) if ig.get("name") is not None else None 
        for cocktail in cocktails 
        for ig in cocktail.get("ingredients")
    ]
)

In [38]:
garnishes = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).title().strip()) if re.search("^Garnish: ", str(ig.get("name")), flags=re.IGNORECASE) or ig.get("unit") == "garnish" else None 
        for cocktail in cocktails 
        for ig in cocktail.get("ingredients")
    ]
)

In [39]:
bitters = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).title().strip()) if re.search("bitter", str(ig.get("name"))) else None 
        for cocktail in cocktails 
        for ig in cocktail.get("ingredients")
    ]
)

In [40]:
syrups = set(
    [
        re.sub("[*]|,$","",unidecode(ig.get("name").lower()).title().strip()) if re.search("syrup", str(ig.get("name"))) else None 
        for cocktail in cocktails 
        for ig in cocktail.get("ingredients")
    ]
)

In [41]:
ingredients = ingredients - garnishes - bitters - syrups
# ingredients = ingredients - bitters
# ingredients = ingredients - syrups

In [None]:
bourbon
vodka
rum
scotch
rye_whiskey
other_whiskey
tequila_mezcal
cognac_brandy
other
gins

In [52]:
[i for i in ingredients if re.search("\\bgin\\b", i, flags=re.IGNORECASE)]

['Ransom Old Tom Gin',
 'Aviation Gin',
 'London Dry Or Old Tom Gin',
 'Monkey 47 Gin',
 'Ki No Bi Gin (Or Other Herbaceous Dry Gin)',
 'Beefeater Gin',
 'Sloe Gin',
 'Lemongrass-Infused Gin',
 'Clear Spirit Of Choice (Such As Aquavit, Gin, Tequila, Vodka Or White Rum)',
 'Scapegrace Black Gin',
 'Gin (Plymouth Or London Dry)',
 "Gin (Such As Hendrick'S)",
 'The Block Autumn Gin',
 'Opihr Gin',
 'Gin',
 'Plymouth Gin',
 'Monopolowa Dry Gin (Or Other Citrus-Forward Gin)',
 'London Dry Gin']