# Abstract

*A problem well stated is a problem half-solved.*

*This is your space to describe your intentions for the project, before writing a single line of code. What are you studying? What are you hoping to build? If you can't explain that clearly before you start digging into the data, you're going to have a hard time planning where to go with this.*

# Obtain the Data

*Describe your data sources here and explain why they are relevant to the problem you are trying to solve.*

*Your code should download the data and save it in data/raw. If you've got the data from an offline source, describe where it came from and what the files look like. Don't do anything to the raw data files just yet; that comes in the next step.*

*After completing this step, be sure to edit `references/data_dictionary` to include descriptions of where you obtained your data and what information it contains.*

In [184]:
## %%writefile ../src/data/make_dataset.py

# Imports
from bs4 import BeautifulSoup
import requests
import json


# Parameters
drizly_drinks_path = '../data/raw/drizly_drinks.pickle'

def get_soup(url):
    """
    Given url, return soup. Return None if host unresponsive
    """
    soup = None
    r = requests.get(url, timeout=5)
    if r.ok:
        soup = BeautifulSoup(r.content, 'html.parser')
    return soup


def get_ingredients(soup):
    """
    """
    d = soup.find_all('div', attrs={'data-integration-name':'react-component'})
    info = json.loads(d[1].get('data-payload'))
    productIds = info['props']['productIDs']

    base = 'https://drizly.com/async_catalog_fetch?category_overrides'
    ids = '&product_ids[]='.join(map(str, productIds))
    req = f'{base}&product_ids[]={ids}&product_overrides'
    r = requests.get(req)
    if not r.ok:
        return {}
    products = r.json()
    
    ingr_path = '../data/raw/ingredients.pickle'
    if file_exists(ingr_path):
        ingredients = load_from(ingr_path)
    else:
        ingredients = {}
    drink_ingrs = {}
        
    for p in products['catalogItems']:
        name = p['name']
        attrs = {
            'description': p['description'],
            'url': f"https://drizly.com/{p['click_url']}", 
            'categories': p['category_names']}
        ingredients.setdefault(name, {})
        ingredients[name].update(attrs)
        drink_ingrs[name] = attrs
    save_to(ingredients, ingr_path)
    return drink_ingrs


def get_cocktail_data():
    """
    """
    drink_list = load_from(drizly_drinks_path)
    num_drinks = len(drink_list)
    failures = 0
    for drink, stats in drink_list.items():
        num_drinks -= 1
        f = ['description', 'recipeIngredient', 'name', 'recipeInstructions']
        if len(set(f) & set(stats.keys())) == len(f):
            continue
        try:
            soup = get_soup(stats['url'])
        except:
            failures += 1
            continue

        # Get fields
        data = json.loads(soup.find('script', type='application/ld+json').text)
        stats.update({key: data[key] for key in f})

        def scrape_text(field, tag, tag_class):
            tags = soup.findAll(tag, attrs={'class': tag_class})
            cleaned = lambda t: t.text.replace('\n', '').replace('\xa0', ' ').strip()
            text = [cleaned(t) for t in tags]
            stats.update({field: text})
            return
        
        items = [
            ('tags', 'li', 'Recipe__tag'),
            ('stats', 'li', 'stat'),
            ('tools', 'li', 'Tools__tool')
        ]
        
        # Get items
        for item in items:
            scrape_text(*item)
            
        # Get ingredient data
        drink_ingrs = get_ingredients(soup)
        stats.update({k: v for k, v in drink_ingrs.items()})
        
        print(f'Drinks remaining: {num_drinks}. Got a {drink}.', 
              '\r', end='')
        save_to(drink_list, drizly_drinks_path)
    return drink_list


def get_drink_list(verbose=False):
    """
    """
    url = "https://drizly.com/recipes#all-recipes"
    soup = get_soup(url)
    link_class = "recipe-link"
    data = soup.findAll('li',attrs={'class': link_class})
    drink_list = {}
    for li in data:
        l = li.find('a')
        if l:
            name = l.text.replace('\n', '').replace('\xa0', ' ').strip()
            slug = l['href']
            url = f'https://drizly.com{slug}'
            drink_list[name] = {'url': url}
    if verbose:
        for drink in drink_list.values():
            print(drink['url'])
    save_to(drink_list, drizly_drinks_path)
    return drink_list


@load_or_make(drizly_drinks_path, overwrite='y')
def scrape_drizly():
    """
    """
    get_drink_list()
    drizly_drinks = get_cocktail_data()
    print(f'Collected data on {len(drizly_drinks)} cocktails from Drizly')
    return drizly_drinks

    
def run():
    """
    Executes a set of helper functions that download data from one or more sources
    and saves those datasets to the data/raw directory.
    """
    scrape_drizly()
    # download_dataset_1(url)
    # download_dataset_2(url)
    # save_dataset_1('data/raw', filename)
    # save_dataset_2('data/raw', filename)
    drink_list = load_from(drizly_drinks_path)
    return drink_list

In [185]:
drink_list = run()

Are you sure you want to overwrite ../data/raw/drizly_drinks.pickle? y/n: y
Collected data on 671 cocktails from Drizlym Float. ioned. n. d Vodka.  . 


In [186]:
drink_list

{'Absolut Pears Mule': {'url': 'https://drizly.com/absolut-pears-mule/r-e8ac3756cf8d8709',
  'description': '<p>Mellow out your mule with the subtle sweetness of pears. The Absolut Pears Mule: sit back, sip, and repeat.</p>',
  'recipeIngredient': ['1 part Absolut Pear\n',
   '3 parts Ginger Beer\n',
   'Splash Fresh Lime Juice\n'],
  'name': 'Absolut Pears Mule',
  'recipeInstructions': 'Pour Absolut, ginger beer, and lime juice into a mule mug filled with ice cubes. Garnish with a slice of Pear\n',
  'tags': ['vodka', 'sweet'],
  'stats': ['Level: Easy', 'Prep: 3 min'],
  'tools': ['Ice cube tray', 'Swizzle stick', 'Citrus juicer'],
  'Goslings Ginger Beer': {'description': 'We didn’t steer you wrong on the whole shorts thing. Trust us on this one, too. We spent over a year to achieve the ideal balance of sweetness and spiciness and matching it to the flavour profile of our award-winning Black Seal Rum.    Only our Black Seal Rum makes a Dark ‘n Stormy genuine. Only our Ginger Beer m

## Scraping Spruce Eats

In [2]:
from bs4 import BeautifulSoup
import requests

### Get Drink List

In [40]:
def get_spruce_drinks(verbose=False):
    """
    """
    url = "https://www.thespruceeats.com/a-to-z-cocktail-recipes-3962886"
    soup = get_soup(url)
    link_class = "comp text-passage ordered-list__content-description mntl-sc-block mntl-sc-block-html"
    data = soup.findAll('div',attrs={'class': link_class})
    spruce_drinks = {}
    for div in data:
        links = div.findAll('li')
        for a in links:
            l = a.find('a')
            if l:
                name = l.text
                spruce_drinks[name] = {'url': l['href']}
    if verbose:
        for drink in spruce_drinks.values():
            print(drink['url'])
    return spruce_drinks

In [41]:
spruce_drinks = get_spruce_drinks()
spruce_drinks

{'Adult Hot Chocolate': {'url': 'https://www.thespruceeats.com/adult-hot-chocolate-recipe-759570'},
 'Alabama Slammer': {'url': 'https://www.thespruceeats.com/alabama-slammer-shooter-recipe-759572'},
 'Alexander': {'url': 'https://www.thespruceeats.com/brandy-alexander-cocktail-recipe-759278'},
 'Americano': {'url': 'https://www.thespruceeats.com/americano-cocktail-recipe-759279'},
 'Anejo Highball': {'url': 'https://www.thespruceeats.com/dale-degroffs-anejo-highball-cocktail-recipe-760491'},
 'Aperol Spritz': {'url': 'https://www.thespruceeats.com/aperol-spritz-cocktail-recipe-759259'},
 'Apple Margarita': {'url': 'https://www.thespruceeats.com/apple-margarita-recipe-759577'},
 'Apple Martini': {'url': 'https://www.thespruceeats.com/apple-martini-recipe-759579'},
 'Aviation Cocktail': {'url': 'https://www.thespruceeats.com/aviation-cocktail-recipe-760055'},
 'After Five': {'url': 'https://www.thespruceeats.com/girl-scout-cookie-shots-and-drinks-759571'},
 'Apocalypse Now': {'url': 'ht

In [38]:
link_class = "comp text-passage ordered-list__content-description mntl-sc-block mntl-sc-block-html"
data = soup.findAll('div',attrs={'class': link_class})
spruce_drinks = {}
for div in data:
    links = div.findAll('li')
    for a in links:
        l = a.find('a')
        if l:
            name = l.text
            spruce_drinks[name] = {'url': l['href']}
for drink in spruce_drinks.values():
    print(drink['url'])

https://www.thespruceeats.com/adult-hot-chocolate-recipe-759570
https://www.thespruceeats.com/alabama-slammer-shooter-recipe-759572
https://www.thespruceeats.com/brandy-alexander-cocktail-recipe-759278
https://www.thespruceeats.com/americano-cocktail-recipe-759279
https://www.thespruceeats.com/dale-degroffs-anejo-highball-cocktail-recipe-760491
https://www.thespruceeats.com/aperol-spritz-cocktail-recipe-759259
https://www.thespruceeats.com/apple-margarita-recipe-759577
https://www.thespruceeats.com/apple-martini-recipe-759579
https://www.thespruceeats.com/aviation-cocktail-recipe-760055
https://www.thespruceeats.com/girl-scout-cookie-shots-and-drinks-759571
https://www.thespruceeats.com/apocalypse-now-shooter-759576
https://www.thespruceeats.com/arnold-palmer-mocktail-recipe-760357
https://www.thespruceeats.com/autumnal-temptations-recipe-non-alcoholic-760341
https://www.thespruceeats.com/50-50-gin-martini-recipe-759550
https://www.thespruceeats.com/spring-cocktail-recipes-759873
https

In [116]:
len(spruce_drinks)

936

### Get Drink Text

In [17]:
manhattan = spruce_drinks['Manhattan']
r = requests.get(manhattan['url'], timeout=5)
manhattan_soup = BeautifulSoup(r.content, 'html.parser')

In [23]:
text_class = "comp mntl-sc-block mntl-sc-block-html"
data = manhattan_soup.findAll('div',attrs={'class': text_class})
manhattan_text = {}
for div in data:
    text = div.findAll('p')
    for t in text:
        print(t.text)

One of the finest and oldest cocktails, the Manhattan is truly a classic cocktail. It is a simple drink recipe that requires just a few ingredients. You can choose between rye whiskey and bourbon, though some drinkers still prefer a smooth Canadian whisky. There is no doubt, however, that this is one of the essential whiskey cocktails that everyone should know.
As with the gin martini, there are many ways that you can adapt the Manhattan to your personal taste. It has also inspired countless variations, but before you give those a taste, it's best to start off with the original. Even though it is an easy cocktail, there are a number of choices to be made and it all begins with deciding which whiskey to pour.
No matter how you mix up your Manhattan, you will find that it's an ideal drink for any occasion. It is perfect for a dinner party and pairs nicely with a great variety of foods. It's also a fantastic drink for a casual night with friends.
Gather the ingredients.
Pour the ingredien

### Get Drink Ingredients

In [31]:
li_class = "simple-list__item js-checkbox-trigger ingredient"
data = manhattan_soup.findAll('li',attrs={'class': li_class})
ingredients = [li.text.replace('\n', '').replace('\xa0', ' ') for li in data]
ingredients

['2 ounces rye whiskey, bourbon, or Canadian whiskey',
 '1 ounce sweet vermouth',
 '2 to 3 dashes Angostura Bitters',
 'Garnish: cherry']

## Drizly

### Get Drink List

In [2]:
def get_drizly_drink_list(verbose=False):
    """
    """
    url = "https://drizly.com/recipes#all-recipes"
    soup = get_soup(url)
    link_class = "recipe-link"
    data = soup.findAll('li', attrs={'class': link_class})
    drizly_drink_list = {}
    for li in data:
        l = li.find('a')
        if l:
            name = l.text.replace('\n', '').replace('\xa0', ' ').strip()
            slug = l['href']
            url = f'https://drizly.com{slug}'
            drizly_drink_list[name] = {'url': url}
    if verbose:
        for drink in drizly_drink_list.values():
            print(drink['url'])
    return drizly_drink_list

In [34]:
drink_list = get_drink_list()
drink_list

Are you sure you want to overwrite ../data/raw/drizly_drink_list.pickle? y/n: n


{'Absolut Pears Mule': {'url': 'https://drizly.com/absolut-pears-mule/r-e8ac3756cf8d8709'},
 'Adults Only-Frozen Hot Chocolate': {'url': 'https://drizly.com/adults-only-frozen-hot-chocolate/r-0779721198d731dd'},
 'Alabama Slammer': {'url': 'https://drizly.com/alabama-slammer/r-f0ed6be5a64cf337'},
 'All American Lemonade': {'url': 'https://drizly.com/all-american-lemonade/r-0b86893ab36abcc1'},
 'Amaretto Sour': {'url': 'https://drizly.com/amaretto-sour/r-481d961d37ae7dcb'},
 'American Mule': {'url': 'https://drizly.com/american-mule/r-dd6a2c48a6594cdc'},
 'American Sour': {'url': 'https://drizly.com/american-sour/r-9eb324bdf7f70cdb'},
 'Americano': {'url': 'https://drizly.com/americano/r-edc93c907ca56ce6'},
 'Americran Harvest': {'url': 'https://drizly.com/americran-harvest/r-96a93beb550d95bb'},
 'Amethyst Anchor': {'url': 'https://drizly.com/Amethyst_Anchor/r-18c10afe0f12f825'},
 'Anejo Rum Old-Fashioned': {'url': 'https://drizly.com/anejo-rum-old-fashioned/r-beb0675a6a7d6241'},
 'Anej

### Get Drink Text

In [162]:
def get_ingredients(soup):
    """
    """
    d = soup.find_all('div', attrs={'data-integration-name':'react-component'})
    info = json.loads(d[1].get('data-payload'))
    productIds = info['props']['productIDs']

    base = 'https://drizly.com/async_catalog_fetch?category_overrides'
    ids = '&product_ids[]='.join(map(str, productIds))
    req = f'{base}&product_ids[]={ids}&product_overrides'
    products = requests.get(req).json()
    
    ingr_path = '../data/raw/ingredients.pickle'
    if file_exists(ingr_path):
        ingredients = load_from(ingr_path)
    else:
        ingredients = {}
    drink_ingrs = {}
        
    for p in products['catalogItems']:
        name = p['name']
        attrs = {
            'description': p['description'],
            'url': f"https://drizly.com/{p['click_url']}", 
            'categories': p['category_names']}
        ingredients.setdefault(name, {})
        ingredients[name].update(attrs)
        drink_ingrs[name] = attrs
    save_to(ingredients, ingr_path)
    return drink_ingrs


def get_cocktail_data(drink_list):
    """
    """
    num_drinks = len(drink_list)
    failures = 0
    for drink, stats in drink_list.items():
        num_drinks -= 1
        f = ['description', 'recipeIngredient', 'name', 'recipeInstructions']
        if len(set(f) & set(stats.keys())) == len(f):
            continue
        try:
            soup = get_soup(stats['url'])
        except:
            failures += 1
            continue

        # Get fields
        data = json.loads(soup.find('script', type='application/ld+json').text)
        stats.update({key: data[key] for key in f})

        def scrape_text(field, tag, tag_class):
            tags = soup.findAll(tag, attrs={'class': tag_class})
            cleaned = lambda t: t.text.replace('\n', '').replace('\xa0', ' ').strip()
            text = [cleaned(t) for t in tags]
            stats.update({field: text})
            return
        
        items = [
            ('tags', 'li', 'Recipe__tag'),
            ('stats', 'li', 'stat'),
            ('tools', 'li', 'Tools__tool')
        ]
        
        # Get items
        for item in items:
            scrape_text(*item)
            
        # Get ingredient data
        drink_ingrs = get_ingredients(soup)
        stats.update({k: v for k, v in drink_ingrs.items()})
        
        print(f'Drinks remaining: {num_drinks}. Got a {drink}.', 
              '\r', end='')
            
    return drink_list

In [163]:
drink_list = {
    "Absolut Pears Mule": {"url": "https://drizly.com/absolut-pears-mule/r-e8ac3756cf8d8709"}, 
    "Adults Only-Frozen Hot Chocolate": {"url": "https://drizly.com/adults-only-frozen-hot-chocolate/r-0779721198d731dd"}, 
    "Alabama Slammer": {"url": "https://drizly.com/alabama-slammer/r-f0ed6be5a64cf337"}, 
    "All American Lemonade": {"url": "https://drizly.com/all-american-lemonade/r-0b86893ab36abcc1"}, 
    "Amaretto Sour": {"url": "https://drizly.com/amaretto-sour/r-481d961d37ae7dcb"}, 
    "American Mule": {"url": "https://drizly.com/american-mule/r-dd6a2c48a6594cdc"}
}

In [168]:
pprint(get_cocktail_data(drink_list), width=100)

{'Absolut Pears Mule': {'Absolut Pears': {'categories': ['Liquor', 'Vodka', 'Flavored Vodka'],
                                          'description': 'Introduced in 2007, Absolut Pears is '
                                                         'quite a newcomer in the Absolut family. '
                                                         'And as with all talented rookies, the '
                                                         'audience soon took it to their hearts. '
                                                         'Probably because it’s such a good '
                                                         'alternative to other overly sugared '
                                                         'alternatives around.    Absolut Pears is '
                                                         'made exclusively from natural '
                                                         'ingredients, and unlike some other '
                                   

In [63]:
url = 'https://drizly.com/alabama-slammer/r-f0ed6be5a64cf337'
f = ['description', 'recipeIngredient', 'name', 'recipeInstructions']
soup = get_soup(url)

data = json.loads(soup.find('script', type='application/ld+json').text)        
print({key: data[key] for key in f})

{'description': 'This fruity and somewhat nutty flavored highball drink is said to have been created at the University of Alabama. The Alabama Slammer gets its name from the fact that the drink can be served on the rocks or  "slammed" as a shooter.', 'recipeIngredient': ['1 oz Southern Comfort Liqueur\n', '1 oz Sloe Gin', '1 oz Amaretto', '2 oz Orange Juice'], 'name': 'Alabama Slammer', 'recipeInstructions': 'Add all ingredients to a shaker filled with ice and shake until cold. Strain into a highball glass with fresh ice then garnish with an orange and cherry'}


In [61]:
ingrs = soup.findAll('li', attrs={'class': 'CatalogItem'})
[ingr.text for ingr in ingrs]

[]

In [64]:
soup

<!DOCTYPE html>

<html lang="en">
<!--

    .___      .__       .__
  __| _/______|__|______|  | ___.__.
 / __ |\_  __ \  \___   /  |<   |  |
/ /_/ | |  | \/  |/    /|  |_\___  |
\____ | |__|  |__/_____ \____/ ____|
     \/                \/    \/

We're hiring @ https://jobs.lever.co/drizly

-->
<head prefix="og: http://ogp.me/ns#">
<title>
  Alabama Slammer Recipe | Drizly
  </title>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta charset="utf-8"/>
<meta content="Simple and Delicious Alabama Slammer Recipe. We'll Even Deliver all of the Ingredients to Your Door in the Next Hour! Let's Get the Party Started." name="description"/>
<meta content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no" name="viewport"/>
<meta content="BB7A7E6CB8BCDABBADBD207627642FCF" name="msvalidate.01">
<meta content="nopin" name="pinterest">
<meta content="3579689062cae3b8ea070dc0ced2c9a9" name="p:domain_verify">
<!-- App Link Tags, used for iOS Universal Links -->
<meta 

In [66]:
productIds = soup.find('productIds')
print(productIds)

None


In [None]:
soup.find('div', attrs)

In [81]:
for i in soup.find_all('div', attrs={'data-integration-name':'react-component'}):
    info = json.loads(i.get('data-payload'))
info.keys()

dict_keys(['props', 'name'])

In [109]:
d = soup.find_all('div', attrs={'data-integration-name':'react-component'})
info = json.loads(d[1].get('data-payload'))
productIds = info['props']['productIDs']
info['props'].keys()
ingredients = {}


In [122]:
base = 'https://drizly.com/async_catalog_fetch?category_overrides'
ids = '&product_ids[]='.join(map(str, productIds))
req = f'{base}&product_ids[]={ids}&product_overrides'
req

'https://drizly.com/async_catalog_fetch?category_overrides&product_ids[]=4784&product_ids[]=7107&product_ids[]=650&product_ids[]=364&product_overrides'

In [142]:
products = requests.get(req).json()

In [124]:
products = ings['catalogItems'][1]
print(juice.keys())


dict_keys(['catalog_item_id', 'default_variant_id', 'name', 'brand_name', 'variants', 'sizes', 'description', 'image_url', 'category_ids', 'categories', 'category_names', 'category_path', 'related', 'click_url', 'abv', 'available_now', 'price_range', 'price_raw', 'extra_data', 'popular', 'special_offer', 'brand', 'availability_estimate', 'badges', 'list', 'position', 'region_path', 'tagline', 'flexible_attributes', 'attribute_class_id', 'deal_stats', 'product_rating'])


In [156]:
def get_ingredients(soup):
    """
    """
    d = soup.find_all('div', attrs={'data-integration-name':'react-component'})
    info = json.loads(d[1].get('data-payload'))
    productIds = info['props']['productIDs']

    base = 'https://drizly.com/async_catalog_fetch?category_overrides'
    ids = '&product_ids[]='.join(map(str, productIds))
    req = f'{base}&product_ids[]={ids}&product_overrides'
    products = requests.get(req).json()
    
    ingr_path = '../data/raw/ingredients.pickle'
    if file_exists(ingr_path):
        ingredients = load_from(ingr_path)
    else:
        ingredients = {}
    drink_ingrs = {}
        
    for p in products['catalogItems']:
        name = p['name']
        attrs = {
            'description': p['description'],
            'url': f"https://drizly.com/{p['click_url']}", 
            'categories': p['category_names']}
        ingredients.setdefault(name, {})
        ingredients[name].update(attrs)
        drink_ingrs[name] = attrs
    save_to(ingredients, ingr_path)
    return drink_ingrs

In [158]:
ings = get_ingredients(soup)

In [None]:
a

In [52]:
def scrape_text(field, tag, tag_class):
    tags = soup.findAll(tag, attrs={'class': tag_class})
    cleaned = lambda t: t.text.replace('\n', '').replace('\xa0', ' ').strip()
    text = [cleaned(t) for t in tags]
#     stats.update({field: text})
    return {field: text}

items = [
    ('tags', 'li', 'Recipe__tag'),
    ('stats', 'li', 'stat'),
    ('tools', 'li', 'Tools__tool')
]

# Get items
for item in items:
    print(scrape_text(*item))

{'tags': ['liqueur', 'fruity']}
{'stats': ['Level: Easy', 'Prep: 3 min']}
{'tools': ['Shaker', 'Jigger', 'Hawthorne strainer']}


In [49]:
scrape_text('li', 'Tools__tool')

['Shaker', 'Jigger', 'Hawthorne strainer']

In [46]:
li_class = 'Tools__tool'
lis = soup.findAll('li', attrs={'class': li_class})
print([li.text.replace('\n', '') for li in lis])

['Shaker', 'Jigger', 'Hawthorne strainer']


In [41]:
tags

[<li class="Recipe__tag">liqueur</li>, <li class="Recipe__tag">fruity</li>]

In [7]:
drizly_drinks = get_drizly_descriptions(drizly_drink_list)

with open('../data/raw/drizly_drinks.json', 'w') as outfile:  
    json.dump(drizly_drinks, outfile)

Drinks remaining: 0. Got a 7UP Pineapple Rum Float. ioned. n. d Vodka.  . 

In [184]:
drizly_drinks

{'Absolut Pears Mule': {'url': 'https://drizly.com/absolut-pears-mule/r-e8ac3756cf8d8709',
  'description': '<p>Mellow out your mule with the subtle sweetness of pears. The Absolut Pears Mule: sit back, sip, and repeat.</p>',
  'recipeIngredient': ['1 part Absolut Pear\n',
   '3 parts Ginger Beer\n',
   'Splash Fresh Lime Juice\n'],
  'name': 'Absolut Pears Mule',
  'recipeInstructions': 'Pour Absolut, ginger beer, and lime juice into a mule mug filled with ice cubes. Garnish with a slice of Pear\n'},
 'Adults Only-Frozen Hot Chocolate': {'url': 'https://drizly.com/adults-only-frozen-hot-chocolate/r-0779721198d731dd',
  'recipeIngredient': ['1 oz Kahlua',
   '2 oz Bourbon\n',
   '2-3 drops of Vanilla Extract',
   '3-4 Frozen Hot Chocolate Ice Cubes'],
  'name': 'Adults Only-Frozen Hot Chocolate',
  'recipeInstructions': 'In a blender combine all ingredients and blend until smooth. Serve in mugs'},
 'Alabama Slammer': {'url': 'https://drizly.com/alabama-slammer/r-f0ed6be5a64cf337',
  'd

In [177]:
k = ['description', 'recipeIngredient', 'name', 'recipeInstructions']
len(set(['description', 'recipeIngredient']) & (set(k)))

2

In [117]:
boy = {'cat': 3, 'dog': 4}

In [137]:
if 'monkey' not in boy.keys():
    boy.update({'cat': 5})
boy['monkey'] = 'raaaah'

In [136]:
boy

{'cat': 5, 'dog': 4, 'cow': 5, 'monkey': 'raaaah'}

In [133]:
boy['cat'] = 8

In [138]:
continue?

Object `continue` not found.


In [150]:
for i in range(100000):
    print(i, '\r', end='')

99999                       

# Scrub the Data

*Look through the raw data files and see what you will need to do to them in order to have a workable data set. If your source data is already well-formatted, you may want to ask yourself why it hasn't already been analyzed and what other people may have overlooked when they were working on it. Are there other data sources that might give you more insights on some of the data you have here?*

*The end goal of this step is to produce a [design matrix](https://en.wikipedia.org/wiki/Design_matrix), containing one column for every variable that you are modeling, including a column for the outputs, and one row for every observation in your data set. It needs to be in a format that won't cause any problems as you visualize and model your data.*

In [247]:
## %%writefile ../src/features/build_features.py

# Imports
import re

# Defaults
int_drink_path = '../data/interim/drink_list.pickle'
# Helper functions go here
@load_or_make(int_drink_path, overwrite='y')
def make_dirty_bags(drink_list):
    
    non_ingrs = ['url', 'description', 'recipeIngredient', 'name', 
                 'recipeInstructions', 'tags', 'stats', 'tools']
    strs = ['description', 'name', 'recipeInstructions']
    lists = ['recipeIngredient', 'tags', 'tools']
    for drink, stats in drink_list.items():
        print(drink, '\r', end='')
        bag = []
        ingrs = [key for key in stats.keys() if key not in non_ingrs]
        for k, v in stats:
            if k in ['url', 'recipeIngredient']:
                continue
            if type(k) == str and k not in ingrs:
                bag.append(v)
            if type(k) == list:
                bag.append(' '.join(v))
            if k in ingrs:
                if v['description']:
                    bag.append(v['description']
                if v['categories']:
                    bag.append(' '.join(v['categories']))

#         bag.append(stats['name'])
#         bag.append(stats['description'])
#         for l in lists:
#             bag.append(' '.join(stats[l]))
#         ingrs = [k for k in stats.keys() if k not in non_ingrs]
#         for ingr in ingrs:
#             bag.append(stats[ingr]['description'])
#             bag.append(' '.join(stats[ingr]['categories']))
        bag = ' '.join(map(str, bag)).lower()
        patterns = ['<.*>', '\n', '\xa0', '   ', '  ']
        for pattern in patterns:
            bag = re.sub(pattern, ' ', bag)
        drink_list[drink].update({'bag': bag})
    save_to(drink_list, int_drink_path)
    return drink_list


def clean_drizly_drinks():
    drizly_drinks_path = '../data/raw/drizly_drinks.pickle'
    drink_list = load_from(drizly_drinks_path)
    drink_list = make_dirty_bags(drink_list)
    return drink_list
    

def run():
    """
    Executes a set of helper functions that read files from data/raw, cleans them,
    and converts the data into a design matrix that is ready for modeling.
    """
    
    clean_drizly_drinks()
    # clean_dataset_2('data/raw', filename)
    # save_cleaned_data_1('data/interim', filename)
    # save_cleaned_data_2('data/interim', filename)
    # build_features()
    # save_features('data/processed')
    return drink_list


SyntaxError: invalid syntax (<ipython-input-247-b68a0a1b7430>, line 30)

In [246]:
drink_list = run()

KeyboardInterrupt: 

In [215]:
drizly_drinks_path = '../data/raw/drizly_drinks.pickle'
drink_list = load_from(drizly_drinks_path)

In [189]:
pprint(drink_list)

{'': {'url': 'https://drizly.com/r-f212ef6243b6396a'},
 '1-2 Punch': {'7UP': {'categories': ['Extras',
                                      'Soda, Water, & Soft Beverages',
                                      'Soda & Sparkling Beverages',
                                      'Lemon-Lime Soda'],
                       'description': 'The light, crisp, lemon and lime flavor '
                                      'of 7UP makes it a refreshing drink on '
                                      'its own, and the perfect addition to '
                                      'recipes for every occasion. From '
                                      'cocktails to mocktails and even baked '
                                      'goods, mix it up a little at your next '
                                      'get-together with 7UP.',
                       'url': 'https://drizly.com//extras/beverages/soda-sparkling-drinks/lemon-lime-soda/7up/p563'},
               'Canada Dry Ginger Ale': {'categ

                                              'fact, let’s just say testing '
                                              'this particular cocktail recipe '
                                              'was super fun...because I could '
                                              'not stop drinking them.',
                               'name': 'Apple Cider Old Fashioned',
                               'recipeIngredient': ['½ teaspoon sugar',
                                                    '2 dashes of bitters\n',
                                                    '4 oz apple cider',
                                                    '3 oz bourbon\n',
                                                    'apple slices for garnish, '
                                                    'optional',
                                                    '4 ice cubes'],
                               'recipeInstructions': 'In a glass add the sugar '
                                  

                                                             'for relaxing and '
                                                             'recharging. Its '
                                                             'ginger flavor '
                                                             'and carbonation '
                                                             'make it a crowd '
                                                             'pleasing '
                                                             'beverage choice, '
                                                             'while also being '
                                                             'a versatile '
                                                             'mixer that can '
                                                             'spark any '
                                                             'occasion. With '
                                                          

                                   'Solerno Blood Orange Liqueur': {'categories': ['Liquor',
                                                                                   'Liqueur, '
                                                                                   'Cordials, '
                                                                                   '& '
                                                                                   'Schnapps',
                                                                                   'Citrus '
                                                                                   '& '
                                                                                   'Triple '
                                                                                   'Sec '
                                                                                   'Liqueur'],
                                                                    'descr

                                                                           'a '
                                                                           'spoon '
                                                                           'or '
                                                                           'a '
                                                                           'classic '
                                                                           'twisted '
                                                                           'swizzle '
                                                                           'stick. '
                                                                           "Bailey's "
                                                                           'is '
                                                                           'an '
                                                                           'infinit

                                                                    'is an '
                                                                    'infinitely '
                                                                    'usable '
                                                                    'bar '
                                                                    'pantry '
                                                                    'ingredient '
                                                                    'you '
                                                                    'should '
                                                                    'always '
                                                                    'have '
                                                                    'around. '
                                                                    'Don’t '
                                                                 

                                  'tools': ['Shaker',
                                            'Old fashioned',
                                            'Jigger'],
                                  'url': 'https://drizly.com/cazadores-jalapeno-margarita/r-641bf39be5852581'},
 'Cazadores Paloma': {'Cazadores Tequila Blanco': {'categories': ['Liquor',
                                                                  'Tequila',
                                                                  'Silver / '
                                                                  'Blanco '
                                                                  'Tequila'],
                                                   'description': 'Awarded a '
                                                                  '93-point '
                                                                  'rating & '
                                                                  'Top 100 '
                     

                                                                           'takes '
                                                                           'some '
                                                                           'practice '
                                                                           'and '
                                                                           'touch, '
                                                                           'but '
                                                                           "it's "
                                                                           'an '
                                                                           'old-school '
                                                                           'skill '
                                                                           'that '
                                                                           'w

                      'recipeInstructions': 'Pour Cointreau and fresh lime '
                                            'juice into a glass. Add ice, top '
                                            'with club soda and stir. Garnish '
                                            'with a lime and orange zest',
                      'stats': ['Level: Easy', 'Prep: 3 min.'],
                      'tags': ['liqueur', 'sweet'],
                      'tools': ['Lowball glass', 'Bar spoon', 'Citrus juicer'],
                      'url': 'https://drizly.com/Cointreau-Rickey/r-12ad61708eeb568b'},
 'Cointreau Strawberry Basil Rickey': {'Canada Dry Club Soda': {'categories': ['Extras',
                                                                               'Soda, '
                                                                               'Water, '
                                                                               '& '
                                                      

 'Double Oaked Old Fashioned': {'Angostura Bitters': {'categories': ['Extras',
                                                                     'Mixers, '
                                                                     'Syrups, '
                                                                     '& '
                                                                     'Bitters',
                                                                     'Bitters'],
                                                      'description': 'You '
                                                                     'can’t—or '
                                                                     'at least '
                                                                     'you '
                                                                     'shouldn’t—make '
                                                                     'an Old '
                                              

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)




                                                 'lighter, drier and less '
                                                 'pungent than their larger '
                                                 'commercial counterparts. The '
                                                 'particular mixture of plants '
                                                 'found near Chambéry give a '
                                                 'fresh, restrained and '
                                                 'elegant nose, with a subtle, '
                                                 'complex bittersweet palate. '
                                                 'Even the Blanc and Rouge '
                                                 'retain great balance, with '
                                                 'the sugar never cloying, and '
                                                 'just enough bitterness to '
                                                 'whet

                                                                                'and '
                                                                                'Roman '
                                                                                'camomile. '
                                                                                'Used '
                                                                                'as '
                                                                                'an '
                                                                                'ingredient '
                                                                                'in '
                                                                                'many '
                                                                                'cocktails, '
                                                                                'the '
                      

                                                  'juice drink.',
                                   'url': 'https://drizly.com//extras/mixers-syrups-bitters/bar-mixers/bar-mixers/roses-lime-juice/p4304'},
             'Stirrings Simple Syrup': {'categories': ['Extras',
                                                       'Mixers, Syrups, & '
                                                       'Bitters',
                                                       'Bar Mixers'],
                                        'description': 'Stirrings Cocktail '
                                                       'Simple Syrup is a '
                                                       'starting point for '
                                                       'many great cocktails. '
                                                       'Set the foundation of '
                                                       'your delicious drink '
                                                

                                                                             'sour/sweet/spirit '
                                                                             'formula '
                                                                             'and '
                                                                             'do '
                                                                             'it '
                                                                             'your '
                                                                             'way.\n'
                                                                             '\n'
                                                                             'Buy '
                                                                             'Pinnacle '
                                                                             'Original '
                                                 

                                        'Tequila\n',
                                        '1 part Blood Orange Juice',
                                        '\n1/2 part Lime Juice',
                                        '\n1/2 part Simple Syrup\n',
                                        '\n1 Blood Orange Wedge\n'],
                   'recipeInstructions': 'Combine all\n'
                                         'ingredients into a shaker with ice '
                                         'and shake. Strain into glass over '
                                         'ice and\n'
                                         'serve\n',
                   'stats': ['Level: Easy', 'Prep: 3 min'],
                   'tags': ['tequila', 'margarita'],
                   'tools': ['Lowball glass', 'Shaker', 'Hawthorne strainer'],
                   'url': 'https://drizly.com/red-red-rita/r-a5853853217aac75'},
 'Red, White, and Rosé': {'Canada Dry Club Soda': {'categories': ['Extras',
       

                                                                   'an '
                                                                   'alcoholic, '
                                                                   'highly '
                                                                   'concentrated, '
                                                                   'botanical '
                                                                   'concoction '
                                                                   'used to '
                                                                   'give your '
                                                                   'cocktail a '
                                                                   'bitter, '
                                                                   'sour, or '
                                                                   'bittersweet '
                                              

                                                                           'to '
                                                                           'our '
                                                                           'Master '
                                                                           'Brewers '
                                                                           'using '
                                                                           'our '
                                                                           'famous '
                                                                           'Guinness '
                                                                           'yeast '
                                                                           'and '
                                                                           'Mosaic '
                                                                           'a

                                                   'and apples to create the '
                                                   'ultimate cocktail for all '
                                                   'season long.&nbsp; With a '
                                                   'little bourbon kick, this '
                                                   'drink is sure to keep your '
                                                   'blood warm and the ghosts '
                                                   'and goblins away!',
                                    'name': 'Spiced Caramel Apple Hot Toddy',
                                    'recipeIngredient': ['3 Oz. Bulleit '
                                                         'Bourbon',
                                                         '1 Cup Apple Cider',
                                                         '1/8 Cup Fresh Orange '
                                                         'Juice + 1 w

                                'Stirrings Simple Syrup': {'categories': ['Extras',
                                                                          'Mixers, '
                                                                          'Syrups, '
                                                                          '& '
                                                                          'Bitters',
                                                                          'Bar '
                                                                          'Mixers'],
                                                           'description': 'Stirrings '
                                                                          'Cocktail '
                                                                          'Simple '
                                                                          'Syrup '
                                                                          'i

                                                          'Soda, Water, & Soft '
                                                          'Beverages',
                                                          'Juice',
                                                          'Lemonade'],
                                           'description': 'Enjoy refreshing, '
                                                          'all-natural '
                                                          'lemonade with no '
                                                          'added preservatives '
                                                          'colors or '
                                                          'artificial flavors.',
                                           'url': 'https://drizly.com//extras/beverages/juice/lemonade/simply-lemonade/p4633'},
                       'description': '<p>Not a drink to end the day, but one '
                                      '

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [210]:
non_ingrs = ['url', 'description', 'recipeIngredient', 'name', 
             'recipeInstructions', 'tags', 'stats', 'tools']
ingrs = []
drink_list['Absolut Pears Mule'].keys()


dict_keys(['url', 'description', 'recipeIngredient', 'name', 'recipeInstructions', 'tags', 'stats', 'tools', 'Goslings Ginger Beer', 'Absolut Pears'])

In [231]:
import re
drizly_drinks_path = '../data/raw/drizly_drinks.pickle'
drink_list = load_from(drizly_drinks_path)

non_ingrs = ['url', 'description', 'recipeIngredient', 'name', 
             'recipeInstructions', 'tags', 'stats', 'tools']
lists = ['recipeIngredient', 'tags', 'tools']
for drink, stats in drink_list.items():
    bag = []
    bag.append(stats['name'])
    bag.append(stats['description'])
    for l in lists:
        bag.append(' '.join(stats[l]))
    ingrs = [k for k in stats.keys() if k not in non_ingrs]
    for ingr in ingrs:
        bag.append(stats[ingr]['description'])
        bag.append(' '.join(stats[ingr]['categories']))
    bag = ' '.join(bag).lower()
    patterns = ['<.*>', '\n', '\xa0', '   ', '  ']
    for pattern in patterns:
        bag = re.sub(pattern, ' ', bag)
    drink_list[drink].update({'bag': bag})
    break
print(drink_list['Absolut Pears Mule']['bag'])

absolut pears mule 1 part absolut pear 3 parts ginger beer splash fresh lime juice vodka sweet ice cube tray swizzle stick citrus juicer we didn’t steer you wrong on the whole shorts thing. trust us on this one, too. we spent over a year to achieve the ideal balance of sweetness and spiciness and matching it to the flavour profile of our award-winning black seal rum. only our black seal rum makes a dark ‘n stormy genuine. only our ginger beer makes it perfect. extras soda, water, & soft beverages soda & sparkling beverages ginger beer introduced in 2007, absolut pears is quite a newcomer in the absolut family. and as with all talented rookies, the audience soon took it to their hearts. probably because it’s such a good alternative to other overly sugared alternatives around. absolut pears is made exclusively from natural ingredients, and unlike some other flavored vodkas, it doesn’t contain any added sugar. absolut pears is fresh and delicate, with a smack of mellow pears and a long fr

In [217]:
import re

In [228]:
x = 'Im a boy and <brdfg> your \n a grl'
re.sub('\n', ' ', x)

'Im a boy and <brdfg> your   a grl'

*Before moving on to exploratory analysis, write down some notes about challenges encountered while working with this data that might be helpful for anyone else (including yourself) who may work through this later on.*

# Explore the Data

*Before you start exploring the data, write out your thought process about what you're looking for and what you expect to find. Take a minute to confirm that your plan actually makes sense.*

*Calculate summary statistics and plot some charts to give you an idea what types of useful relationships might be in your dataset. Use these insights to go back and download additional data or engineer new features if necessary. Not now though... remember we're still just trying to finish the MVP!*

In [None]:
## %%writefile ../src/visualization/visualize.py

# imports
# helper functions go here

def run():
    """
    Executes a set of helper functions that read files from data/processed,
    calculates descriptive statistics for the population, and plots charts
    that visualize interesting relationships between features.
    """
    # data = load_features('data/processed')
    # describe_features(data, 'reports/')
    # generate_charts(data, 'reports/figures/')
    pass


*What did you learn? What relationships do you think will be most helpful as you build your model?*

# Model the Data

*Describe the algorithm or algorithms that you plan to use to train with your data. How do these algorithms work? Why are they good choices for this data and problem space?*

In [None]:
## %%writefile ../src/models/train_model.py

# imports
# helper functions go here

def run():
    """
    Executes a set of helper functions that read files from data/processed,
    calculates descriptive statistics for the population, and plots charts
    that visualize interesting relationships between features.
    """
    # data = load_features('data/processed/')
    # train, test = train_test_split(data)
    # save_train_test(train, test, 'data/processed/')
    # model = build_model()
    # model.fit(train)
    # save_model(model, 'models/')
    pass


In [None]:
## %%writefile ../src/models/predict_model.py

# imports
# helper functions go here

def run():
    """
    Executes a set of helper functions that read files from data/processed,
    calculates descriptive statistics for the population, and plots charts
    that visualize interesting relationships between features.
    """
    # test_X, test_y = load_test_data('data/processed')
    # trained_model = load_model('models/')
    # predictions = trained_model.predict(test_X)
    # metrics = evaluate(test_y, predictions)
    # save_metrics('reports/')
    pass



_Write down any thoughts you may have about working with these algorithms on this data. What other ideas do you want to try out as you iterate on this pipeline?_

# Interpret the Model

_Write up the things you learned, and how well your model performed. Be sure address the model's strengths and weaknesses. What types of data does it handle well? What types of observations tend to give it a hard time? What future work would you or someone reading this might want to do, building on the lessons learned and tools developed in this project?_

# Universal Helpers

In [179]:
import os
import pickle

def load_or_make(filepath, overwrite='n'):
    def decorator(func):
        def wraps(*args, **kwargs):
            if overwrite == 'y':
                ow = input(f'Are you sure you want to overwrite {filepath}? y/n: ')
                if (os.path.exists(filepath)) and (ow == 'y'):
                    os.remove(filepath)
            try:
                with open(filepath, 'rb') as f:
                    data = pickle.load(f)
            except:
                data = func(*args, **kwargs)
                with open(filepath, 'wb') as to_write:
                    pickle.dump(data, to_write)
            return data
        return wraps
    return decorator


def save_to(item, filepath):
    """
    Pickles item sand saves it to path
    Input: object to be pickled, string containing directory and filename
    Output: pickled object stored to provided path
    """
    with open(filepath, 'wb') as to_write:
        pickle.dump(item, to_write)
    return


def load_from(filepath):
    """
    Unpickles item and returns item from path
    Input: filepath to pickled object
    Output: unpickled object
    """
    with open(filepath, 'rb') as f:
        item = pickle.load(f)
    return item


def file_exists(filepath):
    """
    Returns True if specified file already exists, else False
    Input:
        path (str), path to directory containing file
        filename (str), name of file to check for
    Output: True if specified file already exists, else False
    """
    file_exists = os.path.isfile(filepath)
    return file_exists



In [29]:
@load_or_make('../data/raw/test.pickle', overwrite='y')
def make_long_list(n):
    return [x**3 for x in range(1, n + 1)]

In [31]:
make_long_list(10)

Are you sure you want to overwrite ../data/raw/test.pickle? y/n: y


[1, 8, 27, 64, 125, 216, 343, 512, 729, 1000]

In [26]:
load_from('../data/raw/test.pickle')

[1, 8, 27, 64, 125, 216, 343, 512, 729, 1000]