# Categorize Ingredients

In [2]:
import re
import json
import time
import pickle
from collections import defaultdict

import spacy
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

In [3]:
%%time
OPTIONS = Options()
OPTIONS.add_argument('--headless')
# make sure Homebrew is installed
# then run 'brew tap homebrew/cask && brew cask install chromedriver'
driver = webdriver.Chrome(options=OPTIONS)

CPU times: user 5.62 ms, sys: 8.55 ms, total: 14.2 ms
Wall time: 1.33 s


## Parse Ingredients

In [4]:
# python3 -m spacy download en
nlp = spacy.load('en')

def tokenize(line):
    return [(token.text, token.tag_) for token in nlp(line)]

In [151]:
type_exceptions = ['can', 'tablespoon', 'oz', 'clove', 'flanken', 'chili', 'chile', 'cilantro', 'rosemary', 'nutmeg', 'Parmesan']

for exception in type_exceptions:
    print(tokenize(exception), spacy.explain(tokenize(exception)[0][1]))

[('can', 'MD')] verb, modal auxiliary
[('tablespoon', 'VB')] verb, base form
[('oz', 'XX')] unknown
[('clove', 'VB')] verb, base form
[('flanken', 'JJ')] adjective
[('chili', 'JJ')] adjective
[('chile', 'JJ')] adjective
[('cilantro', 'UH')] interjection
[('rosemary', 'JJ')] adjective
[('nutmeg', 'JJ')] adjective
[('Parmesan', 'NNP')] noun, proper singular


In [158]:
NOUN_TYPES = ['NN', 'NNS', 'NNP', 'NNPS']
NOUN_TYPE_EXCEPTIONS = ['gochujang', 'parsley', 'garlic', 'chili', 'chile', 'substitute', 'cream', 'flanken', 'such']
ADJECTIVE_TYPE_EXCEPTIONS = ['ground', 'skinless', 'boneless', 'Parmesan']

In [159]:
def numerical(line):
    # replace everything to '' except whitespace, alphanumeric character
    line = re.sub(r'[^\w\s]', '', line)
    token_tag_pairs = tokenize(line)
    for pair in token_tag_pairs:
        # if the word is not numerical
        if not pair[1] == "CD":
            return False
    return True

def nouns_only(line):
    # replace everything to '' except whitespace, alphanumeric character
    line = re.sub(r'[^\w\s]', '', line)
    token_tag_pairs = tokenize(line)
    for pair in token_tag_pairs:
        # if the word is not a noun or cardinal number
        if (not (pair[1] in NOUN_TYPES) or pair[0] in ADJECTIVE_TYPE_EXCEPTIONS) and pair[0] not in NOUN_TYPE_EXCEPTIONS:
            return False
    return True

In [160]:
def extract_brackets(line):
    # find '(abc)' where 'abc' is in arbitrary length and 'abc' does not contain brackets
    pattern = re.compile(r'\([^\(\)]*\)') 
    match = re.findall(pattern, line)
    if len(match) != 0:
        return match
    
def extract_preparation(line):
    # find ', abc' or ' - abc' where 'abc' is in arbitrary length
#     match = re.findall(re.compile(r'\b[,-] [^\(\)]*'), line)
    match = re.findall(re.compile(r'[^.], .*| - .*'), line)
    if len(match) != 0:
        if '(' not in match[-1] and match[-1][-1] == ')':
            return match[-1][1:-1]
        else:
            return match[-1][1:]
    
def extract_descriptor(ingredient_name):
    descriptor = []
    token_tag_pairs = []
    
    for element in ingredient_name.split():
        # treat compound word with hyphen as an adjective
        if '-' in element:
            token_tag_pairs.append((element, 'JJ'))
        else:
            token_tag_pairs.append([(token.text, token.tag_) for token in nlp(element)][0])

    for pair in token_tag_pairs:
        # if the word is an adjective, an adverb, or a past participle of a verb, or exception like 'ground'
        if pair[1] == "JJ" or pair[1] == "RB" or pair[1] == "VBN" or pair[0] in ADJECTIVE_TYPE_EXCEPTIONS:
            if pair[0] not in NOUN_TYPE_EXCEPTIONS:
                descriptor.append(pair[0])
    if len(descriptor) != 0:
        return ' '.join(descriptor)
        
def extract_all(line):
    noun_type_exceptions = ['can', 'tablespoon', 'oz', 'clove']
    not_measurements = ['jalapeno', 'roma']
    measurement = None
    quantity_in_brackets = None
    quantity_split = []
    pre_preparation = []
    
    # extract preparation
    preparation = extract_preparation(line)
    if preparation:
        line = line.replace(preparation, '')
        # remove 'x, ' prefix
        preparation = preparation[2:].strip()
    
    # extract backets
    brackets = extract_brackets(line)
    if brackets:
        # check the first bracket
        # if no numerical value or line_split length > 3 
        if not any(char.isdigit() for char in brackets[0]) or len(brackets[0].split()) > 3:
            pre_preparation.append(brackets[0][1:-1])
        else:
            quantity_in_brackets = brackets[0]
        # check the rest brackets if any
        if len(brackets) > 1:
            for b in brackets[1:]:
                pre_preparation.append(b[1:-1])
        for b in brackets:
            line = re.sub(r'\({0}\)'.format(b), '', line)  
        
    line_split = line.split()
    # extract quantity from the first word if the word contains a digit
    if any(char.isdigit() for char in line_split[0]):
        quantity_split.append(line_split[0])
    
        # extract quantity from the second word if the word contains a digit
        if any(char.isdigit() for char in line_split[1]):
            quantity_split.append(line_split[1])
            # measurement index
            i = 2
            # check for special case
            if line_split[2] == 'oz':
                quantity_split.append('oz')
                i = 3
            # check measurement type
            if (nouns_only(line_split[i]) or line_split[i] in noun_type_exceptions) and line_split[i] not in not_measurements:
                measurement = line_split[i]
        else:
            # check line_split length and measurement type for cases like '1 egg' or '1/2 onion, chopped' or '1 large tomato, seeded and chopped'
            if len(line_split) > 2 and (nouns_only(line_split[1]) or line_split[1] in noun_type_exceptions) and line_split[1] not in not_measurements:
                measurement = line_split[1]
        line = re.sub(r'{0}'.format(' '.join(quantity_split)), '', line)
    
    if measurement:
        line = re.sub(r'{0}'.format(measurement), '', line)
    
    # append quantity in backets at the end
    if quantity_in_brackets:
        quantity_split.append(quantity_in_brackets)
    
    ingredient_name = line.strip()

    # extract descriptor from ingredient_name
    descriptor = extract_descriptor(ingredient_name)

    # extract ingredient
    ingredient = ingredient_name
    if descriptor:
        for i in descriptor.split():
            ingredient = re.sub(r'[ ]?\b{0}\b'.format(i), '', ingredient).strip()
    if ingredient == '':
        ingredient = ingredient_name

    # cases to add to descriptor
    if pre_preparation:
        if descriptor is None:
            descriptor = ', '.join(pre_preparation)
        else:
            descriptor += ', ' + ', '.join(pre_preparation)
    
    if preparation and 'or' in preparation:
        if descriptor is None:
            descriptor = preparation
        else:
            descriptor += ', ' + preparation
        preparation = None
    
    # add 'to taste' to quantity if any
    if 'to taste' in ingredient:
        quantity_split.append('to taste')
    quantity = ' '.join(quantity_split)
    if quantity == '':
        quantity = None
    
    # remove ' to taste' in ingredient if any
    ingredient = re.sub(r'(or)? to taste', '', ingredient)
    ingredient = ' '.join(ingredient.split())
    
    # if the extracted ingredient is not noun
    if not nouns_only(ingredient):
        ingredient_name = ingredient
        if preparation:
            ingredient_name += ' ' + preparation
        ingredient_name = ingredient_name.replace(' -', ',')
        preparation = extract_preparation(ingredient_name)
        ingredient_name = re.sub(r'{0}'.format(preparation), '', ingredient_name)
        if preparation:
            preparation = preparation[2:].strip()

        descriptor = extract_descriptor(ingredient_name)

        ingredient = ingredient_name
        if descriptor:
            for i in descriptor.split():
                ingredient = re.sub(r'[ ]?\b{0}\b'.format(i), '', ingredient).strip()
            if ingredient == '':
                ingredient = ingredient_name
    
    return quantity, measurement, descriptor, ingredient, preparation

In [161]:
# test on tricky cases
# extract_all('1 egg')
# extract_all('1 jalapeno pepper, diced')
# extract_all('4 roma (plum) tomatoes, chopped')
# extract_all('1 1/2 pounds colossal shrimp, EZ-peel type (deveined and shells split down the back)')
# extract_all('4 pounds Korean-style short ribs (beef chuck flanken, cut 1/3 to 1/2 inch thick across bones)')
# extract_all('1/2 (14 ounce) package vegetarian ground beef (e.g., Gimme Lean TM)')
# extract_all('1 0.42 oz packet concentrated vegetable base (such as Swanson® Vegetable Flavor Boost®) (optional)')
# extract_all('1 large tomato, seeded and chopped')
# extract_all('3 whole skinless, boneless chicken breasts')
# extract_all('5 skinless, boneless chicken breast halves - cut into 1 inch cubes')
# extract_all('1 1/2 pounds boneless beef chuck, cut into 2-inch pieces')
# extract_all('2 dried chipotle chile pepper (optional)')
# extract_all('2 cups uncooked sushi (sticky) or medium-grain rice')
# extract_all('2 pounds elk loin (backstrap), cut into 2 ounce pieces')
# extract_all('3 green onions, whites cut thinly across and greens sliced into thin lengths - divided')
# extract_all('1 (12 ounce) package firm tofu - drained, patted dry, and sliced into 4 slices')
# extract_all('1 pound shredded, cooked chicken')
# extract_all('1/2 cup shredded Parmesan cheese, or more to taste')
extract_all('2 teaspoons gochujang (Korean hot pepper paste), divided (optional)')
# extract_all('2 tablespoons minced Thai chilies, Serrano, or other hot pepper')

('2',
 'teaspoons',
 'Korean hot pepper paste',
 'gochujang',
 'divided (optional)')

In [162]:
# test on tricky cases
extract_descriptor('Parmesan cheese') 
# extract_descriptor('package vegetarian ground beef substitute') 
# extract_descriptor('1 1/2 pounds colossal shrimp, EZ-peel type (deveined and shells split down the back)')

'Parmesan'

In [163]:
def get_ingredient_list(url):
    global driver
    try:
        driver.get(url)
    except:
        print('relaunching webdriver...')
        driver = webdriver.Chrome(options=OPTIONS)
        driver.get(url)
        
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # extract ingredients section from the webpage
    ingredient_list = [element.label.text.strip() for element in soup.find_all(class_='checkList__line')]
    # remove exceptions like 'topping:'
    for i in ingredient_list:
        if ':' in i:
            ingredient_list.remove(i)
    ingredient_list = set(ingredient_list)
    # remove unnecessary elements
    unnecessary = ['', 'Add all ingredients to list']
    for i in unnecessary:
        if i in ingredient_list:
            ingredient_list.remove(i)
    return ingredient_list

In [105]:
%%time
# test
get_ingredient_list('https://www.allrecipes.com/recipe/13928/enchilada-casserole/')

CPU times: user 105 ms, sys: 2.82 ms, total: 107 ms
Wall time: 2.6 s


{'1 (15 ounce) can black beans, rinsed and drained',
 '1 (19 ounce) can enchilada sauce',
 '1 (4 ounce) can diced green chile peppers',
 '1 (6 ounce) can sliced black olives',
 '1 (8 ounce) package tempeh, crumbled',
 '1 jalapeno pepper, seeded and minced',
 '1 onion, chopped',
 '2 cloves garlic, minced',
 '6 (6 inch) corn tortillas',
 '8 ounces shredded Cheddar cheese'}

In [106]:
def get_recipe_urls(url):
    global driver
    try:
        driver.get(url)
    except:
        print('relaunching webdriver...')
        driver = webdriver.Chrome(options=OPTIONS)
        driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    recipe_cards = soup.find_all('div', class_='fixed-recipe-card__info')
    recipe_urls = []
    for recipe_card in recipe_cards:
        url = recipe_card.find('a')['href']
        recipe_urls.append(url)
    return recipe_urls

In [107]:
def lemmatize(line):
    if len(line.split()) == 1:
        return [token.lemma_ for token in nlp(line)][0]
    return line

##  Retrieve all ingredients lists

In [97]:
# 4 pages of vegetarian protein recipes
# 6 pages of meat recipes
# 2 pages of seafood recipes

In [98]:
def get_all_vegetarian_protein_ingredient_lists():
    try:
        with open('../data/all_vegetarian_ingredient_lists.pickle', 'rb') as file:
            all_vegetarian_ingredient_lists = pickle.load(file)
            print('loaded all_vegetarian_ingredient_lists successfully')
    except:
        all_vegetarian_ingredient_lists = []
        for page in range(4):
            vegetarian_protein_url = 'https://www.allrecipes.com/recipes/16778/everyday-cooking/vegetarian/protein/?page=' + str(page+1)
            for recipe_url in get_recipe_urls(vegetarian_protein_url):
                all_vegetarian_ingredient_lists += get_ingredient_list(recipe_url)
        # save retrieved data
        with open('../data/all_vegetarian_ingredient_lists.pickle', 'wb') as file:
            pickle.dump(all_vegetarian_ingredient_lists, file, protocol=pickle.HIGHEST_PROTOCOL)
    return all_vegetarian_ingredient_lists

def get_all_meat_ingredient_lists():
    try:
        with open('../data/all_meat_ingredient_lists.pickle', 'rb') as file:
            all_meat_ingredient_lists = pickle.load(file)
            print('loaded all_meat_ingredient_lists successfully')
    except:
        all_meat_ingredient_lists = []
        for index in range(200, 206):
            meat_url = 'https://www.allrecipes.com/recipes/{}/meat-and-poultry/'.format(index)
            for recipe_url in get_recipe_urls(meat_url):
                all_meat_ingredient_lists += get_ingredient_list(recipe_url)
        # save retrieved data
        with open('../data/all_meat_ingredient_lists.pickle', 'wb') as file:
            pickle.dump(all_meat_ingredient_lists, file, protocol=pickle.HIGHEST_PROTOCOL)
    return all_meat_ingredient_lists

def get_all_seafood_ingredient_lists():
    try:
        with open('../data/all_seafood_ingredient_lists.pickle', 'rb') as file:
            all_seafood_ingredient_lists = pickle.load(file)
            print('loaded all_seafood_ingredient_lists successfully')
    except:
        all_seafood_ingredient_lists = []
        for page in range(2):
            seafood_url = 'https://www.allrecipes.com/recipes/93/seafood/' + str(page+1)
            for recipe_url in get_recipe_urls(seafood_url):
                all_seafood_ingredient_lists += get_ingredient_list(recipe_url)
        # save retrieved data
        with open('../data/all_seafood_ingredient_lists.pickle', 'wb') as file:
            pickle.dump(all_seafood_ingredient_lists, file, protocol=pickle.HIGHEST_PROTOCOL)
    return all_seafood_ingredient_lists

In [99]:
%%time
all_vegetarian_ingredient_lists = get_all_vegetarian_protein_ingredient_lists()
all_meat_ingredient_lists = get_all_meat_ingredient_lists()
all_seafood_ingredient_lists = get_all_seafood_ingredient_lists()

loaded all_vegetarian_ingredient_lists successfully
loaded all_meat_ingredient_lists successfully
loaded all_seafood_ingredient_lists successfully
CPU times: user 3.74 ms, sys: 2.88 ms, total: 6.62 ms
Wall time: 4.11 ms


In [109]:
%%time
vegetarian_protein_ingredients_freq_dict = dict()
vegetarian_protein_ingredients_measurement_dict = defaultdict(set)
vegetarian_protein_ingredients_descriptor_dict = defaultdict(set)
vegetarian_protein_ingredients_preparation_dict = defaultdict(set)

for line in all_vegetarian_ingredient_lists:
    quantity, measurement, descriptor, ingredient, preparation = extract_all(line)
    
    vegetarian_protein_ingredients_measurement_dict[ingredient] |= {measurement}
    vegetarian_protein_ingredients_descriptor_dict[ingredient] |= {descriptor}
    vegetarian_protein_ingredients_preparation_dict[ingredient] |= {preparation}
    if ingredient not in vegetarian_protein_ingredients_freq_dict:
        vegetarian_protein_ingredients_freq_dict[ingredient] = 1
    else:
        vegetarian_protein_ingredients_freq_dict[ingredient] += 1

    # test
    print(line)
    print('quantity:', quantity)
    print('measurement:', measurement)
    print('descriptor:', descriptor)
    print('ingredient:', ingredient)
    print('preparation:', preparation)
    print()

virgin olive oil
2 tablespoons extra virgin olive oil
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: virgin olive oil
preparation: None

1 teaspoon dried oregano
quantity: 1
measurement: teaspoon
descriptor: dried
ingredient: oregano
preparation: None

1 small onion, minced
quantity: 1
measurement: None
descriptor: small
ingredient: onion
preparation: minced

1/2 cup vegetable broth
quantity: 1/2
measurement: cup
descriptor: vegetable
ingredient: broth
preparation: None

2 cloves garlic, minced
quantity: 2
measurement: cloves
descriptor: None
ingredient: garlic
preparation: minced

1/2 teaspoon ground red pepper (optional)
quantity: 1/2
measurement: teaspoon
descriptor: ground red, optional
ingredient: pepper
preparation: None

taco seasoning mix
2 tablespoons taco seasoning mix
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: taco seasoning mix
preparation: None

1 (8 ounce) package spicy flavored tempeh, coarsely grated
quantity: 1 (8 ounce)
measur

1 (10 ounce) package frozen chopped spinach, thawed and drained
quantity: 1 (10 ounce)
measurement: package
descriptor: frozen chopped
ingredient: spinach
preparation: thawed and drained

1/4 cup diced onion
quantity: 1/4
measurement: cup
descriptor: diced
ingredient: onion
preparation: None

1/2 cup shredded Swiss cheese
quantity: 1/2
measurement: cup
descriptor: shredded Swiss
ingredient: cheese
preparation: None

1/2 teaspoon salt, or to taste
quantity: 1/2
measurement: teaspoon
descriptor: or to taste
ingredient: salt
preparation: None

9 inch pie crust
1 unbaked 9 inch pie crust
quantity: 1
measurement: None
descriptor: None
ingredient: 9 inch pie crust
preparation: None

2/3 cup shredded Cheddar cheese
quantity: 2/3
measurement: cup
descriptor: shredded
ingredient: Cheddar cheese
preparation: None

1 teaspoon minced garlic
quantity: 1
measurement: teaspoon
descriptor: minced
ingredient: garlic
preparation: None

1 tablespoon chopped green chile peppers
quantity: 1
measurement: ta

3 tablespoons soy sauce
quantity: 3
measurement: tablespoons
descriptor: None
ingredient: soy sauce
preparation: None

1 teaspoon cornstarch
quantity: 1
measurement: teaspoon
descriptor: None
ingredient: cornstarch
preparation: None

1 red onion, sliced
quantity: 1
measurement: None
descriptor: red
ingredient: onion
preparation: sliced

white
1 egg white
quantity: 1
measurement: egg
descriptor: white
ingredient: white
preparation: None

1 teaspoon salt
quantity: 1
measurement: teaspoon
descriptor: None
ingredient: salt
preparation: None

3 tablespoons olive oil
quantity: 3
measurement: tablespoons
descriptor: None
ingredient: olive oil
preparation: None

1 (16 ounce) package extra firm tofu
quantity: 1 (16 ounce)
measurement: package
descriptor: extra firm
ingredient: tofu
preparation: None

1/2 teaspoon pepper
quantity: 1/2
measurement: teaspoon
descriptor: None
ingredient: pepper
preparation: None

1 tablespoon barbeque sauce
quantity: 1
measurement: tablespoon
descriptor: barbeque
i

4 ounces shredded mozzarella cheese
quantity: 4
measurement: ounces
descriptor: shredded
ingredient: mozzarella cheese
preparation: None

2 tablespoons chopped fresh parsley
quantity: 2
measurement: tablespoons
descriptor: chopped fresh
ingredient: parsley
preparation: None

8 ounces textured vegetable protein
quantity: 8
measurement: ounces
descriptor: textured vegetable
ingredient: protein
preparation: None

1 onion, chopped
quantity: 1
measurement: None
descriptor: None
ingredient: onion
preparation: chopped

1/2 cup uncooked white rice
quantity: 1/2
measurement: cup
descriptor: uncooked white
ingredient: rice
preparation: None

1 (4 ounce) can diced green chile peppers
quantity: 1 (4 ounce)
measurement: can
descriptor: diced green
ingredient: chile peppers
preparation: None

6 (6 inch) corn tortillas
quantity: 6 (6 inch)
measurement: corn
descriptor: None
ingredient: tortillas
preparation: None

2 cloves garlic, minced
quantity: 2
measurement: cloves
descriptor: None
ingredient: ga

1 cup barbecue sauce, your choice
quantity: 1
measurement: cup
descriptor: None
ingredient: barbecue sauce
preparation: your choice

1 green bell pepper, seeded and chopped
quantity: 1
measurement: None
descriptor: green
ingredient: bell pepper
preparation: seeded and chopped

1 red bell pepper, seeded and chopped
quantity: 1
measurement: None
descriptor: red
ingredient: bell pepper
preparation: seeded and chopped

1 medium onion, chopped
quantity: 1
measurement: None
descriptor: medium
ingredient: onion
preparation: chopped

1 (8 ounce) package tempeh, crumbled
quantity: 1 (8 ounce)
measurement: package
descriptor: None
ingredient: tempeh
preparation: crumbled

1 tablespoon vegetable oil
quantity: 1
measurement: tablespoon
descriptor: vegetable
ingredient: oil
preparation: None

4 kaiser rolls, split and toasted
quantity: 4
measurement: kaiser
descriptor: None
ingredient: rolls
preparation: split and toasted

1/2 lime, juiced
quantity: 1/2
measurement: None
descriptor: None
ingredient

2 cloves garlic, minced
quantity: 2
measurement: cloves
descriptor: None
ingredient: garlic
preparation: minced

salt and pepper
salt and pepper to taste
quantity: to taste
measurement: None
descriptor: None
ingredient: salt and pepper
preparation: None

or cheese
1 cup Parmesan or Asiago cheese
quantity: 1
measurement: cup
descriptor: None
ingredient: or cheese
preparation: None

1 tablespoon olive oil
quantity: 1
measurement: tablespoon
descriptor: None
ingredient: olive oil
preparation: None

1 small onion, chopped
quantity: 1
measurement: None
descriptor: small
ingredient: onion
preparation: chopped

1 tablespoon butter
quantity: 1
measurement: tablespoon
descriptor: None
ingredient: butter
preparation: None

garlic powder to taste
quantity: to taste
measurement: None
descriptor: None
ingredient: garlic powder
preparation: None

2 pounds fresh spinach, washed and chopped
quantity: 2
measurement: pounds
descriptor: fresh
ingredient: spinach
preparation: washed and chopped

milk or s

1 pound firm tofu
quantity: 1
measurement: pound
descriptor: firm
ingredient: tofu
preparation: None

1/4 teaspoon red pepper flakes
quantity: 1/4
measurement: teaspoon
descriptor: red
ingredient: pepper flakes
preparation: None

1/4 cup rice vinegar
quantity: 1/4
measurement: cup
descriptor: None
ingredient: rice vinegar
preparation: None

1/3 cup canola oil
quantity: 1/3
measurement: cup
descriptor: None
ingredient: canola oil
preparation: None

sesame oil
4 teaspoons dark sesame oil
quantity: 4
measurement: teaspoons
descriptor: None
ingredient: sesame oil
preparation: None

1 tablespoon minced fresh ginger root
quantity: 1
measurement: tablespoon
descriptor: minced fresh
ingredient: ginger root
preparation: None

1 green onions, cut into 1-inch strips
quantity: 1
measurement: None
descriptor: green
ingredient: onions
preparation: cut into 1-inch strips

1/4 cup coarsely chopped fresh cilantro
quantity: 1/4
measurement: cup
descriptor: coarsely chopped fresh
ingredient: cilantro
pre

1 (8 ounce) container dairy sour cream
quantity: 1 (8 ounce)
measurement: container
descriptor: sour
ingredient: dairy cream
preparation: None

1 green bell pepper, chopped
quantity: 1
measurement: None
descriptor: green
ingredient: bell pepper
preparation: chopped

2 teaspoons ground cumin
quantity: 2
measurement: teaspoons
descriptor: ground
ingredient: cumin
preparation: None

2 teaspoons chili powder
quantity: 2
measurement: teaspoons
descriptor: None
ingredient: chili powder
preparation: None

1 bunch green onions, chopped
quantity: 1
measurement: bunch
descriptor: green
ingredient: onions
preparation: chopped

2 carrots, finely chopped
quantity: 2
measurement: None
descriptor: None
ingredient: carrots
preparation: finely chopped

1/2 cup texturized vegetable protein (TVP)
quantity: 1/2
measurement: cup
descriptor: texturized vegetable, TVP
ingredient: protein
preparation: None

tomatoes with juice
2 (12 ounce) cans diced tomatoes with juice
quantity: 2 (12 ounce)
measurement: can

1 tablespoon vanilla extract
quantity: 1
measurement: tablespoon
descriptor: None
ingredient: vanilla extract
preparation: None

Dijon mustard
1/4 teaspoon Dijon mustard
quantity: 1/4
measurement: teaspoon
descriptor: None
ingredient: Dijon mustard
preparation: None

4 cloves garlic, minced
quantity: 4
measurement: cloves
descriptor: None
ingredient: garlic
preparation: minced

1 pound broccoli, chopped
quantity: 1
measurement: pound
descriptor: None
ingredient: broccoli
preparation: chopped

ground nutmeg
1/4 teaspoon ground nutmeg
quantity: 1/4
measurement: teaspoon
descriptor: ground nutmeg
ingredient: ground nutmeg
preparation: None

1/2 teaspoon ground red pepper
quantity: 1/2
measurement: teaspoon
descriptor: ground red
ingredient: pepper
preparation: None

1 (9 inch) unbaked pie crust
quantity: 1 (9 inch)
measurement: None
descriptor: unbaked
ingredient: pie crust
preparation: None

1 tablespoon dried parsley
quantity: 1
measurement: tablespoon
descriptor: dried
ingredient: pars

1 1/2 teaspoons minced garlic
quantity: 1 1/2
measurement: teaspoons
descriptor: minced
ingredient: garlic
preparation: None

1/4 teaspoon dried marjoram
quantity: 1/4
measurement: teaspoon
descriptor: dried
ingredient: marjoram
preparation: None

2 tablespoons butter
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: butter
preparation: None

1/4 cup raisins
quantity: 1/4
measurement: cup
descriptor: None
ingredient: raisins
preparation: None

1/4 teaspoon poultry seasoning
quantity: 1/4
measurement: teaspoon
descriptor: None
ingredient: poultry seasoning
preparation: None

1 1/2 cups chopped celery
quantity: 1 1/2
measurement: cups
descriptor: chopped
ingredient: celery
preparation: None

1/2 teaspoon dried basil
quantity: 1/2
measurement: teaspoon
descriptor: dried
ingredient: basil
preparation: None

2 cups egg noodles
quantity: 2
measurement: cups
descriptor: None
ingredient: egg noodles
preparation: None

3 tablespoons cold water
quantity: 3
measurement: tablespoon

4 teaspoons ground coriander
quantity: 4
measurement: teaspoons
descriptor: ground
ingredient: coriander
preparation: None

1 teaspoon honey mustard
quantity: 1
measurement: teaspoon
descriptor: None
ingredient: honey mustard
preparation: None

dried rosemary
1 1/2 teaspoons dried rosemary
quantity: 1 1/2
measurement: teaspoons
descriptor: dried rosemary
ingredient: dried rosemary
preparation: None

5 tablespoons orange juice
quantity: 5
measurement: tablespoons
descriptor: orange
ingredient: juice
preparation: None

zest
1/2 teaspoon orange zest
quantity: 1/2
measurement: teaspoon
descriptor: None
ingredient: zest
preparation: None

sesame oil
1/2 cup sesame oil
quantity: 1/2
measurement: cup
descriptor: None
ingredient: sesame oil
preparation: None

1 red onion, finely diced
quantity: 1
measurement: None
descriptor: red
ingredient: onion
preparation: finely diced

salt and pepper
salt and pepper to taste
quantity: to taste
measurement: None
descriptor: None
ingredient: salt and peppe

1 cup vital wheat gluten
quantity: 1
measurement: cup
descriptor: vital gluten
ingredient: wheat
preparation: None

tamari
1/4 cup tamari
quantity: 1/4
measurement: cup
descriptor: tamari
ingredient: tamari
preparation: None

1/4 cup liquid amino acid (such as Bragg®)
quantity: 1/4
measurement: cup
descriptor: liquid, such as Bragg®
ingredient: amino acid
preparation: None

4 cups water
quantity: 4
measurement: cups
descriptor: None
ingredient: water
preparation: None

3 cloves garlic, minced
quantity: 3
measurement: cloves
descriptor: None
ingredient: garlic
preparation: minced

1/3 cup soy sauce
quantity: 1/3
measurement: cup
descriptor: None
ingredient: soy sauce
preparation: None

sesame oil
1 teaspoon sesame oil
quantity: 1
measurement: teaspoon
descriptor: None
ingredient: sesame oil
preparation: None

1 (16 ounce) package extra firm tofu
quantity: 1 (16 ounce)
measurement: package
descriptor: extra firm
ingredient: tofu
preparation: None

1 teaspoon white sugar
quantity: 1
measu

1/4 teaspoon dried marjoram
quantity: 1/4
measurement: teaspoon
descriptor: dried
ingredient: marjoram
preparation: None

1/2 teaspoon sage
quantity: 1/2
measurement: teaspoon
descriptor: None
ingredient: sage
preparation: None

2/3 cup water
quantity: 2/3
measurement: cup
descriptor: None
ingredient: water
preparation: None

1/4 teaspoon pepper
quantity: 1/4
measurement: teaspoon
descriptor: None
ingredient: pepper
preparation: None

1 slice bread, cubed
quantity: 1
measurement: slice
descriptor: None
ingredient: bread
preparation: cubed

1 (12 ounce) package dry bread stuffing mix
quantity: 1 (12 ounce)
measurement: package
descriptor: dry
ingredient: bread stuffing mix
preparation: None

2 tablespoons water
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: water
preparation: None

1 teaspoon barbeque sauce
quantity: 1
measurement: teaspoon
descriptor: barbeque
ingredient: sauce
preparation: None

1/4 teaspoon dried savory
quantity: 1/4
measurement: teaspoon
descripto

olive oil
quantity: None
measurement: None
descriptor: None
ingredient: olive oil
preparation: None

1 small onion, quartered
quantity: 1
measurement: None
descriptor: small
ingredient: onion
preparation: quartered

4 cloves garlic, peeled
quantity: 4
measurement: cloves
descriptor: None
ingredient: garlic
preparation: peeled

8 ounces tofu, diced
quantity: 8
measurement: ounces
descriptor: None
ingredient: tofu
preparation: diced

3 teaspoons light brown sugar
quantity: 3
measurement: teaspoons
descriptor: light brown
ingredient: sugar
preparation: None

1/2 cup chopped fresh basil
quantity: 1/2
measurement: cup
descriptor: chopped fresh
ingredient: basil
preparation: None

2 tablespoons lime juice
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: lime juice
preparation: None

1 tablespoon chopped fresh cilantro
quantity: 1
measurement: tablespoon
descriptor: chopped fresh
ingredient: cilantro
preparation: None

3 fresh green chile peppers
quantity: 3
measurement: None

In [110]:
sorted(vegetarian_protein_ingredients_freq_dict.items(), key=lambda pair: pair[1], reverse=True)

[('tofu', 65),
 ('onion', 40),
 ('garlic', 38),
 ('salt', 35),
 ('pepper', 30),
 ('oil', 28),
 ('water', 23),
 ('soy sauce', 22),
 ('bell pepper', 20),
 ('olive oil', 18),
 ('mushrooms', 14),
 ('onions', 14),
 ('sugar', 12),
 ('sesame oil', 11),
 ('cilantro', 11),
 ('garlic powder', 10),
 ('broth', 9),
 ('cumin', 9),
 ('carrots', 9),
 ('tomato sauce', 9),
 ('basil', 9),
 ('tomatoes', 9),
 ('tempeh', 8),
 ('celery', 8),
 ('cheese', 7),
 ('coconut milk', 7),
 ('beans', 7),
 ('salt and pepper', 7),
 ('ginger root', 7),
 ('oregano', 6),
 ('soy milk', 6),
 ('spinach', 6),
 ('mozzarella cheese', 6),
 ('curry powder', 6),
 ('cayenne pepper', 6),
 ('cornstarch', 6),
 ('parsley', 6),
 ('chili powder', 6),
 ('butter', 6),
 ('jalapeno pepper', 5),
 ('yeast', 5),
 ('tamari', 5),
 ('ginger', 5),
 ('barbecue sauce', 5),
 ('Cheddar cheese', 5),
 ('lime juice', 5),
 ('vinegar', 5),
 ('sauce', 5),
 ('rice', 5),
 ('honey', 5),
 ('vanilla extract', 5),
 ('zucchini', 4),
 ('coriander', 4),
 ('potatoes', 4

In [111]:
%%time
meat_ingredients_freq_dict = dict()
meat_ingredients_measurement_dict = defaultdict(set)
meat_ingredients_descriptor_dict = defaultdict(set)
meat_ingredients_preparation_dict = defaultdict(set)

for line in all_meat_ingredient_lists:
    quantity, measurement, descriptor, ingredient, preparation = extract_all(line)
    
    meat_ingredients_measurement_dict[ingredient] |= {measurement}
    meat_ingredients_descriptor_dict[ingredient] |= {descriptor}
    meat_ingredients_preparation_dict[ingredient] |= {preparation}
    if ingredient not in meat_ingredients_freq_dict:
        meat_ingredients_freq_dict[ingredient] = 1
    else:
        meat_ingredients_freq_dict[ingredient] += 1

    # test
    print(line)
    print('quantity:', quantity)
    print('measurement:', measurement)
    print('descriptor:', descriptor)
    print('ingredient:', ingredient)
    print('preparation:', preparation)
    print()

2 pounds thinly sliced Scotch fillet (chuck eye steaks)
quantity: 2
measurement: pounds
descriptor: thinly sliced fillet, chuck eye steaks
ingredient: Scotch
preparation: None

1/2 cup soy sauce
quantity: 1/2
measurement: cup
descriptor: None
ingredient: soy sauce
preparation: None

sesame oil
2 tablespoons sesame oil
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: sesame oil
preparation: None

5 tablespoons white sugar
quantity: 5
measurement: tablespoons
descriptor: white
ingredient: sugar
preparation: None

2 cloves garlic, crushed
quantity: 2
measurement: cloves
descriptor: None
ingredient: garlic
preparation: crushed

2 1/2 tablespoons sesame seeds
quantity: 2 1/2
measurement: tablespoons
descriptor: None
ingredient: sesame seeds
preparation: None

5 tablespoons mirin (Japanese sweet wine)
quantity: 5
measurement: tablespoons
descriptor: Japanese sweet wine
ingredient: mirin
preparation: None

3 shallots, thinly sliced
quantity: 3
measurement: None
descriptor: No

2 tablespoons minced garlic
quantity: 2
measurement: tablespoons
descriptor: minced
ingredient: garlic
preparation: None

cooking wine
1/2 cup red cooking wine
quantity: 1/2
measurement: cup
descriptor: None
ingredient: cooking wine
preparation: None

1 tablespoon chopped fresh parsley
quantity: 1
measurement: tablespoon
descriptor: chopped fresh
ingredient: parsley
preparation: None

1/2 green bell pepper, chopped
quantity: 1/2
measurement: None
descriptor: green
ingredient: bell pepper
preparation: chopped

1/2 teaspoon ground black pepper
quantity: 1/2
measurement: teaspoon
descriptor: ground black
ingredient: pepper
preparation: None

1 pound ground beef
quantity: 1
measurement: pound
descriptor: ground
ingredient: beef
preparation: None

cooking spray
cooking spray
quantity: None
measurement: None
descriptor: None
ingredient: cooking spray
preparation: None

ground veal
1/2 pound ground veal
quantity: 1/2
measurement: pound
descriptor: ground veal
ingredient: ground veal
preparati

1 pound sweet Italian sausage
quantity: 1
measurement: pound
descriptor: sweet Italian
ingredient: sausage
preparation: None

4 tablespoons chopped fresh parsley
quantity: 4
measurement: tablespoons
descriptor: chopped fresh
ingredient: parsley
preparation: None

2 tablespoons white sugar
quantity: 2
measurement: tablespoons
descriptor: white
ingredient: sugar
preparation: None

16 ounces ricotta cheese
quantity: 16
measurement: ounces
descriptor: ricotta
ingredient: cheese
preparation: None

3/4 cup grated Parmesan cheese
quantity: 3/4
measurement: cup
descriptor: grated Parmesan
ingredient: cheese
preparation: None

3/4 pound mozzarella cheese, sliced
quantity: 3/4
measurement: pound
descriptor: None
ingredient: mozzarella cheese
preparation: sliced

2 cloves garlic, crushed
quantity: 2
measurement: cloves
descriptor: None
ingredient: garlic
preparation: crushed

3/4 pound lean ground beef
quantity: 3/4
measurement: pound
descriptor: lean ground
ingredient: beef
preparation: None

12

1/4 cup chopped onion
quantity: 1/4
measurement: cup
descriptor: chopped
ingredient: onion
preparation: None

1/4 cup chopped green bell pepper
quantity: 1/4
measurement: cup
descriptor: chopped green
ingredient: bell pepper
preparation: None

3 teaspoons brown sugar
quantity: 3
measurement: teaspoons
descriptor: brown
ingredient: sugar
preparation: None

1/2 teaspoon garlic powder
quantity: 1/2
measurement: teaspoon
descriptor: None
ingredient: garlic powder
preparation: None

1 cup milk
quantity: 1
measurement: cup
descriptor: None
ingredient: milk
preparation: None

2 tablespoons brown sugar
quantity: 2
measurement: tablespoons
descriptor: brown
ingredient: sugar
preparation: None

2 tablespoons prepared mustard
quantity: 2
measurement: tablespoons
descriptor: prepared
ingredient: mustard
preparation: None

1/3 cup ketchup
quantity: 1/3
measurement: cup
descriptor: None
ingredient: ketchup
preparation: None

1 cup dried bread crumbs
quantity: 1
measurement: cup
descriptor: dried
ing

1 onion, chopped
quantity: 1
measurement: None
descriptor: None
ingredient: onion
preparation: chopped

1/4 cup olive oil
quantity: 1/4
measurement: cup
descriptor: None
ingredient: olive oil
preparation: None

MEATBALLS
quantity: None
measurement: None
descriptor: None
ingredient: MEATBALLS
preparation: None

1/2 teaspoon ground black pepper
quantity: 1/2
measurement: teaspoon
descriptor: ground black
ingredient: pepper
preparation: None

1 cup fresh bread crumbs
quantity: 1
measurement: cup
descriptor: fresh
ingredient: bread crumbs
preparation: None

2 teaspoons salt
quantity: 2
measurement: teaspoons
descriptor: None
ingredient: salt
preparation: None

1/4 teaspoon ground black pepper
quantity: 1/4
measurement: teaspoon
descriptor: ground black
ingredient: pepper
preparation: None

1 tablespoon grated Parmesan cheese
quantity: 1
measurement: tablespoon
descriptor: grated Parmesan
ingredient: cheese
preparation: None

2 (28 ounce) cans whole peeled tomatoes
quantity: 2 (28 ounce)
me

1/2 teaspoon white sugar
quantity: 1/2
measurement: teaspoon
descriptor: white
ingredient: sugar
preparation: None

1 (46 fluid ounce) can tomato juice
quantity: 1 (46 fluid ounce)
measurement: can
descriptor: None
ingredient: tomato juice
preparation: None

pinto beans
1 (15 ounce) can pinto beans, drained and rinsed
quantity: 1 (15 ounce)
measurement: can
descriptor: drained rinsed
ingredient: pinto beans and
preparation: None

2 pounds lean ground beef
quantity: 2
measurement: pounds
descriptor: lean ground
ingredient: beef
preparation: None

1/4 cup chopped green bell pepper
quantity: 1/4
measurement: cup
descriptor: chopped green
ingredient: bell pepper
preparation: None

1/2 teaspoon dried oregano
quantity: 1/2
measurement: teaspoon
descriptor: dried
ingredient: oregano
preparation: None

1 (15 ounce) can kidney beans, drained and rinsed
quantity: 1 (15 ounce)
measurement: can
descriptor: None
ingredient: kidney beans
preparation: drained and rinsed

1 1/2 cups chopped onion
quan

2 pounds red potatoes, cut in 1-inch pieces
quantity: 2
measurement: pounds
descriptor: red
ingredient: potatoes
preparation: cut in 1-inch pieces

2 tablespoons celery salt
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: celery salt
preparation: None

1 tablespoon ground black pepper
quantity: 1
measurement: tablespoon
descriptor: ground black
ingredient: pepper
preparation: None

kernel corn
1 (16 ounce) package frozen whole kernel corn
quantity: 1 (16 ounce)
measurement: package
descriptor: None
ingredient: kernel corn
preparation: None

8 ounces diced onion
quantity: 8
measurement: ounces
descriptor: diced
ingredient: onion
preparation: None

1 pound beef stew meat, cut into bite-size pieces (optional)
quantity: 1
measurement: pound
descriptor: stew
ingredient: beef meat)
preparation: cut into bite-size pieces (optional

1 (16 ounce) package frozen cut carrots
quantity: 1 (16 ounce)
measurement: package
descriptor: frozen cut
ingredient: carrots
preparation: None


1/4 teaspoon celery seed
quantity: 1/4
measurement: teaspoon
descriptor: None
ingredient: celery seed
preparation: None

2 (9 inch) unbaked pie crusts
quantity: 2 (9 inch)
measurement: None
descriptor: unbaked
ingredient: pie crusts
preparation: None

1/2 teaspoon salt
quantity: 1/2
measurement: teaspoon
descriptor: None
ingredient: salt
preparation: None

1/3 cup chopped onion
quantity: 1/3
measurement: cup
descriptor: chopped
ingredient: onion
preparation: None

1 cup sliced carrots
quantity: 1
measurement: cup
descriptor: sliced
ingredient: carrots
preparation: None

skinless
1 pound skinless, boneless chicken breast halves - cubed
quantity: 1
measurement: pound
descriptor: skinless boneless
ingredient: chicken breast halves
preparation: cubed

skinless
4 skinless, boneless chicken breast halves
quantity: 4
measurement: None
descriptor: skinless boneless
ingredient: chicken breast halves
preparation: None

1 onion, finely diced
quantity: 1
measurement: None
descriptor: None
ingredie

1 bay leaf
quantity: 1
measurement: bay
descriptor: None
ingredient: leaf
preparation: None

4 tablespoons butter
quantity: 4
measurement: tablespoons
descriptor: None
ingredient: butter
preparation: None

4 tablespoons olive oil
quantity: 4
measurement: tablespoons
descriptor: None
ingredient: olive oil
preparation: None

skinless
4 skinless, boneless chicken breast halves - pounded 1/4 inch thick
quantity: 4
measurement: None
descriptor: skinless boneless
ingredient: chicken breast halves
preparation: pounded 1/4 inch thick

flour for coating
1/4 cup all-purpose flour for coating
quantity: 1/4
measurement: cup
descriptor: None
ingredient: flour for coating
preparation: None

1/4 teaspoon ground black pepper
quantity: 1/4
measurement: teaspoon
descriptor: ground black
ingredient: pepper
preparation: None

1/2 cup Marsala wine
quantity: 1/2
measurement: cup
descriptor: None
ingredient: Marsala wine
preparation: None

cooking sherry
1/4 cup cooking sherry
quantity: 1/4
measurement: cup


2 tablespoons honey
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: honey
preparation: None

2 teaspoons chopped garlic
quantity: 2
measurement: teaspoons
descriptor: chopped
ingredient: garlic
preparation: None

1 tablespoon brown sugar
quantity: 1
measurement: tablespoon
descriptor: brown
ingredient: sugar
preparation: None

2 teaspoons chopped fresh ginger root
quantity: 2
measurement: teaspoons
descriptor: chopped fresh
ingredient: ginger root
preparation: None

2 tablespoons hot sauce
quantity: 2
measurement: tablespoons
descriptor: hot
ingredient: sauce
preparation: None

1 tablespoon vegetable oil
quantity: 1
measurement: tablespoon
descriptor: vegetable
ingredient: oil
preparation: None

salad dressing mix
1 (.7 ounce) package dry Italian-style salad dressing mix
quantity: 1 (.7 ounce)
measurement: package
descriptor: None
ingredient: salad dressing mix
preparation: None

skinless
4 skinless, boneless chicken breast halves - cubed
quantity: 4
measurement: None

1 cup red wine
quantity: 1
measurement: cup
descriptor: red
ingredient: wine
preparation: None

1/2 cup finely chopped shallots
quantity: 1/2
measurement: cup
descriptor: finely chopped
ingredient: shallots
preparation: None

1 clove garlic, finely chopped
quantity: 1
measurement: clove
descriptor: None
ingredient: garlic
preparation: finely chopped

1 tablespoon chicken bouillon granules
quantity: 1
measurement: tablespoon
descriptor: None
ingredient: chicken bouillon granules
preparation: None

1/3 cup all-purpose flour
quantity: 1/3
measurement: cup
descriptor: all-purpose
ingredient: flour
preparation: None

3 tablespoons water
quantity: 3
measurement: tablespoons
descriptor: None
ingredient: water
preparation: None

1/8 teaspoon dried thyme, crushed
quantity: 1/8
measurement: teaspoon
descriptor: dried
ingredient: thyme
preparation: crushed

dried rosemary
1/4 teaspoon dried rosemary, crushed
quantity: 1/4
measurement: teaspoon
descriptor: dried rosemary crushed
ingredient: dried 

1/2 teaspoon cayenne pepper
quantity: 1/2
measurement: teaspoon
descriptor: None
ingredient: cayenne pepper
preparation: None

1 pound ground buffalo
quantity: 1
measurement: pound
descriptor: ground
ingredient: buffalo
preparation: None

tomatoes with chiles
1 (10 ounce) can diced tomatoes with green chiles
quantity: 1 (10 ounce)
measurement: can
descriptor: None
ingredient: tomatoes with chiles
preparation: None

1 1/2 teaspoons ground cumin
quantity: 1 1/2
measurement: teaspoons
descriptor: ground
ingredient: cumin
preparation: None

1/2 teaspoon onion powder, or to taste
quantity: 1/2
measurement: teaspoon
descriptor: or to taste
ingredient: onion powder
preparation: None

16 slices thick cut bacon
quantity: 16
measurement: slices
descriptor: thick cut
ingredient: bacon
preparation: None

1 tablespoon liquid smoke flavoring
quantity: 1
measurement: tablespoon
descriptor: liquid
ingredient: smoke flavoring
preparation: None

1/2 teaspoon black pepper, or to taste
quantity: 1/2
measu

1 teaspoon unsweetened cocoa powder
quantity: 1
measurement: teaspoon
descriptor: unsweetened
ingredient: cocoa powder
preparation: None

1 (28 ounce) can crushed tomatoes
quantity: 1 (28 ounce)
measurement: can
descriptor: crushed
ingredient: tomatoes
preparation: None

boiling water
1 cup boiling water
quantity: 1
measurement: cup
descriptor: None
ingredient: boiling water
preparation: None

1 tablespoon chili powder
quantity: 1
measurement: tablespoon
descriptor: None
ingredient: chili powder
preparation: None

Worcestershire sauce
1 teaspoon Worcestershire sauce
quantity: 1
measurement: teaspoon
descriptor: None
ingredient: Worcestershire sauce
preparation: None

1 cup chopped green bell pepper
quantity: 1
measurement: cup
descriptor: chopped green
ingredient: bell pepper
preparation: None

1/2 teaspoon ground black pepper
quantity: 1/2
measurement: teaspoon
descriptor: ground black
ingredient: pepper
preparation: None

1 cup chopped red bell pepper
quantity: 1
measurement: cup
des

1 orange, sliced in rounds
quantity: 1
measurement: None
descriptor: orange sliced
ingredient: in rounds
preparation: None

3 tablespoons soy sauce
quantity: 3
measurement: tablespoons
descriptor: None
ingredient: soy sauce
preparation: None

1 tablespoon honey
quantity: 1
measurement: tablespoon
descriptor: None
ingredient: honey
preparation: None

1 tablespoon chopped fresh parsley, for garnish
quantity: 1
measurement: tablespoon
descriptor: chopped fresh, for garnish
ingredient: parsley
preparation: None

2 tablespoons minced garlic
quantity: 2
measurement: tablespoons
descriptor: minced
ingredient: garlic
preparation: None

1 tablespoon prepared Dijon-style mustard
quantity: 1
measurement: tablespoon
descriptor: prepared Dijon-style
ingredient: mustard
preparation: None

1 large yellow onion, chopped
quantity: 1
measurement: None
descriptor: large yellow
ingredient: onion
preparation: chopped

salt and pepper
salt and pepper to taste
quantity: to taste
measurement: None
descriptor:

2 pounds boneless lamb shoulder, cut into 2 inch pieces
quantity: 2
measurement: pounds
descriptor: boneless
ingredient: lamb shoulder
preparation: cut into 2 inch pieces

1 teaspoon ground cumin
quantity: 1
measurement: teaspoon
descriptor: ground
ingredient: cumin
preparation: None

1 tablespoon freshly grated ginger
quantity: 1
measurement: tablespoon
descriptor: freshly grated
ingredient: ginger
preparation: None

1/4 cup finely minced onion
quantity: 1/4
measurement: cup
descriptor: finely minced
ingredient: onion
preparation: None

2 pounds lamb meat, cut into 1 1/2 inch cubes
quantity: 2
measurement: pounds
descriptor: None
ingredient: lamb meat
preparation: cut into 1 1/2 inch cubes

1/2 teaspoon ground cumin
quantity: 1/2
measurement: teaspoon
descriptor: ground
ingredient: cumin
preparation: None

3 tablespoons olive oil, divided
quantity: 3
measurement: tablespoons
descriptor: None
ingredient: olive oil
preparation: divided

1/2 teaspoon ground cardamom
quantity: 1/2
measure

1 slice bread, toasted and crumbled
quantity: 1
measurement: slice
descriptor: None
ingredient: bread
preparation: toasted and crumbled

1/2 teaspoon ground black pepper
quantity: 1/2
measurement: teaspoon
descriptor: ground black
ingredient: pepper
preparation: None

1/2 teaspoon salt
quantity: 1/2
measurement: teaspoon
descriptor: None
ingredient: salt
preparation: None

1/2 onion, grated
quantity: 1/2
measurement: None
descriptor: None
ingredient: onion
preparation: grated

1 dash ground cumin
quantity: 1
measurement: dash
descriptor: ground
ingredient: cumin
preparation: None

1/2 teaspoon ground coriander
quantity: 1/2
measurement: teaspoon
descriptor: ground
ingredient: coriander
preparation: None

parsnip
1 parsnip, peeled and diced
quantity: 1
measurement: None
descriptor: parsnip peeled diced
ingredient: and
preparation: None

1 teaspoon salt
quantity: 1
measurement: teaspoon
descriptor: None
ingredient: salt
preparation: None

1 stalk celery, chopped
quantity: 1
measurement: 

1 1/2 pounds thickly sliced bacon, diced
quantity: 1 1/2
measurement: pounds
descriptor: thickly sliced
ingredient: bacon
preparation: diced

2 bay leaves
quantity: 2
measurement: bay
descriptor: None
ingredient: leaves
preparation: None

1 teaspoon dried thyme
quantity: 1
measurement: teaspoon
descriptor: dried
ingredient: thyme
preparation: None

4 cups diced carrots
quantity: 4
measurement: cups
descriptor: diced
ingredient: carrots
preparation: None

3 potatoes
quantity: 3
measurement: None
descriptor: None
ingredient: potatoes
preparation: None

4 cups beef stock
quantity: 4
measurement: cups
descriptor: None
ingredient: beef stock
preparation: None

1/2 cup all-purpose flour
quantity: 1/2
measurement: cup
descriptor: all-purpose
ingredient: flour
preparation: None

1 large onion, chopped
quantity: 1
measurement: None
descriptor: large
ingredient: onion
preparation: chopped

2 teaspoons white sugar
quantity: 2
measurement: teaspoons
descriptor: white
ingredient: sugar
preparation:

ground dried rosemary
1 teaspoon ground dried rosemary
quantity: 1
measurement: teaspoon
descriptor: ground dried rosemary
ingredient: ground dried rosemary
preparation: None

1 teaspoon dried marjoram
quantity: 1
measurement: teaspoon
descriptor: dried
ingredient: marjoram
preparation: None

1 teaspoon ground black pepper
quantity: 1
measurement: teaspoon
descriptor: ground black
ingredient: pepper
preparation: None

1 tablespoon minced garlic
quantity: 1
measurement: tablespoon
descriptor: minced
ingredient: garlic
preparation: None

1/2 onion, cut into chunks
quantity: 1/2
measurement: None
descriptor: None
ingredient: onion
preparation: cut into chunks

1 teaspoon ground cumin
quantity: 1
measurement: teaspoon
descriptor: ground
ingredient: cumin
preparation: None

1 teaspoon ground dried thyme
quantity: 1
measurement: teaspoon
descriptor: ground dried
ingredient: thyme
preparation: None

1/2 teaspoon black pepper
quantity: 1/2
measurement: teaspoon
descriptor: black
ingredient: pe

1 tablespoon soy sauce
quantity: 1
measurement: tablespoon
descriptor: None
ingredient: soy sauce
preparation: None

peeled
2 cups peeled, seeded, and sliced butternut squash
quantity: 2
measurement: cups
descriptor: peeled seeded
ingredient: peeled seeded
preparation: and sliced butternut squash

1 pound lamb stew meat, cubed
quantity: 1
measurement: pound
descriptor: stew
ingredient: lamb meat
preparation: cubed

1 tablespoon chopped fresh thyme
quantity: 1
measurement: tablespoon
descriptor: chopped fresh
ingredient: thyme
preparation: None

1 cup sliced celery
quantity: 1
measurement: cup
descriptor: sliced
ingredient: celery
preparation: None

2 cups beef broth
quantity: 2
measurement: cups
descriptor: None
ingredient: beef broth
preparation: None

peeled
1 cup peeled, chopped sweet potatoes
quantity: 1
measurement: cup
descriptor: peeled chopped sweet
ingredient: potatoes
preparation: None

2 cloves garlic, minced
quantity: 2
measurement: cloves
descriptor: None
ingredient: garli

1 1/2 cups milk
quantity: 1 1/2
measurement: cups
descriptor: None
ingredient: milk
preparation: None

1/4 teaspoon garlic powder
quantity: 1/4
measurement: teaspoon
descriptor: None
ingredient: garlic powder
preparation: None

and potatoes
1 cup peeled and diced potatoes
quantity: 1
measurement: cup
descriptor: None
ingredient: and potatoes
preparation: None

2 tablespoons chicken bouillon granules
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: chicken bouillon granules
preparation: None

1 1/2 ounces guanciale (cured pork cheek), sliced
quantity: 1 1/2
measurement: ounces
descriptor: cured pork cheek
ingredient: guanciale
preparation: sliced

5 ounces bucatini pasta
quantity: 5
measurement: ounces
descriptor: None
ingredient: bucatini pasta
preparation: None

3 crushed garlic cloves
quantity: 3
measurement: None
descriptor: crushed
ingredient: garlic cloves
preparation: None

1/2 (8 ounce) can crushed San Marzano tomatoes
quantity: 1/2 (8 ounce)
measurement: can
de

1/4 teaspoon ground black pepper
quantity: 1/4
measurement: teaspoon
descriptor: ground black
ingredient: pepper
preparation: None

1 pound sweet Italian sausage
quantity: 1
measurement: pound
descriptor: sweet Italian
ingredient: sausage
preparation: None

4 tablespoons chopped fresh parsley
quantity: 4
measurement: tablespoons
descriptor: chopped fresh
ingredient: parsley
preparation: None

2 tablespoons white sugar
quantity: 2
measurement: tablespoons
descriptor: white
ingredient: sugar
preparation: None

16 ounces ricotta cheese
quantity: 16
measurement: ounces
descriptor: ricotta
ingredient: cheese
preparation: None

3/4 cup grated Parmesan cheese
quantity: 3/4
measurement: cup
descriptor: grated Parmesan
ingredient: cheese
preparation: None

3/4 pound mozzarella cheese, sliced
quantity: 3/4
measurement: pound
descriptor: None
ingredient: mozzarella cheese
preparation: sliced

2 cloves garlic, crushed
quantity: 2
measurement: cloves
descriptor: None
ingredient: garlic
preparation:

1 1/2 cups heavy cream
quantity: 1 1/2
measurement: cups
descriptor: heavy
ingredient: cream
preparation: None

3 tablespoons minced fresh parsley
quantity: 3
measurement: tablespoons
descriptor: minced fresh
ingredient: parsley
preparation: None

1/2 cup diced onion
quantity: 1/2
measurement: cup
descriptor: diced
ingredient: onion
preparation: None

2 tablespoons olive oil
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: olive oil
preparation: None

1 pound sweet Italian sausage, casings removed and crumbled
quantity: 1
measurement: pound
descriptor: sweet Italian
ingredient: sausage
preparation: casings removed and crumbled

1/2 teaspoon red pepper flakes
quantity: 1/2
measurement: teaspoon
descriptor: red
ingredient: pepper flakes
preparation: None

bow tie pasta
1 (12 ounce) package bow tie pasta
quantity: 1 (12 ounce)
measurement: package
descriptor: None
ingredient: bow tie pasta
preparation: None

1/2 teaspoon salt
quantity: 1/2
measurement: teaspoon
descriptor

1 pound bacon
quantity: 1
measurement: pound
descriptor: None
ingredient: bacon
preparation: None

1 cup brown sugar, or to taste
quantity: 1
measurement: cup
descriptor: brown, or to taste
ingredient: sugar
preparation: None

3 cloves garlic, minced
quantity: 3
measurement: cloves
descriptor: None
ingredient: garlic
preparation: minced

3 tablespoons butter
quantity: 3
measurement: tablespoons
descriptor: None
ingredient: butter
preparation: None

3 teaspoons chopped fresh cilantro
quantity: 3
measurement: teaspoons
descriptor: chopped fresh
ingredient: cilantro
preparation: None

4 cups chicken stock, or enough to cover potatoes
quantity: 4
measurement: cups
descriptor: or enough to cover potatoes
ingredient: chicken stock
preparation: None

1/4 cup all-purpose flour
quantity: 1/4
measurement: cup
descriptor: all-purpose
ingredient: flour
preparation: None

1 pound bacon, chopped
quantity: 1
measurement: pound
descriptor: None
ingredient: bacon
preparation: chopped

1 teaspoon dried 

In [112]:
sorted(meat_ingredients_freq_dict.items(), key=lambda pair: pair[1], reverse=True)

[('onion', 70),
 ('pepper', 69),
 ('garlic', 64),
 ('salt', 64),
 ('olive oil', 36),
 ('sugar', 35),
 ('water', 31),
 ('oregano', 28),
 ('garlic powder', 25),
 ('salt and pepper', 25),
 ('butter', 24),
 ('Worcestershire sauce', 22),
 ('parsley', 22),
 ('cumin', 22),
 ('beef', 20),
 ('flour', 20),
 ('thyme', 19),
 ('cayenne pepper', 19),
 ('tomatoes', 18),
 ('cheese', 18),
 ('chicken broth', 16),
 ('oil', 16),
 ('paprika', 16),
 ('chicken breast halves', 16),
 ('chili powder', 15),
 ('bell pepper', 15),
 ('celery', 15),
 ('carrots', 15),
 ('soy sauce', 14),
 ('ketchup', 14),
 ('beef broth', 13),
 ('milk', 13),
 ('wine', 13),
 ('tomato sauce', 12),
 ('potatoes', 12),
 ('cream', 12),
 ('bacon', 12),
 ('Cheddar cheese', 11),
 ('bread crumbs', 11),
 ('onions', 11),
 ('lamb', 11),
 ('basil', 10),
 ('egg', 10),
 ('ginger', 10),
 ('eggs', 9),
 ('cilantro', 9),
 ('chops', 9),
 ('pepper sauce', 8),
 ('leaf', 8),
 ('vinegar', 8),
 ('sauce', 8),
 ('tomato paste', 8),
 ('cinnamon', 8),
 ('shallots'

In [114]:
%%time
seafood_ingredients_freq_dict = dict()
seafood_ingredients_measurement_dict = defaultdict(set)
seafood_ingredients_descriptor_dict = defaultdict(set)
seafood_ingredients_preparation_dict = defaultdict(set)

for line in all_seafood_ingredient_lists:
    quantity, measurement, descriptor, ingredient, preparation = extract_all(line)
    
    seafood_ingredients_measurement_dict[ingredient] |= {measurement}
    seafood_ingredients_descriptor_dict[ingredient] |= {descriptor}
    seafood_ingredients_preparation_dict[ingredient] |= {preparation}
    if ingredient not in seafood_ingredients_freq_dict:
        seafood_ingredients_freq_dict[ingredient] = 1
    else:
        seafood_ingredients_freq_dict[ingredient] += 1

    # test
    print(line)
    print('quantity:', quantity)
    print('measurement:', measurement)
    print('descriptor:', descriptor)
    print('ingredient:', ingredient)
    print('preparation:', preparation)
    print()

1/3 cup soy sauce
quantity: 1/3
measurement: cup
descriptor: None
ingredient: soy sauce
preparation: None

3/4 cup honey
quantity: 3/4
measurement: cup
descriptor: None
ingredient: honey
preparation: None

2 (12 inch) untreated cedar planks
quantity: 2 (12 inch)
measurement: None
descriptor: untreated
ingredient: cedar planks
preparation: None

1/4 teaspoon garlic powder
quantity: 1/4
measurement: teaspoon
descriptor: None
ingredient: garlic powder
preparation: None

1 tablespoon olive oil
quantity: 1
measurement: tablespoon
descriptor: None
ingredient: olive oil
preparation: None

1 teaspoon ground black pepper
quantity: 1
measurement: teaspoon
descriptor: ground black
ingredient: pepper
preparation: None

1/2 teaspoon cayenne pepper
quantity: 1/2
measurement: teaspoon
descriptor: None
ingredient: cayenne pepper
preparation: None

1/4 cup pineapple juice
quantity: 1/4
measurement: cup
descriptor: None
ingredient: pineapple juice
preparation: None

2 tablespoons white vinegar
quantity:

1 clove garlic, crushed
quantity: 1
measurement: clove
descriptor: None
ingredient: garlic
preparation: crushed

8 ounces Monterey Jack cheese, shredded
quantity: 8
measurement: ounces
descriptor: None
ingredient: Monterey Jack cheese
preparation: shredded

2 tablespoons fine bread crumbs
quantity: 2
measurement: tablespoons
descriptor: fine
ingredient: bread crumbs
preparation: None

1 onion, chopped
quantity: 1
measurement: None
descriptor: None
ingredient: onion
preparation: chopped

7 black peppercorns
quantity: 7
measurement: None
descriptor: black
ingredient: peppercorns
preparation: None

1 1/2 cups beer
quantity: 1 1/2
measurement: cups
descriptor: None
ingredient: beer
preparation: None

2 cups flaked coconut
quantity: 2
measurement: cups
descriptor: flaked
ingredient: coconut
preparation: None

baking powder
1 1/2 teaspoons baking powder
quantity: 1 1/2
measurement: teaspoons
descriptor: None
ingredient: baking powder
preparation: None

1 egg
quantity: 1
measurement: None
des

4 tablespoons water
quantity: 4
measurement: tablespoons
descriptor: None
ingredient: water
preparation: None

2 tablespoons ketchup
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: ketchup
preparation: None

1/4 teaspoon ground ginger
quantity: 1/4
measurement: teaspoon
descriptor: ground
ingredient: ginger
preparation: None

4 cloves garlic, minced
quantity: 4
measurement: cloves
descriptor: None
ingredient: garlic
preparation: minced

2 teaspoons cornstarch
quantity: 2
measurement: teaspoons
descriptor: None
ingredient: cornstarch
preparation: None

1/4 cup sliced green onions
quantity: 1/4
measurement: cup
descriptor: sliced green
ingredient: onions
preparation: None

1/2 teaspoon crushed red pepper
quantity: 1/2
measurement: teaspoon
descriptor: crushed red
ingredient: pepper
preparation: None

12 ounces cooked shrimp, tails removed
quantity: 12
measurement: ounces
descriptor: cooked
ingredient: shrimp
preparation: tails removed

1 tablespoon soy sauce
quantity: 1

1 teaspoon dried parsley flakes
quantity: 1
measurement: teaspoon
descriptor: dried
ingredient: parsley flakes
preparation: None

1 tablespoon butter, melted
quantity: 1
measurement: tablespoon
descriptor: None
ingredient: butter
preparation: melted

pepper to taste
quantity: to taste
measurement: None
descriptor: None
ingredient: pepper
preparation: None

3 tablespoons fresh lemon juice
quantity: 3
measurement: tablespoons
descriptor: fresh
ingredient: lemon juice
preparation: None

4 tilapia fillets
quantity: 4
measurement: tilapia
descriptor: None
ingredient: fillets
preparation: None

3/4 cup all-purpose flour
quantity: 3/4
measurement: cup
descriptor: all-purpose
ingredient: flour
preparation: None

3 (6.5 ounce) cans minced clams
quantity: 3 (6.5 ounce)
measurement: cans
descriptor: minced
ingredient: clams
preparation: None

1 cup minced onion
quantity: 1
measurement: cup
descriptor: minced
ingredient: onion
preparation: None

ground black pepper to taste
quantity: to taste
meas

1 cup shredded Cheddar cheese
quantity: 1
measurement: cup
descriptor: shredded
ingredient: Cheddar cheese
preparation: None

1/3 cup soy sauce
quantity: 1/3
measurement: cup
descriptor: None
ingredient: soy sauce
preparation: None

3/4 cup honey
quantity: 3/4
measurement: cup
descriptor: None
ingredient: honey
preparation: None

2 (12 inch) untreated cedar planks
quantity: 2 (12 inch)
measurement: None
descriptor: untreated
ingredient: cedar planks
preparation: None

1/4 teaspoon garlic powder
quantity: 1/4
measurement: teaspoon
descriptor: None
ingredient: garlic powder
preparation: None

1 tablespoon olive oil
quantity: 1
measurement: tablespoon
descriptor: None
ingredient: olive oil
preparation: None

1 teaspoon ground black pepper
quantity: 1
measurement: teaspoon
descriptor: ground black
ingredient: pepper
preparation: None

1/2 teaspoon cayenne pepper
quantity: 1/2
measurement: teaspoon
descriptor: None
ingredient: cayenne pepper
preparation: None

1/4 cup pineapple juice
quanti

2 teaspoons salt, or to taste
quantity: 2
measurement: teaspoons
descriptor: or to taste
ingredient: salt
preparation: None

1 clove garlic, crushed
quantity: 1
measurement: clove
descriptor: None
ingredient: garlic
preparation: crushed

8 ounces Monterey Jack cheese, shredded
quantity: 8
measurement: ounces
descriptor: None
ingredient: Monterey Jack cheese
preparation: shredded

2 tablespoons fine bread crumbs
quantity: 2
measurement: tablespoons
descriptor: fine
ingredient: bread crumbs
preparation: None

1 onion, chopped
quantity: 1
measurement: None
descriptor: None
ingredient: onion
preparation: chopped

7 black peppercorns
quantity: 7
measurement: None
descriptor: black
ingredient: peppercorns
preparation: None

1 1/2 cups beer
quantity: 1 1/2
measurement: cups
descriptor: None
ingredient: beer
preparation: None

2 cups flaked coconut
quantity: 2
measurement: cups
descriptor: flaked
ingredient: coconut
preparation: None

baking powder
1 1/2 teaspoons baking powder
quantity: 1 1/2

1 teaspoon ground black pepper
quantity: 1
measurement: teaspoon
descriptor: ground black
ingredient: pepper
preparation: None

2 (6 ounce) fillets salmon
quantity: 2 (6 ounce)
measurement: fillets
descriptor: None
ingredient: salmon
preparation: None

1 teaspoon dried basil
quantity: 1
measurement: teaspoon
descriptor: dried
ingredient: basil
preparation: None

4 tablespoons water
quantity: 4
measurement: tablespoons
descriptor: None
ingredient: water
preparation: None

2 tablespoons ketchup
quantity: 2
measurement: tablespoons
descriptor: None
ingredient: ketchup
preparation: None

1/4 teaspoon ground ginger
quantity: 1/4
measurement: teaspoon
descriptor: ground
ingredient: ginger
preparation: None

4 cloves garlic, minced
quantity: 4
measurement: cloves
descriptor: None
ingredient: garlic
preparation: minced

2 teaspoons cornstarch
quantity: 2
measurement: teaspoons
descriptor: None
ingredient: cornstarch
preparation: None

1/4 cup sliced green onions
quantity: 1/4
measurement: cup


cream of mushroom soup
2 (10.75 ounce) cans condensed cream of mushroom soup
quantity: 2 (10.75 ounce)
measurement: cans
descriptor: None
ingredient: cream of mushroom soup
preparation: None

2 cups shredded Cheddar cheese
quantity: 2
measurement: cups
descriptor: shredded
ingredient: Cheddar cheese
preparation: None

1 (12 ounce) package egg noodles
quantity: 1 (12 ounce)
measurement: package
descriptor: None
ingredient: egg noodles
preparation: None

1 clove garlic, finely chopped
quantity: 1
measurement: clove
descriptor: None
ingredient: garlic
preparation: finely chopped

1 teaspoon dried parsley flakes
quantity: 1
measurement: teaspoon
descriptor: dried
ingredient: parsley flakes
preparation: None

1 tablespoon butter, melted
quantity: 1
measurement: tablespoon
descriptor: None
ingredient: butter
preparation: melted

pepper to taste
quantity: to taste
measurement: None
descriptor: None
ingredient: pepper
preparation: None

3 tablespoons fresh lemon juice
quantity: 3
measurement: 

6 cloves garlic, minced
quantity: 6
measurement: cloves
descriptor: None
ingredient: garlic
preparation: minced

1 1/2 cups French fried onions
quantity: 1 1/2
measurement: cups
descriptor: French fried
ingredient: onions
preparation: None

3 cups cooked macaroni
quantity: 3
measurement: cups
descriptor: cooked
ingredient: macaroni
preparation: None

cream of chicken soup
1 (10.75 ounce) can condensed cream of chicken soup
quantity: 1 (10.75 ounce)
measurement: can
descriptor: None
ingredient: cream of chicken soup
preparation: None

1 (5 ounce) can tuna, drained
quantity: 1 (5 ounce)
measurement: can
descriptor: None
ingredient: tuna
preparation: drained

1 cup shredded Cheddar cheese
quantity: 1
measurement: cup
descriptor: shredded
ingredient: Cheddar cheese
preparation: None

CPU times: user 1min 15s, sys: 18.7 s, total: 1min 34s
Wall time: 16.1 s


In [115]:
sorted(seafood_ingredients_freq_dict.items(), key=lambda pair: pair[1], reverse=True)

[('garlic', 30),
 ('pepper', 26),
 ('olive oil', 22),
 ('butter', 22),
 ('salt', 22),
 ('parsley', 18),
 ('lemon juice', 16),
 ('cayenne pepper', 14),
 ('shrimp', 14),
 ('soy sauce', 12),
 ('onion', 12),
 ('honey', 10),
 ('basil', 10),
 ('salt and pepper', 8),
 ('salmon', 8),
 ('mayonnaise', 8),
 ('flour', 8),
 ('garlic powder', 6),
 ('salmon fillets', 6),
 ('lemon', 6),
 ('bread crumbs', 6),
 ('Dijon mustard', 6),
 ('beer', 6),
 ('cheese', 6),
 ('oil', 6),
 ('fillets', 6),
 ('tuna', 6),
 ('vinegar', 4),
 ('sugar', 4),
 ('paprika', 4),
 ('capers', 4),
 ('wine', 4),
 ('thyme', 4),
 ('baking powder', 4),
 ('egg', 4),
 ('oil for frying', 4),
 ('water', 4),
 ('wine vinegar', 4),
 ('cornstarch', 4),
 ('onions', 4),
 ('oregano', 4),
 ('bell pepper', 4),
 ('celery', 4),
 ('chicken broth', 4),
 ('dill weed', 4),
 ('Cheddar cheese', 4),
 ('cream', 4),
 ('cedar planks', 2),
 ('pineapple juice', 2),
 ('crackers', 2),
 ('eggs', 2),
 ('shallots', 2),
 ('chives', 2),
 ('halibut', 2),
 ('mussels', 2)

## Store Ingredients Categorized

In [116]:
ingredients_categorized = {}
ingredients_categorized['vegetarian_protein'] = {}
ingredients_categorized['vegetarian_protein']['ingredients'] = vegetarian_protein_ingredients_freq_dict
ingredients_categorized['vegetarian_protein']['measurement'] = vegetarian_protein_ingredients_measurement_dict
ingredients_categorized['vegetarian_protein']['descriptor'] = vegetarian_protein_ingredients_descriptor_dict
ingredients_categorized['vegetarian_protein']['preparation'] = vegetarian_protein_ingredients_preparation_dict
ingredients_categorized['meat'] = {}
ingredients_categorized['meat']['ingredients'] = meat_ingredients_freq_dict
ingredients_categorized['meat']['measurement'] = meat_ingredients_measurement_dict
ingredients_categorized['meat']['descriptor'] = meat_ingredients_descriptor_dict
ingredients_categorized['meat']['preparation'] = meat_ingredients_preparation_dict
ingredients_categorized['seafood'] = {}
ingredients_categorized['seafood']['ingredients'] = seafood_ingredients_freq_dict
ingredients_categorized['seafood']['measurement'] = seafood_ingredients_measurement_dict
ingredients_categorized['seafood']['descriptor'] = seafood_ingredients_descriptor_dict
ingredients_categorized['seafood']['preparation'] = seafood_ingredients_preparation_dict

In [117]:
# save scraped data
with open('data/ingredients_categorized.pickle', 'wb') as file:
    pickle.dump(ingredients_categorized, file, protocol=pickle.HIGHEST_PROTOCOL)
file.closed

True

In [118]:
# check saved data
with open('data/ingredients_categorized.pickle', 'rb') as file:
    ingredients_categorized = pickle.load(file)

sorted(ingredients_categorized['meat']['ingredients'].items(), key=lambda pair: pair[1], reverse=True)

[('onion', 70),
 ('pepper', 69),
 ('garlic', 64),
 ('salt', 64),
 ('olive oil', 36),
 ('sugar', 35),
 ('water', 31),
 ('oregano', 28),
 ('garlic powder', 25),
 ('salt and pepper', 25),
 ('butter', 24),
 ('Worcestershire sauce', 22),
 ('parsley', 22),
 ('cumin', 22),
 ('beef', 20),
 ('flour', 20),
 ('thyme', 19),
 ('cayenne pepper', 19),
 ('tomatoes', 18),
 ('cheese', 18),
 ('chicken broth', 16),
 ('oil', 16),
 ('paprika', 16),
 ('chicken breast halves', 16),
 ('chili powder', 15),
 ('bell pepper', 15),
 ('celery', 15),
 ('carrots', 15),
 ('soy sauce', 14),
 ('ketchup', 14),
 ('beef broth', 13),
 ('milk', 13),
 ('wine', 13),
 ('tomato sauce', 12),
 ('potatoes', 12),
 ('cream', 12),
 ('bacon', 12),
 ('Cheddar cheese', 11),
 ('bread crumbs', 11),
 ('onions', 11),
 ('lamb', 11),
 ('basil', 10),
 ('egg', 10),
 ('ginger', 10),
 ('eggs', 9),
 ('cilantro', 9),
 ('chops', 9),
 ('pepper sauce', 8),
 ('leaf', 8),
 ('vinegar', 8),
 ('sauce', 8),
 ('tomato paste', 8),
 ('cinnamon', 8),
 ('shallots'