In [228]:
import pandas as pd
import nltk
import re
import string
from collections import Counter
import json

In [229]:
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

In [230]:
from parse_url import *
from mapping_urls_make import *

In [231]:
def get_test_ingredients(og_ingredients):
    test_ingredients = [x['name'] for x in og_ingredients]
    
    return test_ingredients

In [232]:
def clean_text(s):
    s = re.sub("[^a-zA-Z ]", ' ', s)
    s = s.lower()
    s = word_tokenize(s)
    
    # filter out stop words
    words = [w for w in s if not w in stop_words]
    
    return words

In [233]:
def get_italian_ingredients(all_recipes):
    cuisine_df = pd.read_json(all_recipes)
    italian_df = cuisine_df[cuisine_df['cuisine'] == 'italian']
    
    italian = []
    #print(italian_df['ingredients'].tolist())
    for i in italian_df['ingredients'].tolist():
        for j in i:
            #print(j)
            italian.append(j)
            
    return italian

In [234]:
temp = get_italian_ingredients('train.json')

In [235]:
temp

['sugar',
 'pistachio nuts',
 'white almond bark',
 'flour',
 'vanilla extract',
 'olive oil',
 'almond extract',
 'eggs',
 'baking powder',
 'dried cranberries',
 'chopped tomatoes',
 'fresh basil',
 'garlic',
 'extra-virgin olive oil',
 'kosher salt',
 'flat leaf parsley',
 'pimentos',
 'sweet pepper',
 'dried oregano',
 'olive oil',
 'garlic',
 'sharp cheddar cheese',
 'pepper',
 'swiss cheese',
 'provolone cheese',
 'canola oil',
 'mushrooms',
 'black olives',
 'sausages',
 'Italian parsley leaves',
 'walnuts',
 'hot red pepper flakes',
 'extra-virgin olive oil',
 'fresh lemon juice',
 'trout fillet',
 'garlic cloves',
 'chipotle chile',
 'fine sea salt',
 'flat leaf parsley',
 'fresh parmesan cheese',
 'butter',
 'all-purpose flour',
 'fat free less sodium chicken broth',
 'chopped fresh chives',
 'gruyere cheese',
 'ground black pepper',
 'bacon slices',
 'gnocchi',
 'fat free milk',
 'cooking spray',
 'salt',
 'italian seasoning',
 'broiler-fryer chicken',
 'mayonaise',
 'zesty 

In [236]:
def get_ngrams(i_list):
    all_grams = []
    
    for i in i_list:
        l = clean_text(i)
        grams = list(nltk.everygrams(l, max_len=len(l)))
        grams.sort(key=len, reverse=True)        
        grams = [' '.join(g) for g in grams]
        all_grams.append(grams)
    
    return all_grams

In [237]:
stop_words = set(stopwords.words('english'))
get_ngrams(temp)

[['sugar'],
 ['pistachio nuts', 'pistachio', 'nuts'],
 ['white almond bark',
  'white almond',
  'almond bark',
  'white',
  'almond',
  'bark'],
 ['flour'],
 ['vanilla extract', 'vanilla', 'extract'],
 ['olive oil', 'olive', 'oil'],
 ['almond extract', 'almond', 'extract'],
 ['eggs'],
 ['baking powder', 'baking', 'powder'],
 ['dried cranberries', 'dried', 'cranberries'],
 ['chopped tomatoes', 'chopped', 'tomatoes'],
 ['fresh basil', 'fresh', 'basil'],
 ['garlic'],
 ['extra virgin olive oil',
  'extra virgin olive',
  'virgin olive oil',
  'extra virgin',
  'virgin olive',
  'olive oil',
  'extra',
  'virgin',
  'olive',
  'oil'],
 ['kosher salt', 'kosher', 'salt'],
 ['flat leaf parsley', 'flat leaf', 'leaf parsley', 'flat', 'leaf', 'parsley'],
 ['pimentos'],
 ['sweet pepper', 'sweet', 'pepper'],
 ['dried oregano', 'dried', 'oregano'],
 ['olive oil', 'olive', 'oil'],
 ['garlic'],
 ['sharp cheddar cheese',
  'sharp cheddar',
  'cheddar cheese',
  'sharp',
  'cheddar',
  'cheese'],
 ['pe

In [238]:
def get_italian_kb(italian, ingredients_kb):
    italian_kb = {}
    
    italian_grams = get_ngrams(italian)

    for category in ingredients_kb:
        counter = Counter()
        l = [ps.stem(j) for j in ingredients_kb[category]]
        
        for i in italian_grams:
            for j in i:
                if ps.stem(j) in l:                    
                    counter[j] += 1
                    break
                
        if counter:
            italian_kb[category] = counter.most_common()
            
    return italian_kb

In [239]:
with open('to_italian_kb.json') as json_file:
    ingredients_kb = json.load(json_file)

In [240]:
ps = PorterStemmer()

In [241]:
italian_kb = get_italian_kb(temp, ingredients_kb)

In [259]:
with open('italian_freq.json', 'w') as f:
    json.dump(italian_kb, f)

In [242]:
italian_kb

{'carb': [('pasta', 815),
  ('bread', 456),
  ('rice', 338),
  ('spaghetti', 337),
  ('bread crumbs', 300),
  ('lasagna', 273),
  ('penne', 263),
  ('linguine', 196),
  ('wheat', 173),
  ('baguette', 144),
  ('polenta', 139),
  ('fettucine', 136),
  ('doughs', 103),
  ('bread crumb', 85),
  ('dough', 85),
  ('cornmeal', 78),
  ('crust', 77),
  ('noodles', 66),
  ('rolls', 61),
  ('toasted', 54),
  ('fettuccine', 52),
  ('breadcrumbs', 51),
  ('sourdough', 40),
  ('macaroni', 34),
  ('grain', 34),
  ('manicotti', 28),
  ('risotto', 20),
  ('croutons', 20),
  ('shells', 19),
  ('tagliatelle', 19),
  ('barley', 17),
  ('vermicelli', 16),
  ('buns', 15),
  ('shell', 14),
  ('crusts', 14),
  ('tortillas', 13),
  ('quinoa', 10),
  ('biscuits', 10),
  ('stuffed', 10),
  ('shelled', 9),
  ('oats', 8),
  ('crackers', 7),
  ('pita', 7),
  ('roll', 7),
  ('couscous', 6),
  ('toast', 6),
  ('pitas', 5),
  ('graham', 5),
  ('stuffing', 5),
  ('muffins', 4),
  ('cornflake', 4),
  ('biscuit', 4),
  (

In [243]:
def transform_ingredients(test_ingredients, italian_kb, ingredients_kb):
    transformed_ingredients = []
    og_simplified_ingredients = []
    counter = Counter()
    
    test_grams = get_ngrams(test_ingredients)

    for i in test_grams:    
        match = False
        
        for g in i:        
            for category in ingredients_kb:
                ingredients_stemmed = [ps.stem(x) for x in ingredients_kb[category]]
                italian_stemmed = [ps.stem(x[0]) for x in italian_kb[category]]

                if ps.stem(g) in ingredients_stemmed:
                    og = g
                    new = g 
                      
                    if (len(italian_kb[category]) > counter[category]) and ps.stem(g) not in italian_stemmed[:2]:
                        new = italian_kb[category][counter[category]][0]
                        #og_simplified_ingredients.append(g)
                        #transformed_ingredients.append(italian_kb[category][counter[category]][0])
                        counter[category] += 1
                        #break
                        
                    if ps.stem(new) in [ps.stem(x) for x in transformed_ingredients]:
                        if (len(italian_kb[category]) > counter[category]):
                            counter[category] += 1
                            new = italian_kb[category][counter[category]][0]
                    
                    transformed_ingredients.append(new)
                    og_simplified_ingredients.append(og)
                    match = True
                    break
                    
            if match:
                break
                
        if not match:
            og_simplified_ingredients.append(i[0])
            transformed_ingredients.append(i[0])
            
    return transformed_ingredients, og_simplified_ingredients

In [244]:
og_ingredients, og_directions = main_parse('https://www.allrecipes.com/recipe/255865/slow-cooker-thai-curried-beef/',check="single")

https://www.allrecipes.com/recipe/255865/slow-cooker-thai-curried-beef/


In [245]:
test_ingredients = get_test_ingredients(og_ingredients)

In [246]:
test_ingredients

['lean stew beef',
 'salt',
 'diced onion',
 'garlic, minced',
 '(13.5 ounce) can coconut milk',
 'beef broth',
 'red curry paste',
 'lime juice',
 'peanut oil',
 'jalapeno chile peppers, seeded and minced',
 'brown sugar',
 'baby spinach',
 'water',
 'jasmine rice',
 'fresh basil leaves (optional)']

In [247]:
transformed_ingredients, og_simplified_ingredients = transform_ingredients(test_ingredients, italian_kb, ingredients_kb)

In [248]:
transformed_ingredients

['chicken',
 'salt',
 'tomatoes',
 'pepper',
 'milk',
 'eggs',
 'marinara',
 'basil',
 'olive oil',
 'lemon juice',
 'sugar',
 'parsley',
 'water',
 'pasta',
 'rosemary']

In [249]:
og_simplified_ingredients

['beef',
 'salt',
 'onion',
 'garlic',
 'coconut milk',
 'beef',
 'curry',
 'lime juice',
 'peanut oil',
 'jalapeno',
 'sugar',
 'spinach',
 'water',
 'rice',
 'basil']

In [250]:
# takes our created italian ingredients KB and list of all recipes, writes dict of italian ingredients + frequencies divided by food group to json
def get_italian_ingredients_dict(italian_json, all_recipes):
    # import stop words
    stop_words = set(stopwords.words('english'))
    
    # create PorterStemmer object for stemming words
    ps = PorterStemmer()
    
    # import italian ingredients KB (dict of food categories)
    with open(italian_json) as json_file:
        ingredients_kb = json.load(json_file)
        
    # import all_recipes json, get ingredients from only italian recipes
    italian = get_italian_ingredients(all_recipes)
    
    # calculate dict of italian ingredients + frequencies (keys=food categories)
    italian_kb = get_italian_freq(italian, ingredients_kb)
    
    # write dict to json file
    with open('italian_freq.json', 'w') as f:
        json.dump(italian_kb, f)
        
    return italian_kb

In [251]:
# takes list of og_ingredients (from main_parse) + name of italian ingredients json file, returns list of transformed ingredients
def cuisine_to_italian_ingredients(og_ingredients, italian_kb, ingredients_kb):
    # retrieve dict of italian ingredients + frequencies from json
    with open(italian_kb) as json_file:
        italian_kb = json.load(json_file)
        
    # get ingredients from recipe being transformed
    test_ingredients = get_test_ingredients(og_ingredients)
    
    # gets list of transformed ingredients
    transformed_ingredients, og_simplified_ingredients = transform_ingredients(test_ingredients, italian_kb, ingredients_kb)
    
    return transformed_ingredients, og_simplified_ingredients

In [252]:
def cuisine_to_italian_directions(og_simplified_ingredients, og_directions, transformed_ingredients):    
    new_directions = '@'.join(og_directions)
    
    print(og_directions)
    
    for i in og_simplified_ingredients:
        if i in new_directions:
            new_directions = new_directions.replace(i, transformed_ingredients[og_simplified_ingredients.index(i)])

    new_directions = new_directions.split('@')
                           
    return new_directions

In [253]:
cuisine_to_italian_directions(og_simplified_ingredients, og_directions, transformed_ingredients)

['Preheat a large skillet over medium-high heat. Cook and stir beef until browned, about 2 minutes per side. Drain excess grease. Transfer beef to a 4-quart slow cooker and sprinkle with salt.\n                            ', 'Combine onion and garlic in the same skillet over medium-high heat; saute until tender, about 5 minutes. Add to beef in the slow cooker.\n                            ', 'Stir coconut milk, beef broth, red curry paste, lime juice, peanut oil, jalapeno chile peppers, and brown sugar into the slow cooker.\n                            ', 'Cover and cook on Low until flavors combine, 6 to 10 hours.\n                            ', 'Bring water and rice to a boil in a saucepan. Reduce heat to medium-low, cover, and simmer until rice is tender and liquid has been absorbed, 20 to 25 minutes.\n                            ', 'Stir spinach into the slow cooker and cook until wilted, about 15 minutes. Serve beef mixture over rice and garnish with basil leaves.\n               

['Preheat a large skillet over medium-high heat. Cook and stir chicken until browned, about 2 minutes per side. Drain excess grease. Transfer chicken to a 4-quart slow cooker and sprinkle with salt.\n                            ',
 'Combine tomatoes and pepper in the same skillet over medium-high heat; saute until tender, about 5 minutes. Add to chicken in the slow cooker.\n                            ',
 'Stir milk, chicken broth, red marinara paste, rosemary, olive oil, lemon juice chile peppers, and brown sugar into the slow cooker.\n                            ',
 'Cover and cook on Low until flavors combine, 6 to 10 hours.\n                            ',
 'Bring water and pasta to a boil in a saucepan. Reduce heat to medium-low, cover, and simmer until pasta is tender and liquid has been absorbed, 20 to 25 minutes.\n                            ',
 'Stir parsley into the slow cooker and cook until wilted, about 15 minutes. Serve chicken mixture over pasta and garnish with rosemary 

In [254]:
og_ingredients, og_directions = main_parse('https://www.allrecipes.com/recipe/236805/king-ranch-chicken-casserole/',check="single")

https://www.allrecipes.com/recipe/236805/king-ranch-chicken-casserole/


In [255]:
get_test_ingredients(og_ingredients)

['vegetable oil',
 'white onion, diced',
 'red bell pepper, diced',
 'green bell pepper, diced',
 '(10.75 ounce) can condensed cream of mushroom soup',
 '(10.75 ounce) can condensed cream of chicken soup',
 '(10 ounce) can diced tomatoes with green chile peppers (such as RO*TEL®)',
 'chicken broth',
 'sour cream',
 'ground cumin',
 'ancho chile powder',
 'dried oregano',
 'chipotle chile powder',
 'cooked chicken, torn into shreds or cut into chunks',
 'shredded Cheddar cheese',
 'corn tortillas, cut into quarters']

In [256]:
og_directions

['Preheat oven to 350 degrees F (175 degrees C).\n                            ',
 'Heat oil in a large skillet over high heat. Saute onion, red bell pepper, and green bell pepper in hot oil until warmed through, about 2 minutes.\n                            ',
 'Combine onion-pepper mixture, cream of mushroom soup, cream of chicken soup, diced tomatoes, chicken broth, sour cream, cumin, ancho chile powder, oregano, and chipotle chile powder together in a large bowl and stir until sauce is well-combined.\n                            ',
 'Spread a few tablespoons of the sauce in the bottom of a 9x13-inch baking dish. Spread 1/2 the chicken over the sauce. Spread about half the sauce over the chicken and top with 1/3 the cheese. Spread a layer of tortillas over the cheese. Spread remaining 1/2 the chicken over the tortillas, and top with almost all of the remaining sauce, reserving 1/2 cup sauce. Top with 1/3 the cheese, remaining tortillas, the reserved 1/2 cup sauce, and remaining 1/3 c

In [257]:
test.replace('tuna','chicken')

'Preheat oven to 350 degrees F (175 degrees C). Line a baking sheet with aluminum foil, and spray with cooking spray.\n                            In a large bowl, thoroughly mix the chicken, bread crumbs, zucchini, green pepper, onion, green onions, garlic, jalapeno pepper, cottage cheese, sour cream, 2 eggs, lime juice, dried basil, pepper, and salt.\n                            Beat 2 eggs in a shallow bowl, and place the cornmeal on a plate.\n                            Scoop up about 1/4 cup of the chicken mixture, and gently form it into a compact patty. Dip both sides of each cake into beaten egg and then press into cornmeal, and place the cakes onto the prepared baking sheet. Spray the tops of the cakes with cooking oil spray.\n                            Bake in the preheated oven until the tops of the cakes are beginning to brown, about 20 minutes. Flip each cake, spray with cooking spray, and bake until the cakes are cooked through and lightly browned, about 20 more minutes.

In [258]:
with open('train.json') as json_file:
    all_recipes = json.load(json_file)