# Importing the data

In [1]:
import pandas as pd

In [2]:
df= pd.read_csv("Datasets/recipes.csv")

In [3]:
df.head(3)

Unnamed: 0.1,Unnamed: 0,recipe_name,prep_time,cook_time,total_time,servings,yield,ingredients,directions,rating,url,cuisine_path,nutrition,timing,img_src
0,0,Apple-Cranberry Crostada,,,,8,6 to 8 - servings,"3 tablespoons butter, 2 pounds Granny Smith ap...",Heat butter in a large skillet over medium-hig...,4.4,https://www.allrecipes.com/recipe/76931/apple-...,/Desserts/Fruit Desserts/Apple Dessert Recipes/,"Total Fat 18g 23%, Saturated Fat 7g 34%, Chole...","Servings: 8, Yield: 6 to 8 - servings",https://www.allrecipes.com/thmb/Tf1wH73bfH6Oql...
1,1,Apple Pie by Grandma Ople,30 mins,1 hrs,1 hrs 30 mins,8,1 9-inch pie,"8 small Granny Smith apples, or as needed, ½ c...","Peel and core apples, then thinly slice. Set a...",4.8,https://www.allrecipes.com/recipe/12682/apple-...,/Desserts/Pies/Apple Pie Recipes/,"Total Fat 19g 24%, Saturated Fat 9g 46%, Chole...","Prep Time: 30 mins, Cook Time: 1 hrs, Total Ti...",https://www.allrecipes.com/thmb/1I95oiTGz6aEpu...
2,2,Sarah's Homemade Applesauce,10 mins,15 mins,25 mins,4,,"4 apples - peeled, cored and chopped, ¾ cup w...","Combine apples, water, sugar, and cinnamon in ...",4.8,https://www.allrecipes.com/recipe/51301/sarahs...,/Side Dish/Applesauce Recipes/,"Total Fat 0g 0%, Sodium 3mg 0%, Total Carbohyd...","Prep Time: 10 mins, Cook Time: 15 mins, Total ...",https://www.allrecipes.com/thmb/VY5d0tZHB8xz6y...


In [4]:
df.columns

Index(['Unnamed: 0', 'recipe_name', 'prep_time', 'cook_time', 'total_time',
       'servings', 'yield', 'ingredients', 'directions', 'rating', 'url',
       'cuisine_path', 'nutrition', 'timing', 'img_src'],
      dtype='object')

# Goal: Obtaining the full list of individual ingredients used

### Combining the elements of the 'ingredients' column into a big string

In [5]:
raw_list = ','.join(df['ingredients'])
raw_list

"3 tablespoons butter, 2 pounds Granny Smith apples (or other firm, crisp apples), peeled, quartered, cored and sliced 1/4-inch thick, 1 pound Macintosh apples (or other soft-textured apples that fall apart when cooked), peeled, quartered, cored, and sliced 1/4-inch thick, ½ cup sugar, ½ cup dried cranberries, 1 sheet frozen puff pastry, thawed but still cold (follow package directions), 1  egg white, lightly beaten, 1 tablespoon sugar, 1 cup Optional: Ice cream or lightly sweetened whipped cream,8 small Granny Smith apples, or as needed, ½ cup unsalted butter, 3 tablespoons all-purpose flour, ½ cup white sugar, ½ cup packed brown sugar, ¼ cup water, 1 (9 inch) double-crust pie pastry, thawed,4  apples - peeled, cored and chopped, ¾ cup water, ¼ cup white sugar, ½ teaspoon ground cinnamon,10 cups all-purpose apples, peeled, cored and sliced, 1 cup white sugar, 1 tablespoon all-purpose flour, 1 teaspoon ground cinnamon, ½ cup water, 1 cup quick-cooking oats, 1 cup all-purpose flour, 1 c

## Filtering the raw list

In [6]:
# Optional: If punkt, wordnet or omw-1.4 are not downloaded

#nltk.download('punkt')
#nltk.download('wordnet')
#nltk.download('omw-1.4')

In [7]:
from nltk.corpus import stopwords
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer

### Lemmatization

In [8]:
def lemmatize(to_lemmatize):
    lemmatizer = WordNetLemmatizer()
    word_list = nltk.word_tokenize(to_lemmatize)
    return  ' '.join([lemmatizer.lemmatize(w) for w in word_list])
    
lemmatized_list = lemmatize(raw_list)
lemmatized_list 

"3 tablespoon butter , 2 pound Granny Smith apple ( or other firm , crisp apple ) , peeled , quartered , cored and sliced 1/4-inch thick , 1 pound Macintosh apple ( or other soft-textured apple that fall apart when cooked ) , peeled , quartered , cored , and sliced 1/4-inch thick , ½ cup sugar , ½ cup dried cranberry , 1 sheet frozen puff pastry , thawed but still cold ( follow package direction ) , 1 egg white , lightly beaten , 1 tablespoon sugar , 1 cup Optional : Ice cream or lightly sweetened whipped cream,8 small Granny Smith apple , or a needed , ½ cup unsalted butter , 3 tablespoon all-purpose flour , ½ cup white sugar , ½ cup packed brown sugar , ¼ cup water , 1 ( 9 inch ) double-crust pie pastry , thawed,4 apple - peeled , cored and chopped , ¾ cup water , ¼ cup white sugar , ½ teaspoon ground cinnamon,10 cup all-purpose apple , peeled , cored and sliced , 1 cup white sugar , 1 tablespoon all-purpose flour , 1 teaspoon ground cinnamon , ½ cup water , 1 cup quick-cooking oat ,

#### Verifying how much is the lemmatized list similar to the original one

In [9]:
from difflib import SequenceMatcher

SequenceMatcher(None, raw_list, lemmatized_list).ratio()

0.6936660169354855

### Removing standard stop words, punctuations and numbers

--> Now that I think about it, I could have skipped this step

In [10]:
import string  
stop_words = stopwords.words('english') 
additional_words = ['1/4-inch', '1/4']
custom_stop_words = stop_words + additional_words
word_tokens = word_tokenize(lemmatized_list)


filtered_list = [w for w in word_tokens if not w.lower() in custom_stop_words]

filtered_list = [w for w in filtered_list if not (w.isnumeric() or w in string.punctuation)]
  

  
filtered_list

['tablespoon',
 'butter',
 'pound',
 'Granny',
 'Smith',
 'apple',
 'firm',
 'crisp',
 'apple',
 'peeled',
 'quartered',
 'cored',
 'sliced',
 'thick',
 'pound',
 'Macintosh',
 'apple',
 'soft-textured',
 'apple',
 'fall',
 'apart',
 'cooked',
 'peeled',
 'quartered',
 'cored',
 'sliced',
 'thick',
 'cup',
 'sugar',
 'cup',
 'dried',
 'cranberry',
 'sheet',
 'frozen',
 'puff',
 'pastry',
 'thawed',
 'still',
 'cold',
 'follow',
 'package',
 'direction',
 'egg',
 'white',
 'lightly',
 'beaten',
 'tablespoon',
 'sugar',
 'cup',
 'Optional',
 'Ice',
 'cream',
 'lightly',
 'sweetened',
 'whipped',
 'cream,8',
 'small',
 'Granny',
 'Smith',
 'apple',
 'needed',
 'cup',
 'unsalted',
 'butter',
 'tablespoon',
 'all-purpose',
 'flour',
 'cup',
 'white',
 'sugar',
 'cup',
 'packed',
 'brown',
 'sugar',
 'cup',
 'water',
 'inch',
 'double-crust',
 'pie',
 'pastry',
 'thawed,4',
 'apple',
 'peeled',
 'cored',
 'chopped',
 'cup',
 'water',
 'cup',
 'white',
 'sugar',
 'teaspoon',
 'ground',
 'cinn

### Selecting only food-related words from the preprocessed list

In [11]:
def extract_ingredients(to_filter):
    food_list=[]
    for w in to_filter:
        try:
            lexname = wn.synsets(w)[0].lexname()
            if (lexname == 'noun.food') or (lexname == 'noun.plant') or (lexname == "noun.object") or (lexname == "noun.animal"):
                food_list.append(w)
        except:
            pass
        
    return food_list


        

food_list = extract_ingredients(filtered_list)
food_list

['butter',
 'apple',
 'crisp',
 'apple',
 'apple',
 'apple',
 'sugar',
 'cranberry',
 'sheet',
 'pastry',
 'egg',
 'sugar',
 'apple',
 'butter',
 'flour',
 'sugar',
 'sugar',
 'pie',
 'pastry',
 'apple',
 'sugar',
 'ground',
 'apple',
 'sugar',
 'flour',
 'ground',
 'cinnamon',
 'oat',
 'flour',
 'sugar',
 'butter',
 'apple',
 'lemon',
 'juice',
 'sugar',
 'ground',
 'cinnamon',
 'ground',
 'apple',
 'sugar',
 'ground',
 'cinnamon',
 'sugar',
 'oat',
 'flour',
 'apple',
 'cider',
 'cinnamon',
 'allspice',
 'clove',
 'apple',
 'cranberry',
 'sugar',
 'ground',
 'cinnamon',
 'ground',
 'pastry',
 'pie',
 'apple',
 'butter',
 'sugar',
 'ground',
 'cinnamon',
 'ground',
 'nutmeg',
 'sugar',
 'vanilla',
 'butter',
 'sugar',
 'ground',
 'cinnamon',
 'apple',
 'lemon',
 'juice',
 'apple',
 'butter',
 'sugar',
 'ground',
 'cinnamon',
 'pastry',
 'sheet',
 'sugar',
 'milk',
 'vanilla',
 'apple',
 'flour',
 'sugar',
 'ground',
 'cinnamon',
 'butter',
 'apple',
 'sugar',
 'milk',
 'corn',
 'syrup

## Unique values of the food list (List of individual ingredients used)

In [12]:
import numpy as np
ingredients = np.unique(food_list)
ingredients

array(['Almond', 'Bosc', 'Brie', 'Chardonnay', 'Cheese', 'Cocoa',
       'Damson', 'Delicious', 'Dressing', 'Fruit', 'Gorgonzola', 'Ground',
       'Gruyere', 'Honey', 'Kosher', 'Marsala', 'Oats', 'Ocean',
       'Parmesan', 'Pineapple', 'Rice', 'Rosa', 'Tapioca', 'Vanilla',
       'Worcestershire', 'acinus', 'adobo', 'agave', 'alfalfa',
       'allspice', 'almond', 'amaretto', 'apple', 'applesauce', 'apricot',
       'arugula', 'avocado', 'bacon', 'baguette', 'bamboo', 'banana',
       'barbeque', 'basil', 'bay', 'bean', 'beef', 'beer', 'berry',
       'beverage', 'biscuit', 'bitter', 'bittersweet', 'blackberry',
       'blade', 'blossom', 'blueberry', 'bouillon', 'brandy', 'bread',
       'broccoli', 'broth', 'buckwheat', 'butter', 'buttermilk',
       'butternut', 'butterscotch', 'cabbage', 'cactus', 'canola',
       'cantaloupe', 'caper', 'caramel', 'cardamom', 'carrot', 'cashew',
       'castor', 'cayenne', 'celery', 'cereal', 'champagne', 'chard',
       'cheese', 'cherry', 'ches

# Adding an 'Ingredients only column' 

In [13]:
table = df.copy()

In [14]:
table['ingredients only'] = table['ingredients'].map(lemmatize).map(word_tokenize).map(extract_ingredients).map(np.unique)
table['ingredients only']

0       [apple, butter, cranberry, crisp, egg, pastry,...
1              [apple, butter, flour, pastry, pie, sugar]
2                        [apple, cinnamon, ground, sugar]
3       [apple, butter, cinnamon, flour, ground, oat, ...
4       [apple, cinnamon, ground, juice, lemon, nutmeg...
                              ...                        
1085           [Rice, butter, cereal, pecan, rice, sugar]
1086    [banana, egg, flour, juice, pecan, pineapple, ...
1087    [basil, beef, bouillon, clove, garlic, juice, ...
1088                  [butter, egg, flour, sugar, walnut]
1089    [Cocoa, almond, cashew, cayenne, cocoa, coconu...
Name: ingredients only, Length: 1090, dtype: object

In [15]:
table

Unnamed: 0.1,Unnamed: 0,recipe_name,prep_time,cook_time,total_time,servings,yield,ingredients,directions,rating,url,cuisine_path,nutrition,timing,img_src,ingredients only
0,0,Apple-Cranberry Crostada,,,,8,6 to 8 - servings,"3 tablespoons butter, 2 pounds Granny Smith ap...",Heat butter in a large skillet over medium-hig...,4.4,https://www.allrecipes.com/recipe/76931/apple-...,/Desserts/Fruit Desserts/Apple Dessert Recipes/,"Total Fat 18g 23%, Saturated Fat 7g 34%, Chole...","Servings: 8, Yield: 6 to 8 - servings",https://www.allrecipes.com/thmb/Tf1wH73bfH6Oql...,"[apple, butter, cranberry, crisp, egg, pastry,..."
1,1,Apple Pie by Grandma Ople,30 mins,1 hrs,1 hrs 30 mins,8,1 9-inch pie,"8 small Granny Smith apples, or as needed, ½ c...","Peel and core apples, then thinly slice. Set a...",4.8,https://www.allrecipes.com/recipe/12682/apple-...,/Desserts/Pies/Apple Pie Recipes/,"Total Fat 19g 24%, Saturated Fat 9g 46%, Chole...","Prep Time: 30 mins, Cook Time: 1 hrs, Total Ti...",https://www.allrecipes.com/thmb/1I95oiTGz6aEpu...,"[apple, butter, flour, pastry, pie, sugar]"
2,2,Sarah's Homemade Applesauce,10 mins,15 mins,25 mins,4,,"4 apples - peeled, cored and chopped, ¾ cup w...","Combine apples, water, sugar, and cinnamon in ...",4.8,https://www.allrecipes.com/recipe/51301/sarahs...,/Side Dish/Applesauce Recipes/,"Total Fat 0g 0%, Sodium 3mg 0%, Total Carbohyd...","Prep Time: 10 mins, Cook Time: 15 mins, Total ...",https://www.allrecipes.com/thmb/VY5d0tZHB8xz6y...,"[apple, cinnamon, ground, sugar]"
3,3,Apple Crisp,30 mins,45 mins,1 hrs 15 mins,12,1 9x13-inch pan,"10 cups all-purpose apples, peeled, cored and ...",Preheat the oven to 350 degrees F (175 degrees...,4.7,https://www.allrecipes.com/recipe/12409/apple-...,/Desserts/Crisps and Crumbles Recipes/Apple Cr...,"Total Fat 8g 11%, Saturated Fat 5g 25%, Choles...","Prep Time: 30 mins, Cook Time: 45 mins, Total ...",https://www.allrecipes.com/thmb/uAzhPOh86PfR-N...,"[apple, butter, cinnamon, flour, ground, oat, ..."
4,4,Apple Pie Filling,20 mins,20 mins,2 hrs 40 mins,40,5 9-inch pies,"18 cups thinly sliced apples, 3 tablespoons le...",Toss apples with lemon juice in a large bowl a...,4.7,https://www.allrecipes.com/recipe/12681/apple-...,/Desserts/Pies/Apple Pie Recipes/,"Total Fat 0g 0%, Sodium 61mg 3%, Total Carbohy...","Prep Time: 20 mins, Cook Time: 20 mins, Additi...",https://www.allrecipes.com/thmb/c0bbYaS1V_mTt_...,"[apple, cinnamon, ground, juice, lemon, nutmeg..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1085,1085,Date-Nut Balls,15 mins,10 mins,30 mins,48,4 dozen balls,"14 tablespoons butter, 1 cup pitted chopped da...",Melt butter in a saucepan over medium heat; co...,3.8,https://www.allrecipes.com/recipe/240653/date-...,/Desserts/Cookies/Fruit Cookie Recipes/Date/,"Total Fat 5g 6%, Saturated Fat 2g 12%, Cholest...","Prep Time: 15 mins, Cook Time: 10 mins, Additi...",https://www.allrecipes.com/thmb/LXm_15zLymhZ7c...,"[Rice, butter, cereal, pecan, rice, sugar]"
1086,1086,Jamaica Cake,,,,14,1 - 13x9 inch pan,"2 cups white sugar, 1 ½ cups vegetable oil, 1 ...","Mix together sugar, vegetable oil, pecans, flo...",4.6,https://www.allrecipes.com/recipe/7509/jamaica...,/Desserts/Fruit Desserts/Banana Dessert Recipes/,"Total Fat 33g 43%, Saturated Fat 4g 21%, Chole...","Servings: 14, Yield: 1 - 13x9 inch pan",https://www.allrecipes.com/thmb/tI7g5xY5-qPV5v...,"[banana, egg, flour, juice, pecan, pineapple, ..."
1087,1087,Best Hot Sauce,10 mins,20 mins,30 mins,100,2 cups,"10 fresh hot chile peppers, stems removed, 1 ...","Place peppers, onion, dates, basil, parsley, t...",4.9,https://www.allrecipes.com/recipe/242153/best-...,/Side Dish/Sauces and Condiments/,"Total Fat 1g 1%, Saturated Fat 0g 1%, Sodium 2...","Prep Time: 10 mins, Cook Time: 20 mins, Total ...",https://www.allrecipes.com/thmb/80wUYzFvbycfxx...,"[basil, beef, bouillon, clove, garlic, juice, ..."
1088,1088,Moist Date Nut Bread,15 mins,50 mins,1 hrs 35 mins,12,1 9x5-inch loaf,"2 ½ cups chopped dates, ¼ cup butter, 1 cup bo...",Preheat the oven to 350 degrees F (175 degrees...,4.5,https://www.allrecipes.com/recipe/51446/moist-...,/Breakfast and Brunch/Breakfast Bread Recipes/,"Total Fat 10g 13%, Saturated Fat 3g 16%, Chole...","Prep Time: 15 mins, Cook Time: 50 mins, Additi...",https://www.allrecipes.com/thmb/bYsnompJZ6UAF9...,"[butter, egg, flour, sugar, walnut]"


In [16]:
table.to_csv("Datasets/Ingredients.csv")

'3 tablespoons butter, 2 pounds Granny Smith apples (or other firm, crisp apples), peeled, quartered, cored and sliced 1/4-inch thick, 1 pound Macintosh apples (or other soft-textured apples that fall apart when cooked), peeled, quartered, cored, and sliced 1/4-inch thick, ½ cup sugar, ½ cup dried cranberries, 1 sheet frozen puff pastry, thawed but still cold (follow package directions), 1  egg white, lightly beaten, 1 tablespoon sugar, 1 cup Optional: Ice cream or lightly sweetened whipped cream'