In [1]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag

In [2]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /Users/student/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/student/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/student/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/student/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [12]:
# Read measure units from file and create a list
with open("measure_units.txt", "r") as file:
    measure_units = [unit.strip() for unit in file.readlines()]

In [4]:
# Define the recipes as strings
recipes = [
    "Chocolate Chip Cookies: Combine 2 cups of flour, 1 cup of sugar, 1 cup of brown sugar, 1 tsp of baking soda, and a pinch of salt. Cream together with 1 cup of softened butter, add 2 eggs and 2 tsp of vanilla extract. Fold in 2 cups of chocolate chips. Drop spoonfuls on a baking sheet and bake at 350°F for 10 minutes until golden brown.",
    "Vegetable Stir Fry: Heat 2 tbsp of olive oil in a pan, add minced garlic and 1 inch of grated ginger, then add sliced vegetables like broccoli, carrots, and bell peppers. Stir-fry until tender. Add 2 tbsp of soy sauce and serve hot with rice or noodles.",
    "Banana Bread: Mash 3 ripe bananas, mix with 1/3 cup melted butter. Stir in 1 cup of sugar, 1 beaten egg, and 1 tsp of vanilla extract. Mix in 1 tsp of baking soda and a pinch of salt. Gently fold in 1.5 cups of flour. Pour into a loaf pan and bake at 350°F for 1 hour.",
    # Add more recipes here
]

In [18]:
# Add "tsp" and "cups" to the list of measure units
measure_units.extend(["tsp", "cups"])

In [14]:
# Initialize empty lists to store data
recipe_names = []
ingredients = []
amounts = []
measures = []
instructions = []

In [19]:
# Iterate through each recipe
for recipe in recipes:
    # Split recipe into sentences based on full stop
    sentences = recipe.split(".")
    
    # Iterate through each sentence
    for sentence in sentences:
        words = word_tokenize(sentence)
        
        # Check if the sentence is not empty
        if len(words) > 0:
            # Extract recipe name
            if ":" in sentence:
                recipe_names.append(sentence.split(":")[0])
            else:
                recipe_names.append(None)
            
            # Extract ingredients, amounts, measures, and instructions
            ingredient = []
            amount = []
            measure = []
            instruction = []
            
            # Perform POS tagging
            pos_tags = pos_tag(words)
            
            for word, tag in pos_tags:
                if tag.startswith('NN'):  # Noun
                    ingredient.append(word)
                elif tag.startswith('VB'):  # Verb
                    instruction.append(word)
                elif word.lower() in measure_units:
                    measure.append(word)
                elif word.isdigit():
                    amount.append(word)
            
            # Append measure units to the measure list
            for unit in measure_units:
                if unit in sentence:
                    measure.append(unit)
            
            ingredients.append(" ".join(ingredient))
            amounts.append(" ".join(amount))
            measures.append(" ".join(measure))
            instructions.append(" ".join(instruction))

In [20]:
# Create a DataFrame
data = {
    "Recipe Name": recipe_names,
    "Ingredients": ingredients,
    "Amount": amounts,
    "Measure": measures,
    "Instructions": instructions
}

df = pd.DataFrame(data)

# Display the DataFrame
print(df)

               Recipe Name                                        Ingredients  \
0   Chocolate Chip Cookies  Chocolate Chip Cookies cups flour cup sugar cu...   
1                     None          Cream cup butter eggs tsp vanilla extract   
2                     None                          Fold cups chocolate chips   
3                     None            Drop spoonfuls sheet bake minutes brown   
4       Vegetable Stir Fry  Stir Fry Heat tbsp oil pan inch ginger vegetab...   
5                     None                                    Stir-fry tender   
6                     None                        tbsp soy sauce rice noodles   
7             Banana Bread      Banana Bread Mash ripe bananas mix cup butter   
8                     None      Stir cup sugar beaten egg tsp vanilla extract   
9                     None                            Mix tsp soda pinch salt   
10                    None                                                      
11                    None  

In [21]:
df

Unnamed: 0,Recipe Name,Ingredients,Amount,Measure,Instructions
0,Chocolate Chip Cookies,Chocolate Chip Cookies cups flour cup sugar cu...,2 1 1 1,tsp,Combine baking
1,,Cream cup butter eggs tsp vanilla extract,1 2 2,tsp,add
2,,Fold cups chocolate chips,2,,
3,,Drop spoonfuls sheet bake minutes brown,10,,
4,Vegetable Stir Fry,Stir Fry Heat tbsp oil pan inch ginger vegetab...,2 1,,add minced add
5,,Stir-fry tender,,,
6,,tbsp soy sauce rice noodles,2,,serve
7,Banana Bread,Banana Bread Mash ripe bananas mix cup butter,3,,melted
8,,Stir cup sugar beaten egg tsp vanilla extract,1 1 1,tsp,
9,,Mix tsp soda pinch salt,1,tsp,baking
