## Static Data Collection (RecipeNLG Dataset)

In [None]:
# ! pip install pandas numpy spacy

# python -m spacy download en_core_web_sm

In [1]:
import pandas as pd
import numpy as np
import re
import spacy

In [2]:
receipenlg = pd.read_csv("/Users/sandhyakilari/Desktop/MLOps Project/dataset/RecipeNLG_dataset.csv")

In [4]:
receipenlg.head()

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,link,source,NER
0,0,No-Bake Nut Cookies,"[""1 c. firmly packed brown sugar"", ""1/2 c. eva...","[""In a heavy 2-quart saucepan, mix brown sugar...",www.cookbooks.com/Recipe-Details.aspx?id=44874,Gathered,"[""brown sugar"", ""milk"", ""vanilla"", ""nuts"", ""bu..."
1,1,Jewell Ball'S Chicken,"[""1 small jar chipped beef, cut up"", ""4 boned ...","[""Place chipped beef on bottom of baking dish....",www.cookbooks.com/Recipe-Details.aspx?id=699419,Gathered,"[""beef"", ""chicken breasts"", ""cream of mushroom..."
2,2,Creamy Corn,"[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg...","[""In a slow cooker, combine all ingredients. C...",www.cookbooks.com/Recipe-Details.aspx?id=10570,Gathered,"[""frozen corn"", ""cream cheese"", ""butter"", ""gar..."
3,3,Chicken Funny,"[""1 large whole chicken"", ""2 (10 1/2 oz.) cans...","[""Boil and debone chicken."", ""Put bite size pi...",www.cookbooks.com/Recipe-Details.aspx?id=897570,Gathered,"[""chicken"", ""chicken gravy"", ""cream of mushroo..."
4,4,Reeses Cups(Candy),"[""1 c. peanut butter"", ""3/4 c. graham cracker ...","[""Combine first four ingredients and press in ...",www.cookbooks.com/Recipe-Details.aspx?id=659239,Gathered,"[""peanut butter"", ""graham cracker crumbs"", ""bu..."


In [5]:
receipenlg.reset_index(drop=True, inplace=True)

In [6]:
receipenlg.duplicated().sum()

0

In [7]:
receipenlg.dtypes

Unnamed: 0      int64
title          object
ingredients    object
directions     object
link           object
source         object
NER            object
dtype: object

In [8]:
receipenlg.shape

(2231142, 7)

In [9]:
receipenlg.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2231142 entries, 0 to 2231141
Data columns (total 7 columns):
 #   Column       Dtype 
---  ------       ----- 
 0   Unnamed: 0   int64 
 1   title        object
 2   ingredients  object
 3   directions   object
 4   link         object
 5   source       object
 6   NER          object
dtypes: int64(1), object(6)
memory usage: 119.2+ MB


In [10]:
receipenlg.describe(include=object)

Unnamed: 0,title,ingredients,directions,link,source,NER
count,2231141,2231142,2231142,2231142,2231142,2231142
unique,1312870,2226362,2211644,2231142,2,2133496
top,Chicken Casserole,"[""1 c. peanut butter"", ""1 c. sugar"", ""1 egg""]","[""Mix all ingredients together.""]",www.cookbooks.com/Recipe-Details.aspx?id=44874,Gathered,[]
freq,4099,28,274,1,1643098,573


In [11]:
receipenlg.columns

Index(['Unnamed: 0', 'title', 'ingredients', 'directions', 'link', 'source',
       'NER'],
      dtype='object')

### Data cleaning

In [21]:
# Drop unnamed index column
receipenlg.drop(columns=["Unnamed: 0"], inplace=True)

In [23]:
# Remove null values
receipenlg.dropna(inplace=True)

In [24]:
# Remove duplicates
receipenlg.drop_duplicates(inplace=True)

In [26]:
# Drop rows where essential columns are missing
receipenlg.dropna(subset=["title", "ingredients", "directions"], inplace=True)

In [27]:
# Clean text fields
for col in ["title", "ingredients", "directions", "link", "source", "NER"]:
    receipenlg[col] = receipenlg[col].astype(str).str.strip()
    receipenlg[col] = receipenlg[col].apply(lambda x: re.sub(r"\s+", " ", x))  # Remove excessive spaces
    receipenlg[col] = receipenlg[col].apply(lambda x: re.sub(r"[^\w\s.,-]", "", x))  # Remove unwanted characters
    receipenlg[col] = receipenlg[col].str.lower()

In [28]:
receipenlg.head()

Unnamed: 0,title,ingredients,directions,link,source,NER
0,no-bake nut cookies,"1 c. firmly packed brown sugar, 12 c. evaporat...","in a heavy 2-quart saucepan, mix brown sugar, ...",www.cookbooks.comrecipe-details.aspxid44874,gathered,"brown sugar, milk, vanilla, nuts, butter, bite..."
1,jewell balls chicken,"1 small jar chipped beef, cut up, 4 boned chic...","place chipped beef on bottom of baking dish., ...",www.cookbooks.comrecipe-details.aspxid699419,gathered,"beef, chicken breasts, cream of mushroom soup,..."
2,creamy corn,"2 16 oz. pkg. frozen corn, 1 8 oz. pkg. cream ...","in a slow cooker, combine all ingredients. cov...",www.cookbooks.comrecipe-details.aspxid10570,gathered,"frozen corn, cream cheese, butter, garlic powd..."
3,chicken funny,"1 large whole chicken, 2 10 12 oz. cans chicke...","boil and debone chicken., put bite size pieces...",www.cookbooks.comrecipe-details.aspxid897570,gathered,"chicken, chicken gravy, cream of mushroom soup..."
4,reeses cupscandy,"1 c. peanut butter, 34 c. graham cracker crumb...",combine first four ingredients and press in 13...,www.cookbooks.comrecipe-details.aspxid659239,gathered,"peanut butter, graham cracker crumbs, butter, ..."


In [29]:
receipenlg.dtypes

title          object
ingredients    object
directions     object
link           object
source         object
NER            object
dtype: object

In [30]:
receipenlg.isnull().sum()

title          0
ingredients    0
directions     0
link           0
source         0
NER            0
dtype: int64

In [31]:
receipenlg.shape

(2231141, 6)

## Dynamic API Data - Spoonacular API

In [35]:
# ! pip install requests pandas streamlit

In [36]:
import requests

# Your Spoonacular API Key
API_KEY = "709a04ae6b8d4e21ab961fdfc4f4a205"
BASE_URL = "https://api.spoonacular.com/recipes/findByIngredients"

In [44]:
def fetch_recipes(ingredients, num_recipes=5):
    """
    Fetches recipes from Spoonacular API based on input ingredients.
    """
    params = {
        "ingredients": ",".join(ingredients),  # Convert list to comma-separated string
        "number": num_recipes,  # Number of recipes to fetch
        "apiKey": API_KEY
    }
    response = requests.get(BASE_URL, params=params)
    
    if response.status_code == 200:
        return response.json()  # Returns JSON response
    else:
        print("Error:", response.status_code, response.text)
        return None

In [46]:
# Example Test Run
test_ingredients = ["tomato", "cheese", "pasta"]
recipes = fetch_recipes(test_ingredients)

# Print sample results
if recipes:
    for idx, recipe in enumerate(recipes, 1):
        print(f"{idx}. {recipe['title']} (ID: {recipe['id']})")

1. Cheesy Pasta in a Pot (ID: 637670)
2. Farfalle with fresh tomatoes, basil and mozzarella (ID: 642585)
3. Pasta Margherita (ID: 511728)
4. Mozzarella Pesto Salad (ID: 652508)
5. Garden Fresh Tomato Sauce & Spaghetti (ID: 644192)


In [48]:
#### Retreiving receipe instructions for a given ID

In [50]:
def get_recipe_details(recipe_id):
    """
    Fetches detailed instructions and ingredients for a given recipe ID.
    """
    url = f"https://api.spoonacular.com/recipes/{recipe_id}/information"
    params = {"apiKey": API_KEY}
    
    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        return response.json()
    else:
        print("Error:", response.status_code, response.text)
        return None

# Example: Fetch details for the first recipe from previous output
if recipes:
    first_recipe_id = recipes[0]["id"]
    details = get_recipe_details(first_recipe_id)
    
    # Print summary
    if details:
        print(f"Recipe: {details['title']}")
        print("Ingredients:")
        for ingredient in details['extendedIngredients']:
            print(f"- {ingredient['original']}")
        print("\nInstructions:")
        for step in details.get("analyzedInstructions", [{}])[0].get("steps", []):
            print(f"{step['number']}. {step['step']}")

Recipe: Cheesy Pasta in a Pot
Ingredients:
- 2 lbs lean ground beef or turkey
- 2 onions chopped
- 1 garlic clove crushed
- 1 14 oz jar spaghetti sauce
- 1 lb can stewed tomatoes
- 1 can sliced mushrooms
- 8 oz shell pasta
- 1/2 lb sliced provolone
- 1/2 lb sliced mozzarella

Instructions:
1. Preheat oven at 350 degrees.
2. Cook the ground meat in a little oil in a large pan stirring often.
3. Drain fat.
4. Add onions, garlic, spaghetti sauce, stewed tomatoes, & undrained mushrooms.
5. Mix well.
6. Simmer 20 mins or until onions are soft.
7. Cook pasta according to package directons.
8. Drain & rinse with cold water.
9. Pour 1/2 of the shells in deep casserole.
10. Cover with 1/2 meat sauce mixture. Top with provolone.
11. Repeat & end with mozzarella.
12. Cover casserole & bake at 350 for 35-40 min.
13. Remove cover & continue baking until mozzarella melts & browns slightly, about 5 mins more.
14. Serve.


In [54]:
def api_to_dataframe(api_response):
    """
    Converts API recipe response into a structured Pandas DataFrame.
    """
    recipes_list = []
    
    for recipe in api_response:
        recipe_id = recipe["id"]
        title = recipe["title"]
        ingredients = ", ".join([ing["name"] for ing in recipe["usedIngredients"] + recipe["missedIngredients"]])
        
        recipes_list.append([recipe_id, title, ingredients])
    
    # Create DataFrame
    api_df = pd.DataFrame(recipes_list, columns=["id", "title", "ingredients"])
    return api_df

# Convert API data to DataFrame
if recipes:
    df_api = api_to_dataframe(recipes)
    print(df_api.head())

       id                                              title  \
0  637670                              Cheesy Pasta in a Pot   
1  642585  Farfalle with fresh tomatoes, basil and mozzar...   
2  511728                                   Pasta Margherita   
3  652508                             Mozzarella Pesto Salad   
4  644192              Garden Fresh Tomato Sauce & Spaghetti   

                                         ingredients  
0  canned tomatoes, shell pasta, provolone, mozza...  
1  farfalle pasta by barilla, mozzarella, tomatoe...  
2  mozzarella cheese, grape tomatoes, linguine pa...  
3  mozzarella cheese, cherry tomatoes, pasta see ...  
4  romano cheese, of- spaghetti, vine tomatoes, b...  


In [56]:
df_api.head()

Unnamed: 0,id,title,ingredients
0,637670,Cheesy Pasta in a Pot,"canned tomatoes, shell pasta, provolone, mozza..."
1,642585,"Farfalle with fresh tomatoes, basil and mozzar...","farfalle pasta by barilla, mozzarella, tomatoe..."
2,511728,Pasta Margherita,"mozzarella cheese, grape tomatoes, linguine pa..."
3,652508,Mozzarella Pesto Salad,"mozzarella cheese, cherry tomatoes, pasta see ..."
4,644192,Garden Fresh Tomato Sauce & Spaghetti,"romano cheese, of- spaghetti, vine tomatoes, b..."


In [64]:
print(df_api.columns)

Index(['id', 'title', 'ingredients'], dtype='object')


In [60]:
receipenlg.head()

Unnamed: 0,title,ingredients,directions,link,source,NER
0,no-bake nut cookies,"1 c. firmly packed brown sugar, 12 c. evaporat...","in a heavy 2-quart saucepan, mix brown sugar, ...",www.cookbooks.comrecipe-details.aspxid44874,gathered,"brown sugar, milk, vanilla, nuts, butter, bite..."
1,jewell balls chicken,"1 small jar chipped beef, cut up, 4 boned chic...","place chipped beef on bottom of baking dish., ...",www.cookbooks.comrecipe-details.aspxid699419,gathered,"beef, chicken breasts, cream of mushroom soup,..."
2,creamy corn,"2 16 oz. pkg. frozen corn, 1 8 oz. pkg. cream ...","in a slow cooker, combine all ingredients. cov...",www.cookbooks.comrecipe-details.aspxid10570,gathered,"frozen corn, cream cheese, butter, garlic powd..."
3,chicken funny,"1 large whole chicken, 2 10 12 oz. cans chicke...","boil and debone chicken., put bite size pieces...",www.cookbooks.comrecipe-details.aspxid897570,gathered,"chicken, chicken gravy, cream of mushroom soup..."
4,reeses cupscandy,"1 c. peanut butter, 34 c. graham cracker crumb...",combine first four ingredients and press in 13...,www.cookbooks.comrecipe-details.aspxid659239,gathered,"peanut butter, graham cracker crumbs, butter, ..."


In [62]:
print(receipenlg.columns)

Index(['title', 'ingredients', 'directions', 'link', 'source', 'NER'], dtype='object')


## Data Integration

In [70]:
# Ensure both DataFrames have the same structure
receipenlg_subset = receipenlg[["title", "ingredients"]]  # Select relevant columns

# Merge the datasets
combined_df = pd.concat([df_api, receipenlg_subset], ignore_index=True)

# Check merged dataset
combined_df.head()

Unnamed: 0,id,title,ingredients
0,637670.0,Cheesy Pasta in a Pot,"canned tomatoes, shell pasta, provolone, mozza..."
1,642585.0,"Farfalle with fresh tomatoes, basil and mozzar...","farfalle pasta by barilla, mozzarella, tomatoe..."
2,511728.0,Pasta Margherita,"mozzarella cheese, grape tomatoes, linguine pa..."
3,652508.0,Mozzarella Pesto Salad,"mozzarella cheese, cherry tomatoes, pasta see ..."
4,644192.0,Garden Fresh Tomato Sauce & Spaghetti,"romano cheese, of- spaghetti, vine tomatoes, b..."


## Store Integrated Data in a Database

In [None]:
import sqlite3

# Connect to SQLite Database
conn = sqlite3.connect("pantrypalette.db")
cursor = conn.cursor()

# Create Table for Storing Recipes
cursor.execute("""
CREATE TABLE IF NOT EXISTS recipes (
    id INTEGER PRIMARY KEY,
    title TEXT,
    ingredients TEXT,
    instructions TEXT
)
""")

conn.commit()
conn.close()

In [None]:
def save_recipe_to_db(recipe_id, title, ingredients, instructions):
    """
    Saves fetched recipes to the SQLite database.
    """
    conn = sqlite3.connect("pantrypalette.db")
    cursor = conn.cursor()

    cursor.execute("INSERT INTO recipes (id, title, ingredients, instructions) VALUES (?, ?, ?, ?)",
                   (recipe_id, title, ingredients, instructions))

    conn.commit()
    conn.close()

# Example: Save fetched recipe details
if details:
    ingredients_list = ", ".join([ing['original'] for ing in details['extendedIngredients']])
    instructions_text = "\n".join([step['step'] for step in details.get("analyzedInstructions", [{}])[0].get("steps", [])])

    save_recipe_to_db(details['id'], details['title'], ingredients_list, instructions_text)
    print("Recipe saved to database successfully!")