In [1]:
import pandas as pd
from collections import Counter
import re

In [2]:
def set_pandas_display_options() -> None:
    
    display = pd.options.display

    display.max_columns = 1000
    display.max_rows = 10_000
    display.max_colwidth = 199
    display.width = 1000

set_pandas_display_options()

### Read in and clean data

In [3]:
# Load the CSV file to determine the number of rows
total_rows = sum(1 for line in open('data/RecipeNLG_dataset.csv'))  

percentage = total_rows // 10

df = pd.read_csv('data/RecipeNLG_dataset.csv', nrows=percentage)
df.head(5)

Unnamed: 0.1,Unnamed: 0,title,ingredients,directions,link,source,NER
0,0,No-Bake Nut Cookies,"[""1 c. firmly packed brown sugar"", ""1/2 c. evaporated milk"", ""1/2 tsp. vanilla"", ""1/2 c. broken nuts (pecans)"", ""2 Tbsp. butter or margarine"", ""3 1/2 c. bite size shredded rice biscuits""]","[""In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine."", ""Stir over medium heat until mixture bubbles all over top."", ""Boil and stir 5 minutes more. Take o...",www.cookbooks.com/Recipe-Details.aspx?id=44874,Gathered,"[""brown sugar"", ""milk"", ""vanilla"", ""nuts"", ""butter"", ""bite size shredded rice biscuits""]"
1,1,Jewell Ball'S Chicken,"[""1 small jar chipped beef, cut up"", ""4 boned chicken breasts"", ""1 can cream of mushroom soup"", ""1 carton sour cream""]","[""Place chipped beef on bottom of baking dish."", ""Place chicken on top of beef."", ""Mix soup and cream together; pour over chicken. Bake, uncovered, at 275\u00b0 for 3 hours.""]",www.cookbooks.com/Recipe-Details.aspx?id=699419,Gathered,"[""beef"", ""chicken breasts"", ""cream of mushroom soup"", ""sour cream""]"
2,2,Creamy Corn,"[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg. cream cheese, cubed"", ""1/3 c. butter, cubed"", ""1/2 tsp. garlic powder"", ""1/2 tsp. salt"", ""1/4 tsp. pepper""]","[""In a slow cooker, combine all ingredients. Cover and cook on low for 4 hours or until heated through and cheese is melted. Stir well before serving. Yields 6 servings.""]",www.cookbooks.com/Recipe-Details.aspx?id=10570,Gathered,"[""frozen corn"", ""cream cheese"", ""butter"", ""garlic powder"", ""salt"", ""pepper""]"
3,3,Chicken Funny,"[""1 large whole chicken"", ""2 (10 1/2 oz.) cans chicken gravy"", ""1 (10 1/2 oz.) can cream of mushroom soup"", ""1 (6 oz.) box Stove Top stuffing"", ""4 oz. shredded cheese""]","[""Boil and debone chicken."", ""Put bite size pieces in average size square casserole dish."", ""Pour gravy and cream of mushroom soup over chicken; level."", ""Make stuffing according to instructions ...",www.cookbooks.com/Recipe-Details.aspx?id=897570,Gathered,"[""chicken"", ""chicken gravy"", ""cream of mushroom soup"", ""shredded cheese""]"
4,4,Reeses Cups(Candy),"[""1 c. peanut butter"", ""3/4 c. graham cracker crumbs"", ""1 c. melted butter"", ""1 lb. (3 1/2 c.) powdered sugar"", ""1 large pkg. chocolate chips""]","[""Combine first four ingredients and press in 13 x 9-inch ungreased pan."", ""Melt chocolate chips and spread over mixture. Refrigerate for about 20 minutes and cut into pieces before chocolate get...",www.cookbooks.com/Recipe-Details.aspx?id=659239,Gathered,"[""peanut butter"", ""graham cracker crumbs"", ""butter"", ""powdered sugar"", ""chocolate chips""]"


In [4]:
# Cleaning
df = df.drop(columns=['Unnamed: 0', 'source', 'NER'])

df['ingredients'] = df['ingredients'].str.lower()
words = df['ingredients'].apply(lambda x: re.findall(r'\w+', x)).explode()
word_counts = Counter(words)

df['ingredients'] = df['ingredients'].str.replace('[', '')
df['ingredients'] = df['ingredients'].str.replace(']', '')
df['ingredients'] = df['ingredients'].str.replace('"', '')
df['directions'] = df['directions'].str.replace('[', '')
df['directions'] = df['directions'].str.replace(']', '')
df['directions'] = df['directions'].str.replace('"', '')

df['ingredients'] = df['ingredients'].str.replace('tsp.','teaspoon')
df['ingredients'] = df['ingredients'].str.replace('tbsp.','tablespoon')
df['ingredients'] = df['ingredients'].str.replace('oz.','ounces')
df['ingredients'] = df['ingredients'].str.replace('lb.','pound')
df['ingredients'] = df['ingredients'].str.replace('pkg.','package')
df['ingredients'] = df['ingredients'].str.replace('c.','cups')
df['ingredients'] = df['ingredients'].str.replace('\\','/')

df['directions'] = df['directions'].str.replace('.,',',')

df['directions'] = df['directions'].str.replace('\\u00b0', ' degree')

### Restructuring and saving 

In [5]:
feature_df = df[['title','ingredients','directions']]
feature_df.to_csv('data/recipe_store.csv', sep=';')
feature_df.head(5)

Unnamed: 0,title,ingredients,directions
0,No-Bake Nut Cookies,"1 cups firmly packed brown sugar, 1/2 cups evaporated milk, 1/2 teaspoon vanilla, 1/2 cups broken nuts (pecans), 2 tablespoon butter or margarine, 3 1/2 cups bite size shredded rice biscuits","In a heavy 2-quart saucepan, mix brown sugar, nuts, evaporated milk and butter or margarine, Stir over medium heat until mixture bubbles all over top, Boil and stir 5 minutes more. Take off heat,..."
1,Jewell Ball'S Chicken,"1 small jar chipped beef, cut up, 4 boned chicken breasts, 1 can cream of mushroom soup, 1 carton sour cream","Place chipped beef on bottom of baking dish, Place chicken on top of beef, Mix soup and cream together; pour over chicken. Bake, uncovered, at 275 degree for 3 hours."
2,Creamy Corn,"2 (16 ounces) package frozen corn, 1 (8 ounces) package cream cheese, cubed, 1/3 cups butter, cubed, 1/2 teaspoon garlic powder, 1/2 teaspoon salt, 1/4 teaspoon pepper","In a slow cooker, combine all ingredients. Cover and cook on low for 4 hours or until heated through and cheese is melted. Stir well before serving. Yields 6 servings."
3,Chicken Funny,"1 large whole chicken, 2 (10 1/2 ounces) cans chicken gravy, 1 (10 1/2 ounces) can cream of mushroom soup, 1 (6 ounces) box stove top stuffing, 4 ounces shredded cheese","Boil and debone chicken, Put bite size pieces in average size square casserole dish, Pour gravy and cream of mushroom soup over chicken; level, Make stuffing according to instructions on box (do ..."
4,Reeses Cups(Candy),"1 cups peanut butter, 3/4 cups graham cracker crumbs, 1 cups melted butter, 1 pound (3 1/2 cups) powdered sugar, 1 large package chocolate chips","Combine first four ingredients and press in 13 x 9-inch ungreased pan, Melt chocolate chips and spread over mixture. Refrigerate for about 20 minutes and cut into pieces before chocolate gets har..."
