**Name**: Michelle Adeline

**Student ID**: 31989101

### Download required packages

In [1]:
# !pip3 install torch numpy matplotlib pandas tqdm

--2024-05-20 11:51:36--  https://raw.githubusercontent.com/williamLyh/RecipeWithPlans/main/ingredient_set.json
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8000::154, 2606:50c0:8003::154, 2606:50c0:8002::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8000::154|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 16759 (16K) [text/plain]
Saving to: 'ingredient_set.json'

     0K .......... ......                                     100% 1.50M=0.01s

2024-05-20 11:51:37 (1.50 MB/s) - 'ingredient_set.json' saved [16759/16759]



### Imports and Constants

In [1]:
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch import nn, optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from tqdm import tqdm

SEED = 31989101
HIDDEN_SIZE = 256
TEACHER_FORCING_RATIO = 1.0
DROPOUT_RATE = 0.1
MAX_INGR_LEN = 150
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## ensuring reproducibility
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)

print(f"Using device: {DEVICE}")

Using device: cpu


In [2]:
# to easily read ingredients and instructions
pd.set_option('display.max_colwidth', 2000)

# Data Loading and Preprocessing

In [3]:
data_root = "./Cooking_Dataset"

In [4]:
train = pd.read_csv(os.path.join(data_root, "train.csv"), usecols=['Ingredients', 'Recipe'])
dev = pd.read_csv(os.path.join(data_root, "dev.csv"), usecols=['Ingredients', 'Recipe'])
test = pd.read_csv(os.path.join(data_root, "test.csv"), usecols=['Ingredients', 'Recipe'])

In [5]:
train.head()

Unnamed: 0,Ingredients,Recipe
0,6 tb butter or margarine - softened\t3/4 c c and h powdered sugar\t1 c all-purpose flour\t1 tb milk\t2 eggs\t1 c c and h granulated sugar\t1/2 c cocoa\t2 tb flour\t1/2 ts baking powder\t1/2 ts salt\t1 ts vanilla\t1/4 ts almond extract (optional)\t1 c chopped almonds or pecans,"cream together butter and powdered sugar . blend in 1 cup flour and milk . spread evenly in bottom of ungreased 9-inch square pan . bake in 350 degree oven 10 to 12 minutes . beat eggs slightly ; combine dry ingredients and add to eggs . blend in vanilla and almond extract ; fold in almonds . spread over hot baked layer ; return to oven and bake 20 minutes longer . cool ; while warm , cut into 24 bars ."
1,"1 c vanilla wafer cookies, finely crushed\t1 c confectioner's sugar\t2 tb cocoa\t1 c pecans, finely chopped\t2 tb corn syrup\t1/2 c bourbon","combine dry ingredients and mix well . add corn syrup and bourbon and mix well . if too moist add a few cookie crumbs , if too dry add a little more bourbon . shape into small balls and roll in fruit sugar or fonely chopped pecans . store in an airtight container . also good made with dark rum ."
2,1 c all-purpose flour\t1/2 c sugar\t2 ts baking powder\t1/2 ts salt\t2 tb unsweetened cocoa powder\t1 1/2 c all-bran? cereal\t3/4 c skim milk\t2 egg whites\t1/4 c vegetable oil\t1 c sliced ripe banana; (about 1\tvegetable cooking spray,"kellogg 's all-bran 1 . stir together flour , sugar , baking powder , salt , and cocoa powder . set aside . in large mixing bowl , combine all-bran cereal and milk . let stand 5 minutes or until cereal softens . add egg whites and oil . beat well . stir in bananas . add flour mixture stirring only until combined . portion batte evenly into twelve 2-1/2 inch muffin-pan cups coated with cooking spray . bake at 400 f. about 25 minutes or until lightly browned . serve warm ."
3,"5 1/2 c flour\t1 cake compressed yeast\tor 1 cake dry yeast\t2 c milk, scalded and cooled\t1/2 ts salt\t1/2 c cocoa\t1/4 c shortening\t2 eggs, well beaten\t1/2 c sugar","soften yeast and dissolve 1 tablespoon sugar in lukewarm milk . add 3 cups flour and beat until smooth . cover and set aside to rise in a warm place until light . add shortening and sugar , which have been creamed together , eggs , cocoa , salt , and remainder of flour or enough to make a soft dough . knead lightly and place in well-oiled bowl . cover and set in a warm place until double in bulk about 2 hours . form into loaves . place in well-oiled bread pans , filling them 1/2 full . cover and let rise again until double in bulk . bake in hot oven -lrb- 425 f -rrb- 40 50 minutes . 2 loaves ."
4,bread -- to fill dish 2/3\tful\t2 c skim milk\t2/3 c sugar\t2 eggs -- or 4 egg whites\t2 tb cocoa\t1 ts vanilla,"into a greased casserole dish break up enough bread to fill it 2/3 full . over this pour 2/3 cup sugar , 2 heaping tbsp cocoa that have been mixed together . toss all together lightly -lrb- to coat the bread -rrb- . to 2 cups of milk add 2 well beaten eggs and 1 tsp vanilla . pour this over the bread and it should just cover the pieces . bake at 350 fo r about 45 minutes ."


`dev` and `test` are all lowercase already. `train` supposedly has 2 rows containing non-lowercase letters. Let's investigate them

Only 2 rows in Ingredients have non lowercase letters but upon investigation, both are invalid rows so can simply remove them.

In [6]:
def check_all_lowercase(df):
    ingrnonlower = (~df.Ingredients.dropna().str.islower()).sum()
    recipenonlower = (~df.Recipe.dropna().str.islower()).sum()
    print(f"Number of rows with a non-lowercase letter:\nIngredients: {ingrnonlower}\nRecipe: {recipenonlower}")

In [7]:
check_all_lowercase(train)

Number of rows with a non-lowercase letter:
Ingredients: 2
Recipe: 0


In [8]:
check_all_lowercase(dev)

Number of rows with a non-lowercase letter:
Ingredients: 0
Recipe: 0


In [9]:
check_all_lowercase(test)

Number of rows with a non-lowercase letter:
Ingredients: 0
Recipe: 0


We can see that the only two rows with non-lowercase letters have invalid Ingredients lists so we can simply filter them out.

In [10]:
train[~train.Ingredients.str.islower()]

Unnamed: 0,Ingredients,Recipe
7415,=-=-=-=-=-=-=-=-=-=-=-=-=-=-\t--=-=-=-=-=-= =-=-=-=-=-=-=-=-=-=-=-=-=-=,"follow these 20 `` wellness '' facts and you have a better chance of living a longer and healthier life . michaelangelo was carving the rondanini just before he died at 89 . verdi finished his opera falstaff at 80 . but among non-smokers the risk of one form of lung cancer , adenocarcinoma , increases with the amount of saturated consumed daily . according to the `` new england journal of medicine '' women who have a female physician are twice as likely to receive pap smears . air bags increase the chance of surviving a frontal collision 29 % . about half of adult pedestrians killed in traffic accidents have been drinking . and more than a third were legally drunk . even a two-inch heel increases the pressure to 57 % . half a cantaloupe contains nearly double the amount of vitamin c than an orange . wild animals normally do n't get fat , and their meat is only slightly marbled . try increased fiber and prunes for constipation . researchers believe prunes contain a substance that stimulates intestinal contractions . but if you have asthma , they 're a special hazard . but because it 's `` low-fat '' does n't mean you can eat all you want . so high in calories . growing up is optional"
11772,..........................,"though it 's been around a long time , there 's something almost quintessentially american about the sandwich . after all it was the gambling table that prompted the fourth earl of sandwich -lrb- 1718-1792 -rrb- to have his food served to him between two slices of bread so he would n't have to interrupt his gaming to eat . actually though people were munching on various foods stuffed between slabs of bread long before the earl came into the picture ."


make sure to:
- remove brackets
- make all lowercase
- add spaces around non-words (punctuations, numbers, etc.)
- remove >1 space (only leave behind 1 space), 
- remove leading and trailing whitespace
- add tags
- remove cases where <INGREDIENT>[0 or more whitespace]<INGREDIENT>, or <INGREDIENT>[0 or more whitespace]<INGREDIENT_END>, or <RECIPE_STEP>[0 or more whitespace]<RECIPE_STEP>, or <RECIPE_STEP>[0 or more whitespace]<RECIPE_END>.
- filter instructions more than length X (find percentile len), or less than 3 words.
- remove those with <1 ingredient


maybe try removing punctuation all together. (but in the sample there is punctuation hmmm)

In [11]:
def replace(df, patterns, replacements, columns, regex=False):
    # patterns: List[str]
    # replacements: List[str]
    # columns: List[str]
    if type(replacements) == str:
        replacements = [replacements] * len(patterns)
    if type(columns) == str:
        columns = [columns] * len(patterns)

    for pat, rep, col in zip(patterns, replacements, columns):
        df[col] = df[col].str.replace(pat, rep, regex=regex)


def add_tags(df):
    assert 'Ingredients' in df.columns and 'Recipe' in df.columns

    replace(df, ['\t'], ' <INGREDIENT> ', 'Ingredients')
    df.Ingredients = '<INGREDIENT_START> ' + df.Ingredients + ' <INGREDIENT_END>'

    replace(df, ['.', ';'], ' <RECIPE_STEP> ', 'Recipe')
    df.Recipe = '<RECIPE_START> ' + df.Recipe + ' <RECIPE_END>'

def preprocess_data(orig_df, max_ingr_len=150, max_recipe_len=600, min_recipe_len=5, min_ingredients=1):
    df = orig_df.copy() # ensure original data is not mutated (mostly for verification purposes)

    ## drop NA
    df = df.dropna()

    ## keep only rows with all lowercase (Recipe column is all lowercase already)
    df = df[df.Ingredients.str.islower()]

    ## replace brackets with space
    replace(df, ['[()]'], ' ', ['Ingredients', 'Recipe'], regex=True)

    ## add spaces around non-words (exclude whitespace, apostrophe, period (treated separately below))
    replace(df, ["([^0-9a-zA-Z.'\"/ ])"]*2, r" \1 ", ['Ingredients', 'Recipe'], regex=True)
    # add spaces around periods (excluding decimal places)
    replace(df, [r"\.(?!\d)"]*2, r" . ", ['Ingredients', 'Recipe'], regex=True)

    ## add tags for ingredients and recipes
    add_tags(df)

    ## replace >1 whitespace with a single space
    replace(df, ['[ ]{2,}']*2, " ", ['Ingredients', 'Recipe'], regex=True)

    ## remove leading and trailing whitespace
    df.Ingredients = df.Ingredients.str.strip()
    df.Recipe = df.Recipe.str.strip()

    ## remove consecutive tags, for ex. <INGREDIENT>[0 or more whitespace]<INGREDIENT>
    replace(df, ["<INGREDIENT>[ \t\n]*([ \t\n]*<INGREDIENT>)+", "<RECIPE_STEP>[ \t\n]*([ \t\n]*<RECIPE_STEP>)+"], 
    ["<INGREDIENT>", "<RECIPE_STEP>"], ["Ingredients", "Recipe"], regex=True)

    ## filter out recipes and ingredients above/below limit
    recipe_lens = df.Recipe.apply(lambda r: len(r.split()))
    df = df[(recipe_lens > min_recipe_len) & (recipe_lens < max_recipe_len)]
    df = df[df.Ingredients.apply(lambda i: len(i.split())) < max_ingr_len]

    ## filter out those with <1 ingredients
    df = df[df.Ingredients.str.count('<INGREDIENT>') >= min_ingredients]

    print(f"Number of data samples before preprocessing: {len(orig_df)}\n"
          f"Number of data samples after preprocessing: {len(df)} ({len(df) * 100/len(orig_df):.3f}%)")

    return df

In [12]:
df = preprocess_data(train)

Number of data samples before preprocessing: 101340
Number of data samples after preprocessing: 99036 (97.726%)


#### Preprocess testing (remove later)

In [None]:
df = train.copy()

In [None]:
df = df.dropna()

In [None]:
df = df[df.Ingredients.apply(lambda i: len(i.split())) < 150]

In [None]:
(df.Ingredients.apply(lambda i: len(i.split())) >= 150).sum()

661

In [None]:
df = df[df.Ingredients.str.islower()]

In [None]:
replace(df, ['[()]'], ' ', ['Ingredients', 'Recipe'], regex=True)

In [None]:
replace(df, ["([^0-9a-zA-Z.'\"/ ])"]*2, r" \1 ", ['Ingredients', 'Recipe'], regex=True)

In [None]:
replace(df, [r"\.(?!\d)"]*2, r" . ", ['Ingredients', 'Recipe'], regex=True)

In [None]:
add_tags(df)

In [None]:
replace(df, ['[ ]{2,}']*2, " ", ['Ingredients', 'Recipe'], regex=True)

In [None]:
df.Ingredients = df.Ingredients.str.strip()
df.Recipe = df.Recipe.str.strip()

In [None]:
replace(df, ["<INGREDIENT>[ \t\n]*([ \t\n]*<INGREDIENT>)+", "<RECIPE_STEP>[ \t\n]*([ \t\n]*<RECIPE_STEP>)+"], 
    ["<INGREDIENT>", "<RECIPE_STEP>"], ["Ingredients", "Recipe"], regex=True)

In [None]:
min_len, max_len = 20, 2000
df = df[(df.Recipe.str.len() >= min_len) & (df.Recipe.str.len() <= max_len)]

In [None]:
df[df.Ingredients.str.count('<INGREDIENT>') >= 1].shape

(98524, 2)

In [None]:
len(df)

99697

In [None]:
len(train)

101340

In [None]:
df

Unnamed: 0,Ingredients,Recipe
0,<INGREDIENT_START> 6 tb butter or margarine - softened <INGREDIENT> 3/4 c c and h powdered sugar <INGREDIENT> 1 c all - purpose flour <INGREDIENT> 1 tb milk <INGREDIENT> 2 eggs <INGREDIENT> 1 c c and h granulated sugar <INGREDIENT> 1/2 c cocoa <INGREDIENT> 2 tb flour <INGREDIENT> 1/2 ts baking powder <INGREDIENT> 1/2 ts salt <INGREDIENT> 1 ts vanilla <INGREDIENT> 1/4 ts almond extract optional <INGREDIENT> 1 c chopped almonds or pecans <INGREDIENT_END>,"<RECIPE_START> cream together butter and powdered sugar <RECIPE_STEP> blend in 1 cup flour and milk <RECIPE_STEP> spread evenly in bottom of ungreased 9 - inch square pan <RECIPE_STEP> bake in 350 degree oven 10 to 12 minutes <RECIPE_STEP> beat eggs slightly <RECIPE_STEP> combine dry ingredients and add to eggs <RECIPE_STEP> blend in vanilla and almond extract <RECIPE_STEP> fold in almonds <RECIPE_STEP> spread over hot baked layer <RECIPE_STEP> return to oven and bake 20 minutes longer <RECIPE_STEP> cool <RECIPE_STEP> while warm , cut into 24 bars <RECIPE_STEP> <RECIPE_END>"
1,"<INGREDIENT_START> 1 c vanilla wafer cookies , finely crushed <INGREDIENT> 1 c confectioner's sugar <INGREDIENT> 2 tb cocoa <INGREDIENT> 1 c pecans , finely chopped <INGREDIENT> 2 tb corn syrup <INGREDIENT> 1/2 c bourbon <INGREDIENT_END>","<RECIPE_START> combine dry ingredients and mix well <RECIPE_STEP> add corn syrup and bourbon and mix well <RECIPE_STEP> if too moist add a few cookie crumbs , if too dry add a little more bourbon <RECIPE_STEP> shape into small balls and roll in fruit sugar or fonely chopped pecans <RECIPE_STEP> store in an airtight container <RECIPE_STEP> also good made with dark rum <RECIPE_STEP> <RECIPE_END>"
2,<INGREDIENT_START> 1 c all - purpose flour <INGREDIENT> 1/2 c sugar <INGREDIENT> 2 ts baking powder <INGREDIENT> 1/2 ts salt <INGREDIENT> 2 tb unsweetened cocoa powder <INGREDIENT> 1 1/2 c all - bran ? cereal <INGREDIENT> 3/4 c skim milk <INGREDIENT> 2 egg whites <INGREDIENT> 1/4 c vegetable oil <INGREDIENT> 1 c sliced ripe banana ; about 1 <INGREDIENT> vegetable cooking spray <INGREDIENT_END>,"<RECIPE_START> kellogg 's all - bran 1 <RECIPE_STEP> stir together flour , sugar , baking powder , salt , and cocoa powder <RECIPE_STEP> set aside <RECIPE_STEP> in large mixing bowl , combine all - bran cereal and milk <RECIPE_STEP> let stand 5 minutes or until cereal softens <RECIPE_STEP> add egg whites and oil <RECIPE_STEP> beat well <RECIPE_STEP> stir in bananas <RECIPE_STEP> add flour mixture stirring only until combined <RECIPE_STEP> portion batte evenly into twelve 2 - 1/2 inch muffin - pan cups coated with cooking spray <RECIPE_STEP> bake at 400 f <RECIPE_STEP> about 25 minutes or until lightly browned <RECIPE_STEP> serve warm <RECIPE_STEP> <RECIPE_END>"
3,"<INGREDIENT_START> 5 1/2 c flour <INGREDIENT> 1 cake compressed yeast <INGREDIENT> or 1 cake dry yeast <INGREDIENT> 2 c milk , scalded and cooled <INGREDIENT> 1/2 ts salt <INGREDIENT> 1/2 c cocoa <INGREDIENT> 1/4 c shortening <INGREDIENT> 2 eggs , well beaten <INGREDIENT> 1/2 c sugar <INGREDIENT_END>","<RECIPE_START> soften yeast and dissolve 1 tablespoon sugar in lukewarm milk <RECIPE_STEP> add 3 cups flour and beat until smooth <RECIPE_STEP> cover and set aside to rise in a warm place until light <RECIPE_STEP> add shortening and sugar , which have been creamed together , eggs , cocoa , salt , and remainder of flour or enough to make a soft dough <RECIPE_STEP> knead lightly and place in well - oiled bowl <RECIPE_STEP> cover and set in a warm place until double in bulk about 2 hours <RECIPE_STEP> form into loaves <RECIPE_STEP> place in well - oiled bread pans , filling them 1/2 full <RECIPE_STEP> cover and let rise again until double in bulk <RECIPE_STEP> bake in hot oven - lrb - 425 f - rrb - 40 50 minutes <RECIPE_STEP> 2 loaves <RECIPE_STEP> <RECIPE_END>"
4,<INGREDIENT_START> bread - - to fill dish 2/3 <INGREDIENT> ful <INGREDIENT> 2 c skim milk <INGREDIENT> 2/3 c sugar <INGREDIENT> 2 eggs - - or 4 egg whites <INGREDIENT> 2 tb cocoa <INGREDIENT> 1 ts vanilla <INGREDIENT_END>,"<RECIPE_START> into a greased casserole dish break up enough bread to fill it 2/3 full <RECIPE_STEP> over this pour 2/3 cup sugar , 2 heaping tbsp cocoa that have been mixed together <RECIPE_STEP> toss all together lightly - lrb - to coat the bread - rrb - <RECIPE_STEP> to 2 cups of milk add 2 well beaten eggs and 1 tsp vanilla <RECIPE_STEP> pour this over the bread and it should just cover the pieces <RECIPE_STEP> bake at 350 fo r about 45 minutes <RECIPE_STEP> <RECIPE_END>"
...,...,...
101335,"<INGREDIENT_START> 6 oz smoked bacon or pancetta , cut into 1/2"" dice <INGREDIENT> 4 tb olive oil <INGREDIENT> 6 oz fresh shiitake mushrooms , sliced <INGREDIENT> 1 lb asparagus , cut into 2"" piece <INGREDIENT> 12 oz fresh fettucine <INGREDIENT> 1 tb fresh thyme , finely chopped <INGREDIENT> 2 tb fresh oregano , finely chopped <INGREDIENT> salt to taste <INGREDIENT> pepper to taste <INGREDIENT_END>","<RECIPE_START> saute bacon in skillet until crisp , remove to a plate <RECIPE_STEP> drain off excess fat , then add olive oil to the skillet and cook the mushrooms until softened <RECIPE_STEP> lightly blanch the asparagus <RECIPE_STEP> cook the pasta briefly in boiling salted water <RECIPE_STEP> drain the noodles and toos with the asparagus , bacon , shiitake and fresh herbs <RECIPE_STEP> season with salt and pepper <RECIPE_STEP> 35 grams total fat <RECIPE_STEP> - lrb - 9 <RECIPE_STEP> 9 grams saturated fat - rrb - <RECIPE_STEP> 175 <RECIPE_STEP> 5 milligrams cholesterol <RECIPE_STEP> 709 <RECIPE_STEP> 7 milligrams sodium <RECIPE_STEP> <RECIPE_END>"
101336,"<INGREDIENT_START> 2 ts sesame oil <INGREDIENT> 1 lb halibut or lean fish steaks , cut into l - inch pieces <INGREDIENT> 1 md onion , thinly sliced <INGREDIENT> 3 cloves garlic , finely chopped <INGREDIENT> 1 ts finely chopped gingerroot <INGREDIENT> 1 pk 10 ounces frozen asparagus cuts , thawed and drained <INGREDIENT> 1 cn 4 ounces sliced mushrooms , drained <INGREDIENT> 1 md tomato , cut into thin wedges <INGREDIENT> 2 tb soy sauce <INGREDIENT> 1 tb lemon juice <INGREDIENT_END>","<RECIPE_START> heat oil in 10 - inch nonstick skillet over medium - high heat <RECIPE_STEP> add fish , onion , garlic , gingerroot and asparagus <RECIPE_STEP> stir - fry 2 to 3 minutes or until fish almost flakes with fork <RECIPE_STEP> carefully stir in remaining ingredients <RECIPE_STEP> heat through <RECIPE_STEP> serve with additional soy sauce if desired <RECIPE_STEP> 4 servings <RECIPE_STEP> nutrition information per serving 1 serving percent of u <RECIPE_STEP> s <RECIPE_STEP> rda <RECIPE_END>"
101337,"<INGREDIENT_START> apple , artichoke , asparagus , vocado , string bean , beet , roccoli , russels spourts , cabbage , arrot , cauliflower , celery , orn , cucumber , ggplant , grape , lettuce , nion , parsnip , pea , peach , ear , pepper , lum , potato , radish , aspberry , squash , tomato , atermelon <INGREDIENT_END>","<RECIPE_START> in the name of sportsmanship , let 's consider one more way to look at fruits and vegetables <RECIPE_STEP> artichoke , asparagus , avocado , bean , beet , broccoli , brussels sprouts , cabbage , carrot , cauliflower , celery , corn , cucumber , eggplant , lettuce , onion , parsnip , pea , pepper , potato , radish , squash , and tomato are vegetables , all the others are fruits <RECIPE_STEP> fine with us <RECIPE_STEP> and , deep in our hearts , we suspect that 's the best way to play , according to hoyle <RECIPE_STEP> <RECIPE_END>"
101338,"<INGREDIENT_START> herb vinaigrette see below <INGREDIENT> truffle juice vinaigrette see below <INGREDIENT> 32 asparagus tips <INGREDIENT> 50 g chanterelles cleaned <INGREDIENT> 12 qt breasts <INGREDIENT> 1 or 2 uncooked yellow zucchini sliced into 48 very thin circles <INGREDIENT> 1 lg tomato brunoised <INGREDIENT> 15 g frisee lettuce cleaned <INGREDIENT> 12 italian parsley leaves <INGREDIENT> 30 g truffle sliced into 12 very thin circles <INGREDIENT> butter <INGREDIENT> seasoning - salt , pepper , chicken stock <INGREDIENT> white wine <INGREDIENT_END>","<RECIPE_START> little dabs of food artfully arranged here and there on a plate , sometimes connected by skeins of sauces <RECIPE_STEP> expensive too <RECIPE_STEP> patrick lin - lrb - the regent hotel - rrb - make herb vinaigrette from onion , chives , italian parsley , parsley and a pinch of tarragon , all finely chopped together <RECIPE_STEP> - lrb - keep some chopped herbs aside to use in chanterelles preparation see below <RECIPE_STEP> - rrb - add salt , pepper , walnut oil , sherry and vinegar , according to taste <RECIPE_STEP> make truffle juice vinaigrette from bottled truffle juice , salt , olive oil and vinegar <RECIPE_STEP> lightly salt chanterelles and saute over a low flame in butter , with a drop of white wine , a few drops of chicken stock , and a mixture of the same herbs that were used to make herb vinaigrette - lrb - see above - rrb - <RECIPE_STEP> reduce sauteed mixture completely , and then marinate chanterelles in herb vinaigrette for 30 minutes <RECIPE_STEP> clean quail breasts and season with salt and pepper <RECIPE_STEP> create central garnish by arranging a wheel of eleven overlapping yellow zucchini slices , placing a small heap of tomato brunoise on one central slice <RECIPE_STEP> place asparagus tips pointing outwards from centre <RECIPE_STEP> create three distinct islands of chanterelles topped with frisee lettuce and italian parsley <RECIPE_STEP> place three truffle slices between chanterelle islands and lay one quail breast on each <RECIPE_STEP> pour a little truffle juice vinaigrette over quails and central garnish , and a little herb vinaigrette over lettuce <RECIPE_STEP> <RECIPE_END>"


In [None]:
df[:5]

Unnamed: 0,Ingredients,Recipe
0,<INGREDIENT_START> 6 tb butter or margarine - softened <INGREDIENT> 3/4 c c and h powdered sugar <INGREDIENT> 1 c all - purpose flour <INGREDIENT> 1 tb milk <INGREDIENT> 2 eggs <INGREDIENT> 1 c c and h granulated sugar <INGREDIENT> 1/2 c cocoa <INGREDIENT> 2 tb flour <INGREDIENT> 1/2 ts baking powder <INGREDIENT> 1/2 ts salt <INGREDIENT> 1 ts vanilla <INGREDIENT> 1/4 ts almond extract optional <INGREDIENT> 1 c chopped almonds or pecans <INGREDIENT_END>,<RECIPE_START> cream together butter and powdered sugar <RECIPE_STEP> blend in 1 cup flour and milk <RECIPE_STEP> spread evenly in bottom of ungreased 9 - inch square pan <RECIPE_STEP> bake in 350 degree oven 10 to 12 minutes <RECIPE_STEP> beat eggs slightly <RECIPE_STEP> combine dry ingredients and add to eggs <RECIPE_STEP> blend in vanilla and almond extract <RECIPE_STEP> fold in almonds <RECIPE_STEP> spread over hot baked layer <RECIPE_STEP> return to o...
1,"<INGREDIENT_START> 1 c vanilla wafer cookies , finely crushed <INGREDIENT> 1 c confectioner's sugar <INGREDIENT> 2 tb cocoa <INGREDIENT> 1 c pecans , finely chopped <INGREDIENT> 2 tb corn syrup <INGREDIENT> 1/2 c bourbon <INGREDIENT_END>","<RECIPE_START> combine dry ingredients and mix well <RECIPE_STEP> add corn syrup and bourbon and mix well <RECIPE_STEP> if too moist add a few cookie crumbs , if too dry add a little more bourbon <RECIPE_STEP> shape into small balls and roll in fruit sugar or fonely chopped pecans <RECIPE_STEP> store in an airtight container <RECIPE_STEP> also good made with dark rum <RECIPE_STEP> <RECIPE_END>"
2,<INGREDIENT_START> 1 c all - purpose flour <INGREDIENT> 1/2 c sugar <INGREDIENT> 2 ts baking powder <INGREDIENT> 1/2 ts salt <INGREDIENT> 2 tb unsweetened cocoa powder <INGREDIENT> 1 1/2 c all - bran ? cereal <INGREDIENT> 3/4 c skim milk <INGREDIENT> 2 egg whites <INGREDIENT> 1/4 c vegetable oil <INGREDIENT> 1 c sliced ripe banana ; about 1 <INGREDIENT> vegetable cooking spray <INGREDIENT_END>,"<RECIPE_START> kellogg 's all - bran 1 <RECIPE_STEP> stir together flour , sugar , baking powder , salt , and cocoa powder <RECIPE_STEP> set aside <RECIPE_STEP> in large mixing bowl , combine all - bran cereal and milk <RECIPE_STEP> let stand 5 minutes or until cereal softens <RECIPE_STEP> add egg whites and oil <RECIPE_STEP> beat well <RECIPE_STEP> stir in bananas <RECIPE_STEP> add flour mixture stirring only until combined <RECIPE_STEP> portion..."
3,"<INGREDIENT_START> 5 1/2 c flour <INGREDIENT> 1 cake compressed yeast <INGREDIENT> or 1 cake dry yeast <INGREDIENT> 2 c milk , scalded and cooled <INGREDIENT> 1/2 ts salt <INGREDIENT> 1/2 c cocoa <INGREDIENT> 1/4 c shortening <INGREDIENT> 2 eggs , well beaten <INGREDIENT> 1/2 c sugar <INGREDIENT_END>","<RECIPE_START> soften yeast and dissolve 1 tablespoon sugar in lukewarm milk <RECIPE_STEP> add 3 cups flour and beat until smooth <RECIPE_STEP> cover and set aside to rise in a warm place until light <RECIPE_STEP> add shortening and sugar , which have been creamed together , eggs , cocoa , salt , and remainder of flour or enough to make a soft dough <RECIPE_STEP> knead lightly and place in well - oiled bowl <RECIPE_STEP> cover and set in a warm place until doubl..."
4,<INGREDIENT_START> bread - - to fill dish 2/3 <INGREDIENT> ful <INGREDIENT> 2 c skim milk <INGREDIENT> 2/3 c sugar <INGREDIENT> 2 eggs - - or 4 egg whites <INGREDIENT> 2 tb cocoa <INGREDIENT> 1 ts vanilla <INGREDIENT_END>,"<RECIPE_START> into a greased casserole dish break up enough bread to fill it 2/3 full <RECIPE_STEP> over this pour 2/3 cup sugar , 2 heaping tbsp cocoa that have been mixed together <RECIPE_STEP> toss all together lightly - lrb - to coat the bread - rrb - <RECIPE_STEP> to 2 cups of milk add 2 well beaten eggs and 1 tsp vanilla <RECIPE_STEP> pour this over the bread and it should just cover the pieces <RECIPE_STEP> bake at 350 fo r about 45 minutes <RECIPE_STEP> ..."


In [None]:
df[:20]

Unnamed: 0,Ingredients,Recipe
0,6 tb butter or margarine - softened \t 3/4 c c and h powdered sugar \t 1 c all - purpose flour \t 1 tb milk \t 2 eggs \t 1 c c and h granulated sugar \t 1/2 c cocoa \t 2 tb flour \t 1/2 ts baking powder \t 1/2 ts salt \t 1 ts vanilla \t 1/4 ts almond extract optional \t 1 c chopped almonds or pecans,"cream together butter and powdered sugar . blend in 1 cup flour and milk . spread evenly in bottom of ungreased 9 - inch square pan . bake in 350 degree oven 10 to 12 minutes . beat eggs slightly ; combine dry ingredients and add to eggs . blend in vanilla and almond extract ; fold in almonds . spread over hot baked layer ; return to oven and bake 20 minutes longer . cool ; while warm , cut into 24 bars ."
1,"1 c vanilla wafer cookies , finely crushed \t 1 c confectioner's sugar \t 2 tb cocoa \t 1 c pecans , finely chopped \t 2 tb corn syrup \t 1/2 c bourbon","combine dry ingredients and mix well . add corn syrup and bourbon and mix well . if too moist add a few cookie crumbs , if too dry add a little more bourbon . shape into small balls and roll in fruit sugar or fonely chopped pecans . store in an airtight container . also good made with dark rum ."
2,1 c all - purpose flour \t 1/2 c sugar \t 2 ts baking powder \t 1/2 ts salt \t 2 tb unsweetened cocoa powder \t 1 1/2 c all - bran ? cereal \t 3/4 c skim milk \t 2 egg whites \t 1/4 c vegetable oil \t 1 c sliced ripe banana ; about 1 \t vegetable cooking spray,"kellogg 's all - bran 1 . stir together flour , sugar , baking powder , salt , and cocoa powder . set aside . in large mixing bowl , combine all - bran cereal and milk . let stand 5 minutes or until cereal softens . add egg whites and oil . beat well . stir in bananas . add flour mixture stirring only until combined . portion batte evenly into twelve 2 - 1/2 inch muffin - pan cups coated with cooking spray . bake at 400 f . about 25 minutes or until lightly bro..."
3,"5 1/2 c flour \t 1 cake compressed yeast \t or 1 cake dry yeast \t 2 c milk , scalded and cooled \t 1/2 ts salt \t 1/2 c cocoa \t 1/4 c shortening \t 2 eggs , well beaten \t 1/2 c sugar","soften yeast and dissolve 1 tablespoon sugar in lukewarm milk . add 3 cups flour and beat until smooth . cover and set aside to rise in a warm place until light . add shortening and sugar , which have been creamed together , eggs , cocoa , salt , and remainder of flour or enough to make a soft dough . knead lightly and place in well - oiled bowl . cover and set in a warm place until double in bulk about 2 hours . form into loaves . place in well - oiled bread pans , ..."
4,bread - - to fill dish 2/3 \t ful \t 2 c skim milk \t 2/3 c sugar \t 2 eggs - - or 4 egg whites \t 2 tb cocoa \t 1 ts vanilla,"into a greased casserole dish break up enough bread to fill it 2/3 full . over this pour 2/3 cup sugar , 2 heaping tbsp cocoa that have been mixed together . toss all together lightly - lrb - to coat the bread - rrb - . to 2 cups of milk add 2 well beaten eggs and 1 tsp vanilla . pour this over the bread and it should just cover the pieces . bake at 350 fo r about 45 minutes ."
5,4 egg whites \t 1/3 c oil \t 1/4 c nonfat yogurt \t 1 ts vanilla \t 1 1/3 c sugar \t 1/2 c cocoa powder \t 1 1/4 c flour \t 1/4 ts salt \t 1 tb confectioners sugar,"heat oven to 350 degrees . oil bottom of 9 - inch square pan . place egg whites in a large bowl . beat until slightly frothy . add oil , yogurt and vanilla , mix well . add sugar and cocoa , mix well . add flour and salt . mix until blended . pour into pan . bake at 360 degrees for 26 - 28 minutes . avoid over baking . dust with confectioners sugar ."
6,2/3 c sifted all - purpose flour \t 1/3 c cocoa \t 1/2 ts baking powder \t 1/2 ts salt \t 2 eggs ; unbeaten \t 1 c c & h golden brown sugar \t 1 ts vanilla \t 1/3 c salad oil \t 1 c coarsely chopped nuts,"sift together flour , cocoa , baking powder , and salt ; set aside . in mixing bowl slightly beat the eggs . gradually stir in sugar . add vanilla and oil and mix well . stir in flour - cocoa mixture , then nuts . spread in greased shallow 11x7 or 9 - inch square pan . bake at 350 degrees for 22 to 25 minutes . cool in pan , then cut in bars , squares , or diamonds ."
7,1 c melted butter or margarine \t 2 eggs \t 2 c brown sugar \t 6 tb cocoa \t 1 1/2 c flour \t 1 ts baking powder \t 1 ts vanilla,mix sugar and cocoa together . add butter - lrb - or margarine - rrb - and eggs . mix well . add the rest of the ingredients . bake at 350 for 25 minutes .
8,1 2/3 c all - purpose flour \t 1 1/2 c sugar \t 1/2 c hershey cocoa \t 1 1/2 ts baking soda \t 1/2 ts baking powder \t 1 ts salt \t 2 eggs \t 1/2 c shortening \t 1 1/3 c buttermilk or sour milk \t 1 ts vanilla \t chocolate glaze recipe foll ws,"heat oven to 350 degrees . generously grease and flour 12 cup bundt pan . in large mixer bowl , blend flour , sugar , cocoa , baking soda , baking powder and salt ; add remaining ingredients except chocolate glaze . beat on low speed 1 minute , scraping bowl constantly . beat on high speed 3 minutes , scraping bowl occasionally . pour into prepared pan . bake 50 to 55 minutes or until wooden toothpick inserted in center comes out clean . cool 10 minutes ; remo..."
9,1/2 c cocoa \t 1/2 c boiling water \t 3/4 c 1 1/2 sticks margerine @ \t room temperature \t liquid sugar substitue equal \t to 1/2 cup sugar \t 2 ts vinilla ; \t 3 large egg whites ; @ rm temp ; \t 1/2 ts cream of tartar \t 1/3 c sugar \t 2 1/2 c cake flour \t 1 ts baking soda \t 2 ts baking powder \t 1/4 ts salt \t 1/2 ts cinnamon \t 1 c cool water,"mix together cocoa and boiling water to blend and set aside to cool to room temperature . cream margarine at medium speed until light and fluffy . add sweetener and vanilla to creamed misture , along with cooled cocoa mixture . mix at medium speed until well blended . beat egg whites at medium speed until foamy . add cream of tartar and beat at high speed , gradually adding sugar , to form a meringue . set aside for alter use . stir together flour , soda , baking powd..."


In [None]:
df[69:79]

Unnamed: 0,Ingredients,Recipe
69,1/2 c butter\t2 c sugar\t4 eggs\t3 ts vanilla\t1 1/2 c sifted flour\t1/2 c unsweetened cocoa\t1/2 ts salt\t1 c flaked coconut\t1/2 c chocolate chips\t2 tb sugar\t1/2 c walnuts; chopped opt,"melt butter over low heat , then remove . add 2 c sugar . beat . add eggs and vanilla . blend . add flour , cocoa , salt . mix . stir in coconut . pour into greased 13x9 '' pan . sprinkle remaining ingredients over top . bake at 350 for 30 minutes . cool in pan and cut into bars ."
70,24 2/3 oz sugar\t2 coconuts\t1 1/4 oz cashews\t8 cardam seeds; powdered\t7000 cg ghee recipe,"shred the coconut . break the cashewnuts into small pieces & fry them in ghee . powder the cardamom . heat water in a vessel containing 1/4 ltr . of water and add the sugar to it . after the sugary liquid is no longer thin , add the coconut shreds and heat it until it turns thick . after sufficient stirring , add the fried cashewnut pieces and ghee and stir the mixture well . add the powdered cardamom and mix it thoroughly and stop heating ."
71,"base:\t2 c flour; all purpose\t1/3 c sugar; granulated\t1 c butter; cold, cubed\ttopping:\t1/4 c butter\t3 eggs; lightly beaten\t2 c sugar; brown\t1/2 c rolled oats\t3 ts flour; all purpose\t1 ts baking powder\t1 ts vanilla extract\t1/4 ts salt\t1/2 c raisins\t1/2 c coconut; unsweetened, flake\t1/2 c pecans; chopped","preheat oven to 350f . in bowl , combine flour and sugar , cut in butter till mixture ressembles coarse crumbs . press into 13 '' x 9 '' baking pan . bake in 350f 15 minutes or till golden . in bowl , mix together butter and eggs ; blend in sugar , rolled oats , flour , baking powder , vanilla and salt . stir in raisins , coconut and pecans ; pour over base . bake in preheated 350f oven 20 , to 25 minutes or till top is golden brown and springs back when lightly touched . run knife around ed..."
72,6 tb butter\t1 c dark brown sugar\t2 eggs\t1 ts vanilla extract\t2/3 c flour\t1 ts baking powder\t1/4 ts salt\t1/2 c chopped pecans\t1/2 c shredded coconut,"preheat oven to 350 ` f. grease an 8 '' square baking pan . combine the butter and brown sugar in a sturdy saucepan and set over moderate heat . stir frequently until the mixture is bubbly and the sugar is melted , then set aside to cool slightly . beat the eggs and vanilla into the butter and brown sugar mixture . combine the flour , baking powder and salt , then stir and toss them together . add to the first mixture and beat just until thoroughly mixed . stir in the pecans and the coconut ..."
73,1 1/2 c all-purpose flour\t1/2 ts baking soda\t1/2 c butter or shortening\t1/2 c firmly packed brown sugar\t1 pk butterscotch pudding mix 4 serving \t1 egg\t1/2 c flaked coconut,mix flour with baking soda . cream butter and beat in sugar and pudding mix . add egg and blend well . stir in flour mixture . form into small balls about 1 inch in diameter . roll in coconut . place on ungreased baking sheets and press with bottom of glass that has been dipped in flour . bake at 350 degrees f for 10 minutes . remove from sheets and cool on racks . 70 cal ;
74,4 eggs\t1/2 c oil\t1/2 c water\t1/2 ts vanilla\t1/2 ts salt\t1 1/2 ts baking powder\t1 c cake flour\t3/4 c sugar\t2 c milk; plus...\t2 tb milk\t3/4 c sugar\t1/2 ts vanilla\t1 tb cornstarch\t2 eggs; beaten\t1 1/2 c whipping cream\t3 tb sugar\t3 c freshly grated coconut,"separate the first 4 eggs . mix egg yolks , oil , water and 1/2 teaspoon vanilla . add salt , baking powder , cake flour and 3/4 cup sugar and mix until smooth . beat 4 egg whites until stiff and carefully fold into batter . pour into 10-inch springform pan that has been greased and floured or waxed paper lined . bake at 325f about 50 minutes , or until cake tests done . cool . cut cake into 4 very thin or 3 thicker layers . to make filling , combine 2 cups milk , 3/4 cup sugar and 1/2 teasp..."
75,"4 x eggs\t1/2 c oil\t1/2 c water\t1/2 ts vanilla\t1/2 ts salt\t1 1/2 ts baking powder\t1 c cake flour\t3/4 c sugar\t2 c plus 2 t milk\t3/4 c sugar\t1/2 ts vanilla\t1 tb cornstarch\t2 x eggs, beaten\t1 1/2 c whipping cream\t3 tb sugar\t3 c freshly grated coconut","separate 4 eggs . mix eggs yolks , oil , water and 1/2 tsp vanilla . add salt , baking powder , cake flour and 3.4 c. sugar and mix until smooth . beat 4 egg whites until stiff and carefully fold into batter . pour into 10 in springform pan that has been greased and floured and waxed paper lined . bake at 325f about 50 minutes , or until cake tests done . cool . cut cake into 4 very thin or 3 thicker layers . to make filling , combine 2 cups milk , 3/4 c. sugar and 1/2 tsp vanilla in saucepa..."
76,"1 lg coconut \t1 pn salt\t1 pt milk, sweet\t1 c sugar\t1 tb flour, heaping",to coconut milk to make 1 pint . -- cook all ingredients in a 3-quart saucepan except coconut until thick . add coconut and cook another 10 minutes on low-heat . cool before putting on any white cake .
77,"1 ea coconut, large \t1 x salt, pinch\t1 pt milk, sweet\t1 c sugar\t1 tb flour, heaping",quart saucepan except coconut until thick . add coconut and cook another 10 minutes on low-heat . cool before putting on any white cake .
78,3 c granulated sugar\t1 c milk\t3 c grated coconut,"boil the sugar and milk over a medium heat until it forms a thin syrup . -lrb- 232 to 240 degrees on your candy thermometer -rrb- add grated coconut , stirring frequently to prevent burning . when mixture thickens , drop by the spoonful onto a piece of waxed paper and allow to cool ."


In [None]:
df[:10]

Unnamed: 0,Ingredients,Recipe
0,6 tb butter or margarine - softened\t3/4 c c and h powdered sugar\t1 c all-purpose flour\t1 tb milk\t2 eggs\t1 c c and h granulated sugar\t1/2 c cocoa\t2 tb flour\t1/2 ts baking powder\t1/2 ts salt\t1 ts vanilla\t1/4 ts almond extract (optional)\t1 c chopped almonds or pecans,"cream together butter and powdered sugar . blend in 1 cup flour and milk . spread evenly in bottom of ungreased 9-inch square pan . bake in 350 degree oven 10 to 12 minutes . beat eggs slightly ; combine dry ingredients and add to eggs . blend in vanilla and almond extract ; fold in almonds . spread over hot baked layer ; return to oven and bake 20 minutes longer . cool ; while warm , cut into 24 bars ."
1,"1 c vanilla wafer cookies, finely crushed\t1 c confectioner's sugar\t2 tb cocoa\t1 c pecans, finely chopped\t2 tb corn syrup\t1/2 c bourbon","combine dry ingredients and mix well . add corn syrup and bourbon and mix well . if too moist add a few cookie crumbs , if too dry add a little more bourbon . shape into small balls and roll in fruit sugar or fonely chopped pecans . store in an airtight container . also good made with dark rum ."
2,1 c all-purpose flour\t1/2 c sugar\t2 ts baking powder\t1/2 ts salt\t2 tb unsweetened cocoa powder\t1 1/2 c all-bran? cereal\t3/4 c skim milk\t2 egg whites\t1/4 c vegetable oil\t1 c sliced ripe banana; (about 1\tvegetable cooking spray,"kellogg 's all-bran 1 . stir together flour , sugar , baking powder , salt , and cocoa powder . set aside . in large mixing bowl , combine all-bran cereal and milk . let stand 5 minutes or until cereal softens . add egg whites and oil . beat well . stir in bananas . add flour mixture stirring only until combined . portion batte evenly into twelve 2-1/2 inch muffin-pan cups coated with cooking spray . bake at 400 f. about 25 minutes or until lightly browned . serve warm ."
3,"5 1/2 c flour\t1 cake compressed yeast\tor 1 cake dry yeast\t2 c milk, scalded and cooled\t1/2 ts salt\t1/2 c cocoa\t1/4 c shortening\t2 eggs, well beaten\t1/2 c sugar","soften yeast and dissolve 1 tablespoon sugar in lukewarm milk . add 3 cups flour and beat until smooth . cover and set aside to rise in a warm place until light . add shortening and sugar , which have been creamed together , eggs , cocoa , salt , and remainder of flour or enough to make a soft dough . knead lightly and place in well-oiled bowl . cover and set in a warm place until double in bulk about 2 hours . form into loaves . place in well-oiled bread pans , filling them 1/2 full . cover..."
4,bread -- to fill dish 2/3\tful\t2 c skim milk\t2/3 c sugar\t2 eggs -- or 4 egg whites\t2 tb cocoa\t1 ts vanilla,"into a greased casserole dish break up enough bread to fill it 2/3 full . over this pour 2/3 cup sugar , 2 heaping tbsp cocoa that have been mixed together . toss all together lightly -lrb- to coat the bread -rrb- . to 2 cups of milk add 2 well beaten eggs and 1 tsp vanilla . pour this over the bread and it should just cover the pieces . bake at 350 fo r about 45 minutes ."
5,4 egg whites\t1/3 c oil\t1/4 c nonfat yogurt\t1 ts vanilla\t1 1/3 c sugar\t1/2 c cocoa powder\t1 1/4 c flour\t1/4 ts salt\t1 tb confectioners sugar,"heat oven to 350 degrees . oil bottom of 9-inch square pan . place egg whites in a large bowl . beat until slightly frothy . add oil , yogurt and vanilla , mix well . add sugar and cocoa , mix well . add flour and salt . mix until blended . pour into pan . bake at 360 degrees for 26-28 minutes . avoid over baking . dust with confectioners sugar ."
6,2/3 c sifted all-purpose flour\t1/3 c cocoa\t1/2 ts baking powder\t1/2 ts salt\t2 eggs; unbeaten\t1 c c&h golden brown sugar\t1 ts vanilla\t1/3 c salad oil\t1 c coarsely chopped nuts,"sift together flour , cocoa , baking powder , and salt ; set aside . in mixing bowl slightly beat the eggs . gradually stir in sugar . add vanilla and oil and mix well . stir in flour-cocoa mixture , then nuts . spread in greased shallow 11x7 or 9-inch square pan . bake at 350 degrees for 22 to 25 minutes . cool in pan , then cut in bars , squares , or diamonds ."
7,1 c melted butter or margarine\t2 eggs\t2 c brown sugar\t6 tb cocoa\t1 1/2 c flour\t1 ts baking powder\t1 ts vanilla,mix sugar and cocoa together . add butter -lrb- or margarine -rrb- and eggs . mix well . add the rest of the ingredients . bake at 350 for 25 minutes .
8,1 2/3 c all-purpose flour\t1 1/2 c sugar\t1/2 c hershey cocoa\t1 1/2 ts baking soda\t1/2 ts baking powder\t1 ts salt\t2 eggs\t1/2 c shortening\t1 1/3 c buttermilk or sour milk \t1 ts vanilla\tchocolate glaze (recipe foll ws),"heat oven to 350 degrees . generously grease and flour 12 cup bundt pan . in large mixer bowl , blend flour , sugar , cocoa , baking soda , baking powder and salt ; add remaining ingredients except chocolate glaze . beat on low speed 1 minute , scraping bowl constantly . beat on high speed 3 minutes , scraping bowl occasionally . pour into prepared pan . bake 50 to 55 minutes or until wooden toothpick inserted in center comes out clean . cool 10 minutes ; remove from pan to wire rack . cool ..."
9,1/2 c cocoa\t1/2 c boiling water\t3/4 c (1 1/2 sticks) margerine @\troom temperature\tliquid sugar substitue equal\tto 1/2 cup sugar\t2 ts vinilla;\t3 large egg whites;@ rm temp;\t1/2 ts cream of tartar\t1/3 c sugar\t2 1/2 c cake flour\t1 ts baking soda\t2 ts baking powder\t1/4 ts salt\t1/2 ts cinnamon\t1 c cool water,"mix together cocoa and boiling water to blend and set aside to cool to room temperature . cream margarine at medium speed until light and fluffy . add sweetener and vanilla to creamed misture , along with cooled cocoa mixture . mix at medium speed until well blended . beat egg whites at medium speed until foamy . add cream of tartar and beat at high speed , gradually adding sugar , to form a meringue . set aside for alter use . stir together flour , soda , baking powder , salt and cinnamon t..."


In [None]:
df.Recipe.str.islower().sum()

101338

In [None]:
df[~(df.Recipe.str.islower())]

Unnamed: 0,Ingredients,Recipe


In [None]:
df.Ingredients = df.Ingredients.str.lower()
df.Recipe = df.Recipe.str.lower()

In [None]:
len(df)

101340

In [None]:
df.dropna().shape

(101338, 2)

In [None]:
df.shape

(101340, 2)

In [None]:
df[(df != train)['Recipe']]

Unnamed: 0,Ingredients,Recipe
38064,4 regular tea bags;\t-torani\t3 ts peach syrup; such as monin\tsugar to taste =or=- (sugar sub)\t3 ts mango syrup; davinci or let cool. add flavoring. add ice. sweeten to taste.,
38681,"3 (6 ot 7 inch) whole wheat\t-shredded (about 2 cups) pocket (pita) breads 1 tb chopped fresh mint leaves\t3/4 c lowfat plain yogurt\t2 ts chopped fresh cilantro\t2 tb light sour cream\t-(chinese parsley or fresh\t1 garlic clove, minced\t-coriander)\t1 cucumber, peeled and m pillsbury's fast and easy magazine, july/august 1993 posted by",


In [None]:
df

Unnamed: 0,Ingredients,Recipe
0,6 tb butter or margarine - softened\t3/4 c c and h powdered sugar\t1 c all-purpose flour\t1 tb milk\t2 eggs\t1 c c and h granulated sugar\t1/2 c cocoa\t2 tb flour\t1/2 ts baking powder\t1/2 ts salt\t1 ts vanilla\t1/4 ts almond extract (optional)\t1 c chopped almonds or pecans,"cream together butter and powdered sugar . blend in 1 cup flour and milk . spread evenly in bottom of ungreased 9-inch square pan . bake in 350 degree oven 10 to 12 minutes . beat eggs slightly ; combine dry ingredients and add to eggs . blend in vanilla and almond extract ; fold in almonds . spread over hot baked layer ; return to oven and bake 20 minutes longer . cool ; while warm , cut into 24 bars ."
1,"1 c vanilla wafer cookies, finely crushed\t1 c confectioner's sugar\t2 tb cocoa\t1 c pecans, finely chopped\t2 tb corn syrup\t1/2 c bourbon","combine dry ingredients and mix well . add corn syrup and bourbon and mix well . if too moist add a few cookie crumbs , if too dry add a little more bourbon . shape into small balls and roll in fruit sugar or fonely chopped pecans . store in an airtight container . also good made with dark rum ."
2,1 c all-purpose flour\t1/2 c sugar\t2 ts baking powder\t1/2 ts salt\t2 tb unsweetened cocoa powder\t1 1/2 c all-bran? cereal\t3/4 c skim milk\t2 egg whites\t1/4 c vegetable oil\t1 c sliced ripe banana; (about 1\tvegetable cooking spray,"kellogg 's all-bran 1 . stir together flour , sugar , baking powder , salt , and cocoa powder . set aside . in large mixing bowl , combine all-bran cereal and milk . let stand 5 minutes or until cereal softens . add egg whites and oil . beat well . stir in bananas . add flour mixture stirring only until combined . portion batte evenly into twelve 2-1/2 inch muffin-pan cups coated with cooking spray . bake at 400 f. about 25 minutes or until lightly browned . serve warm ."
3,"5 1/2 c flour\t1 cake compressed yeast\tor 1 cake dry yeast\t2 c milk, scalded and cooled\t1/2 ts salt\t1/2 c cocoa\t1/4 c shortening\t2 eggs, well beaten\t1/2 c sugar","soften yeast and dissolve 1 tablespoon sugar in lukewarm milk . add 3 cups flour and beat until smooth . cover and set aside to rise in a warm place until light . add shortening and sugar , which have been creamed together , eggs , cocoa , salt , and remainder of flour or enough to make a soft dough . knead lightly and place in well-oiled bowl . cover and set in a warm place until double in bulk about 2 hours . form into loaves . place in well-oiled bread pans , filling them 1/2 full . cover..."
4,bread -- to fill dish 2/3\tful\t2 c skim milk\t2/3 c sugar\t2 eggs -- or 4 egg whites\t2 tb cocoa\t1 ts vanilla,"into a greased casserole dish break up enough bread to fill it 2/3 full . over this pour 2/3 cup sugar , 2 heaping tbsp cocoa that have been mixed together . toss all together lightly -lrb- to coat the bread -rrb- . to 2 cups of milk add 2 well beaten eggs and 1 tsp vanilla . pour this over the bread and it should just cover the pieces . bake at 350 fo r about 45 minutes ."
...,...,...
101335,"6 oz smoked bacon or pancetta, cut into 1/2"" dice\t4 tb olive oil\t6 oz fresh shiitake mushrooms, sliced\t1 lb asparagus, cut into 2"" piece\t12 oz fresh fettucine\t1 tb fresh thyme, finely chopped\t2 tb fresh oregano, finely chopped\tsalt to taste\tpepper to taste","saute bacon in skillet until crisp , remove to a plate . drain off excess fat , then add olive oil to the skillet and cook the mushrooms until softened . lightly blanch the asparagus . cook the pasta briefly in boiling salted water . drain the noodles and toos with the asparagus , bacon , shiitake and fresh herbs . season with salt and pepper . 35 grams total fat ; -lrb- 9.9 grams saturated fat -rrb- ; 175.5 milligrams cholesterol ; 709.7 milligrams sodium ."
101336,"2 ts sesame oil\t1 lb halibut or lean fish steaks, cut into l-inch pieces\t1 md onion, thinly sliced\t3 cloves garlic, finely chopped\t1 ts finely chopped gingerroot\t1 pk (10 ounces) frozen asparagus cuts, thawed and drained\t1 cn (4 ounces) sliced mushrooms, drained\t1 md tomato, cut into thin wedges\t2 tb soy sauce\t1 tb lemon juice","heat oil in 10-inch nonstick skillet over medium-high heat . add fish , onion , garlic , gingerroot and asparagus . stir-fry 2 to 3 minutes or until fish almost flakes with fork . carefully stir in remaining ingredients ; heat through . serve with additional soy sauce if desired . 4 servings . nutrition information per serving 1 serving percent of u.s. rda"
101337,"apple, artichoke, asparagus, vocado, string bean, beet, roccoli, russels spourts, cabbage, arrot, cauliflower, celery, orn, cucumber, ggplant, grape, lettuce, nion, parsnip, pea, peach, ear, pepper, lum, potato, radish, aspberry, squash, tomato, atermelon","in the name of sportsmanship , let 's consider one more way to look at fruits and vegetables . artichoke , asparagus , avocado , bean , beet , broccoli , brussels sprouts , cabbage , carrot , cauliflower , celery , corn , cucumber , eggplant , lettuce , onion , parsnip , pea , pepper , potato , radish , squash , and tomato are vegetables , all the others are fruits . fine with us . and , deep in our hearts , we suspect that 's the best way to play , according to hoyle ."
101338,"herb vinaigrette (see below)\ttruffle juice vinaigrette (see below)\t32 asparagus tips\t50 g chanterelles (cleaned)\t12 qt breasts\t1 or 2 uncooked yellow zucchini (sliced into 48 very thin circles)\t1 lg tomato (brunoised)\t15 g frisee lettuce (cleaned)\t12 italian parsley leaves\t30 g truffle (sliced into 12 very thin circles)\tbutter\tseasoning - salt, pepper, chicken stock\twhite wine","little dabs of food artfully arranged here and there on a plate , sometimes connected by skeins of sauces . expensive too . patrick lin -lrb- the regent hotel -rrb- make herb vinaigrette from onion , chives , italian parsley , parsley and a pinch of tarragon , all finely chopped together . -lrb- keep some chopped herbs aside to use in chanterelles preparation see below . -rrb- add salt , pepper , walnut oil , sherry and vinegar , according to taste . make truffle juice vinaigrette from bottl..."


In [None]:
s = pd.Series(['    1. Ant.  ', '    2. Bee!\n', '3. Cat?\t', np.nan, 10, True])
s

0        1. Ant.  
1        2. Bee!\n
2        3. Cat?\t
3              NaN
4               10
5             True
dtype: object

In [None]:
s.str.split()

0    [1., Ant.]
1    [2., Bee!]
2    [3., Cat?]
3           NaN
4           NaN
5           NaN
dtype: object

In [None]:
(train.Recipe.str.len() < 20).sum()

284

In [None]:
(10 <= train.Recipe.str.len() <= 2000).all()

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
train.Recipe[train.Recipe.str.len() < 20][:40]

549       use sweet cycle . 
716       eating raw eggs . 
961       use basic cycle . 
963       use basic cycle . 
964       use basic cycle . 
967      add nuts at beep . 
969       use basic cycle . 
970       use basic cycle . 
972       use basic cycle . 
983       use basic cycle . 
1027         instant hair ! 
1238      see ingredients . 
1992     chill until firm . 
2307             mix well . 
2381          for 1 hrs . . 
2566          -- gecho 1.11 
2763        dried peppers ; 
2984          5 cups cooked 
4139     strain and serve . 
4518         pop the corn . 
4540         karen mintzias 
5144          walter jetton 
5146       mix thoroughly . 
5749          taste of home 
7116              combine . 
7365           what to do ? 
7839                home co 
8440       rhonda guilbeaux 
8533                  add m 
8998                1/2 cup 
9881          pizza dough . 
9882             ted taylor 
9889             ted taylor 
10055         liz parkinson 
10354       co

In [None]:
train.Recipe.str.len().quantile(0.99)

1843.0

In [None]:
train.Recipe[train.Recipe.str.islower() == False]

Series([], Name: Recipe, dtype: object)

In [None]:
with_tags = add_tags(train)

In [None]:
with_tags.head()

Unnamed: 0,Ingredients,Recipe
0,<INGREDIENT_START> <INGREDIENT_START> 6 tb butter or margarine - softened <INGREDIENT> 3/4 c c and h powdered sugar <INGREDIENT> 1 c all-purpose flour <INGREDIENT> 1 tb milk <INGREDIENT> 2 eggs <INGREDIENT> 1 c c and h granulated sugar <INGREDIENT> 1/2 c cocoa <INGREDIENT> 2 tb flour <INGREDIENT> 1/2 ts baking powder <INGREDIENT> 1/2 ts salt <INGREDIENT> 1 ts vanilla <INGREDIENT> 1/4 ts almond extract (optional) <INGREDIENT> 1 c chopped almonds or pecans <INGREDIENT_END> <INGREDIENT_...,<RECIPE_START> cream together butter and powdered sugar <RECIPE_STEP> blend in 1 cup flour and milk <RECIPE_STEP> spread evenly in bottom of ungreased 9-inch square pan <RECIPE_STEP> bake in 350 degree oven 10 to 12 minutes <RECIPE_STEP> beat eggs slightly <RECIPE_STEP> combine dry ingredients and add to eggs <RECIPE_STEP> blend in vanilla and almond extract <RECIPE_STEP> fold in almonds <RECIPE_STEP> spread over hot baked layer <RECIPE_STEP> return to oven and bake 20 minu...
1,"<INGREDIENT_START> <INGREDIENT_START> 1 c vanilla wafer cookies, finely crushed <INGREDIENT> 1 c confectioner's sugar <INGREDIENT> 2 tb cocoa <INGREDIENT> 1 c pecans, finely chopped <INGREDIENT> 2 tb corn syrup <INGREDIENT> 1/2 c bourbon <INGREDIENT_END> <INGREDIENT_END>","<RECIPE_START> combine dry ingredients and mix well <RECIPE_STEP> add corn syrup and bourbon and mix well <RECIPE_STEP> if too moist add a few cookie crumbs , if too dry add a little more bourbon <RECIPE_STEP> shape into small balls and roll in fruit sugar or fonely chopped pecans <RECIPE_STEP> store in an airtight container <RECIPE_STEP> also good made with dark rum <RECIPE_STEP> <RECIPE_END>"
2,<INGREDIENT_START> <INGREDIENT_START> 1 c all-purpose flour <INGREDIENT> 1/2 c sugar <INGREDIENT> 2 ts baking powder <INGREDIENT> 1/2 ts salt <INGREDIENT> 2 tb unsweetened cocoa powder <INGREDIENT> 1 1/2 c all-bran? cereal <INGREDIENT> 3/4 c skim milk <INGREDIENT> 2 egg whites <INGREDIENT> 1/4 c vegetable oil <INGREDIENT> 1 c sliced ripe banana; (about 1 <INGREDIENT> vegetable cooking spray <INGREDIENT_END> <INGREDIENT_END>,"<RECIPE_START> kellogg 's all-bran 1 <RECIPE_STEP> stir together flour , sugar , baking powder , salt , and cocoa powder <RECIPE_STEP> set aside <RECIPE_STEP> in large mixing bowl , combine all-bran cereal and milk <RECIPE_STEP> let stand 5 minutes or until cereal softens <RECIPE_STEP> add egg whites and oil <RECIPE_STEP> beat well <RECIPE_STEP> stir in bananas <RECIPE_STEP> add flour mixture stirring only until combined <RECIPE_STEP> portion batte evenly into twelve 2-1/2 ..."
3,"<INGREDIENT_START> <INGREDIENT_START> 5 1/2 c flour <INGREDIENT> 1 cake compressed yeast <INGREDIENT> or 1 cake dry yeast <INGREDIENT> 2 c milk, scalded and cooled <INGREDIENT> 1/2 ts salt <INGREDIENT> 1/2 c cocoa <INGREDIENT> 1/4 c shortening <INGREDIENT> 2 eggs, well beaten <INGREDIENT> 1/2 c sugar <INGREDIENT_END> <INGREDIENT_END>","<RECIPE_START> soften yeast and dissolve 1 tablespoon sugar in lukewarm milk <RECIPE_STEP> add 3 cups flour and beat until smooth <RECIPE_STEP> cover and set aside to rise in a warm place until light <RECIPE_STEP> add shortening and sugar , which have been creamed together , eggs , cocoa , salt , and remainder of flour or enough to make a soft dough <RECIPE_STEP> knead lightly and place in well-oiled bowl <RECIPE_STEP> cover and set in a warm place until double in bulk about 2 hour..."
4,<INGREDIENT_START> <INGREDIENT_START> bread -- to fill dish 2/3 <INGREDIENT> ful <INGREDIENT> 2 c skim milk <INGREDIENT> 2/3 c sugar <INGREDIENT> 2 eggs -- or 4 egg whites <INGREDIENT> 2 tb cocoa <INGREDIENT> 1 ts vanilla <INGREDIENT_END> <INGREDIENT_END>,"<RECIPE_START> into a greased casserole dish break up enough bread to fill it 2/3 full <RECIPE_STEP> over this pour 2/3 cup sugar , 2 heaping tbsp cocoa that have been mixed together <RECIPE_STEP> toss all together lightly -lrb- to coat the bread -rrb- <RECIPE_STEP> to 2 cups of milk add 2 well beaten eggs and 1 tsp vanilla <RECIPE_STEP> pour this over the bread and it should just cover the pieces <RECIPE_STEP> bake at 350 fo r about 45 minutes <RECIPE_STEP> <RECIPE_END>"


filter out 
"see directions"
"text only"
"none"
"no ingredients"
"no ingredients found"
"see below"

In [None]:
with_tags.shape

(101340, 2)

In [None]:
(with_tags.Ingredients.str.count('<INGREDIENT>') < 1).sum()

1234

In [None]:
1234 / 101340

0.012176830471679495

In [None]:
with_tags[with_tags.Ingredients.str.count('<INGREDIENT>') < 1][60:80]

Unnamed: 0,Ingredients,Recipe
4766,<INGREDIENT_START> <INGREDIENT_START> see below <INGREDIENT_END> <INGREDIENT_END>,<RECIPE_START> use 2 ts loose tea or 2 tea bags for each cup of cold water <RECIPE_STEP> place tea in glass container <RECIPE_STEP> add water <RECIPE_STEP> cover and refrigerate at least 24 hours <RECIPE_STEP> serve over crushed ice <RECIPE_STEP> <RECIPE_END>
4777,<INGREDIENT_START> <INGREDIENT_START> round robin exchange <INGREDIENT_END> <INGREDIENT_END>,"<RECIPE_START> 6 ea eggs , separated 11/4 c sugar 2 t lemon juice 3/4 c sifted pastry flour 1/4 c cornstarch 1/2 ts salt beat egg yolks until creamy and light , gradually add sugar , beating well after each addition and adding with the last portion 1 tablespoon lemon juice <RECIPE_STEP> mix and sift flour , cornstarch and salt , add half of it to the egg yolk mixture , stirring gently till blended and adding while blending another tablespooon of lemon juice <RECIPE_STEP> fold in stiffly ..."
4835,<INGREDIENT_START> <INGREDIENT_START> text only <INGREDIENT_END> <INGREDIENT_END>,"<RECIPE_START> in a saucepan , make a syrup with 2 cups water and 1 cup sugar <RECIPE_STEP> stir over heat until the sugar dissolves , then add the juice of half a lemon and boil the syrup vigorously for 5 minutes <RECIPE_STEP> set aside to cool <RECIPE_STEP> in a food processor , puree 3 punnets of hulled , strawberries with quarter of a cup of milk and a couple of drops of red food colouring <RECIPE_STEP> combine puree with the syrup and pour into a narrow rectangular cake tin or a..."
4887,<INGREDIENT_START> <INGREDIENT_START> 2 1/2 qt chicken stock; (see other recipe) <INGREDIENT_END> <INGREDIENT_END>,"<RECIPE_START> put the previously prepared , cold , fat-free , 2-1/2 quarts of chicken stock in an 8 quart pan <RECIPE_STEP> add a whole stewing fowl or roasting chicken weighing between 4 and 5 pounds <RECIPE_STEP> bring slowly to a boil <RECIPE_STEP> skim off any scum that forms on the surface <RECIPE_STEP> reduce heat <RECIPE_STEP> cover and simmer gently until the chicken is very tender , about 1 hour for a young chicken , or 2 to 2-1/2 hours for a fowl <RECIPE_STEP> remove t..."
5056,<INGREDIENT_START> <INGREDIENT_START> 12 ears white corn in milky stage <INGREDIENT_END> <INGREDIENT_END>,"<RECIPE_START> scrape corn with sharp knife three times <RECIPE_STEP> first scrape corn just to break off kernals <RECIPE_STEP> second scrape remainder of corn halfway <RECIPE_STEP> third scrape off rest of kernels off cob <RECIPE_STEP> then use potato masher and mash all kernels until milk comes out , take loaf pan which is about 11/2 '' deep , grease and put in all corn that has been mashed and bake in oven until all kernels are golden brown <RECIPE_STEP> use low heat and bake ap..."
5068,<INGREDIENT_START> <INGREDIENT_START> 2 tbsp) evenly in single layer on plate. microwave on high 2-4 minutes <INGREDIENT_END> <INGREDIENT_END>,"<RECIPE_START> sprinkle grated peel from one medium orange or two small lemons -lrb- about or until dry to the touch , turning plate and lifting and tossing peel with fingers after each minute <RECIPE_STEP> let stand about 2 hours or until completly cooled and brittle <RECIPE_STEP> store in airtight container no longer than 2 months <RECIPE_STEP> 1/2 tsp dried peel is equivalent to 1 tsp grated fresh peel <RECIPE_END>"
5069,<INGREDIENT_START> <INGREDIENT_START> 12 med-skinned oranges or lemons <INGREDIENT_END> <INGREDIENT_END>,"<RECIPE_START> use the coarse side of a cheese grater to grate off the citrus skin in strips <RECIPE_STEP> use only the orange -lrb- or yellow -rrb- part of the skin avoiding the bitter white part <RECIPE_STEP> spread the peel strips on two ceramic plates and let dry , uncovered , at room temperature , for 3 to 4 days <RECIPE_STEP> when the peels become brittle and shriveled , store them in a small glass jar <RECIPE_STEP> whirl the whole dried peels in a blender or food processor unt..."
5147,<INGREDIENT_START> <INGREDIENT_START> 1 information <INGREDIENT_END> <INGREDIENT_END>,"<RECIPE_START> for use in cajun dishes , gravies and as an all around flavorful thickening agent <RECIPE_STEP> make several cups at a time and store in tightly closed jar in refrigerator <RECIPE_STEP> put 1 to 3 cups flour into heavy skillet and place over moderate heat <RECIPE_STEP> stir the flour around often with a wooden spoon as it cooks <RECIPE_STEP> continue until all the flour is desired color , probably about like a light peanut butter color <RECIPE_STEP> extracted from ``..."
5858,<INGREDIENT_START> <INGREDIENT_START> shaping of breads <INGREDIENT_END> <INGREDIENT_END>,"<RECIPE_START> grease 6 6x3 '' sheets to make free form loaves <RECIPE_STEP> divide dough into 6 parts <RECIPE_STEP> remove air pockets by working dough with hands and rolling into 6 10x5 '' rectangles <RECIPE_STEP> starting from shorter side , roll up , pinching edges firmly to seal <RECIPE_STEP> for free-form loaves , taper ends slightly <RECIPE_STEP> place seam-side-down on prepared pan <RECIPE_STEP> cover <RECIPE_STEP> let rise in warm place until light and double in size ,..."
5905,<INGREDIENT_START> <INGREDIENT_START> 1 lb tofu <INGREDIENT_END> <INGREDIENT_END>,"<RECIPE_START> slice 1/4 '' thick <RECIPE_STEP> lightly sprinkle salt on both sides <RECIPE_STEP> or , sprinkle with onion or garlic salt <RECIPE_STEP> bake in a toster oven or put cake rackson regular grates in oven <RECIPE_STEP> lay slices on racks <RECIPE_STEP> bake at 350f <RECIPE_STEP> <RECIPE_END>"


In [None]:
with_tags.Ingredients.str.count('<INGREDIENT>').quantile(0.01)

0.0

In [None]:
train

Unnamed: 0,Ingredients,Recipe
0,6 tb butter or margarine - softened\t3/4 c c and h powdered sugar\t1 c all-purpose flour\t1 tb milk\t2 eggs\t1 c c and h granulated sugar\t1/2 c cocoa\t2 tb flour\t1/2 ts baking powder\t1/2 ts salt\t1 ts vanilla\t1/4 ts almond extract (optional)\t1 c chopped almonds or pecans,"cream together butter and powdered sugar . blend in 1 cup flour and milk . spread evenly in bottom of ungreased 9-inch square pan . bake in 350 degree oven 10 to 12 minutes . beat eggs slightly ; combine dry ingredients and add to eggs . blend in vanilla and almond extract ; fold in almonds . spread over hot baked layer ; return to oven and bake 20 minutes longer . cool ; while warm , cut into 24 bars ."
1,"1 c vanilla wafer cookies, finely crushed\t1 c confectioner's sugar\t2 tb cocoa\t1 c pecans, finely chopped\t2 tb corn syrup\t1/2 c bourbon","combine dry ingredients and mix well . add corn syrup and bourbon and mix well . if too moist add a few cookie crumbs , if too dry add a little more bourbon . shape into small balls and roll in fruit sugar or fonely chopped pecans . store in an airtight container . also good made with dark rum ."
2,1 c all-purpose flour\t1/2 c sugar\t2 ts baking powder\t1/2 ts salt\t2 tb unsweetened cocoa powder\t1 1/2 c all-bran? cereal\t3/4 c skim milk\t2 egg whites\t1/4 c vegetable oil\t1 c sliced ripe banana; (about 1\tvegetable cooking spray,"kellogg 's all-bran 1 . stir together flour , sugar , baking powder , salt , and cocoa powder . set aside . in large mixing bowl , combine all-bran cereal and milk . let stand 5 minutes or until cereal softens . add egg whites and oil . beat well . stir in bananas . add flour mixture stirring only until combined . portion batte evenly into twelve 2-1/2 inch muffin-pan cups coated with cooking spray . bake at 400 f. about 25 minutes or until lightly browned . serve warm ."
3,"5 1/2 c flour\t1 cake compressed yeast\tor 1 cake dry yeast\t2 c milk, scalded and cooled\t1/2 ts salt\t1/2 c cocoa\t1/4 c shortening\t2 eggs, well beaten\t1/2 c sugar","soften yeast and dissolve 1 tablespoon sugar in lukewarm milk . add 3 cups flour and beat until smooth . cover and set aside to rise in a warm place until light . add shortening and sugar , which have been creamed together , eggs , cocoa , salt , and remainder of flour or enough to make a soft dough . knead lightly and place in well-oiled bowl . cover and set in a warm place until double in bulk about 2 hours . form into loaves . place in well-oiled bread pans , filling them 1/2 full . cover..."
4,bread -- to fill dish 2/3\tful\t2 c skim milk\t2/3 c sugar\t2 eggs -- or 4 egg whites\t2 tb cocoa\t1 ts vanilla,"into a greased casserole dish break up enough bread to fill it 2/3 full . over this pour 2/3 cup sugar , 2 heaping tbsp cocoa that have been mixed together . toss all together lightly -lrb- to coat the bread -rrb- . to 2 cups of milk add 2 well beaten eggs and 1 tsp vanilla . pour this over the bread and it should just cover the pieces . bake at 350 fo r about 45 minutes ."
...,...,...
101335,"6 oz smoked bacon or pancetta, cut into 1/2"" dice\t4 tb olive oil\t6 oz fresh shiitake mushrooms, sliced\t1 lb asparagus, cut into 2"" piece\t12 oz fresh fettucine\t1 tb fresh thyme, finely chopped\t2 tb fresh oregano, finely chopped\tsalt to taste\tpepper to taste","saute bacon in skillet until crisp , remove to a plate . drain off excess fat , then add olive oil to the skillet and cook the mushrooms until softened . lightly blanch the asparagus . cook the pasta briefly in boiling salted water . drain the noodles and toos with the asparagus , bacon , shiitake and fresh herbs . season with salt and pepper . 35 grams total fat ; -lrb- 9.9 grams saturated fat -rrb- ; 175.5 milligrams cholesterol ; 709.7 milligrams sodium ."
101336,"2 ts sesame oil\t1 lb halibut or lean fish steaks, cut into l-inch pieces\t1 md onion, thinly sliced\t3 cloves garlic, finely chopped\t1 ts finely chopped gingerroot\t1 pk (10 ounces) frozen asparagus cuts, thawed and drained\t1 cn (4 ounces) sliced mushrooms, drained\t1 md tomato, cut into thin wedges\t2 tb soy sauce\t1 tb lemon juice","heat oil in 10-inch nonstick skillet over medium-high heat . add fish , onion , garlic , gingerroot and asparagus . stir-fry 2 to 3 minutes or until fish almost flakes with fork . carefully stir in remaining ingredients ; heat through . serve with additional soy sauce if desired . 4 servings . nutrition information per serving 1 serving percent of u.s. rda"
101337,"apple, artichoke, asparagus, vocado, string bean, beet, roccoli, russels spourts, cabbage, arrot, cauliflower, celery, orn, cucumber, ggplant, grape, lettuce, nion, parsnip, pea, peach, ear, pepper, lum, potato, radish, aspberry, squash, tomato, atermelon","in the name of sportsmanship , let 's consider one more way to look at fruits and vegetables . artichoke , asparagus , avocado , bean , beet , broccoli , brussels sprouts , cabbage , carrot , cauliflower , celery , corn , cucumber , eggplant , lettuce , onion , parsnip , pea , pepper , potato , radish , squash , and tomato are vegetables , all the others are fruits . fine with us . and , deep in our hearts , we suspect that 's the best way to play , according to hoyle ."
101338,"herb vinaigrette (see below)\ttruffle juice vinaigrette (see below)\t32 asparagus tips\t50 g chanterelles (cleaned)\t12 qt breasts\t1 or 2 uncooked yellow zucchini (sliced into 48 very thin circles)\t1 lg tomato (brunoised)\t15 g frisee lettuce (cleaned)\t12 italian parsley leaves\t30 g truffle (sliced into 12 very thin circles)\tbutter\tseasoning - salt, pepper, chicken stock\twhite wine","little dabs of food artfully arranged here and there on a plate , sometimes connected by skeins of sauces . expensive too . patrick lin -lrb- the regent hotel -rrb- make herb vinaigrette from onion , chives , italian parsley , parsley and a pinch of tarragon , all finely chopped together . -lrb- keep some chopped herbs aside to use in chanterelles preparation see below . -rrb- add salt , pepper , walnut oil , sherry and vinegar , according to taste . make truffle juice vinaigrette from bottl..."


In [None]:
'Ingredients' in train.columns

True

In [None]:
train.Ingredients = '<INGREDIENT_START> ' + train.Ingredients.str.replace('\t', ' <INGREDIENT> ', regex=False) + ' <INGREDIENT_END>'

### Building dictionaries

In [13]:
# index for custom tags
tags = {
    "<INGREDIENT_START>": 0,
    "<INGREDIENT>": 1,
    "<INGREDIENT_END>": 2,
    "<RECIPE_START>": 3,
    "<RECIPE_STEP>": 4,
    "<RECIPE_END>": 5
}

PAD_WORD = "<PAD>"
class Vocabulary:
    def __init__(self):
        """Vocabulary class which can convert a valid word to unique index and converting the index back to word."""
        ## initialize
        self.word2index = tags
        self.word2count = {k: 0 for k in tags.keys()}
        self.index2word = {v:k for k,v in tags.items()}
        self.n_unique_words = len(self.index2word) # total number of words in the dictionary.

    def add_word(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_unique_words
            self.index2word[self.n_unique_words] = word
            self.n_unique_words += 1
            self.word2count[word] = 1
        else:
            self.word2count[word] += 1

    def add_sentence(self, sentence):
        for word in sentence.split(' '):
            self.add_word(word)

    def add_padding(self):
        # NOTE: should be called after finished with building vocab
        self.add_word(PAD_WORD)

    def populate(self, df):
        for rowid in tqdm(range(len(df))):
            df_row = df.iloc[rowid]
            for i in range(2):
                self.add_sentence(df_row.iloc[i])
        self.add_padding() # padding should be last in the vocabulary (for convenience in decoder)

Use a combined vocabulary for both ingredients and recipes because the ingredients will appear in the recipes.

In [14]:
vocab = Vocabulary()
# populate_vocabulary(df, vocab)
vocab.populate(df)
vocab.n_unique_words

100%|██████████| 99036/99036 [00:24<00:00, 4055.90it/s]


44683

In [15]:
class RecipeDataset(Dataset):
    def __init__(self, df, vocab):
        """
        Args:
            df (pd.DataFrame): dataframe with two columns: "Ingredients" and "Recipe"
            vocab (Vocabulary): to convert word2index
        """
        super().__init__()
        self.ingredient_recipe_df = df
        self.vocab = vocab

    def __len__(self):
        return len(self.ingredient_recipe_df)
    
    def __getitem__(self, index):
        row = self.ingredient_recipe_df.iloc[index]
        ingredient_tens = torch.tensor([vocab.word2index[w] for w in row.Ingredients.split(" ")],
                                       dtype=torch.long, device=DEVICE)
        recipe_tens = torch.tensor([vocab.word2index[w] for w in row.Recipe.split(" ")],
                                       dtype=torch.long, device=DEVICE)
        return (ingredient_tens, recipe_tens)

In [16]:
recipe_ds = RecipeDataset(df, vocab)

For efficiently performing our model operations, the input should be batched. However, input ingredients and recipes are of different lengths. Therefore, to batch them, we need to pad with a padding value which we reserve to be 0 (see Vocabulary).

In [17]:
# inspired by https://suzyahyah.github.io/pytorch/2019/07/01/DataLoader-Pad-Pack-Sequence.html
def pad_collate(vocab):

    def _pad_collate(batch):
        # print(len(batch))
        # print(batch[0])
        # ingredients: tuple of len batch_size with Tensor elements containing all ingredients in batch
        # recipes: tuple of len batch_size with Tensor elements containing all recipes in batch
        ingredients, recipes = zip(*batch)
        ingr_lens = torch.tensor([len(x) for x in ingredients], dtype=torch.long, device=DEVICE)
        recipe_lens = torch.tensor([len(r) for r in recipes], dtype=torch.long, device=DEVICE)

        ingredients_padded = pad_sequence(ingredients, batch_first=True, padding_value=vocab.word2index[PAD_WORD])
        recipes_padded = pad_sequence(recipes, batch_first=True, padding_value=vocab.word2index[PAD_WORD])

        return ingredients_padded, recipes_padded, ingr_lens, recipe_lens
    
    return _pad_collate

def pack(x_embed, x_lens):
    # convert tensor with padding to a PackedSequence, this allows rnns to ignore paddings
    return pack_padded_sequence(x_embed, x_lens.cpu().int(), batch_first=True, enforce_sorted=False)

def unpack(out_packed, padding_val):
    out_padded, out_lens = pad_packed_sequence(out_packed, batch_first=True, padding_value=padding_val)
    return out_padded, out_lens

# Implementation of Baseline 1

In [18]:
class EncoderRNN(nn.Module):
    def __init__(self,
                 input_size,
                 hidden_size,
                 padding_value,
                 ):
        """Encoder LSTM to encode input sequence.

        input_size (int): size of vocabulary
        hidden_size (int): size of hidden dimension, referred to as H
        """
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.padding_value = padding_value

    def forward(self, ingredients, ing_lens):
        """Embed ingredients and feed through LSTM. 
        Batch process all words in sequence at once for efficiency rather than one word one batch at a time.

        Args:
            ingredients (torch.Tensor): padded ingredients of shape [N, L], where N=batch size and L=longest sequence length in batch
        """
        ## embed ingredients
        ingredients_embed = self.embedding(ingredients) # [N, L, H]

        ## pack padded ingredients tensor before feeding through LSTM (this allows the lstm to optimize operations, ignoring padding)
        ingredients_packed = pack(ingredients_embed, ing_lens)

        ## feed through LSTM
        # by default, initial hidden state and initial cell state are zeros
        # output: PackedSequence containing hidden state for each token in sequence
        # final hidden state: Tensor [num_layers=1, N, H] NOTE: this is the last non-padded hidden state for each input sequence
        # c_final: last cell state Tensor [num_layers=1, N, H]
        output, (h_final, _) = self.lstm(ingredients_packed)

        ## unpack PackedSequence to get back our padded tensor
        # output_padded: padded output tensor which masks out encoder outputs for padding to 0; shape [N, L, H] NOTE: output_padded[:, -1] != h_final because of padding
        # output_lens: unpadded sequence lengths; tensor of shape [N]
        output_padded, output_lens = unpack(output, padding_val=self.padding_value)

        return output_padded, output_lens, h_final

In [19]:
#! IMPORTANT: MAKE SURE DECODER'S OUTPUT SIZE IS VOCAB SIZE - 1
class DecoderRNN(nn.Module):
    def __init__(self,
                 hidden_size,
                 output_size
                 ):
        """Decoder to generate recipes based on encoder output (hidden state(s)).

        Args:
            hidden_size (int): size of hidden dimension
            output_size (int): size of target language vocabulary - 1 (doesn't need to encode padding), |Vocab| - 1
        """
        super().__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, batch_first=False)
        self.nonlinear_activation = nn.Tanh()
        self.out_fc = nn.Linear(hidden_size, output_size)
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def forward(self, inp, hidden):
        """Decode one word at a time. Batch processed.

        Args:
            inp (torch.Tensor): start token or previous generation (non teacher-forcing) or 
                                previous ground truth token (teacher-forcing);
                                shape [N]
            hidden (torch.Tensor): encoder last hidden state; shape [1, N, H]
        """
        ## embed token input
        inp_embedded = self.embedding(inp)[None] # [L=1, N, H]

        ## apply non-linear activation
        inp_embedded = self.nonlinear_activation(inp_embedded)
        
        ## feed embedded input and hidden state through LSTM
        # out: output features; shape [L=1, N, H]
        # h_final: final updated hidden state; shape [num_layers=1, N, H]
        # c_final: last cell state Tensor [num_layers=1, N, H]
        out, (h_final, _) = self.lstm(inp_embedded, (hidden, torch.zeros_like(hidden)))

        ## linear projection
        out = self.out_fc(out[0]) # [N, H] -> [N, |Vocab|]

        ## log softmax to get log probability distribution over vocabulary words
        out = self.logsoftmax(out) # [N, |Vocab|]

        return out, h_final

In [20]:
def train_iter(ingredients, recipes, ing_lens, rec_lens, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion):
    """Single training iteration. Processes batched data.

    Args:
        ingredients (torch.Tensor): padded ingredients tensor in idx form; 
                                    shape [N, L_i], where L_i = max ingredients length in batch
        recipes (torch.Tensor): padded recipes tensor in idx form;
                                shape [N, L_r], where L_r = max recipes length in batch
        ing_lens (torch.Tensor): unpadded length of ingredients; shape [N]
        rec_lens (torch.Tensor): unpadded length of recipes; shape [N]
        encoder (EncoderRNN): encoder RNN module
        decoder (DecoderRNN): decoder RNN module
        encoder_optimizer (torch.optim)
        decoder_optimizer (torch.optim)
        criterion (torch.nn.NLLLoss): loss function
    """

    ## reset gradients
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    N = ingredients.size(0)
    padded_ing_len = ingredients.size(1) # L_i
    padded_rec_len = recipes.size(1) # L_r


    ## feed ingredients through encoder
    # enc_out: padded encoder output tensor with shape [N, L, H]
    # enc_out_lens: unpadded sequence lengths; tensor with shape [N]
    # enc_h_final: final hidden state: [num_layers=1, N, H]
    enc_out, enc_out_lens, enc_h_final = encoder(ingredients, ing_lens)

    # decoder_input = torch.full((N, 1), fill_value=vocab.word2index["<RECIPE_START>"],
    #                            dtype=torch.long, device=DEVICE) 
    # initialize decoder hidden state as final encoder hidden state
    decoder_hidden = enc_h_final

    if TEACHER_FORCING_RATIO < 1:
        raise ValueError("Non-teacher forcing is not implemented")
    
    loss = 0
    all_decoder_outs = [] # List of [N, |Vocab|-1]
    all_gt = [] # List of [N]

    ## teacher forcing
    curr_rec_lens = rec_lens.clone()
    ## NOTE: recipes already contain start token no need to add manually
    ## TODO IMPORTANT: MAKE SURE DECODER'S OUTPUT SIZE IS VOCAB SIZE - 1
    for di in range(padded_rec_len-1):
        # get batches which have valid (non-padding and non ending) tokens as input
        valid = (rec_lens - 1) > di
        decoder_input_i = recipes[valid, di] # [N_valid]
        decoder_hidden_i = decoder_hidden[:,valid] # [1, N_valid, H]

        # decoder_out: log probabilities over vocab; [N_valid, |Vocab|-1]
        # decoder_hfinal: final hidden state; [num_layers=1, N_valid, H]
        decoder_out, decoder_hidden_i = decoder(decoder_input_i, decoder_hidden_i)

        all_decoder_outs.append(decoder_out)

        # because we ensured that input cannot be end token, there is a guaranteed non-padding token
        # for each valid batch sample
        gt_i = recipes[valid, di+1] # [N_valid]
        assert (gt_i != vocab.word2index[PAD_WORD]).all(), f"gt_i should not have padding but got: {gt_i}"
        all_gt.append(gt_i)

        # update only valid decoder_hidden
        decoder_hidden[:, valid] = decoder_hidden_i
    
    all_decoder_outs = torch.cat(all_decoder_outs, dim=0)
    all_gt = torch.cat(all_gt, dim=0)

    # mean Negative Log Likelihood Loss
    loss = criterion(all_decoder_outs, all_gt)

    ## backpropagation
    loss.backward()

    ## update params
    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item()

In [21]:
def train(encoder, decoder, dataset, n_epochs, batch_size=4, learning_rate=0.01, verbose=True, verbose_iter_interval=10):
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=pad_collate(vocab))
    total_iters = len(dataloader)
    epoch_losses = torch.zeros(size=[n_epochs], dtype=torch.double, device=DEVICE, requires_grad=False)
    
    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()

    for epoch in range(n_epochs):
        if verbose: print(f"Starting epoch {epoch+1}/{n_epochs}")
        epoch_loss = 0 # accumulate total loss during epoch
        print_epoch_loss = 0 # accumulate losses for printing
        for iter_idx, (ingredients, recipes, ing_lens, rec_lens) in enumerate(dataloader):
            if verbose and iter_idx > 0  and iter_idx % verbose_iter_interval == 0:
                print(f"(Epoch {epoch}, iter {iter_idx}/{total_iters}) Average loss so far: {print_epoch_loss/verbose_iter_interval:.3f}")
                print_epoch_loss = 0
            loss = train_iter(ingredients, recipes, ing_lens, rec_lens, encoder, decoder, 
                                   encoder_optimizer, decoder_optimizer, criterion)
            epoch_loss += loss
            print_epoch_loss += loss
        epoch_loss /= total_iters # get average epoch loss
        if verbose: print(f"Average epoch loss: {epoch_loss:.3f}")
        epoch_losses.append(epoch_loss)

    return epoch_losses

### Instantiate Encoder and Decoder

In [22]:
encoder = EncoderRNN(vocab.n_unique_words, hidden_size=HIDDEN_SIZE, padding_value=vocab.word2index[PAD_WORD]).to(DEVICE)
# in the training script, decoder is always fed a non-end token and thus never needs to generate padding
decoder = DecoderRNN(hidden_size=HIDDEN_SIZE, output_size=vocab.n_unique_words-1).to(DEVICE)

In [23]:
epoch_losses = train(encoder, decoder, recipe_ds, n_epochs=5, batch_size=4, learning_rate=0.01, verbose_iter_interval=1)

Starting epoch 1/5


(Epoch 0, iter 1/24759) Average loss so far: 10.711
(Epoch 0, iter 2/24759) Average loss so far: 10.707
(Epoch 0, iter 3/24759) Average loss so far: 10.713
(Epoch 0, iter 4/24759) Average loss so far: 10.706
(Epoch 0, iter 5/24759) Average loss so far: 10.713
(Epoch 0, iter 6/24759) Average loss so far: 10.710
(Epoch 0, iter 7/24759) Average loss so far: 10.708
(Epoch 0, iter 8/24759) Average loss so far: 10.710
(Epoch 0, iter 9/24759) Average loss so far: 10.707
(Epoch 0, iter 10/24759) Average loss so far: 10.708
(Epoch 0, iter 11/24759) Average loss so far: 10.709
(Epoch 0, iter 12/24759) Average loss so far: 10.708
(Epoch 0, iter 13/24759) Average loss so far: 10.707
(Epoch 0, iter 14/24759) Average loss so far: 10.707
(Epoch 0, iter 15/24759) Average loss so far: 10.706
(Epoch 0, iter 16/24759) Average loss so far: 10.704
(Epoch 0, iter 17/24759) Average loss so far: 10.708
(Epoch 0, iter 18/24759) Average loss so far: 10.704
(Epoch 0, iter 19/24759) Average loss so far: 10.708
(E

KeyboardInterrupt: 

In [51]:
dataloader = DataLoader(recipe_ds, batch_size=4, shuffle=True, collate_fn=pad_collate(vocab))

In [52]:
ingredients, recipes, ing_lens, rec_lens = next(iter(dataloader))

In [53]:
rec_lens

tensor([263, 134, 126,  16])

In [54]:
criterion = nn.NLLLoss()

In [55]:
encoder_optimizer = optim.SGD(encoder.parameters(), lr=1e-3)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=1e-3)

In [56]:
loss = train_iter(ingredients, recipes, ing_lens, rec_lens, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)

In [57]:
loss

10.712065696716309

In [62]:
l = [torch.Tensor([1, 2, 3]), torch.Tensor([4]), torch.Tensor([5, 6])]

pad_l = pad_sequence(l, batch_first=True, padding_value=0)

In [63]:
pad_l

tensor([[1., 2., 3.],
        [4., 0., 0.],
        [5., 6., 0.]])

In [66]:
packed_l = pack(pad_l.unsqueeze(-1), torch.tensor([3, 1, 2]))
packed_l

PackedSequence(data=tensor([[1.],
        [5.],
        [4.],
        [2.],
        [6.],
        [3.]]), batch_sizes=tensor([3, 2, 1]), sorted_indices=tensor([0, 2, 1]), unsorted_indices=tensor([0, 2, 1]))

In [67]:
lstm = nn.LSTM(1, 5, batch_first=True)

In [69]:
out, (h_final, _) = lstm(packed_l)

In [70]:
out

PackedSequence(data=tensor([[ 0.0794, -0.0027, -0.0813, -0.0836,  0.0903],
        [-0.0352, -0.1354, -0.1496, -0.0808,  0.3205],
        [ 0.0136, -0.1145, -0.1331, -0.1055,  0.2563],
        [ 0.1176, -0.0716, -0.1257, -0.1492,  0.2094],
        [ 0.0733, -0.2670, -0.1957, -0.0674,  0.5118],
        [ 0.1314, -0.1688, -0.1590, -0.1596,  0.3382]], grad_fn=<CatBackward0>), batch_sizes=tensor([3, 2, 1]), sorted_indices=tensor([0, 2, 1]), unsorted_indices=tensor([0, 2, 1]))

In [71]:
h_final

tensor([[[ 0.1314, -0.1688, -0.1590, -0.1596,  0.3382],
         [ 0.0136, -0.1145, -0.1331, -0.1055,  0.2563],
         [ 0.0733, -0.2670, -0.1957, -0.0674,  0.5118]]],
       grad_fn=<IndexSelectBackward0>)

In [72]:
unpacked_out, out_lens = unpack(out)

In [73]:
unpacked_out

tensor([[[ 0.0794, -0.0027, -0.0813, -0.0836,  0.0903],
         [ 0.1176, -0.0716, -0.1257, -0.1492,  0.2094],
         [ 0.1314, -0.1688, -0.1590, -0.1596,  0.3382]],

        [[ 0.0136, -0.1145, -0.1331, -0.1055,  0.2563],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]],

        [[-0.0352, -0.1354, -0.1496, -0.0808,  0.3205],
         [ 0.0733, -0.2670, -0.1957, -0.0674,  0.5118],
         [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]],
       grad_fn=<IndexSelectBackward0>)

In [24]:
dec_out = torch.rand(4, 10)
dec_out

tensor([[0.1699, 0.7829, 0.8439, 0.3110, 0.8584, 0.2562, 0.5113, 0.8993, 0.5475,
         0.7520],
        [0.5721, 0.1622, 0.4188, 0.2749, 0.1484, 0.5337, 0.9014, 0.8269, 0.8428,
         0.9507],
        [0.9826, 0.6929, 0.9554, 0.2149, 0.3530, 0.0090, 0.9106, 0.2185, 0.1463,
         0.2705],
        [0.9207, 0.7776, 0.3821, 0.9539, 0.6005, 0.0750, 0.3464, 0.9162, 0.5391,
         0.2886]])

In [28]:
dec_out.topk(k=1, dim=-1)

torch.return_types.topk(
values=tensor([[0.8993],
        [0.9507],
        [0.9826],
        [0.9539]]),
indices=tensor([[7],
        [9],
        [0],
        [3]]))

In [27]:
dec_out.argmax(dim=-1)

tensor([7, 9, 0, 3])

In [15]:
lens = torch.tensor([2, 9, 1, 3])
lens

tensor([2, 9, 1, 3])

In [18]:
decoder_input = torch.rand((4,1))

In [19]:
decoder_input

tensor([[0.6875],
        [0.3673],
        [0.5379],
        [0.4929]])

In [20]:
lens > 1

tensor([ True,  True, False,  True])

In [21]:
decoder_input[lens > 1]

tensor([[0.6875],
        [0.3673],
        [0.4929]])

In [153]:
lstm = nn.LSTM(256, 256, batch_first=True)

In [164]:
ingr_embedded = emb(ingr)

In [165]:
ingr_embedded.shape

torch.Size([2, 93, 256])

In [160]:
ilen

[69, 93]

In [187]:
pack(ingr_embedded, out_lens)

PackedSequence(data=tensor([[-1.0380,  0.9075, -1.1193,  ..., -0.8712,  0.6987, -1.6537],
        [-1.0380,  0.9075, -1.1193,  ..., -0.8712,  0.6987, -1.6537],
        [-0.8490, -0.2949,  2.2047,  ..., -0.5509, -0.6989, -0.3990],
        ...,
        [ 0.6973,  0.6226,  1.1423,  ...,  2.4458,  1.2086, -1.6040],
        [ 0.2775,  0.4690,  1.6982,  ...,  1.9954, -2.0233, -0.0757],
        [ 0.0966,  0.8704, -0.1913,  ...,  0.4736, -0.1003, -0.5020]],
       grad_fn=<PackPaddedSequenceBackward0>), batch_sizes=tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), sorted_indices=tensor([1, 0]), unsorted_indices=tensor([1, 0]))

In [188]:
pack(ingr_embedded, ilen)

PackedSequence(data=tensor([[-1.0380,  0.9075, -1.1193,  ..., -0.8712,  0.6987, -1.6537],
        [-1.0380,  0.9075, -1.1193,  ..., -0.8712,  0.6987, -1.6537],
        [-0.8490, -0.2949,  2.2047,  ..., -0.5509, -0.6989, -0.3990],
        ...,
        [ 0.6973,  0.6226,  1.1423,  ...,  2.4458,  1.2086, -1.6040],
        [ 0.2775,  0.4690,  1.6982,  ...,  1.9954, -2.0233, -0.0757],
        [ 0.0966,  0.8704, -0.1913,  ...,  0.4736, -0.1003, -0.5020]],
       grad_fn=<PackPaddedSequenceBackward0>), batch_sizes=tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), sorted_indices=tensor([1, 0]), unsorted_indices=tensor([1, 0]))

In [168]:
ingr_packed = pack(ingr_embedded, out_lens)

In [169]:
ingr_packed

PackedSequence(data=tensor([[-1.0380,  0.9075, -1.1193,  ..., -0.8712,  0.6987, -1.6537],
        [-1.0380,  0.9075, -1.1193,  ..., -0.8712,  0.6987, -1.6537],
        [-0.8490, -0.2949,  2.2047,  ..., -0.5509, -0.6989, -0.3990],
        ...,
        [ 0.6973,  0.6226,  1.1423,  ...,  2.4458,  1.2086, -1.6040],
        [ 0.2775,  0.4690,  1.6982,  ...,  1.9954, -2.0233, -0.0757],
        [ 0.0966,  0.8704, -0.1913,  ...,  0.4736, -0.1003, -0.5020]],
       grad_fn=<PackPaddedSequenceBackward0>), batch_sizes=tensor([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]), sorted_indices=tensor([1, 0]), unsorted_indices=tensor([1, 0]))

In [170]:
output, (h_final, c_final) = lstm(ingr_packed)

In [189]:
c_final.shape

torch.Size([1, 2, 256])

In [173]:
out_padded, out_lens = unpack(output)

In [175]:
out_padded.shape

torch.Size([2, 93, 256])

In [181]:
ilen

[69, 93]

In [183]:
out_lens

tensor([69, 93])

In [144]:
output, (h_final, c_final) = lstm(torch.rand(2, 96, 256))

In [149]:
output[:, -1].shape

torch.Size([2, 256])

In [151]:
(output[:, -1] == h_final[0]).all()

tensor(True)

In [147]:
h_final.shape

torch.Size([1, 2, 256])

In [21]:
emb = nn.Embedding(vocab.n_unique_words, 256)

In [52]:
test = [torch.tensor([5, 18, 29]), torch.tensor([1]), torch.tensor([10, 9])]

In [53]:
test_padded = pad_sequence(test, batch_first=True, padding_value=-1)

In [54]:
test_padded

tensor([[ 5, 18, 29],
        [ 1, -1, -1],
        [10,  9, -1]])

In [None]:
emb

# Implementation of Baseline 2

# Implementation of Extension 1

# Implementation of Extension 2