In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# !pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 --extra-index-url https://download.pytorch.org/whl/cu116
# !pip install matplotlib numpy pandas tqdm nltk

# for separating ingredients vs non-ingredients
# NOTE: if using Windows to run this, need to download GNU Wget
# !wget -c https://raw.githubusercontent.com/williamLyh/RecipeWithPlans/main/ingredient_set.json -O ingredient_set.json

In [3]:
import os
import re
import string
import numpy as np
import pandas as pd
import random
import json
from torch.optim.lr_scheduler import CosineAnnealingLR, StepLR, MultiStepLR
import nltk
from nltk.translate.bleu_score import corpus_bleu, sentence_bleu
from nltk.translate import meteor

from data import *
from encoder_decoder import *
from train import *
from eval import *
from utils import *

# required for bleu
# nltk.download("wordnet")

  from .autonotebook import tqdm as notebook_tqdm


---

In [4]:
SEED = 31989101
HIDDEN_SIZE = 256
MAX_INGR_LEN = 150 # fixed from assignment
MAX_RECIPE_LEN = 600
DROPOUT = 0.1
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## ensuring reproducibility
def reset_rng():
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

reset_rng()

# to easily read ingredients and instructions
pd.set_option('display.max_colwidth', 2000)

print(f"Using device: {DEVICE}")

Using device: cuda


In [5]:
data_root = "./Cooking_Dataset"
add_intermediate_tag=True

train_df_orig = pd.read_csv(os.path.join(data_root, "train.csv"), usecols=['Ingredients', 'Recipe'])
dev_df_orig = pd.read_csv(os.path.join(data_root, "dev.csv"), usecols=['Ingredients', 'Recipe'])
test_df_orig = pd.read_csv(os.path.join(data_root, "test.csv"), usecols=['Ingredients', 'Recipe'])

In [6]:
train_df = preprocess_data(train_df_orig, max_ingr_len=MAX_INGR_LEN, max_recipe_len=MAX_RECIPE_LEN, add_intermediate_tag=add_intermediate_tag)

Number of data samples before preprocessing: 101340
Number of data samples after preprocessing: 99035 (97.725%)


In [7]:
dev_df = preprocess_data(dev_df_orig, max_ingr_len=MAX_INGR_LEN, max_recipe_len=MAX_RECIPE_LEN, add_intermediate_tag=add_intermediate_tag)

Number of data samples before preprocessing: 797
Number of data samples after preprocessing: 775 (97.240%)


In [8]:
test_df = preprocess_data(test_df_orig, max_ingr_len=MAX_INGR_LEN, max_recipe_len=MAX_RECIPE_LEN, add_intermediate_tag=add_intermediate_tag)

Number of data samples before preprocessing: 778
Number of data samples after preprocessing: 757 (97.301%)


In [9]:
vocab = Vocabulary(add_intermediate_tag=add_intermediate_tag)
vocab.populate(train_df)
vocab.n_unique_words

  0%|          | 0/99035 [00:00<?, ?it/s]

100%|██████████| 99035/99035 [00:07<00:00, 13596.13it/s]


42650

In [10]:
# vocab = Vocabulary()
# vocab.populate(train_df)
# vocab.n_unique_words

In [11]:
# vocab = Vocabulary()
# vocab.populate(train_df)
# vocab.n_unique_words

In [12]:
train_ds = RecipeDataset(train_df, vocab)
dev_ds = RecipeDataset(dev_df, vocab, train=False)
test_ds = RecipeDataset(test_df, vocab, train=False)

### Encoder-Decoder (Base)

In [13]:
embedding_size=300
encoder = EncoderRNN(vocab.n_unique_words, embedding_size=embedding_size, hidden_size=HIDDEN_SIZE, padding_value=vocab.word2index(PAD_WORD)).to(DEVICE)
# in the training script, decoder is always fed a non-end token and thus never needs to generate padding
# also it should never generate "<UNKNOWN>"
decoder = DecoderRNN(embedding_size=embedding_size,hidden_size=HIDDEN_SIZE, output_size=vocab.n_unique_words-2).to(DEVICE)

In [15]:
initial_lr=0.8
min_lr = 0.01
n_epochs = 30
batch_size=128
encoder_optimizer = optim.SGD(encoder.parameters(), lr=initial_lr)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=initial_lr)
# enc_scheduler = CosineAnnealingLR(encoder_optimizer, T_max=n_epochs, eta_min=min_lr)
# dec_scheduler = CosineAnnealingLR(decoder_optimizer, T_max=n_epochs, eta_min=min_lr)
enc_scheduler = MultiStepLR(encoder_optimizer, milestones=[15], gamma=0.1)
dec_scheduler = MultiStepLR(decoder_optimizer, milestones=[15], gamma=0.1)

epoch_losses = train(encoder, decoder, encoder_optimizer, decoder_optimizer, train_ds, 
                     n_epochs=n_epochs, vocab=vocab, decoder_mode="basic", batch_size=batch_size, 
                     enc_lr_scheduler=enc_scheduler, dec_lr_scheduler=dec_scheduler, 
                     dev_ds = dev_ds, identifier="test",
                     verbose_iter_interval=10)

Starting epoch 1/30, enc lr scheduler: [0.8], dec lr scheduler: [0.8]
(Epoch 0, iter 10/774) Average loss so far: 10.564


KeyboardInterrupt: 

## Encoder-Decoder (Attention)

In [14]:
embedding_size=300
encoder_attn = EncoderRNN(vocab.n_unique_words, embedding_size=embedding_size, hidden_size=HIDDEN_SIZE, padding_value=vocab.word2index(PAD_WORD)).to(DEVICE)
# in the training script, decoder is always fed a non-end token and thus never needs to generate padding
# also it should never generate "<UNKNOWN>"
# decoder = DecoderRNN(embedding_size=embedding_size,hidden_size=HIDDEN_SIZE, output_size=vocab.n_unique_words-2).to(DEVICE)
decoder_attn = AttnDecoderRNN(embedding_size, hidden_size=HIDDEN_SIZE, output_size=vocab.n_unique_words-2, padding_val=vocab.word2index(PAD_WORD), dropout=DROPOUT).to(DEVICE)

In [15]:
initial_lr=0.8
min_lr = 0.01
n_epochs = 30
batch_size=128
encoder_attn_optimizer = optim.SGD(encoder_attn.parameters(), lr=initial_lr)
decoder_attn_optimizer = optim.SGD(decoder_attn.parameters(), lr=initial_lr)
# enc_attn_scheduler = CosineAnnealingLR(encoder_attn_optimizer, T_max=n_epochs, eta_min=min_lr)
# dec_attn_scheduler = CosineAnnealingLR(decoder_attn_optimizer, T_max=n_epochs, eta_min=min_lr)
enc_attn_scheduler = MultiStepLR(encoder_attn_optimizer, milestones=[15], gamma=0.1)
dec_attn_scheduler = MultiStepLR(decoder_attn_optimizer, milestones=[15], gamma=0.1)

epoch_losses = train(encoder_attn, decoder_attn, encoder_attn_optimizer, decoder_attn_optimizer, recipe_ds, 
                     n_epochs=n_epochs, vocab=vocab, decoder_mode="attention", batch_size=batch_size, 
                     enc_lr_scheduler=enc_attn_scheduler, dec_lr_scheduler=dec_attn_scheduler, 
                     verbose_iter_interval=10)

Starting epoch 1/30, enc lr scheduler: [0.8], dec lr scheduler: [0.8]
(Epoch 0, iter 10/774) Average loss so far: 10.356
(Epoch 0, iter 20/774) Average loss so far: 8.718
(Epoch 0, iter 30/774) Average loss so far: 7.806
(Epoch 0, iter 40/774) Average loss so far: 7.802
(Epoch 0, iter 50/774) Average loss so far: 7.435
(Epoch 0, iter 60/774) Average loss so far: 7.306
(Epoch 0, iter 70/774) Average loss so far: 6.935
(Epoch 0, iter 80/774) Average loss so far: 6.662
(Epoch 0, iter 90/774) Average loss so far: 6.400
(Epoch 0, iter 100/774) Average loss so far: 6.532
(Epoch 0, iter 110/774) Average loss so far: 6.366
(Epoch 0, iter 120/774) Average loss so far: 6.335
(Epoch 0, iter 130/774) Average loss so far: 6.155
(Epoch 0, iter 140/774) Average loss so far: 6.100


KeyboardInterrupt: 

## Encoder-Decoder (Extension: pretrained embeddings)

In [13]:
pretrained_embedding_dict = create_pretrained_embedding_dict("./glove.840B.300d.txt")

In [14]:
embedding_size=300
encoder_pretrained_embed = EncoderRNN(
    input_size=vocab.n_unique_words, embedding_size=embedding_size, hidden_size=HIDDEN_SIZE, 
    padding_value=vocab.word2index(PAD_WORD), pretrained_embedding_dict=pretrained_embedding_dict, 
    vocab=vocab).to(DEVICE)
# in the training script, decoder is always fed a non-end token and thus never needs to generate padding
# also it should never generate "<UNKNOWN>"
decoder_pretrained_embed = DecoderRNN(
    embedding_size=embedding_size,hidden_size=HIDDEN_SIZE, output_size=vocab.n_unique_words-2,
    pretrained_embedding_dict=pretrained_embedding_dict, vocab=vocab).to(DEVICE)

  0%|          | 0/42650 [00:00<?, ?it/s]

100%|██████████| 42650/42650 [00:00<00:00, 386465.32it/s]


28250/42650 (0.662) words have pretrained embeddings


100%|██████████| 42648/42648 [00:00<00:00, 360630.13it/s]

28250/42650 (0.662) words have pretrained embeddings





In [15]:
initial_lr=0.8
min_lr = 0.01
n_epochs = 20
batch_size=128
encoder_pretrained_embed_optimizer = optim.SGD(encoder_pretrained_embed.parameters(), lr=initial_lr)
decoder_pretrained_embed_optimizer = optim.SGD(decoder_pretrained_embed.parameters(), lr=initial_lr)
# enc_scheduler = CosineAnnealingLR(encoder_pretrained_embed_optimizer, T_max=n_epochs, eta_min=min_lr)
# dec_scheduler = CosineAnnealingLR(decoder_pretrained_embed_optimizer, T_max=n_epochs, eta_min=min_lr)
enc_pretrained_embed_scheduler = MultiStepLR(encoder_pretrained_embed_optimizer, milestones=[15], gamma=0.2)
dec_pretrained_embed_scheduler = MultiStepLR(decoder_pretrained_embed_optimizer, milestones=[15], gamma=0.2)

epoch_losses = train(encoder_pretrained_embed, decoder_pretrained_embed, 
                     encoder_pretrained_embed_optimizer, decoder_pretrained_embed_optimizer, recipe_ds, 
                     n_epochs=n_epochs, vocab=vocab, decoder_mode="basic", batch_size=batch_size, 
                     enc_lr_scheduler=enc_pretrained_embed_scheduler, dec_lr_scheduler=dec_pretrained_embed_scheduler, 
                     verbose_iter_interval=10)

Starting epoch 1/20, enc lr scheduler: [0.8], dec lr scheduler: [0.8]
(Epoch 0, iter 10/774) Average loss so far: 10.462
(Epoch 0, iter 20/774) Average loss so far: 8.980
(Epoch 0, iter 30/774) Average loss so far: 8.117
(Epoch 0, iter 40/774) Average loss so far: 7.807
(Epoch 0, iter 50/774) Average loss so far: 7.399
(Epoch 0, iter 60/774) Average loss so far: 7.265
(Epoch 0, iter 70/774) Average loss so far: 6.978
(Epoch 0, iter 80/774) Average loss so far: 6.980
(Epoch 0, iter 90/774) Average loss so far: 6.677
(Epoch 0, iter 100/774) Average loss so far: 6.663
(Epoch 0, iter 110/774) Average loss so far: 6.488
(Epoch 0, iter 120/774) Average loss so far: 6.277
(Epoch 0, iter 130/774) Average loss so far: 6.548
(Epoch 0, iter 140/774) Average loss so far: 6.164


KeyboardInterrupt: 

In [35]:
initial_lr=0.8
min_lr = 0.01
n_epochs = 20
batch_size=128
encoder_pretrained_embed_optimizer = optim.SGD(encoder_pretrained_embed.parameters(), lr=initial_lr)
decoder_pretrained_embed_optimizer = optim.SGD(decoder_pretrained_embed.parameters(), lr=initial_lr)
# enc_scheduler = CosineAnnealingLR(encoder_pretrained_embed_optimizer, T_max=n_epochs, eta_min=min_lr)
# dec_scheduler = CosineAnnealingLR(decoder_pretrained_embed_optimizer, T_max=n_epochs, eta_min=min_lr)
enc_pretrained_embed_scheduler = MultiStepLR(encoder_pretrained_embed_optimizer, milestones=[15], gamma=0.2)
dec_pretrained_embed_scheduler = MultiStepLR(decoder_pretrained_embed_optimizer, milestones=[15], gamma=0.2)

epoch_losses = train(encoder_pretrained_embed, decoder_pretrained_embed, 
                     encoder_pretrained_embed_optimizer, decoder_pretrained_embed_optimizer, recipe_ds, 
                     n_epochs=n_epochs, vocab=vocab, decoder_mode="basic", batch_size=batch_size, 
                     enc_lr_scheduler=enc_pretrained_embed_scheduler, dec_lr_scheduler=dec_pretrained_embed_scheduler, 
                     verbose_iter_interval=10)

Starting epoch 1/20, enc lr scheduler: [0.8], dec lr scheduler: [0.8]
(Epoch 0, iter 10/774) Average loss so far: 10.469
(Epoch 0, iter 20/774) Average loss so far: 9.036
(Epoch 0, iter 30/774) Average loss so far: 8.055
(Epoch 0, iter 40/774) Average loss so far: 7.621
(Epoch 0, iter 50/774) Average loss so far: 7.625
(Epoch 0, iter 60/774) Average loss so far: 7.021
(Epoch 0, iter 70/774) Average loss so far: 6.919
(Epoch 0, iter 80/774) Average loss so far: 7.049
(Epoch 0, iter 90/774) Average loss so far: 6.751
(Epoch 0, iter 100/774) Average loss so far: 6.528
(Epoch 0, iter 110/774) Average loss so far: 6.701
(Epoch 0, iter 120/774) Average loss so far: 6.385


KeyboardInterrupt: 

---

### Run testing

In [None]:
# decoder_attn = AttnDecoderRNN(embedding_size=embedding_size, hidden_size=HIDDEN_SIZE,
#                               output_size=vocab.n_unique_words-2, 
#                               padding_val=vocab.word2index(PAD_WORD)).to(DEVICE)

In [None]:
# dataloader = DataLoader(recipe_ds, batch_size=4, shuffle=True, collate_fn=pad_collate(vocab))
# ingredients, recipes, ing_lens, rec_lens = next(iter(dataloader))

In [None]:
# rec_lens

tensor([ 30, 164, 150,  20], device='cuda:0')

In [None]:
# initial_lr=0.8
# min_lr = 0.01
# n_epochs = 30
# batch_size=128
# encoder_optimizer = optim.SGD(encoder.parameters(), lr=initial_lr)
# decoder_optimizer = optim.SGD(decoder.parameters(), lr=initial_lr)

In [None]:
# train_iter(ingredients, recipes, ing_lens, rec_lens, encoder, decoder_attn, 
#            encoder_optimizer, decoder_optimizer, criterion=nn.NLLLoss(),
#            decoder_mode="attention", vocab=vocab)

10.720215797424316

---

## Evaluation

In [74]:
dev_recipe_ds = RecipeDataset(dev_df, vocab, train=False)

In [75]:
dev_dataloader = DataLoader(dev_recipe_ds, batch_size=4, shuffle=False, 
                            collate_fn=pad_collate(vocab, train=False))

In [39]:
# encoder.eval()
# decoder.eval()
# with torch.no_grad():
#     all_decoder_outs = get_predictions_iter(ingredients_padded, ingr_lens,
#                                         encoder, decoder, vocab, max_recipe_len=MAX_RECIPE_LEN)

In [77]:
all_decoder_outs, all_gt_recipes = eval(encoder, decoder, dev_recipe_ds, vocab,
                                        max_recipe_len=MAX_RECIPE_LEN)

  0%|          | 0/194 [00:00<?, ?it/s]

100%|██████████| 194/194 [01:39<00:00,  1.95it/s]


In [99]:
calc_bleu(all_gt_recipes, all_decoder_outs)

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


3.4558993159943767e-156

In [98]:
calc_meteor(all_gt_recipes, all_decoder_outs, split_gt=False)

100%|██████████| 775/775 [00:44<00:00, 17.37it/s]


0.0491881954127885

---

## Metric Sample

In [None]:
all_ings = get_all_ingredients("./ingredient_set.json")
all_ings_regex = get_ingredients_regex(all_ings)
metric_sample_ings, metric_sample_gold_recipe, metric_sample_generated_recipe = \
    load_metric_sample("./metric_sample.txt")

In [None]:
prop_inp_ings, n_extra_ings = get_prop_input_num_extra_ingredients(
    metric_sample_ings, metric_sample_generated_recipe, all_ings_regex, verbose=True,
    metric_sample=True)
print(f"\nproportion of input ingredients: {prop_inp_ings}\nnumber of extra ingredients: {n_extra_ings}")

=====Input ingredients in text=====
['orange juice', 'strawberries', 'lemon juice', 'sugar', 'water']

=====All ingredients in text===== 
['vanilla ice cream', 'orange juice', 'strawberries', 'cantaloupe', 'lemon juice', 'sugar', 'water']

proportion of input ingredients: 1.0
number of extra ingredients: 2


In [None]:
bleu_score = calc_bleu([metric_sample_gold_recipe], [metric_sample_generated_recipe], split_gen=True)
meteor_score = calc_meteor([metric_sample_gold_recipe], [metric_sample_generated_recipe], split_gen=True)
print(f"BLEU score: {bleu_score}, METEOR score: {meteor_score}")

BLEU score: 0.14346607531819988, METEOR score: 0.5736654804270463
