In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import numpy as np
import pandas as pd
import random
from torch.optim.lr_scheduler import CosineAnnealingLR

from data import *
from encoder_decoder import *
from train import *

In [3]:
SEED = 31989101
HIDDEN_SIZE = 256
MAX_INGR_LEN = 150
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## ensuring reproducibility
def reset_rng():
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    random.seed(SEED)

reset_rng()

print(f"Using device: {DEVICE}")

Using device: cpu


In [4]:
# to easily read ingredients and instructions
pd.set_option('display.max_colwidth', 2000)

In [5]:
data_root = "./Cooking_Dataset"

train_df_orig = pd.read_csv(os.path.join(data_root, "train.csv"), usecols=['Ingredients', 'Recipe'])
dev_df_orig = pd.read_csv(os.path.join(data_root, "dev.csv"), usecols=['Ingredients', 'Recipe'])
test_df_orig = pd.read_csv(os.path.join(data_root, "test.csv"), usecols=['Ingredients', 'Recipe'])

In [6]:
train_df = preprocess_data(train_df_orig, max_ingr_len=MAX_INGR_LEN)

Number of data samples before preprocessing: 101340
Number of data samples after preprocessing: 99036 (97.726%)


In [7]:
vocab = Vocabulary()
vocab.populate(train_df)
vocab.n_unique_words

  0%|          | 0/99036 [00:00<?, ?it/s]

100%|██████████| 99036/99036 [00:42<00:00, 2303.51it/s]


44683

In [8]:
recipe_ds = RecipeDataset(train_df, vocab)

In [9]:
encoder = EncoderRNN(vocab.n_unique_words, hidden_size=HIDDEN_SIZE, padding_value=vocab.word2index[PAD_WORD]).to(DEVICE)
# in the training script, decoder is always fed a non-end token and thus never needs to generate padding
decoder = DecoderRNN(hidden_size=HIDDEN_SIZE, output_size=vocab.n_unique_words-1).to(DEVICE)

In [11]:
initial_lr=0.01
min_lr = 1e-5
n_epochs = 5
encoder_optimizer = optim.SGD(encoder.parameters(), lr=initial_lr)
decoder_optimizer = optim.SGD(decoder.parameters(), lr=initial_lr)
enc_scheduler = CosineAnnealingLR(encoder_optimizer, T_max=n_epochs, eta_min=min_lr)
dec_scheduler = CosineAnnealingLR(decoder_optimizer, T_max=n_epochs, eta_min=min_lr)

In [13]:
epoch_losses = train(encoder, decoder, encoder_optimizer, decoder_optimizer, recipe_ds, 
                     n_epochs=n_epochs, vocab=vocab, batch_size=4, 
                     enc_lr_scheduler=enc_scheduler, dec_lr_scheduler=dec_scheduler, 
                     verbose_iter_interval=1)

Starting epoch 1/5, enc lr scheduler: [0.01], dec lr scheduler: [0.01]
Average epoch loss: 0.000
Starting epoch 2/5, enc lr scheduler: [0.009046039886902862], dec lr scheduler: [0.009046039886902862]
Average epoch loss: 0.000
Starting epoch 3/5, enc lr scheduler: [0.006548539886902862], dec lr scheduler: [0.006548539886902862]
Average epoch loss: 0.000
Starting epoch 4/5, enc lr scheduler: [0.0034614601130971375], dec lr scheduler: [0.0034614601130971375]
Average epoch loss: 0.000
Starting epoch 5/5, enc lr scheduler: [0.0009639601130971379], dec lr scheduler: [0.0009639601130971379]
Average epoch loss: 0.000
