# Training cycle debugging

Basically run code copied from train_seq2seq.py in this notebook to catch bugs

In [3]:
import os
os.environ['ALFRED_ROOT'] = '/home/hoyeung/alfred/'

import sys
sys.path.append(os.path.join(os.environ['ALFRED_ROOT']))
sys.path.append(os.path.join(os.environ['ALFRED_ROOT'], 'models'))

# from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
import torch
import pprint
import json
from data.preprocess import Dataset
from importlib import import_module, reload
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from models.utils.helper_utils import optimizer_to

In [4]:
# import these if we want to debug a model such as seq2seq_nl_baseline.py

import torch
import pprint
import json
from data.preprocess import Dataset
from importlib import import_module, reload
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from models.utils.helper_utils import optimizer_to

# Parser

In [5]:
# These are the default flags present in train_seq2seq.py

parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
args = parser.parse_args('')

# settings
args.seed = 123
args.data = 'data/json_feat_2.1.0'
args.splits = 'data/splits/oct21.json'
args.preprocess = False #!
args.pp_folder = 'pp'
args.save_every_epoch = False #!
args.model = 'seq2seq_nl_baseline'
args.gpu = True
args.dout = 'exp/model:seq2seq_nl_baseline'
args.resume = False #!

# hyper parameters
args.batch = 8
args.epoch = 20
args.lr = 1e-4
args.decay_epoch = 10
args.dhid = 512
args.dframe = 2500
args.demb = 100
args.pframe = 300
args.mask_loss_wt = 1.
args.action_loss_wt = 1.
args.subgoal_aux_loss_wt = 0.
args.pm_aux_loss_wt = 0.

# dropouts
args.zero_goal = False #!
args.zero_instr = False #!
args.act_dropout = 0.
args.lang_dropout = 0.
args.input_dropout = 0.
args.vis_dropout = 0.3
args.hstate_dropout = 0.3
args.attn_dropout = 0.
args.actor_dropout = 0.
args.word_dropout = 0.

# other settings
args.dec_teacher_forcing = False #!
args.temp_no_history = False #!

# debugging
args.fast_epoch = False #!
args.dataset_fraction = 0

In [6]:
# overwrite the default flags

args.preprocess = False # Turn this to True if running for the first time

args.model = 'seq2seq_nl_baseline'  # found under models/model/ directory
args.dout = 'exp/model:seq2seq_nl_baseline'

args.dec_teacher_forcing = True
# args.gpu = False

# light setup for debugging
args.fast_epoch = True # Turn this to False if running for the first time to preprocess data properly
args.epoch = 5

In [7]:
torch.manual_seed(args.seed)

<torch._C.Generator at 0x7fe0c803b5b0>

# Setup and load data splits

In [8]:
# make output dir
if not os.path.isdir(args.dout):
    os.makedirs(args.dout)

print('args.out ', args.dout)

args.out  exp/model:seq2seq_nl_baseline


In [9]:
# load train/valid/tests splits
with open(args.splits) as f:
    splits = json.load(f)
    pprint.pprint({k: len(v) for k, v in splits.items()})

{'tests_seen': 1533,
 'tests_unseen': 1529,
 'train': 21023,
 'valid_seen': 820,
 'valid_unseen': 821}


In [10]:
# preprocess and save -- only need to preprocess once
if args.preprocess:
    print("\nPreprocessing dataset and saving to %s folders ... This will take a while. Do this once as required." % args.pp_folder)
    dataset = Dataset(args, None)
    dataset.preprocess_splits(splits)
    vocab = torch.load(os.path.join(args.dout, "%s.vocab" % args.pp_folder))
else:
    vocab = torch.load(os.path.join(args.data, "%s.vocab" % args.pp_folder))
    
print(vocab)

{'word': Vocab(2360), 'action_low': Vocab(15), 'action_high': Vocab(93)}


# Model and Training

In [11]:
# load the model architecture
args.gpu = False

M = import_module('model.{}'.format(args.model))
reload(M)
model = M.Module(args, vocab)
optimizer = None

if args.gpu:
    model = model.to(torch.device('cuda'))
    if not optimizer is None:
        optimizer_to(optimizer, torch.device('cuda'))

In [12]:
# examine model layers

model

Module(
  (emb_word): Embedding(2360, 100)
  (emb_action_low): Embedding(15, 100)
  (enc): LSTM(100, 512, batch_first=True, bidirectional=True)
  (enc_att): SelfAttn(
    (scorer): Linear(in_features=1024, out_features=1, bias=True)
  )
  (dec): LanguageDecoder(
    (emb): Embedding(2360, 100)
    (cell): LSTMCell(1124, 1024)
    (attn): DotAttn()
    (input_dropout): Dropout(p=0.0, inplace=False)
    (attn_dropout): Dropout(p=0.0, inplace=False)
    (hstate_dropout): Dropout(p=0.3, inplace=False)
    (word_dropout): Dropout(p=0.0, inplace=False)
    (word): Linear(in_features=2148, out_features=100, bias=True)
    (h_tm1_fc): Linear(in_features=1024, out_features=1024, bias=True)
  )
  (act_dropout): Dropout(p=0.0, inplace=True)
)

In [13]:
# main training loop -- debug here if breakpoints were inserted
model.run_train(splits, optimizer=optimizer)

epoch:   0%|          | 0/5 [00:00<?, ?it/s]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

Saving to: exp/model:seq2seq_nl_baseline



batch:  50%|█████     | 1/2 [00:08<00:08,  8.22s/it][A
batch: 100%|██████████| 2/2 [00:14<00:00,  7.43s/it][A
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:03<00:03,  3.09s/it][A
batch: 100%|██████████| 2/2 [00:04<00:00,  2.41s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
ba


Found new best valid_seen!! Saving...
Found new best valid_unseen!! Saving...


epoch:  20%|██        | 1/5 [00:27<01:51, 27.83s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 0,
 'train': {'lang_instr_bleu': 6.683068037262772e-232,
           'loss_lang_instr': 5.071922063827515,
           'total_loss': 5.071922063827515},
 'valid_seen': {'lang_instr_bleu': 3.9290016210209286e-232,
                'loss_lang_instr': 4.792473793029785,
                'total_loss': 4.792473793029785},
 'valid_unseen': {'lang_instr_bleu': 3.1867248496889685e-232,
                  'loss_lang_instr': 4.029761552810669,
                  'total_loss': 4.029761552810669}}



batch:  50%|█████     | 1/2 [00:04<00:04,  4.71s/it][A
batch: 100%|██████████| 2/2 [00:09<00:00,  4.95s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:02<00:02,  2.05s/it][A
batch: 100%|██████████| 2/2 [00:03<00:00,  1.60s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:01<00:01,  1.89s/it][A
batch: 100%|██████████| 2/2 [00:02<00:00,  1.49s/it][A



Found new best valid_seen!! Saving...
Found new best valid_unseen!! Saving...


epoch:  40%|████      | 2/5 [00:46<01:14, 24.93s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 1,
 'train': {'lang_instr_bleu': 5.088483292973611e-156,
           'loss_lang_instr': 4.661640405654907,
           'total_loss': 4.661640405654907},
 'valid_seen': {'lang_instr_bleu': 1.5549792577983414e-156,
                'loss_lang_instr': 4.327688097953796,
                'total_loss': 4.327688097953796},
 'valid_unseen': {'lang_instr_bleu': 7.194683268642414e-232,
                  'loss_lang_instr': 3.788380742073059,
                  'total_loss': 3.788380742073059}}



batch:  50%|█████     | 1/2 [00:05<00:05,  5.00s/it][A
batch: 100%|██████████| 2/2 [00:08<00:00,  4.47s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:01<00:01,  1.33s/it][A
batch: 100%|██████████| 2/2 [00:02<00:00,  1.23s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:01<00:01,  1.59s/it][A
batch: 100%|██████████| 2/2 [00:03<00:00,  1.51s/it][A



Found new best valid_seen!! Saving...
Found new best valid_unseen!! Saving...


epoch:  60%|██████    | 3/5 [01:02<00:44, 22.48s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 2,
 'train': {'lang_instr_bleu': 7.362170604348541e-156,
           'loss_lang_instr': 4.332609176635742,
           'total_loss': 4.332609176635742},
 'valid_seen': {'lang_instr_bleu': 1.3501496389559961e-156,
                'loss_lang_instr': 4.21867561340332,
                'total_loss': 4.21867561340332},
 'valid_unseen': {'lang_instr_bleu': 1.4440599808465304e-156,
                  'loss_lang_instr': 3.5581694841384888,
                  'total_loss': 3.5581694841384888}}



batch:  50%|█████     | 1/2 [00:04<00:04,  4.87s/it][A
batch: 100%|██████████| 2/2 [00:11<00:00,  5.64s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:01<00:01,  1.88s/it][A
batch: 100%|██████████| 2/2 [00:03<00:00,  1.65s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:01<00:01,  1.17s/it][A
batch: 100%|██████████| 2/2 [00:03<00:00,  1.51s/it][A


Found new best valid_unseen!! Saving...


epoch:  80%|████████  | 4/5 [01:22<00:21, 21.65s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 3,
 'train': {'lang_instr_bleu': 7.671953962491568e-156,
           'loss_lang_instr': 4.126596212387085,
           'total_loss': 4.126596212387085},
 'valid_seen': {'lang_instr_bleu': 1.3501496389559961e-156,
                'loss_lang_instr': 4.300110816955566,
                'total_loss': 4.300110816955566},
 'valid_unseen': {'lang_instr_bleu': 2.813714642243753e-156,
                  'loss_lang_instr': 3.5424221754074097,
                  'total_loss': 3.5424221754074097}}



batch:  50%|█████     | 1/2 [00:04<00:04,  4.22s/it][A
batch: 100%|██████████| 2/2 [00:09<00:00,  4.57s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:02<00:02,  2.28s/it][A
batch: 100%|██████████| 2/2 [00:04<00:00,  2.08s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:02<00:02,  2.08s/it][A
batch: 100%|██████████| 2/2 [00:04<00:00,  2.30s/it][A



Found new best valid_seen!! Saving...
Found new best valid_unseen!! Saving...


epoch: 100%|██████████| 5/5 [01:45<00:00, 21.12s/it]

{'epoch': 4,
 'train': {'lang_instr_bleu': 7.371685413995356e-156,
           'loss_lang_instr': 4.101462244987488,
           'total_loss': 4.101462244987488},
 'valid_seen': {'lang_instr_bleu': 5.5860632126028606e-80,
                'loss_lang_instr': 4.090612888336182,
                'total_loss': 4.090612888336182},
 'valid_unseen': {'lang_instr_bleu': 8.81515743544235e-80,
                  'loss_lang_instr': 3.4359012842178345,
                  'total_loss': 3.4359012842178345}}





# Look at debugging outputs

In [None]:
import subprocess

subprocess.call(['ls -l {}'.format(args.dout)])

In [18]:
with open(os.path.join(args.dout, 'valid_seen.debug.preds.json'), 'r') as f:
    debug_tr = json.load(f)

In [19]:
print(type(debug_tr))
print(len(debug_tr))

dict

In [20]:
# all the task numbers
debug_tr.keys()
# let's look at one of them below
task_num = 'trial_T20190909_115736_122556'

dict_keys(['trial_T20190909_115736_122556', 'trial_T20190909_091246_807206', 'trial_T20190918_184236_557252', 'trial_T20190908_052007_212776', 'trial_T20190907_232225_725376', 'trial_T20190908_065238_500229', 'trial_T20190908_192636_561572', 'trial_T20190908_165525_911839', 'trial_T20190911_131350_027076', 'trial_T20190907_151802_277016', 'trial_T20190909_010644_297017', 'trial_T20190909_011522_113515', 'trial_T20190907_183137_838565', 'trial_T20190907_033843_707544', 'trial_T20190910_173916_331859', 'trial_T20190907_164342_432289'])

In [29]:
# We log these from each task
debug_tr[task_num].keys()

dict_keys(['lang_goal', 'lang_instr', 'word_inp_goal', 'word_inp_instr', 'num_inp_goal', 'num_inp_instr', 'action_low', 'action_high', 'p_lang_instr'])

In [50]:
# Gold goal description
debug_tr[task_num]['lang_goal']

'Put two spray bottles on a toilet tank.'

In [51]:
# Gold low-level instruction description
debug_tr[task_num]['lang_instr']

['Turn left, go straight, turn left to face the blue bin next to the toilet, look up.',
 'Take the purple spray bottle from the cabinet above the toilet.',
 'Go backwards while facing towards the blue bin.',
 'Put the spray bottle on the left side of the toilet tank.',
 'Go forward to the blue bin, turn right, look up.',
 'Take the purple spray bottle from the cabinet above that is first from the right.',
 'Turn to face the blue bin. Go backwards while facing towards the blue bin.',
 'Put the spray bottle on the center of the toilet tank.']

In [52]:
# Gold Input low-level action description
debug_tr[task_num]['action_low']

['LookDown_15',
 'RotateLeft_90',
 'MoveAhead_25',
 'MoveAhead_25',
 'MoveAhead_25',
 'MoveAhead_25',
 'RotateLeft_90',
 'LookUp_15',
 'LookUp_15',
 'LookUp_15',
 'LookUp_15',
 'LookUp_15',
 'OpenObject',
 'PickupObject',
 'CloseObject',
 'LookDown_15',
 'LookDown_15',
 'LookDown_15',
 'LookDown_15',
 'LookDown_15',
 'RotateLeft_90',
 'RotateLeft_90',
 'MoveAhead_25',
 'MoveAhead_25',
 'RotateRight_90',
 'RotateRight_90',
 'LookUp_15',
 'LookUp_15',
 'PutObject',
 'LookDown_15',
 'LookDown_15',
 'MoveAhead_25',
 'MoveAhead_25',
 'RotateRight_90',
 'LookUp_15',
 'LookUp_15',
 'LookUp_15',
 'LookUp_15',
 'OpenObject',
 'PickupObject',
 'CloseObject',
 'LookDown_15',
 'LookDown_15',
 'LookDown_15',
 'LookDown_15',
 'RotateRight_90',
 'MoveAhead_25',
 'MoveAhead_25',
 'RotateRight_90',
 'RotateRight_90',
 'LookUp_15',
 'LookUp_15',
 'PutObject']

In [53]:
# Gold Input high-level subgoal action description
debug_tr[task_num]['action_high']

['GotoLocation',
 'PickupObject',
 'GotoLocation',
 'PutObject',
 'GotoLocation',
 'PickupObject',
 'GotoLocation',
 'PutObject',
 'NoOp']

In [54]:
# Prediction result
debug_tr[task_num]['p_lang_instr']

['the',
 'left',
 'and',
 'family',
 'to',
 'to',
 'to',
 'left',
 'to',
 'the',
 'the',
 'toilet',
 'to',
 'coach',
 'the',
 'the',
 'toilet',
 '.',
 'of',
 'drop',
 'the',
 'turn',
 'the',
 'toilet',
 'edge',
 'bottle',
 'the',
 'the',
 'toilet',
 '.',
 'the',
 'toilet',
 '.',
 'the',
 'the',
 'the',
 'the',
 'the',
 'the',
 'toilet',
 'to',
 'coach',
 'turn',
 'the',
 'toilet',
 'bottle',
 'the',
 'the',
 'toilet',
 '.',
 'transfer',
 'the',
 'toilet',
 '.',
 'coach',
 'turn',
 'to',
 'the',
 'the',
 'toilet',
 'right',
 'coach',
 'family',
 'left',
 'and',
 'family',
 'to',
 'the',
 'turn',
 'the',
 'toilet',
 'edge',
 'bottle',
 'the',
 'the',
 'toilet',
 '.',
 'the',
 'the',
 'the',
 'the',
 'the',
 'toilet',
 'of',
 'turn',
 'left',
 'the',
 'the',
 'toilet',
 'right',
 'coach',
 'turn',
 'the',
 'to',
 'the',
 'to',
 'the',
 'toilet',
 'to',
 'coach',
 'turn',
 'the',
 'toilet',
 'bottle',
 'the',
 'the',
 'toilet',
 'the',
 'the',
 'toilet',
 '.',
 'handbooks',
 'turn',
 'and'