# Training cycle debugging

Basically run code copied from train_seq2seq.py in this notebook to catch bugs

In [1]:
import os
os.environ['ALFRED_ROOT'] = '/root/data/home/hoyeung/alfred/'

import sys
sys.path.append(os.path.join(os.environ['ALFRED_ROOT']))
sys.path.append(os.path.join(os.environ['ALFRED_ROOT'], 'models'))

# from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
import torch
import pprint
import json
from data.preprocess import Dataset
from importlib import import_module, reload
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
from models.utils.helper_utils import optimizer_to

# Parser

In [2]:
# These are the default flags present in train_seq2seq.py

parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
args = parser.parse_args('')

# settings
args.seed = 123
args.data = '/root/data_alfred/json_feat_2.1.0'
args.splits = '/root/data_alfred/splits/may17.json'
args.preprocess = False #!
args.pp_folder = 'pp'
args.monitor_train_every = 10
args.save_every_epoch = False #!
args.model = 'seq2seq_per_subgoal'
args.gpu = True
args.dout = 'exp/model:seq2seq_per_subgoal'
args.resume = False #!

# hyper parameters
args.batch = 8
args.epoch = 20
args.lr = 1e-4
args.decay_epoch = 10
args.dhid = 512
args.dframe = 2500
args.demb = 100
args.pframe = 300
args.mask_loss_wt = 1.
args.action_loss_wt = 1.
args.subgoal_aux_loss_wt = 0.
args.pm_aux_loss_wt = 0.

# architecture ablations
# args.maxpool_over_object_states = False
# args.aux_loss_over_object_states = False
args.encoder_addons = 'none'
args.decoder_addons = 'none'

# dropouts
args.zero_goal = False #!
args.zero_instr = False #!
args.act_dropout = 0.
args.lang_dropout = 0.
args.input_dropout = 0.
args.vis_dropout = 0.3
args.hstate_dropout = 0.3
args.attn_dropout = 0.
args.actor_dropout = 0.
args.word_dropout = 0.

# other settings
args.train_teacher_forcing = False #!
args.train_student_forcing_prob = 0.1
args.temp_no_history = False #!

# debugging
args.fast_epoch = False #!
args.dataset_fraction = 0

In [3]:
# overwrite the default flags

args.preprocess = False # Turn this to True if running for the first time

args.model = 'seq2seq_per_subgoal'  # found under models/model/ directory
args.dout = '/root/data_alfred/exp/model:seq2seq_per_subgoal_fast_epoch'

args.train_teacher_forcing = True
args.gpu = False

# light setup for debugging
args.fast_epoch = True # Turn this to False if running for the first time to preprocess data properly
args.epoch = 20

In [4]:
# args.maxpool_over_object_states = True
# args.aux_loss_over_object_states = True

args.encoder_addons = 'biattn_obj'
args.decoder_addons = 'aux_loss'

In [5]:
torch.manual_seed(args.seed)

<torch._C.Generator at 0x7f9e10179870>

# Setup and load data splits

In [6]:
# make output dir
if not os.path.isdir(args.dout):
    os.makedirs(args.dout)

print('args.out ', args.dout)

args.out  /root/data_alfred/exp/model:seq2seq_per_subgoal_fast_epoch


In [7]:
# load train/valid/tests splits
with open(args.splits) as f:
    splits = json.load(f)
    pprint.pprint({k: len(v) for k, v in splits.items()})

{'tests_seen': 1533,
 'tests_unseen': 1529,
 'train': 20806,
 'train_sanity': 246,
 'train_sanity_v1': 246,
 'valid_seen': 814,
 'valid_seen_v1': 249,
 'valid_unseen': 818,
 'valid_unseen_v1': 254}


In [8]:
# preprocess and save -- only need to preprocess once
if args.preprocess:
    print("\nPreprocessing dataset and saving to %s folders ... This will take a while. Do this once as required." % args.pp_folder)
    dataset = Dataset(args, None)
    dataset.preprocess_splits(splits)
    vocab = torch.load(os.path.join(args.dout, "%s.vocab" % args.pp_folder))
else:
    vocab = torch.load(os.path.join(args.data, "%s.vocab" % args.pp_folder))
    
print(vocab)

{'action_high': Vocab(93), 'word': Vocab(2360), 'action_low': Vocab(15)}


In [9]:
object_vocab = torch.load(os.path.join(args.data, '%s.vocab' % 'objects'))

# Model and Training

In [10]:
# load the model architecture
args.gpu = False
args.resume = None # os.path.join(args.dout, 'best_seen.pth')

M = import_module('model.{}'.format(args.model))
if args.resume:
    print("Loading: " + args.resume)
    model, optimizer, start_epoch = M.Module.load(args.resume)
    print("Restarting at epoch {}/{}".format(start_epoch, args.epoch-1))
    if start_epoch >= args.epoch:
        print('Checkpoint already finished {}/{} epochs.'.format(start_epoch, args.epoch))
        sys.exit(0)
else:
    model = M.Module(args, vocab, object_vocab)
    optimizer = None
    start_epoch = 0

if args.gpu:
    model = model.to(torch.device('cuda'))
    if not optimizer is None:
        optimizer_to(optimizer, torch.device('cuda'))

In [11]:
# examine model layers
model

Module(
  (emb_word): Embedding(2360, 100)
  (emb_action_low): Embedding(15, 100)
  (emb_object): Embedding(109, 512, padding_idx=0)
  (enc): ActionFrameAttnEncoderPerSubgoalObjAttn(
    (emb): Embedding(15, 100)
    (vis_dropout): Dropout(p=0.3, inplace=False)
    (act_dropout): Dropout(p=0.0, inplace=True)
    (vis_encoder): ResnetVisualEncoder(
      (conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
      (conv2): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
      (fc): Linear(in_features=3136, out_features=2500, bias=True)
      (bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (enc_att): SelfAttn(
      (scorer): Linear(in_features=1024, out_features=1, bias=True)
    )
    (encoder): LSTM(2600, 512, batch_first=True, bidirectional=True)
    (input_dropout): Dropout(p=0.0, inplace=False)
    (hstate_dropout): Dropout(p=0.3, i

In [None]:
# main training loop -- debug here if breakpoints were inserted
model.run_train(splits, optimizer=optimizer, start_epoch=start_epoch)

epoch:   0%|          | 0/20 [00:00<?, ?it/s]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

Saving to: /root/data_alfred/exp/model:seq2seq_per_subgoal_fast_epoch


  empty_tensor = torch.ones(torch.tensor(v[0][0][0]).unsqueeze(0).shape, device=device, dtype=torch.float if ('frames' in k) else torch.long) * self.pad
  seqs.append(torch.tensor(v[subgoal_i][batch_i], device=device, dtype=torch.float if ('frames' in k) else torch.long))

batch:  50%|█████     | 1/2 [00:18<00:18, 18.77s/it][A
batch: 100%|██████████| 2/2 [00:47<00:00, 23.67s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:07<00:07,  7.10s/it][A
batch: 100%|██████████| 2/2 [00:10<00:00,  5.32s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:06<00:06,  6.94s/it][A
batch: 100%|██████████| 2/2 [00:16<00:00,  8.36s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:08<00:08,  8.95s/it][A
batch: 100%|██████████| 2/2 [00:18<00:00,  9.10s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:40<00:40, 40.19s/it][A
batch: 100%|██████████| 2/


Found new best valid_seen!! Saving...
Found new best valid_unseen!! Saving...
Found new best train_sanity!! Saving...


epoch:   5%|▌         | 1/20 [06:26<2:02:30, 386.88s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 0,
 'train_sanity': {'BLEU': 1.7724518441416312e-157,
                  'perplexity': 155.2275619506836,
                  'total_loss': 11.482679843902588},
 'valid_seen': {'BLEU': 2.6851921485929244e-157,
                'perplexity': 160.757568359375,
                'total_loss': 11.707912921905518},
 'valid_unseen': {'BLEU': 1.3425960742964622e-157,
                  'perplexity': 156.80821990966797,
                  'total_loss': 11.604719638824463}}
epoch_time                    386.879                                 
compute_metrics_validation_sets337.971                                 
forward_batch_train_with_iterate47.346                                  
forward_batch_train           47.001                                  
torch_save_valid_seen         0.337                                   
torch_save_last               0.331                                   
torch_save_train_sanity       0.33                                    
torch_save_valid_unseen     


batch:  50%|█████     | 1/2 [00:17<00:17, 17.33s/it][A
batch: 100%|██████████| 2/2 [00:33<00:00, 16.84s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:04<00:04,  4.04s/it][A
batch: 100%|██████████| 2/2 [00:06<00:00,  3.50s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:04<00:04,  4.96s/it][A
batch: 100%|██████████| 2/2 [00:12<00:00,  6.48s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:07<00:07,  7.03s/it][A
batch: 100%|██████████| 2/2 [00:12<00:00,  6.18s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:35<00:35, 35.99s/it][A
batch: 100%|██████████| 2/2 [00:54<00:00, 27.27s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:33<00:33, 33.75s/it][A
batch: 100%|██████████| 2/2 [01:09<00:00, 34.77s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:3


Found new best valid_seen!! Saving...
Found new best valid_unseen!! Saving...
Found new best train_sanity!! Saving...


epoch:  10%|█         | 2/20 [10:59<1:45:44, 352.47s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 1,
 'train_sanity': {'BLEU': 2.924577758301233e-156,
                  'perplexity': 105.12337875366211,
                  'total_loss': 10.003964900970459},
 'valid_seen': {'BLEU': 3.033164078734298e-81,
                'perplexity': 114.88747787475586,
                'total_loss': 10.053014755249023},
 'valid_unseen': {'BLEU': 2.1957490126883007e-156,
                  'perplexity': 107.27132034301758,
                  'total_loss': 9.859097480773926}}
epoch_time                    659.061                                 
compute_metrics_validation_sets563.0                                   
forward_batch_train_with_iterate81.037                                  
forward_batch_train           80.39                                   
torch_save_last               5.039                                   
torch_save_train_sanity       3.159                                   
torch_save_valid_unseen       2.828                                   
torch_save_valid_seen        


batch:  50%|█████     | 1/2 [00:17<00:17, 17.69s/it][A
batch: 100%|██████████| 2/2 [00:32<00:00, 16.16s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:04<00:04,  4.55s/it][A
batch: 100%|██████████| 2/2 [00:06<00:00,  3.26s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.91s/it][A
batch: 100%|██████████| 2/2 [00:11<00:00,  5.78s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.19s/it][A
batch: 100%|██████████| 2/2 [00:09<00:00,  4.85s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:33<00:33, 33.20s/it][A
batch: 100%|██████████| 2/2 [00:47<00:00, 23.55s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:30<00:30, 30.30s/it][A
batch: 100%|██████████| 2/2 [01:02<00:00, 31.49s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:3


Found new best valid_seen!! Saving...
Found new best valid_unseen!! Saving...
Found new best train_sanity!! Saving...


epoch:  15%|█▌        | 3/20 [15:10<1:31:19, 322.31s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 2,
 'train_sanity': {'BLEU': 7.5208704470569245e-81,
                  'perplexity': 87.17150497436523,
                  'total_loss': 9.522510051727295},
 'valid_seen': {'BLEU': 1.126297066127311e-79,
                'perplexity': 96.10115432739258,
                'total_loss': 9.471346855163574},
 'valid_unseen': {'BLEU': 3.293123435144804e-81,
                  'perplexity': 87.1397476196289,
                  'total_loss': 9.265686511993408}}
epoch_time                    910.979                                 
compute_metrics_validation_sets767.39                                  
forward_batch_train_with_iterate113.363                                 
forward_batch_train           112.433                                 
torch_save_last               10.392                                  
torch_save_train_sanity       6.965                                   
torch_save_valid_unseen       5.265                                   
torch_save_valid_seen         4.707  


batch:  50%|█████     | 1/2 [00:15<00:15, 15.83s/it][A
batch: 100%|██████████| 2/2 [00:32<00:00, 16.22s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:03<00:03,  3.99s/it][A
batch: 100%|██████████| 2/2 [00:05<00:00,  2.96s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.91s/it][A
batch: 100%|██████████| 2/2 [00:10<00:00,  5.27s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:03<00:03,  3.95s/it][A
batch: 100%|██████████| 2/2 [00:07<00:00,  3.75s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:30<00:30, 30.61s/it][A
batch: 100%|██████████| 2/2 [00:46<00:00, 23.28s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:30<00:30, 30.22s/it][A
batch: 100%|██████████| 2/2 [01:03<00:00, 31.53s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:3

{'epoch': 3,
 'train_sanity': {'BLEU': 5.223081130927093e-81,
                  'perplexity': 78.18582153320312,
                  'total_loss': 9.252532482147217},
 'valid_seen': {'BLEU': 1.1815502053523823e-80,
                'perplexity': 84.53696060180664,
                'total_loss': 9.125615119934082},
 'valid_unseen': {'BLEU': 2.07426013162108e-156,
                  'perplexity': 78.54051971435547,
                  'total_loss': 9.00125789642334}}
epoch_time                    1140.62                                 
compute_metrics_validation_sets962.102                                 
forward_batch_train_with_iterate145.815                                 
forward_batch_train           144.606                                 
torch_save_last               12.689                                  
torch_save_train_sanity       6.965                                   
torch_save_valid_unseen       5.265                                   
torch_save_valid_seen         4.707  


batch:  50%|█████     | 1/2 [00:15<00:15, 15.33s/it][A
batch: 100%|██████████| 2/2 [00:32<00:00, 16.26s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:03<00:03,  3.96s/it][A
batch: 100%|██████████| 2/2 [00:06<00:00,  3.22s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:06<00:06,  6.11s/it][A
batch: 100%|██████████| 2/2 [00:09<00:00,  4.84s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.37s/it][A
batch: 100%|██████████| 2/2 [00:10<00:00,  5.11s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:32<00:32, 32.75s/it][A
batch: 100%|██████████| 2/2 [00:45<00:00, 22.92s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:29<00:29, 29.60s/it][A
batch: 100%|██████████| 2/2 [01:05<00:00, 32.79s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:3

{'epoch': 4,
 'train_sanity': {'BLEU': 1.3020014462697315e-155,
                  'perplexity': 71.10909271240234,
                  'total_loss': 9.010494709014893},
 'valid_seen': {'BLEU': 1.4846437723891347e-155,
                'perplexity': 75.15960311889648,
                'total_loss': 8.855387210845947},
 'valid_unseen': {'BLEU': 1.7443462387331522e-155,
                  'perplexity': 72.67127227783203,
                  'total_loss': 8.800154209136963}}
epoch_time                    1377.559                                
compute_metrics_validation_sets1164.004                                
forward_batch_train_with_iterate178.344                                 
forward_batch_train           176.783                                 
torch_save_last               14.964                                  
torch_save_train_sanity       6.965                                   
torch_save_valid_unseen       5.265                                   
torch_save_valid_seen         4


batch:  50%|█████     | 1/2 [00:14<00:14, 14.66s/it][A
batch: 100%|██████████| 2/2 [00:29<00:00, 14.89s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:03<00:03,  3.64s/it][A
batch: 100%|██████████| 2/2 [00:05<00:00,  2.89s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.68s/it][A
batch: 100%|██████████| 2/2 [00:10<00:00,  5.32s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:04<00:04,  4.93s/it][A
batch: 100%|██████████| 2/2 [00:08<00:00,  4.26s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:33<00:33, 33.62s/it][A
batch: 100%|██████████| 2/2 [00:48<00:00, 24.47s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:28<00:28, 28.74s/it][A
batch: 100%|██████████| 2/2 [01:04<00:00, 32.45s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:3

{'epoch': 5,
 'train_sanity': {'BLEU': 1.1871283924259859e-155,
                  'perplexity': 63.69926643371582,
                  'total_loss': 8.840510845184326},
 'valid_seen': {'BLEU': 1.6223348257232558e-155,
                'perplexity': 66.11241149902344,
                'total_loss': 8.663967609405518},
 'valid_unseen': {'BLEU': 1.5309408648864078e-155,
                  'perplexity': 65.72210311889648,
                  'total_loss': 8.639078617095947}}
epoch_time                    1614.727                                
compute_metrics_validation_sets1367.744                                
forward_batch_train_with_iterate208.131                                 
forward_batch_train           206.256                                 
torch_save_last               18.371                                  
torch_save_train_sanity       6.965                                   
torch_save_valid_unseen       5.265                                   
torch_save_valid_seen         4


batch:  50%|█████     | 1/2 [00:18<00:18, 18.16s/it][A
batch: 100%|██████████| 2/2 [00:32<00:00, 16.27s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:03<00:03,  3.28s/it][A
batch: 100%|██████████| 2/2 [00:05<00:00,  2.62s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:04<00:04,  4.15s/it][A
batch: 100%|██████████| 2/2 [00:09<00:00,  4.66s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.05s/it][A
batch: 100%|██████████| 2/2 [00:09<00:00,  4.93s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:36<00:36, 36.29s/it][A
batch: 100%|██████████| 2/2 [00:51<00:00, 25.63s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:28<00:28, 28.80s/it][A
batch: 100%|██████████| 2/2 [01:09<00:00, 34.59s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:3


Found new best valid_seen!! Saving...
Found new best valid_unseen!! Saving...
Found new best train_sanity!! Saving...


epoch:  35%|███▌      | 7/20 [31:12<56:59, 263.01s/it]  
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 6,
 'train_sanity': {'BLEU': 8.795341470012513e-80,
                  'perplexity': 58.91527557373047,
                  'total_loss': 8.725086688995361},
 'valid_seen': {'BLEU': 1.5602035303574024e-79,
                'perplexity': 59.71653747558594,
                'total_loss': 8.534575939178467},
 'valid_unseen': {'BLEU': 8.688939422852377e-81,
                  'perplexity': 60.35763931274414,
                  'total_loss': 8.50069808959961}}
epoch_time                    1872.586                                
compute_metrics_validation_sets1577.124                                
forward_batch_train_with_iterate240.669                                 
forward_batch_train           238.469                                 
torch_save_last               24.256                                  
torch_save_train_sanity       11.133                                  
torch_save_valid_unseen       8.172                                   
torch_save_valid_seen         7.41   


batch:  50%|█████     | 1/2 [00:16<00:16, 16.98s/it][A
batch: 100%|██████████| 2/2 [00:33<00:00, 16.85s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:03<00:03,  3.97s/it][A
batch: 100%|██████████| 2/2 [00:05<00:00,  2.91s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:04<00:04,  4.65s/it][A
batch: 100%|██████████| 2/2 [00:08<00:00,  4.35s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.84s/it][A
batch: 100%|██████████| 2/2 [00:10<00:00,  5.25s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:31<00:31, 31.02s/it][A
batch: 100%|██████████| 2/2 [00:43<00:00, 22.00s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:35<00:35, 35.71s/it][A
batch: 100%|██████████| 2/2 [01:06<00:00, 33.08s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:3


Found new best valid_seen!! Saving...
Found new best valid_unseen!! Saving...
Found new best train_sanity!! Saving...


epoch:  40%|████      | 8/20 [35:27<52:07, 260.67s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 7,
 'train_sanity': {'BLEU': 0.004925737546891541,
                  'perplexity': 55.29231834411621,
                  'total_loss': 8.633923530578613},
 'valid_seen': {'BLEU': 0.005792379855923519,
                'perplexity': 55.56051826477051,
                'total_loss': 8.422352313995361},
 'valid_unseen': {'BLEU': 0.002198131661770655,
                  'perplexity': 56.51415824890137,
                  'total_loss': 8.393368244171143}}
epoch_time                    2127.779                                
compute_metrics_validation_sets1782.68                                 
forward_batch_train_with_iterate274.377                                 
forward_batch_train           271.9                                   
torch_save_last               27.45                                   
torch_save_train_sanity       16.018                                  
torch_save_valid_unseen       13.483                                  
torch_save_valid_seen         9.689     


batch:  50%|█████     | 1/2 [00:18<00:18, 18.63s/it][A
batch: 100%|██████████| 2/2 [00:35<00:00, 17.69s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:04<00:04,  4.34s/it][A
batch: 100%|██████████| 2/2 [00:06<00:00,  3.32s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.57s/it][A
batch: 100%|██████████| 2/2 [00:12<00:00,  6.31s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.14s/it][A
batch: 100%|██████████| 2/2 [00:08<00:00,  4.48s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:30<00:30, 30.78s/it][A
batch: 100%|██████████| 2/2 [00:46<00:00, 23.22s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:32<00:32, 32.75s/it][A
batch: 100%|██████████| 2/2 [01:07<00:00, 33.91s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:3

Found new best valid_unseen!! Saving...


epoch:  45%|████▌     | 9/20 [39:42<47:27, 258.84s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 8,
 'train_sanity': {'BLEU': 3.6881298133374667e-79,
                  'perplexity': 49.15816879272461,
                  'total_loss': 8.47033166885376},
 'valid_seen': {'BLEU': 0.00039599275611208745,
                'perplexity': 49.4355411529541,
                'total_loss': 8.249424934387207},
 'valid_unseen': {'BLEU': 0.0037429670246547503,
                  'perplexity': 50.82699203491211,
                  'total_loss': 8.263648748397827}}
epoch_time                    2382.369                                
compute_metrics_validation_sets1995.255                                
forward_batch_train_with_iterate309.762                                 
forward_batch_train           306.934                                 
torch_save_last               31.51                                   
torch_save_train_sanity       16.018                                  
torch_save_valid_unseen       15.811                                  
torch_save_valid_seen         9.689  


batch:  50%|█████     | 1/2 [00:17<00:17, 17.69s/it][A
batch: 100%|██████████| 2/2 [00:32<00:00, 16.29s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.12s/it][A
batch: 100%|██████████| 2/2 [00:09<00:00,  4.53s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:07<00:07,  7.66s/it][A
batch: 100%|██████████| 2/2 [00:15<00:00,  7.56s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:07<00:07,  7.54s/it][A
batch: 100%|██████████| 2/2 [00:14<00:00,  7.03s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:57<00:57, 57.62s/it][A
batch: 100%|██████████| 2/2 [01:27<00:00, 43.60s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:58<00:58, 58.86s/it][A
batch: 100%|██████████| 2/2 [01:55<00:00, 57.66s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [01:0

{'epoch': 9,
 'train_sanity': {'BLEU': 4.063552690154093e-79,
                  'perplexity': 45.16494178771973,
                  'total_loss': 8.336349964141846},
 'valid_seen': {'BLEU': 0.0003199893409621375,
                'perplexity': 45.35989952087402,
                'total_loss': 8.10684871673584},
 'valid_unseen': {'BLEU': 0.0034167964786093723,
                  'perplexity': 47.23642158508301,
                  'total_loss': 8.165297031402588}}
epoch_time                    2762.756                                
compute_metrics_validation_sets2340.539                                
forward_batch_train_with_iterate342.338                                 
forward_batch_train           339.211                                 
torch_save_last               33.801                                  
torch_save_train_sanity       16.018                                  
torch_save_valid_unseen       15.811                                  
torch_save_valid_seen         9.689   


batch:  50%|█████     | 1/2 [00:16<00:16, 16.48s/it][A
batch: 100%|██████████| 2/2 [00:34<00:00, 17.22s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:03<00:03,  3.09s/it][A
batch: 100%|██████████| 2/2 [00:04<00:00,  2.37s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:06<00:06,  6.34s/it][A
batch: 100%|██████████| 2/2 [00:11<00:00,  5.60s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.10s/it][A
batch: 100%|██████████| 2/2 [00:10<00:00,  5.42s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:32<00:32, 32.40s/it][A
batch: 100%|██████████| 2/2 [00:46<00:00, 23.13s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:50<00:50, 50.12s/it][A
batch: 100%|██████████| 2/2 [01:28<00:00, 44.24s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:3

Found new best valid_unseen!! Saving...


epoch:  55%|█████▌    | 11/20 [50:36<43:18, 288.73s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 10,
 'train_sanity': {'BLEU': 3.80434944810643e-79,
                  'perplexity': 44.79294776916504,
                  'total_loss': 8.318325757980347},
 'valid_seen': {'BLEU': 0.0014430363098751342,
                'perplexity': 44.969770431518555,
                'total_loss': 8.08597183227539},
 'valid_unseen': {'BLEU': 0.006402887332794452,
                  'perplexity': 46.84660339355469,
                  'total_loss': 8.146264553070068}}
epoch_time                    3036.137                                
compute_metrics_validation_sets2571.526                                
forward_batch_train_with_iterate376.787                                 
forward_batch_train           373.298                                 
torch_save_last               39.554                                  
torch_save_valid_unseen       17.753                                  
torch_save_train_sanity       16.018                                  
torch_save_valid_seen         9.689   


batch:  50%|█████     | 1/2 [00:16<00:16, 16.46s/it][A
batch: 100%|██████████| 2/2 [00:30<00:00, 15.11s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:04<00:04,  4.42s/it][A
batch: 100%|██████████| 2/2 [00:06<00:00,  3.34s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.23s/it][A
batch: 100%|██████████| 2/2 [00:10<00:00,  5.30s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:03<00:03,  3.60s/it][A
batch: 100%|██████████| 2/2 [00:06<00:00,  3.40s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:23<00:23, 23.29s/it][A
batch: 100%|██████████| 2/2 [00:34<00:00, 17.10s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:31<00:31, 31.62s/it][A
batch: 100%|██████████| 2/2 [00:56<00:00, 28.30s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:3

Found new best valid_unseen!! Saving...


epoch:  60%|██████    | 12/20 [54:12<35:36, 267.03s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 11,
 'train_sanity': {'BLEU': 4.052394682647e-79,
                  'perplexity': 44.37904167175293,
                  'total_loss': 8.30155324935913},
 'valid_seen': {'BLEU': 0.0001735138842122374,
                'perplexity': 44.69742774963379,
                'total_loss': 8.071208953857422},
 'valid_unseen': {'BLEU': 0.010346157650158921,
                  'perplexity': 46.59224891662598,
                  'total_loss': 8.131822109222412}}
epoch_time                    3252.531                                
compute_metrics_validation_sets2750.029                                
forward_batch_train_with_iterate407.014                                 
forward_batch_train           403.218                                 
torch_save_last               43.372                                  
torch_save_valid_unseen       21.359                                  
torch_save_train_sanity       16.018                                  
torch_save_valid_seen         9.689      


batch:  50%|█████     | 1/2 [00:17<00:17, 17.63s/it][A
batch: 100%|██████████| 2/2 [00:33<00:00, 16.92s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.01s/it][A
batch: 100%|██████████| 2/2 [00:07<00:00,  3.53s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:04<00:04,  4.67s/it][A
batch: 100%|██████████| 2/2 [00:09<00:00,  4.77s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:05<00:05,  5.88s/it][A
batch: 100%|██████████| 2/2 [00:09<00:00,  4.92s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:32<00:32, 32.58s/it][A
batch: 100%|██████████| 2/2 [00:47<00:00, 23.68s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:34<00:34, 34.78s/it][A
batch: 100%|██████████| 2/2 [01:09<00:00, 34.52s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:3


Found new best valid_seen!! Saving...
Found new best valid_unseen!! Saving...


epoch:  65%|██████▌   | 13/20 [58:25<30:40, 262.92s/it]
batch:   0%|          | 0/2 [00:00<?, ?it/s][A

{'epoch': 12,
 'train_sanity': {'BLEU': 4.8624303314505153e-79,
                  'perplexity': 44.08770942687988,
                  'total_loss': 8.286047458648682},
 'valid_seen': {'BLEU': 0.007105578590705607,
                'perplexity': 44.502891540527344,
                'total_loss': 8.053915977478027},
 'valid_unseen': {'BLEU': 0.01290240942771798,
                  'perplexity': 46.460554122924805,
                  'total_loss': 8.11527705192566}}
epoch_time                    3505.862                                
compute_metrics_validation_sets2957.35                                 
forward_batch_train_with_iterate440.855                                 
forward_batch_train           436.788                                 
torch_save_last               49.405                                  
torch_save_valid_unseen       23.959                                  
torch_save_train_sanity       16.018                                  
torch_save_valid_seen         12.962 


batch:  50%|█████     | 1/2 [00:14<00:14, 14.58s/it][A
batch: 100%|██████████| 2/2 [00:31<00:00, 15.56s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:04<00:04,  4.73s/it][A
batch: 100%|██████████| 2/2 [00:07<00:00,  3.74s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:04<00:04,  4.52s/it][A
batch: 100%|██████████| 2/2 [00:09<00:00,  4.53s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:03<00:03,  3.69s/it][A
batch: 100%|██████████| 2/2 [00:09<00:00,  4.67s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A
batch:  50%|█████     | 1/2 [00:27<00:27, 27.44s/it][A
batch: 100%|██████████| 2/2 [00:44<00:00, 22.45s/it][A

batch:   0%|          | 0/2 [00:00<?, ?it/s][A

In [None]:
torch.sum(state_t[0])

torch.sum(state_t[1])

enc_act[:,t-1,:]
enc_act[:,t+1,:]
enc_act[:,t,:]

In [None]:
feat['object_visibility'][subgoal_i][torch.arange(batch_size),action_low_seq_lengths,:]

In [None]:
feat['object_visibility'][subgoal_i][torch.arange(feat['action_low'][0].shape[0]),action_low_seq_lengths,:]

In [None]:
11, 17, 42

In [None]:
epoch_time                    110.233                                 
compute_metrics_validation_sets87.779                                  
forward_batch_train_with_iterate14.776                                  
forward_batch_train           14.518                                  
torch_save_train_sanity       3.805                                   
torch_save_valid_unseen       2.162                                   
make_debug_train              0.928                                   
torch_save_last               0.341                                   
torch_save_valid_seen         0.296                                   
iterate_featurize             0.244                                   
iterate_load_task_json        0.161                                   
featurize_input_resnet_features0.115                                   
featurize_tensorization_and_padding0.113                                   
make_debug_valid_seen         0.099                                   
featurize_torch_load_time     0.06                                    
make_debug_valid_unseen       0.045                                   
setup_time                    0.001                                   
featurize_outputs             0.001                                   
featurize_input_action_low    0.0                                     
compute_metrics_train         0.0  