In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '1,2,3'
import sys
import json
import argparse
import numpy as np
import math
from einops import rearrange
import time
import random
import string
import h5py
from tqdm import tqdm
import webdataset as wds

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torchvision import transforms
from accelerate import Accelerator

# SDXL unCLIP requires code from https://github.com/Stability-AI/generative-models/tree/main
sys.path.append('generative_models/')
import sgm
from generative_models.sgm.modules.encoders.modules import FrozenOpenCLIPImageEmbedder, FrozenOpenCLIPEmbedder2
from generative_models.sgm.models.diffusion import DiffusionEngine
from generative_models.sgm.util import append_dims
from omegaconf import OmegaConf

# tf32 data type is faster than standard float32
torch.backends.cuda.matmul.allow_tf32 = True

# custom functions #
import utils
from models import *

### Multi-GPU config ###
local_rank = os.getenv('RANK')
if local_rank is None: 
    local_rank = 0
else:
    local_rank = int(local_rank)
print("LOCAL RANK ", local_rank)  

accelerator = Accelerator(split_batches=False, mixed_precision="fp16")
device = accelerator.device
print("device:",device)

Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


LOCAL RANK  0
device: cuda


In [2]:
# if running this interactively, can specify jupyter_args here for argparser to use
if utils.is_interactive():
    model_name = "final_subj01_pretrained_40sess_24bs"
    print("model_name:", model_name)

    # other variables can be specified in the following string:
    jupyter_args = f"--data_path=../dataset \
                    --cache_dir=../cache \
                    --model_name={model_name} --subj=1 \
                    --hidden_dim=4096 --n_blocks=4 --new_test"
    print(jupyter_args)
    jupyter_args = jupyter_args.split()
    
    from IPython.display import clear_output # function to clear print outputs in cell
    %load_ext autoreload 
    # this allows you to change functions in models.py or utils.py and have this notebook automatically update with your revisions
    %autoreload 2 

model_name: final_subj01_pretrained_40sess_24bs
--data_path=../dataset                     --cache_dir=../cache                     --model_name=final_subj01_pretrained_40sess_24bs --subj=1                     --hidden_dim=4096 --n_blocks=4 --new_test


In [3]:
parser = argparse.ArgumentParser(description="Model Training Configuration")
parser.add_argument(
    "--model_name", type=str, default="testing",
    help="will load ckpt for model found in ../train_logs/model_name",
)
parser.add_argument(
    "--data_path", type=str, default=os.getcwd(),
    help="Path to where NSD data is stored / where to download it to",
)
parser.add_argument(
    "--cache_dir", type=str, default=os.getcwd(),
    help="Path to where misc. files downloaded from huggingface are stored. Defaults to current src directory.",
)
parser.add_argument(
    "--subj",type=int, default=1, choices=[1,2,3,4,5,6,7,8],
    help="Validate on which subject?",
)
parser.add_argument(
    "--blurry_recon",action=argparse.BooleanOptionalAction,default=True,
)
parser.add_argument(
    "--n_blocks",type=int,default=4,
)
parser.add_argument(
    "--hidden_dim",type=int,default=2048,
)
parser.add_argument(
    "--new_test",action=argparse.BooleanOptionalAction,default=True,
)
parser.add_argument(
    "--seq_len",type=int,default=1,
)
parser.add_argument(
    "--seed",type=int,default=42,
)

if utils.is_interactive():
    args = parser.parse_args(jupyter_args)
else:
    args = parser.parse_args()

# create global variables without the args prefix
for attribute_name in vars(args).keys():
    globals()[attribute_name] = getattr(args, attribute_name)
    
# seed all random functions
utils.seed_everything(seed)

# make output directory
os.makedirs("evals",exist_ok=True)
os.makedirs(f"evals/{model_name}",exist_ok=True)

In [4]:
voxels = {}
# Load hdf5 data for betas
f = h5py.File(f'{data_path}/betas_all_subj0{subj}_fp32_renorm.hdf5', 'r')
betas = f['betas'][:]
betas = torch.Tensor(betas).to("cpu")
num_voxels = betas[0].shape[-1]
voxels[f'subj0{subj}'] = betas
print(f"num_voxels for subj0{subj}: {num_voxels}")

if not new_test: # using old test set from before full dataset released (used in original MindEye paper)
    if subj==3:
        num_test=2113
    elif subj==4:
        num_test=1985
    elif subj==6:
        num_test=2113
    elif subj==8:
        num_test=1985
    else:
        num_test=2770
    test_url = f"{data_path}/wds/subj0{subj}/test/" + "0.tar"
else: # using larger test set from after full dataset released
    if subj==3:
        num_test=2371
    elif subj==4:
        num_test=2188
    elif subj==6:
        num_test=2371
    elif subj==8:
        num_test=2188
    else:
        num_test=3000
    test_url = f"{data_path}/wds/subj0{subj}/new_test/" + "0.tar"
    
print(test_url)
def my_split_by_node(urls): return urls
test_data = wds.WebDataset(test_url,resampled=False,nodesplitter=my_split_by_node)\
                    .decode("torch")\
                    .rename(behav="behav.npy", past_behav="past_behav.npy", future_behav="future_behav.npy", olds_behav="olds_behav.npy")\
                    .to_tuple(*["behav", "past_behav", "future_behav", "olds_behav"])
test_dl = torch.utils.data.DataLoader(test_data, batch_size=num_test, shuffle=False, drop_last=True, pin_memory=True)
print(f"Loaded test dl for subj{subj}!\n")

num_voxels for subj01: 15724
../dataset/wds/subj01/new_test/0.tar
Loaded test dl for subj1!



In [5]:
# Prep images but don't load them all to memory
f = h5py.File(f'{data_path}/coco_images_224_float16.hdf5', 'r')
images = f['images']

# Prep test voxels and indices of test images
test_images_idx = []
test_voxels_idx = []
for test_i, (behav, past_behav, future_behav, old_behav) in enumerate(test_dl):
    test_voxels = voxels[f'subj0{subj}'][behav[:,0,5].cpu().long()]
    test_voxels_idx = np.append(test_images_idx, behav[:,0,5].cpu().numpy())
    test_images_idx = np.append(test_images_idx, behav[:,0,0].cpu().numpy())
test_images_idx = test_images_idx.astype(int)
test_voxels_idx = test_voxels_idx.astype(int)

assert (test_i+1) * num_test == len(test_voxels) == len(test_images_idx)
print(test_i, len(test_voxels), len(test_images_idx), len(np.unique(test_images_idx)))

0 3000 3000 1000


In [6]:
clip_img_embedder = FrozenOpenCLIPImageEmbedder(
    arch="ViT-bigG-14",
    version="laion2b_s39b_b160k",
    output_tokens=True,
    only_tokens=True,
)
clip_img_embedder.to(device)
clip_seq_dim = 256
clip_emb_dim = 1664

if blurry_recon:
    from diffusers import AutoencoderKL
    autoenc = AutoencoderKL(
        down_block_types=['DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D'],
        up_block_types=['UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D'],
        block_out_channels=[128, 256, 512, 512],
        layers_per_block=2,
        sample_size=256,
    )
    ckpt = torch.load(f'{cache_dir}/sd_image_var_autoenc.pth')
    autoenc.load_state_dict(ckpt)
    autoenc.eval()
    autoenc.requires_grad_(False)
    autoenc.to(device)
    utils.count_params(autoenc)
    
class MindEyeModule(nn.Module):
    def __init__(self):
        super(MindEyeModule, self).__init__()
    def forward(self, x):
        return x
        
model = MindEyeModule()

class RidgeRegression(torch.nn.Module):
    # make sure to add weight_decay when initializing optimizer
    def __init__(self, input_sizes, out_features, seq_len): 
        super(RidgeRegression, self).__init__()
        self.out_features = out_features
        self.linears = torch.nn.ModuleList([
                torch.nn.Linear(input_size, out_features) for input_size in input_sizes
            ])
    def forward(self, x, subj_idx):
        out = torch.cat([self.linears[subj_idx](x[:,seq]).unsqueeze(1) for seq in range(seq_len)], dim=1)
        return out
        
model.ridge = RidgeRegression([num_voxels], out_features=hidden_dim, seq_len=seq_len)

from diffusers.models.vae import Decoder
class BrainNetwork(nn.Module):
    def __init__(self, h=4096, in_dim=15724, out_dim=768, seq_len=2, n_blocks=n_blocks, drop=.15, 
                 clip_size=768):
        super().__init__()
        self.seq_len = seq_len
        self.h = h
        self.clip_size = clip_size
        
        self.mixer_blocks1 = nn.ModuleList([
            self.mixer_block1(h, drop) for _ in range(n_blocks)
        ])
        self.mixer_blocks2 = nn.ModuleList([
            self.mixer_block2(seq_len, drop) for _ in range(n_blocks)
        ])
        
        # Output linear layer
        self.backbone_linear = nn.Linear(h * seq_len, out_dim, bias=True) 
        self.clip_proj = self.projector(clip_size, clip_size, h=clip_size)
        
        if blurry_recon:
            self.blin1 = nn.Linear(h*seq_len,4*28*28,bias=True)
            self.bdropout = nn.Dropout(.3)
            self.bnorm = nn.GroupNorm(1, 64)
            self.bupsampler = Decoder(
                in_channels=64,
                out_channels=4,
                up_block_types=["UpDecoderBlock2D","UpDecoderBlock2D","UpDecoderBlock2D"],
                block_out_channels=[32, 64, 128],
                layers_per_block=1,
            )
            self.b_maps_projector = nn.Sequential(
                nn.Conv2d(64, 512, 1, bias=False),
                nn.GroupNorm(1,512),
                nn.ReLU(True),
                nn.Conv2d(512, 512, 1, bias=False),
                nn.GroupNorm(1,512),
                nn.ReLU(True),
                nn.Conv2d(512, 512, 1, bias=True),
            )
            
    def projector(self, in_dim, out_dim, h=2048):
        return nn.Sequential(
            nn.LayerNorm(in_dim),
            nn.GELU(),
            nn.Linear(in_dim, h),
            nn.LayerNorm(h),
            nn.GELU(),
            nn.Linear(h, h),
            nn.LayerNorm(h),
            nn.GELU(),
            nn.Linear(h, out_dim)
        )
    
    def mlp(self, in_dim, out_dim, drop):
        return nn.Sequential(
            nn.Linear(in_dim, out_dim),
            nn.GELU(),
            nn.Dropout(drop),
            nn.Linear(out_dim, out_dim),
        )
    
    def mixer_block1(self, h, drop):
        return nn.Sequential(
            nn.LayerNorm(h),
            self.mlp(h, h, drop),  # Token mixing
        )

    def mixer_block2(self, seq_len, drop):
        return nn.Sequential(
            nn.LayerNorm(seq_len),
            self.mlp(seq_len, seq_len, drop)  # Channel mixing
        )
        
    def forward(self, x):
        # make empty tensors
        c,b,t = torch.Tensor([0.]), torch.Tensor([[0.],[0.]]), torch.Tensor([0.])
        
        # Mixer blocks
        residual1 = x
        residual2 = x.permute(0,2,1)
        for block1, block2 in zip(self.mixer_blocks1,self.mixer_blocks2):
            x = block1(x) + residual1
            residual1 = x
            x = x.permute(0,2,1)
            
            x = block2(x) + residual2
            residual2 = x
            x = x.permute(0,2,1)
            
        x = x.reshape(x.size(0), -1)
        backbone = self.backbone_linear(x).reshape(len(x), -1, self.clip_size)
        c = self.clip_proj(backbone)

        if blurry_recon:
            b = self.blin1(x)
            b = self.bdropout(b)
            b = b.reshape(b.shape[0], -1, 7, 7).contiguous()
            b = self.bnorm(b)
            b_aux = self.b_maps_projector(b).flatten(2).permute(0,2,1)
            b_aux = b_aux.view(len(b_aux), 49, 512)
            b = (self.bupsampler(b), b_aux)
        
        return backbone, c, b

model.backbone = BrainNetwork(h=hidden_dim, in_dim=hidden_dim, seq_len=seq_len, 
                          clip_size=clip_emb_dim, out_dim=clip_emb_dim*clip_seq_dim) 
utils.count_params(model.ridge)
utils.count_params(model.backbone)
utils.count_params(model)

# setup diffusion prior network
out_dim = clip_emb_dim
depth = 6
dim_head = 52
heads = clip_emb_dim//52 # heads * dim_head = clip_emb_dim
timesteps = 100

prior_network = PriorNetwork(
        dim=out_dim,
        depth=depth,
        dim_head=dim_head,
        heads=heads,
        causal=False,
        num_tokens = clip_seq_dim,
        learned_query_mode="pos_emb"
    )

model.diffusion_prior = BrainDiffusionPrior(
    net=prior_network,
    image_embed_dim=out_dim,
    condition_on_text_encodings=False,
    timesteps=timesteps,
    cond_drop_prob=0.2,
    image_embed_scale=None,
)
model.to(device)

utils.count_params(model.diffusion_prior)
utils.count_params(model)

# Load pretrained model ckpt
tag='last'
outdir = os.path.abspath(f'../train_logs/{model_name}')
print(f"\n---loading {outdir}/{tag}.pth ckpt---\n")
try:
    checkpoint = torch.load(outdir+f'/{tag}.pth', map_location='cpu')
    state_dict = checkpoint['model_state_dict']
    model.load_state_dict(state_dict, strict=True)
    del checkpoint
except: # probably ckpt is saved using deepspeed format
    import deepspeed
    state_dict = deepspeed.utils.zero_to_fp32.get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir=outdir, tag=tag)
    model.load_state_dict(state_dict, strict=False)
    del state_dict
print("ckpt loaded!")

param counts:
83,653,863 total
0 trainable
param counts:
64,409,600 total
64,409,600 trainable
param counts:
1,903,020,028 total
1,903,020,028 trainable
param counts:
1,967,429,628 total
1,967,429,628 trainable
param counts:
259,865,216 total
259,865,200 trainable
param counts:
2,227,294,844 total
2,227,294,828 trainable

---loading /export/raid1/home/kneel027/MindEyeV2/train_logs/final_subj01_pretrained_40sess_24bs/last.pth ckpt---

ckpt loaded!


In [7]:
# setup text caption networks
from transformers import AutoProcessor, AutoModelForCausalLM
from modeling_git import GitForCausalLMClipEmb
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
clip_text_model = GitForCausalLMClipEmb.from_pretrained("microsoft/git-large-coco")
clip_text_model.to(device) # if you get OOM running this script, you can switch this to cpu and lower minibatch_size to 4
clip_text_model.eval().requires_grad_(False)
clip_text_seq_dim = 257
clip_text_emb_dim = 1024

class CLIPConverter(torch.nn.Module):
    def __init__(self):
        super(CLIPConverter, self).__init__()
        self.linear1 = nn.Linear(clip_seq_dim, clip_text_seq_dim)
        self.linear2 = nn.Linear(clip_emb_dim, clip_text_emb_dim)
    def forward(self, x):
        x = x.permute(0,2,1)
        x = self.linear1(x)
        x = self.linear2(x.permute(0,2,1))
        return x
        
clip_convert = CLIPConverter()
state_dict = torch.load(f"{cache_dir}/bigG_to_L_epoch8.pth", map_location='cpu')['model_state_dict']
clip_convert.load_state_dict(state_dict, strict=True)
clip_convert.to(device) # if you get OOM running this script, you can switch this to cpu and lower minibatch_size to 4
del state_dict

In [9]:
# prep unCLIP
config = OmegaConf.load("generative_models/configs/unclip6.yaml")
config = OmegaConf.to_container(config, resolve=True)
unclip_params = config["model"]["params"]
network_config = unclip_params["network_config"]
denoiser_config = unclip_params["denoiser_config"]
first_stage_config = unclip_params["first_stage_config"]
conditioner_config = unclip_params["conditioner_config"]
sampler_config = unclip_params["sampler_config"]
scale_factor = unclip_params["scale_factor"]
disable_first_stage_autocast = unclip_params["disable_first_stage_autocast"]
offset_noise_level = unclip_params["loss_fn_config"]["params"]["offset_noise_level"]

first_stage_config['target'] = 'sgm.models.autoencoder.AutoencoderKL'
sampler_config['params']['num_steps'] = 38

diffusion_engine = DiffusionEngine(network_config=network_config,
                       denoiser_config=denoiser_config,
                       first_stage_config=first_stage_config,
                       conditioner_config=conditioner_config,
                       sampler_config=sampler_config,
                       scale_factor=scale_factor,
                       disable_first_stage_autocast=disable_first_stage_autocast)
# set to inference
diffusion_engine.eval().requires_grad_(False)
diffusion_engine.to(device)

ckpt_path = f'{cache_dir}/unclip6_epoch0_step110000.ckpt'
ckpt = torch.load(ckpt_path, map_location='cpu')
diffusion_engine.load_state_dict(ckpt['state_dict'])

batch={"jpg": torch.randn(1,3,1,1).to(device), # jpg doesnt get used, it's just a placeholder
      "original_size_as_tuple": torch.ones(1, 2).to(device) * 768,
      "crop_coords_top_left": torch.zeros(1, 2).to(device)}
out = diffusion_engine.conditioner(batch)
vector_suffix = out["vector"].to(device)
print("vector_suffix", vector_suffix.shape)



Initialized embedder #0: FrozenOpenCLIPImageEmbedder with 1909889025 params. Trainable: False
Initialized embedder #1: ConcatTimestepEmbedderND with 0 params. Trainable: False
Initialized embedder #2: ConcatTimestepEmbedderND with 0 params. Trainable: False
vector_suffix torch.Size([1, 1024])


In [11]:
# get all reconstructions
model.to(device)
model.eval().requires_grad_(False)

# all_images = None
all_blurryrecons = None
all_recons = None
all_predcaptions = []
all_clipvoxels = None

minibatch_size = 1
num_samples_per_image = 1
assert num_samples_per_image == 1
plotting = False

with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.float16):
    for batch in tqdm(range(0,len(np.unique(test_images_idx)),minibatch_size)):
        uniq_imgs = np.unique(test_images_idx)[batch:batch+minibatch_size]
        voxel = None
        for uniq_img in uniq_imgs:
            locs = np.where(test_images_idx==uniq_img)[0]
            if len(locs)==1:
                locs = locs.repeat(3)
            elif len(locs)==2:
                locs = locs.repeat(2)[:3]
            assert len(locs)==3
            if voxel is None:
                voxel = test_voxels[None,locs] # 1, num_image_repetitions, num_voxels
            else:
                voxel = torch.vstack((voxel, test_voxels[None,locs]))
        voxel = voxel.to(device)
        
        for rep in range(3):
            voxel_ridge = model.ridge(voxel[:,[rep]],0) # 0th index of subj_list
            backbone0, clip_voxels0, blurry_image_enc0 = model.backbone(voxel_ridge)
            if rep==0:
                clip_voxels = clip_voxels0
                backbone = backbone0
                blurry_image_enc = blurry_image_enc0[0]
            else:
                clip_voxels += clip_voxels0
                backbone += backbone0
                blurry_image_enc += blurry_image_enc0[0]
        clip_voxels /= 3
        backbone /= 3
        blurry_image_enc /= 3
                
        # Save retrieval submodule outputs
        if all_clipvoxels is None:
            all_clipvoxels = clip_voxels.cpu()
        else:
            all_clipvoxels = torch.vstack((all_clipvoxels, clip_voxels.cpu()))
        
        # Feed voxels through OpenCLIP-bigG diffusion prior
        prior_out = model.diffusion_prior.p_sample_loop(backbone.shape, 
                        text_cond = dict(text_embed = backbone), 
                        cond_scale = 1., timesteps = 20)
        
        pred_caption_emb = clip_convert(prior_out)
        generated_ids = clip_text_model.generate(pixel_values=pred_caption_emb, max_length=20)
        generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)
        all_predcaptions = np.hstack((all_predcaptions, generated_caption))
        # print(generated_caption)
        
        # Feed diffusion prior outputs through unCLIP
        for i in range(len(voxel)):
            samples = utils.unclip_recon(prior_out[[i]],
                             diffusion_engine,
                             vector_suffix,
                             num_samples=num_samples_per_image)
            if all_recons is None:
                all_recons = samples.cpu()
            else:
                all_recons = torch.vstack((all_recons, samples.cpu()))
            if plotting:
                for s in range(num_samples_per_image):
                    plt.figure(figsize=(2,2))
                    plt.imshow(transforms.ToPILImage()(samples[s]))
                    plt.axis('off')
                    plt.show()

        if blurry_recon:
            blurred_image = (autoenc.decode(blurry_image_enc/0.18215).sample/ 2 + 0.5).clamp(0,1)
            
            for i in range(len(voxel)):
                im = torch.Tensor(blurred_image[i])
                if all_blurryrecons is None:
                    all_blurryrecons = im[None].cpu()
                else:
                    all_blurryrecons = torch.vstack((all_blurryrecons, im[None].cpu()))
                if plotting:
                    plt.figure(figsize=(2,2))
                    plt.imshow(transforms.ToPILImage()(im))
                    plt.axis('off')
                    plt.show()

        if plotting: 
            print(model_name)
            err # dont actually want to run the whole thing with plotting=True

# resize outputs before saving
imsize = 256
all_recons = transforms.Resize((imsize,imsize))(all_recons).float()
if blurry_recon: 
    all_blurryrecons = transforms.Resize((imsize,imsize))(all_blurryrecons).float()
        
# saving
print(all_recons.shape)
# torch.save(all_images,"evals/all_images.pt")
if blurry_recon:
    torch.save(all_blurryrecons,f"evals/{model_name}/{model_name}_all_blurryrecons.pt")
torch.save(all_recons,f"evals/{model_name}/{model_name}_all_recons.pt")
torch.save(all_predcaptions,f"evals/{model_name}/{model_name}_all_predcaptions.pt")
torch.save(all_clipvoxels,f"evals/{model_name}/{model_name}_all_clipvoxels.pt")
print(f"saved {model_name} outputs!")

if not utils.is_interactive():
    sys.exit(0)

  0%|          | 0/1000 [00:00<?, ?it/s]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around a table.']


  0%|          | 1/1000 [00:05<1:27:34,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a table.']


  0%|          | 2/1000 [00:10<1:29:18,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer on a surfboard in the ocean.']


  0%|          | 3/1000 [00:16<1:29:09,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite flying in the sky.']


  0%|          | 4/1000 [00:21<1:28:59,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


  0%|          | 5/1000 [00:26<1:27:57,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


  1%|          | 6/1000 [00:31<1:27:11,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a plate of food on it']


  1%|          | 7/1000 [00:37<1:26:45,  5.24s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people playing a game of frisbee.']


  1%|          | 8/1000 [00:42<1:26:35,  5.24s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on top of a table.']


  1%|          | 9/1000 [00:47<1:26:22,  5.23s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave.']


  1%|          | 10/1000 [00:52<1:26:05,  5.22s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane is flying in the sky.']


  1%|          | 11/1000 [00:58<1:26:44,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer on a surfboard in the ocean.']


  1%|          | 12/1000 [01:03<1:26:28,  5.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small herd of sheep grazing.']


  1%|▏         | 13/1000 [01:08<1:26:11,  5.24s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman sitting in a chair.']


  1%|▏         | 14/1000 [01:13<1:26:19,  5.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large body of water']


  2%|▏         | 15/1000 [01:19<1:26:43,  5.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a couch, chair, and a table.']


  2%|▏         | 16/1000 [01:24<1:26:59,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man wearing a hat and holding a cell phone.']


  2%|▏         | 17/1000 [01:29<1:27:04,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a lush green field.']


  2%|▏         | 18/1000 [01:35<1:26:48,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


  2%|▏         | 19/1000 [01:40<1:26:15,  5.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


  2%|▏         | 20/1000 [01:45<1:26:31,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a child is holding a book.']


  2%|▏         | 21/1000 [01:50<1:26:02,  5.27s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


  2%|▏         | 22/1000 [01:56<1:25:58,  5.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock tower with a tower in the background.']


  2%|▏         | 23/1000 [02:01<1:25:40,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing on top of a field.']


  2%|▏         | 24/1000 [02:06<1:26:18,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in a field.']


  2%|▎         | 25/1000 [02:12<1:26:25,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a car.']


  3%|▎         | 26/1000 [02:17<1:26:11,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


  3%|▎         | 27/1000 [02:22<1:26:08,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large jetliner sitting on top of a lush green field.']


  3%|▎         | 28/1000 [02:28<1:26:18,  5.33s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a couple of birds standing on top of a hill.']


  3%|▎         | 29/1000 [02:33<1:26:23,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a young woman is sitting on a bed.']


  3%|▎         | 30/1000 [02:38<1:26:28,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


  3%|▎         | 31/1000 [02:44<1:26:27,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a couch, a table, and a tv.']


  3%|▎         | 32/1000 [02:49<1:26:35,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bed and a bed in a room.']


  3%|▎         | 33/1000 [02:54<1:26:07,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a sink and a mirror.']


  3%|▎         | 34/1000 [03:00<1:26:10,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


  4%|▎         | 35/1000 [03:05<1:26:06,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving down the tracks.']


  4%|▎         | 36/1000 [03:10<1:26:06,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus driving down a street.']


  4%|▎         | 37/1000 [03:16<1:26:03,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food.']


  4%|▍         | 38/1000 [03:21<1:25:53,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a sidewalk.']


  4%|▍         | 39/1000 [03:27<1:25:45,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boat on a body of water.']


  4%|▍         | 40/1000 [03:32<1:25:32,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boy is holding a ball.']


  4%|▍         | 41/1000 [03:37<1:25:25,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


  4%|▍         | 42/1000 [03:43<1:25:23,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is parked on the tracks.']


  4%|▍         | 43/1000 [03:48<1:25:23,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large body of water']


  4%|▍         | 44/1000 [03:53<1:25:20,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a vase with flowers on it.']


  4%|▍         | 45/1000 [03:59<1:25:16,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite flying over a beach.']


  5%|▍         | 46/1000 [04:04<1:25:16,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate with a pizza on it']


  5%|▍         | 47/1000 [04:09<1:25:13,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large jetliner sitting on top of a lush green field.']


  5%|▍         | 48/1000 [04:15<1:25:20,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a car and a bus.']


  5%|▍         | 49/1000 [04:20<1:25:17,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


  5%|▌         | 50/1000 [04:26<1:25:05,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird is standing on a branch.']


  5%|▌         | 51/1000 [04:31<1:24:49,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a sidewalk.']


  5%|▌         | 52/1000 [04:36<1:24:47,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large body of water.']


  5%|▌         | 53/1000 [04:42<1:24:36,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane is flying in the sky.']


  5%|▌         | 54/1000 [04:47<1:24:38,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boat on a lake with a boat in the water.']


  6%|▌         | 55/1000 [04:52<1:24:37,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


  6%|▌         | 56/1000 [04:58<1:24:26,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bicycle parked on the side of a road.']


  6%|▌         | 57/1000 [05:03<1:24:21,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving past a building.']


  6%|▌         | 58/1000 [05:08<1:24:12,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a sandwich on it.']


  6%|▌         | 59/1000 [05:14<1:24:06,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing in front of a batter.']


  6%|▌         | 60/1000 [05:19<1:24:01,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man walking down a street.']


  6%|▌         | 61/1000 [05:24<1:23:19,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of chairs']


  6%|▌         | 62/1000 [05:30<1:23:04,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tennis player is on the court.']


  6%|▋         | 63/1000 [05:35<1:22:35,  5.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


  6%|▋         | 64/1000 [05:40<1:22:12,  5.27s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is standing in the snow.']


  6%|▋         | 65/1000 [05:45<1:21:54,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of food on it']


  7%|▋         | 66/1000 [05:51<1:21:48,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave on a surfboard.']


  7%|▋         | 67/1000 [05:56<1:21:44,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding on a skateboard.']


  7%|▋         | 68/1000 [06:01<1:21:42,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man sitting on a bench.']


  7%|▋         | 69/1000 [06:06<1:21:30,  5.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in a field.']


  7%|▋         | 70/1000 [06:12<1:21:59,  5.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a fire hydrant is on the side of the road.']


  7%|▋         | 71/1000 [06:17<1:21:57,  5.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man on a surfboard in the ocean.']


  7%|▋         | 72/1000 [06:22<1:21:44,  5.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


  7%|▋         | 73/1000 [06:28<1:21:26,  5.27s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


  7%|▋         | 74/1000 [06:33<1:21:09,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite is flying over a beach.']


  8%|▊         | 75/1000 [06:38<1:21:00,  5.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus is parked on the side of the road.']


  8%|▊         | 76/1000 [06:43<1:20:55,  5.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite flying in the sky.']


  8%|▊         | 77/1000 [06:49<1:20:47,  5.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man sitting down next to a table.']


  8%|▊         | 78/1000 [06:54<1:20:44,  5.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of items on it']


  8%|▊         | 79/1000 [06:59<1:20:40,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a person holding a cell phone.']


  8%|▊         | 80/1000 [07:04<1:20:32,  5.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a stuffed animal is sitting on a table.']


  8%|▊         | 81/1000 [07:10<1:20:31,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving down the tracks.']


  8%|▊         | 82/1000 [07:15<1:20:30,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


  8%|▊         | 83/1000 [07:20<1:20:30,  5.27s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boat on a body of water.']


  8%|▊         | 84/1000 [07:25<1:20:24,  5.27s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large field with trees and bushes.']


  8%|▊         | 85/1000 [07:31<1:20:14,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer is riding a wave.']


  9%|▊         | 86/1000 [07:36<1:20:09,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman on a skateboard.']


  9%|▊         | 87/1000 [07:41<1:20:15,  5.27s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat is sitting on a table.']


  9%|▉         | 88/1000 [07:47<1:20:27,  5.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


  9%|▉         | 89/1000 [07:52<1:20:10,  5.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a surfboard.']


  9%|▉         | 90/1000 [07:57<1:20:00,  5.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a lush green field.']


  9%|▉         | 91/1000 [08:02<1:19:54,  5.27s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a view.']


  9%|▉         | 92/1000 [08:08<1:19:39,  5.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding down the street.']


  9%|▉         | 93/1000 [08:13<1:19:46,  5.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a glass door with a window.']


  9%|▉         | 94/1000 [08:18<1:20:13,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock tower with a tower in the background.']


 10%|▉         | 95/1000 [08:24<1:20:33,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a drink on it.']


 10%|▉         | 96/1000 [08:29<1:20:48,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave on a surfboard.']


 10%|▉         | 97/1000 [08:35<1:20:55,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bunch of food on a table.']


 10%|▉         | 98/1000 [08:40<1:20:56,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat is standing on a table.']


 10%|▉         | 99/1000 [08:45<1:20:51,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a lot of furniture.']


 10%|█         | 100/1000 [08:51<1:20:45,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman sitting in a chair.']


 10%|█         | 101/1000 [08:56<1:20:39,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


 10%|█         | 102/1000 [09:01<1:20:42,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a building.']


 10%|█         | 103/1000 [09:07<1:20:33,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman with a hat on.']


 10%|█         | 104/1000 [09:12<1:20:30,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding on a skateboard.']


 10%|█         | 105/1000 [09:18<1:20:26,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 11%|█         | 106/1000 [09:23<1:20:23,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird is standing on a branch.']


 11%|█         | 107/1000 [09:28<1:19:46,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large airplane is parked on the runway.']


 11%|█         | 108/1000 [09:34<1:19:19,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer on a surfboard in the ocean.']


 11%|█         | 109/1000 [09:39<1:19:01,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is parked on the tracks.']


 11%|█         | 110/1000 [09:44<1:18:47,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


 11%|█         | 111/1000 [09:49<1:18:31,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a skateboard down a street.']


 11%|█         | 112/1000 [09:55<1:18:32,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a counter top.']


 11%|█▏        | 113/1000 [10:00<1:18:26,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave on a surfboard.']


 11%|█▏        | 114/1000 [10:05<1:18:20,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 12%|█▏        | 115/1000 [10:11<1:18:06,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large jetliner sitting on top of a tarmac.']


 12%|█▏        | 116/1000 [10:16<1:18:07,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


 12%|█▏        | 117/1000 [10:21<1:18:06,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing on a field.']


 12%|█▏        | 118/1000 [10:27<1:18:03,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in a field.']


 12%|█▏        | 119/1000 [10:32<1:17:56,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a bowl of food on it.']


 12%|█▏        | 120/1000 [10:37<1:17:55,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large jetliner sitting on top of a grass covered field.']


 12%|█▏        | 121/1000 [10:43<1:17:55,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bench and a bench']


 12%|█▏        | 122/1000 [10:48<1:17:40,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a view']


 12%|█▏        | 123/1000 [10:53<1:17:38,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a sink and a mirror.']


 12%|█▏        | 124/1000 [10:58<1:17:38,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 12%|█▎        | 125/1000 [11:04<1:17:30,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 13%|█▎        | 126/1000 [11:09<1:17:18,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a sink and a mirror.']


 13%|█▎        | 127/1000 [11:14<1:17:09,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a stove and a refrigerator.']


 13%|█▎        | 128/1000 [11:20<1:17:08,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a sidewalk.']


 13%|█▎        | 129/1000 [11:25<1:17:00,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a white wall']


 13%|█▎        | 130/1000 [11:30<1:16:47,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tree in a field']


 13%|█▎        | 131/1000 [11:36<1:16:36,  5.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer is riding a wave.']


 13%|█▎        | 132/1000 [11:41<1:16:39,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people sitting down.']


 13%|█▎        | 133/1000 [11:46<1:17:03,  5.33s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man on a surfboard in the water.']


 13%|█▎        | 134/1000 [11:52<1:17:25,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of zebras grazing in a field.']


 14%|█▎        | 135/1000 [11:57<1:17:40,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man wearing a suit and tie.']


 14%|█▎        | 136/1000 [12:03<1:17:44,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people riding on top of a horse.']


 14%|█▎        | 137/1000 [12:08<1:17:44,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boat is parked on the water.']


 14%|█▍        | 138/1000 [12:13<1:17:35,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of giraffes standing around.']


 14%|█▍        | 139/1000 [12:19<1:17:40,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a glass top and a white vase.']


 14%|█▍        | 140/1000 [12:24<1:17:36,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food.']


 14%|█▍        | 141/1000 [12:30<1:16:54,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a motorcycle parked on the side of a road.']


 14%|█▍        | 142/1000 [12:35<1:16:34,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bed and a table with a lamp on it']


 14%|█▍        | 143/1000 [12:40<1:16:18,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people sitting down.']


 14%|█▍        | 144/1000 [12:45<1:16:06,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a white bathroom with a toilet and sink.']


 14%|█▍        | 145/1000 [12:51<1:15:56,  5.33s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large area with a lot of grass.']


 15%|█▍        | 146/1000 [12:56<1:15:47,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a stove and a counter']


 15%|█▍        | 147/1000 [13:01<1:15:38,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a grassy field with a zebra in it.']


 15%|█▍        | 148/1000 [13:07<1:15:31,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane flying in the sky.']


 15%|█▍        | 149/1000 [13:12<1:15:22,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in a field.']


 15%|█▌        | 150/1000 [13:17<1:15:23,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane is flying in the sky.']


 15%|█▌        | 151/1000 [13:23<1:15:23,  5.33s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave.']


 15%|█▌        | 152/1000 [13:28<1:15:34,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a sandwich on it.']


 15%|█▌        | 153/1000 [13:34<1:15:48,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a fire hydrant is next to a sidewalk.']


 15%|█▌        | 154/1000 [13:39<1:15:45,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 16%|█▌        | 155/1000 [13:44<1:16:01,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in a field.']


 16%|█▌        | 156/1000 [13:50<1:16:13,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people playing a game of soccer.']


 16%|█▌        | 157/1000 [13:55<1:16:00,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a computer and a laptop on it.']


 16%|█▌        | 158/1000 [14:01<1:16:08,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is holding a cell phone.']


 16%|█▌        | 159/1000 [14:06<1:16:02,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large tree.']


 16%|█▌        | 160/1000 [14:12<1:15:46,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a living room with a couch, chair, and table.']


 16%|█▌        | 161/1000 [14:17<1:16:05,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman sitting on a couch next to a wall.']


 16%|█▌        | 162/1000 [14:23<1:16:14,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 16%|█▋        | 163/1000 [14:28<1:16:09,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a bike on a dirt road.']


 16%|█▋        | 164/1000 [14:33<1:16:03,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a sink and a mirror.']


 16%|█▋        | 165/1000 [14:39<1:16:28,  5.50s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus is parked on the side of the road.']


 17%|█▋        | 166/1000 [14:45<1:16:44,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus driving down a street.']


 17%|█▋        | 167/1000 [14:50<1:16:43,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a lot of windows.']


 17%|█▋        | 168/1000 [14:56<1:16:46,  5.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large motorcycle is parked on the side of the road.']


 17%|█▋        | 169/1000 [15:01<1:16:43,  5.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


 17%|█▋        | 170/1000 [15:07<1:16:32,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a field.']


 17%|█▋        | 171/1000 [15:12<1:15:50,  5.49s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around a tree.']


 17%|█▋        | 172/1000 [15:18<1:16:17,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock on a wall']


 17%|█▋        | 173/1000 [15:23<1:15:43,  5.49s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave on a surfboard.']


 17%|█▋        | 174/1000 [15:29<1:16:07,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird is standing on a branch.']


 18%|█▊        | 175/1000 [15:34<1:15:55,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large white and red airplane.']


 18%|█▊        | 176/1000 [15:40<1:15:50,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 18%|█▊        | 177/1000 [15:45<1:15:16,  5.49s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is wearing a suit and tie.']


 18%|█▊        | 178/1000 [15:51<1:14:47,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a pair of scissors on a table.']


 18%|█▊        | 179/1000 [15:56<1:14:30,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a field.']


 18%|█▊        | 180/1000 [16:01<1:14:13,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird is standing on a branch.']


 18%|█▊        | 181/1000 [16:07<1:14:00,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving on the tracks.']


 18%|█▊        | 182/1000 [16:12<1:14:10,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a toilet and a sink']


 18%|█▊        | 183/1000 [16:18<1:13:53,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


 18%|█▊        | 184/1000 [16:23<1:13:49,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on top of a table.']


 18%|█▊        | 185/1000 [16:29<1:13:38,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a lot of windows']


 19%|█▊        | 186/1000 [16:34<1:13:31,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a beach with a bunch of people on it']


 19%|█▊        | 187/1000 [16:39<1:13:22,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a fenced in area with a fence.']


 19%|█▉        | 188/1000 [16:45<1:13:18,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


 19%|█▉        | 189/1000 [16:50<1:13:15,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant is standing in the grass.']


 19%|█▉        | 190/1000 [16:56<1:13:12,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bench with a dog on it.']


 19%|█▉        | 191/1000 [17:01<1:13:01,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a forest filled with trees.']


 19%|█▉        | 192/1000 [17:06<1:12:55,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bunch of fruits and vegetables']


 19%|█▉        | 193/1000 [17:12<1:12:42,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large truck is parked on the side of the road.']


 19%|█▉        | 194/1000 [17:17<1:12:44,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on a table.']


 20%|█▉        | 195/1000 [17:23<1:12:37,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


 20%|█▉        | 196/1000 [17:28<1:12:33,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a living room with a couch and a chair.']


 20%|█▉        | 197/1000 [17:34<1:12:31,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus is parked in front of a bus.']


 20%|█▉        | 198/1000 [17:39<1:12:31,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of zebras grazing in a field.']


 20%|█▉        | 199/1000 [17:44<1:12:31,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving past a train.']


 20%|██        | 200/1000 [17:50<1:12:29,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing on top of a field.']


 20%|██        | 201/1000 [17:55<1:13:03,  5.49s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is on a snowy hill.']


 20%|██        | 202/1000 [18:01<1:12:51,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large group of people on a street.']


 20%|██        | 203/1000 [18:06<1:12:36,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snow covered ski slope.']


 20%|██        | 204/1000 [18:12<1:12:22,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large body of water.']


 20%|██        | 205/1000 [18:17<1:12:19,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a plate of food on it.']


 21%|██        | 206/1000 [18:23<1:12:13,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a lot of windows.']


 21%|██        | 207/1000 [18:28<1:12:05,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


 21%|██        | 208/1000 [18:34<1:11:58,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock on a wall']


 21%|██        | 209/1000 [18:39<1:11:45,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


 21%|██        | 210/1000 [18:44<1:11:44,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street sign is shown.']


 21%|██        | 211/1000 [18:50<1:11:29,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large airplane on a runway.']


 21%|██        | 212/1000 [18:55<1:11:25,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is parked on the tracks.']


 21%|██▏       | 213/1000 [19:01<1:11:51,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 21%|██▏       | 214/1000 [19:06<1:11:46,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a view']


 22%|██▏       | 215/1000 [19:12<1:11:28,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane is flying in the sky.']


 22%|██▏       | 216/1000 [19:18<1:14:23,  5.69s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large tree.']


 22%|██▏       | 217/1000 [19:24<1:14:56,  5.74s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave on a surfboard.']


 22%|██▏       | 218/1000 [19:29<1:13:23,  5.63s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man sitting down next to a woman.']


 22%|██▏       | 219/1000 [19:35<1:12:12,  5.55s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 22%|██▏       | 220/1000 [19:40<1:11:07,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a woman.']


 22%|██▏       | 221/1000 [19:45<1:10:26,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock tower and a tower with a clock.']


 22%|██▏       | 222/1000 [19:51<1:10:02,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing on a dirt field.']


 22%|██▏       | 223/1000 [19:56<1:09:36,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a person that is holding a plate.']


 22%|██▏       | 224/1000 [20:01<1:09:18,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large grassy field.']


 22%|██▎       | 225/1000 [20:07<1:09:02,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a view of a person.']


 23%|██▎       | 226/1000 [20:12<1:08:52,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane flying in the sky.']


 23%|██▎       | 227/1000 [20:17<1:08:47,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a bike down a street.']


 23%|██▎       | 228/1000 [20:23<1:08:54,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in a field.']


 23%|██▎       | 229/1000 [20:28<1:08:57,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a bowl of food on it.']


 23%|██▎       | 230/1000 [20:33<1:09:12,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving down the tracks.']


 23%|██▎       | 231/1000 [20:39<1:09:27,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving past a train.']


 23%|██▎       | 232/1000 [20:44<1:09:38,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 23%|██▎       | 233/1000 [20:50<1:09:41,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a building.']


 23%|██▎       | 234/1000 [20:56<1:12:38,  5.69s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a truck parked on the side of a road.']


 24%|██▎       | 235/1000 [21:02<1:12:10,  5.66s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a man.']


 24%|██▎       | 236/1000 [21:07<1:10:55,  5.57s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on top of a building.']


 24%|██▎       | 237/1000 [21:12<1:10:10,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small grassy area.']


 24%|██▍       | 238/1000 [21:18<1:09:25,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large body of water']


 24%|██▍       | 239/1000 [21:23<1:08:51,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite is flying over a beach.']


 24%|██▍       | 240/1000 [21:29<1:08:30,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on a chair.']


 24%|██▍       | 241/1000 [21:34<1:08:07,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large field with a mountain in the background.']


 24%|██▍       | 242/1000 [21:39<1:07:57,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large area with a lot of planes.']


 24%|██▍       | 243/1000 [21:45<1:08:20,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 24%|██▍       | 244/1000 [21:50<1:08:42,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding on a skateboard.']


 24%|██▍       | 245/1000 [21:56<1:08:29,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a lot of windows.']


 25%|██▍       | 246/1000 [22:01<1:08:46,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus driving down a street.']


 25%|██▍       | 247/1000 [22:07<1:08:28,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 25%|██▍       | 248/1000 [22:12<1:08:17,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 25%|██▍       | 249/1000 [22:18<1:08:09,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a skateboard.']


 25%|██▌       | 250/1000 [22:23<1:07:59,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cow standing on a field.']


 25%|██▌       | 251/1000 [22:28<1:07:55,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small table with a piece of paper on it.']


 25%|██▌       | 252/1000 [22:34<1:07:56,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a skateboard down a snow covered slope.']


 25%|██▌       | 253/1000 [22:39<1:07:46,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding his skateboard.']


 25%|██▌       | 254/1000 [22:45<1:07:39,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is standing in front of a wall.']


 26%|██▌       | 255/1000 [22:50<1:07:31,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bed and a bed']


 26%|██▌       | 256/1000 [22:56<1:07:19,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large body of water']


 26%|██▌       | 257/1000 [23:01<1:07:09,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


 26%|██▌       | 258/1000 [23:06<1:07:06,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a building.']


 26%|██▌       | 259/1000 [23:12<1:06:59,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a snowboard down a snow covered slope.']


 26%|██▌       | 260/1000 [23:17<1:06:56,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a dog is sitting on a leash.']


 26%|██▌       | 261/1000 [23:23<1:06:49,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 26%|██▌       | 262/1000 [23:28<1:06:33,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


 26%|██▋       | 263/1000 [23:33<1:06:30,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is parked on the side of the road.']


 26%|██▋       | 264/1000 [23:39<1:06:29,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a beach next to a body of water.']


 26%|██▋       | 265/1000 [23:44<1:06:46,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave.']


 27%|██▋       | 266/1000 [23:50<1:06:50,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus parked on the side of a road.']


 27%|██▋       | 267/1000 [23:55<1:06:56,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is standing on a skateboard.']


 27%|██▋       | 268/1000 [24:01<1:07:04,  5.50s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in a field.']


 27%|██▋       | 269/1000 [24:07<1:07:10,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a vase with flowers on it.']


 27%|██▋       | 270/1000 [24:12<1:07:14,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a snow covered slope.']


 27%|██▋       | 271/1000 [24:18<1:06:54,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large grassy area.']


 27%|██▋       | 272/1000 [24:23<1:07:05,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a sign']


 27%|██▋       | 273/1000 [24:29<1:07:06,  5.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 27%|██▋       | 274/1000 [24:34<1:06:52,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on top of a table.']


 28%|██▊       | 275/1000 [24:40<1:06:56,  5.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop on it']


 28%|██▊       | 276/1000 [24:46<1:08:03,  5.64s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman sitting down.']


 28%|██▊       | 277/1000 [24:51<1:06:53,  5.55s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bike parked on the side of a road.']


 28%|██▊       | 278/1000 [24:56<1:05:55,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat laying on a bed.']


 28%|██▊       | 279/1000 [25:02<1:05:16,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding on a skateboard.']


 28%|██▊       | 280/1000 [25:07<1:04:38,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large tree.']


 28%|██▊       | 281/1000 [25:12<1:04:16,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


 28%|██▊       | 282/1000 [25:17<1:03:41,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boat is parked on the side of the road.']


 28%|██▊       | 283/1000 [25:23<1:03:27,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


 28%|██▊       | 284/1000 [25:28<1:03:23,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


 28%|██▊       | 285/1000 [25:33<1:03:02,  5.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a sidewalk.']


 29%|██▊       | 286/1000 [25:39<1:02:52,  5.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man walking down a street.']


 29%|██▊       | 287/1000 [25:44<1:02:54,  5.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is holding a cell phone.']


 29%|██▉       | 288/1000 [25:49<1:02:34,  5.27s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving on the tracks.']


 29%|██▉       | 289/1000 [25:54<1:02:28,  5.27s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a view of a room.']


 29%|██▉       | 290/1000 [26:00<1:02:39,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 29%|██▉       | 291/1000 [26:05<1:02:25,  5.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


 29%|██▉       | 292/1000 [26:10<1:02:21,  5.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman sitting in a chair.']


 29%|██▉       | 293/1000 [26:16<1:02:25,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is holding a cell phone.']


 29%|██▉       | 294/1000 [26:21<1:02:07,  5.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman standing in front of a door.']


 30%|██▉       | 295/1000 [26:26<1:01:59,  5.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 30%|██▉       | 296/1000 [26:31<1:02:03,  5.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man holding a tennis racket.']


 30%|██▉       | 297/1000 [26:37<1:02:09,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a lot of furniture.']


 30%|██▉       | 298/1000 [26:42<1:02:07,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a counter top and a sink in the background.']


 30%|██▉       | 299/1000 [26:47<1:02:12,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a building.']


 30%|███       | 300/1000 [26:53<1:02:12,  5.33s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on top of a table.']


 30%|███       | 301/1000 [26:58<1:02:13,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra in a field.']


 30%|███       | 302/1000 [27:03<1:01:45,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 30%|███       | 303/1000 [27:09<1:01:25,  5.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a window and a door.']


 30%|███       | 304/1000 [27:14<1:01:23,  5.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving through a city.']


 30%|███       | 305/1000 [27:19<1:01:33,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a building.']


 31%|███       | 306/1000 [27:25<1:01:32,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a dog is standing on a sidewalk.']


 31%|███       | 307/1000 [27:30<1:01:50,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large truck driving down a street.']


 31%|███       | 308/1000 [27:35<1:01:44,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large animal in a field.']


 31%|███       | 309/1000 [27:41<1:01:45,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a shower and a sink.']


 31%|███       | 310/1000 [27:46<1:01:43,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 31%|███       | 311/1000 [27:51<1:01:18,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat laying on a bed.']


 31%|███       | 312/1000 [27:57<1:01:15,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a close up of a person']


 31%|███▏      | 313/1000 [28:02<1:00:45,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in front of a wall.']


 31%|███▏      | 314/1000 [28:07<1:00:37,  5.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a field with a herd of cattle.']


 32%|███▏      | 315/1000 [28:13<1:00:42,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a lot of windows.']


 32%|███▏      | 316/1000 [28:18<1:00:32,  5.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman sitting on a couch.']


 32%|███▏      | 317/1000 [28:23<1:00:59,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a couple of horses standing in a field.']


 32%|███▏      | 318/1000 [28:29<1:00:33,  5.33s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer on a surfboard in the ocean.']


 32%|███▏      | 319/1000 [28:34<1:00:24,  5.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a vase with flowers on it.']


 32%|███▏      | 320/1000 [28:39<1:00:31,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in a field.']


 32%|███▏      | 321/1000 [28:45<1:00:21,  5.33s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a dog is standing in front of a fence.']


 32%|███▏      | 322/1000 [28:50<1:00:51,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a white wall']


 32%|███▏      | 323/1000 [28:55<1:00:20,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large rock.']


 32%|███▏      | 324/1000 [29:01<1:00:15,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on top of a bed.']


 32%|███▎      | 325/1000 [29:06<59:59,  5.33s/it]  

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people sitting around a table.']


 33%|███▎      | 326/1000 [29:11<1:00:05,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing in front of a batter.']


 33%|███▎      | 327/1000 [29:17<59:55,  5.34s/it]  

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bunch of flowers in a vase']


 33%|███▎      | 328/1000 [29:22<59:58,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer is riding a wave.']


 33%|███▎      | 329/1000 [29:27<59:40,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 33%|███▎      | 330/1000 [29:33<59:48,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a lot of windows.']


 33%|███▎      | 331/1000 [29:38<59:52,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a bike on a dirt road.']


 33%|███▎      | 332/1000 [29:44<59:49,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a horse next to a horse.']


 33%|███▎      | 333/1000 [29:49<59:31,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man flying a kite in the sky.']


 33%|███▎      | 334/1000 [29:54<59:31,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boat on a river.']


 34%|███▎      | 335/1000 [30:00<59:10,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bunch of different types of food.']


 34%|███▎      | 336/1000 [30:05<59:14,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on top of a tennis court.']


 34%|███▎      | 337/1000 [30:10<58:57,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing on a dirt field.']


 34%|███▍      | 338/1000 [30:16<59:00,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a counter top.']


 34%|███▍      | 339/1000 [30:21<58:45,  5.33s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder riding down a street.']


 34%|███▍      | 340/1000 [30:26<58:52,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people sitting down.']


 34%|███▍      | 341/1000 [30:32<58:35,  5.33s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop on it']


 34%|███▍      | 342/1000 [30:37<58:38,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people on skis in the snow.']


 34%|███▍      | 343/1000 [30:42<58:30,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 34%|███▍      | 344/1000 [30:48<58:34,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 34%|███▍      | 345/1000 [30:53<58:18,  5.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave.']


 35%|███▍      | 346/1000 [30:58<58:21,  5.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird sitting on a branch.']


 35%|███▍      | 347/1000 [31:04<58:02,  5.33s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tree in a field']


 35%|███▍      | 348/1000 [31:09<58:27,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite flying in the sky']


 35%|███▍      | 349/1000 [31:15<58:11,  5.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird is standing on a branch.']


 35%|███▌      | 350/1000 [31:20<58:16,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large field with a few trees in it.']


 35%|███▌      | 351/1000 [31:25<58:03,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a counter top and a counter top.']


 35%|███▌      | 352/1000 [31:31<58:09,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building.']


 35%|███▌      | 353/1000 [31:36<57:59,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large white and black plane.']


 35%|███▌      | 354/1000 [31:42<57:59,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large display of food.']


 36%|███▌      | 355/1000 [31:47<57:41,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman holding a tennis racket.']


 36%|███▌      | 356/1000 [31:52<57:52,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tree in a field']


 36%|███▌      | 357/1000 [31:58<57:57,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on a table.']


 36%|███▌      | 358/1000 [32:03<58:07,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in a field.']


 36%|███▌      | 359/1000 [32:09<57:40,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street sign and a street sign']


 36%|███▌      | 360/1000 [32:14<57:31,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a woman.']


 36%|███▌      | 361/1000 [32:19<57:12,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on top of a wooden fence.']


 36%|███▌      | 362/1000 [32:25<57:33,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bunch of bananas on a table.']


 36%|███▋      | 363/1000 [32:30<57:11,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a shower and a sink.']


 36%|███▋      | 364/1000 [32:36<57:18,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in front of a table.']


 36%|███▋      | 365/1000 [32:41<57:14,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant']


 37%|███▋      | 366/1000 [32:47<57:35,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in front of a building.']


 37%|███▋      | 367/1000 [32:52<57:36,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane is flying in the air.']


 37%|███▋      | 368/1000 [32:58<57:55,  5.50s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant is standing in the grass.']


 37%|███▋      | 369/1000 [33:03<57:56,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 37%|███▋      | 370/1000 [33:09<57:48,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 37%|███▋      | 371/1000 [33:14<57:30,  5.49s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large rock.']


 37%|███▋      | 372/1000 [33:19<57:13,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large room with a lot of furniture.']


 37%|███▋      | 373/1000 [33:25<57:16,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around.']


 37%|███▋      | 374/1000 [33:30<57:03,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


 38%|███▊      | 375/1000 [33:36<56:59,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a horse is standing on a field.']


 38%|███▊      | 376/1000 [33:42<57:17,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a vase with flowers on it']


 38%|███▊      | 377/1000 [33:47<56:49,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a sidewalk next to a building.']


 38%|███▊      | 378/1000 [33:52<57:00,  5.50s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 38%|███▊      | 379/1000 [33:58<57:18,  5.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving past a building.']


 38%|███▊      | 380/1000 [34:04<57:39,  5.58s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a truck parked on the side of a road.']


 38%|███▊      | 381/1000 [34:09<57:12,  5.55s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is jumping in the air.']


 38%|███▊      | 382/1000 [34:15<57:17,  5.56s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a lot of traffic.']


 38%|███▊      | 383/1000 [34:20<56:31,  5.50s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant is standing in the grass.']


 38%|███▊      | 384/1000 [34:26<56:35,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer is riding a wave.']


 38%|███▊      | 385/1000 [34:31<55:56,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite in the sky']


 39%|███▊      | 386/1000 [34:37<55:59,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 39%|███▊      | 387/1000 [34:42<55:26,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing on a field.']


 39%|███▉      | 388/1000 [34:47<55:23,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on a table.']


 39%|███▉      | 389/1000 [34:53<54:57,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a white room with a lot of furniture']


 39%|███▉      | 390/1000 [34:58<55:18,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 39%|███▉      | 391/1000 [35:03<54:51,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a couple of sheep standing on top of a grass covered field.']


 39%|███▉      | 392/1000 [35:09<54:59,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird is sitting on a branch.']


 39%|███▉      | 393/1000 [35:14<54:45,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a couch, chairs, and a table.']


 39%|███▉      | 394/1000 [35:20<54:53,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a hillside.']


 40%|███▉      | 395/1000 [35:25<54:41,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a car is parked on the side of the road.']


 40%|███▉      | 396/1000 [35:31<55:29,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bed with a cat and a blanket']


 40%|███▉      | 397/1000 [35:36<54:53,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop on it']


 40%|███▉      | 398/1000 [35:42<54:51,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a traffic light and a street sign.']


 40%|███▉      | 399/1000 [35:47<54:55,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large area with a fence.']


 40%|████      | 400/1000 [35:53<54:40,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is parked on the tracks.']


 40%|████      | 401/1000 [35:58<54:17,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a sign and a car']


 40%|████      | 402/1000 [36:04<54:08,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a sink and a mirror.']


 40%|████      | 403/1000 [36:09<54:31,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large brown bear.']


 40%|████      | 404/1000 [36:15<55:02,  5.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer on a surfboard in the ocean.']


 40%|████      | 405/1000 [36:20<54:22,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock on a wall']


 41%|████      | 406/1000 [36:26<54:14,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a bunch of food on it.']


 41%|████      | 407/1000 [36:31<53:58,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a car and a bus.']


 41%|████      | 408/1000 [36:37<53:56,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a dog is standing in front of a wall.']


 41%|████      | 409/1000 [36:42<53:39,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a horse standing on a grass covered field.']


 41%|████      | 410/1000 [36:47<53:54,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a sign on it.']


 41%|████      | 411/1000 [36:53<53:21,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in a field.']


 41%|████      | 412/1000 [36:58<53:17,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large body of water with a bunch of boats in it.']


 41%|████▏     | 413/1000 [37:04<53:20,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a couple of giraffes standing in a field.']


 41%|████▏     | 414/1000 [37:09<53:20,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bunch of flowers in a planter']


 42%|████▏     | 415/1000 [37:15<53:13,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is wearing a suit and tie.']


 42%|████▏     | 416/1000 [37:20<53:03,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man holding a cell phone.']


 42%|████▏     | 417/1000 [37:26<53:02,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man sitting on a bench.']


 42%|████▏     | 418/1000 [37:31<52:50,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 42%|████▏     | 419/1000 [37:36<52:45,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite flying in the sky.']


 42%|████▏     | 420/1000 [37:42<52:31,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a refrigerator and a counter.']


 42%|████▏     | 421/1000 [37:47<52:07,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a couch, chairs, and a table.']


 42%|████▏     | 422/1000 [37:53<52:07,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a white plate and a lamp on it.']


 42%|████▏     | 423/1000 [37:58<51:47,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 42%|████▏     | 424/1000 [38:03<51:43,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding down a ramp.']


 42%|████▎     | 425/1000 [38:09<51:31,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a city street with a lot of people.']


 43%|████▎     | 426/1000 [38:14<51:35,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave.']


 43%|████▎     | 427/1000 [38:19<51:19,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding on a skateboard.']


 43%|████▎     | 428/1000 [38:25<51:22,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 43%|████▎     | 429/1000 [38:30<51:05,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 43%|████▎     | 430/1000 [38:36<51:09,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a television and a desk.']


 43%|████▎     | 431/1000 [38:41<50:54,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


 43%|████▎     | 432/1000 [38:46<51:04,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat is standing on a table.']


 43%|████▎     | 433/1000 [38:52<50:45,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite flying in the sky.']


 43%|████▎     | 434/1000 [38:57<50:46,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people sitting around tables.']


 44%|████▎     | 435/1000 [39:02<50:34,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a counter and a sink.']


 44%|████▎     | 436/1000 [39:08<50:41,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a couple of sheep standing next to each other.']


 44%|████▎     | 437/1000 [39:13<50:23,  5.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small display of flowers.']


 44%|████▍     | 438/1000 [39:19<50:33,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in the grass.']


 44%|████▍     | 439/1000 [39:24<50:41,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 44%|████▍     | 440/1000 [39:30<50:40,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 44%|████▍     | 441/1000 [39:35<50:17,  5.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large plane flying over a city.']


 44%|████▍     | 442/1000 [39:40<50:19,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a white toilet']


 44%|████▍     | 443/1000 [39:46<49:56,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people sitting down.']


 44%|████▍     | 444/1000 [39:51<49:58,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in a field.']


 44%|████▍     | 445/1000 [39:56<49:45,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a bike down a street.']


 45%|████▍     | 446/1000 [40:02<49:48,  5.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 45%|████▍     | 447/1000 [40:07<49:33,  5.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving down the street.']


 45%|████▍     | 448/1000 [40:13<50:03,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop and a monitor']


 45%|████▍     | 449/1000 [40:18<49:43,  5.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock tower with a clock on it.']


 45%|████▌     | 450/1000 [40:24<50:08,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a view']


 45%|████▌     | 451/1000 [40:29<49:37,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bunch of bananas']


 45%|████▌     | 452/1000 [40:34<49:29,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 45%|████▌     | 453/1000 [40:40<49:22,  5.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


 45%|████▌     | 454/1000 [40:46<49:49,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large jetliner sitting on top of a tarmac.']


 46%|████▌     | 455/1000 [40:51<49:50,  5.49s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tree in a field']


 46%|████▌     | 456/1000 [40:57<50:02,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 46%|████▌     | 457/1000 [41:02<49:53,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and sink.']


 46%|████▌     | 458/1000 [41:08<49:38,  5.49s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in a field.']


 46%|████▌     | 459/1000 [41:13<49:11,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a ski slope.']


 46%|████▌     | 460/1000 [41:18<49:14,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave on a surfboard.']


 46%|████▌     | 461/1000 [41:24<48:49,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a child is holding a cell phone.']


 46%|████▌     | 462/1000 [41:29<48:56,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding on a skateboard.']


 46%|████▋     | 463/1000 [41:35<48:36,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of animals standing around.']


 46%|████▋     | 464/1000 [41:40<48:40,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a lot of traffic.']


 46%|████▋     | 465/1000 [41:46<48:40,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is sitting down.']


 47%|████▋     | 466/1000 [41:51<48:57,  5.50s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer on a surfboard in the ocean.']


 47%|████▋     | 467/1000 [41:57<48:30,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a couple of horses standing on top of a grass covered field.']


 47%|████▋     | 468/1000 [42:02<48:50,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tree in a field']


 47%|████▋     | 469/1000 [42:08<48:30,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


 47%|████▋     | 470/1000 [42:13<48:41,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 47%|████▋     | 471/1000 [42:19<48:14,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer is riding a wave.']


 47%|████▋     | 472/1000 [42:24<48:27,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in a field.']


 47%|████▋     | 473/1000 [42:30<48:05,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat standing in a doorway.']


 47%|████▋     | 474/1000 [42:35<48:24,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a man.']


 48%|████▊     | 475/1000 [42:41<47:51,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant']


 48%|████▊     | 476/1000 [42:46<48:05,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a view']


 48%|████▊     | 477/1000 [42:52<47:45,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in front of a wall.']


 48%|████▊     | 478/1000 [42:57<48:02,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a clock on it.']


 48%|████▊     | 479/1000 [43:03<47:45,  5.50s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a skateboard on top of a lake.']


 48%|████▊     | 480/1000 [43:08<47:47,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bench with a fence in front of it.']


 48%|████▊     | 481/1000 [43:14<47:14,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large room with a table and chairs.']


 48%|████▊     | 482/1000 [43:19<47:15,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in the grass.']


 48%|████▊     | 483/1000 [43:25<47:08,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is on a snowy mountain.']


 48%|████▊     | 484/1000 [43:30<47:04,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small herd of sheep grazing.']


 48%|████▊     | 485/1000 [43:35<46:37,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a motorcycle parked in front of a building.']


 49%|████▊     | 486/1000 [43:41<46:41,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tree in a field']


 49%|████▊     | 487/1000 [43:46<46:30,  5.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a plate of food on it']


 49%|████▉     | 488/1000 [43:52<46:28,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a city street with a traffic light.']


 49%|████▉     | 489/1000 [43:57<46:29,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a counter and a sink.']


 49%|████▉     | 490/1000 [44:03<46:57,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 49%|████▉     | 491/1000 [44:08<46:29,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large body of water.']


 49%|████▉     | 492/1000 [44:14<46:15,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a field.']


 49%|████▉     | 493/1000 [44:19<45:53,  5.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snow covered ski slope.']


 49%|████▉     | 494/1000 [44:25<45:56,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a clock on it.']


 50%|████▉     | 495/1000 [44:30<45:50,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a field.']


 50%|████▉     | 496/1000 [44:36<46:06,  5.49s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite is flying over a beach.']


 50%|████▉     | 497/1000 [44:41<45:57,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a lush green field.']


 50%|████▉     | 498/1000 [44:47<46:18,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman standing on a tennis court.']


 50%|████▉     | 499/1000 [44:52<46:04,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant standing in a field.']


 50%|█████     | 500/1000 [44:58<46:21,  5.56s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant.']


 50%|█████     | 501/1000 [45:03<45:54,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


 50%|█████     | 502/1000 [45:09<45:56,  5.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus driving down a street.']


 50%|█████     | 503/1000 [45:14<45:28,  5.49s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant']


 50%|█████     | 504/1000 [45:20<45:31,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in a field.']


 50%|█████     | 505/1000 [45:25<45:06,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in a field.']


 51%|█████     | 506/1000 [45:31<45:00,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a white wall']


 51%|█████     | 507/1000 [45:36<44:48,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man holding a cell phone.']


 51%|█████     | 508/1000 [45:42<45:05,  5.50s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop on it']


 51%|█████     | 509/1000 [45:47<44:41,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave.']


 51%|█████     | 510/1000 [45:52<44:40,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving past a train.']


 51%|█████     | 511/1000 [45:58<44:23,  5.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving down the tracks.']


 51%|█████     | 512/1000 [46:03<44:28,  5.47s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus is parked on the side of the road.']


 51%|█████▏    | 513/1000 [46:09<45:34,  5.62s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a counter and a sink']


 51%|█████▏    | 514/1000 [46:15<45:19,  5.60s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing in front of a ball.']


 52%|█████▏    | 515/1000 [46:20<44:36,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bunch of fruits and vegetables']


 52%|█████▏    | 516/1000 [46:26<44:29,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a grassy field with a few trees in the background.']


 52%|█████▏    | 517/1000 [46:31<44:21,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 52%|█████▏    | 518/1000 [46:37<44:23,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boat on a body of water']


 52%|█████▏    | 519/1000 [46:43<47:02,  5.87s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of items on it']


 52%|█████▏    | 520/1000 [46:49<46:27,  5.81s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a sandwich on it.']


 52%|█████▏    | 521/1000 [46:55<45:21,  5.68s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a field.']


 52%|█████▏    | 522/1000 [47:00<45:03,  5.65s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is standing next to a horse.']


 52%|█████▏    | 523/1000 [47:06<44:32,  5.60s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a lot of windows.']


 52%|█████▏    | 524/1000 [47:11<44:29,  5.61s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is standing on a sidewalk.']


 52%|█████▎    | 525/1000 [47:17<45:10,  5.71s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a lush green field.']


 53%|█████▎    | 526/1000 [47:23<44:44,  5.66s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


 53%|█████▎    | 527/1000 [47:28<44:00,  5.58s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman with a umbrella']


 53%|█████▎    | 528/1000 [47:34<43:40,  5.55s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a ski slope with a ski slope.']


 53%|█████▎    | 529/1000 [47:39<43:37,  5.56s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a young girl sitting on a chair.']


 53%|█████▎    | 530/1000 [47:45<43:28,  5.55s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on top of a snow covered slope.']


 53%|█████▎    | 531/1000 [47:51<44:15,  5.66s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large tree with a few leaves.']


 53%|█████▎    | 532/1000 [47:56<43:56,  5.63s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a lush green field.']


 53%|█████▎    | 533/1000 [48:02<43:14,  5.56s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


 53%|█████▎    | 534/1000 [48:07<43:00,  5.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


 54%|█████▎    | 535/1000 [48:13<42:50,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant standing in a field.']


 54%|█████▎    | 536/1000 [48:18<42:54,  5.55s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 54%|█████▎    | 537/1000 [48:24<43:48,  5.68s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large passenger jet sitting on top of a tarmac.']


 54%|█████▍    | 538/1000 [48:30<43:19,  5.63s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop on it.']


 54%|█████▍    | 539/1000 [48:35<42:34,  5.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


 54%|█████▍    | 540/1000 [48:40<42:24,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock tower with a tower in the background.']


 54%|█████▍    | 541/1000 [48:46<41:55,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird sitting on a branch.']


 54%|█████▍    | 542/1000 [48:51<41:48,  5.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman sitting down.']


 54%|█████▍    | 543/1000 [48:57<41:34,  5.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock on a building.']


 54%|█████▍    | 544/1000 [49:02<41:46,  5.50s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a view of a field.']


 55%|█████▍    | 545/1000 [49:08<41:37,  5.49s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a view']


 55%|█████▍    | 546/1000 [49:13<41:49,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a living room with a couch, chair, and a tv.']


 55%|█████▍    | 547/1000 [49:19<41:42,  5.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a white wall']


 55%|█████▍    | 548/1000 [49:25<41:57,  5.57s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a laptop on it']


 55%|█████▍    | 549/1000 [49:30<41:25,  5.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane is parked on the runway.']


 55%|█████▌    | 550/1000 [49:36<41:34,  5.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is standing next to a table.']


 55%|█████▌    | 551/1000 [49:41<41:10,  5.50s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of wild animals.']


 55%|█████▌    | 552/1000 [49:47<41:30,  5.56s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite in the sky']


 55%|█████▌    | 553/1000 [49:52<41:14,  5.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a dog is standing on a sidewalk.']


 55%|█████▌    | 554/1000 [49:58<41:21,  5.56s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a clock on it.']


 56%|█████▌    | 555/1000 [50:03<41:09,  5.55s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a jetliner flying over a jet.']


 56%|█████▌    | 556/1000 [50:09<41:10,  5.56s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat is sitting on a table.']


 56%|█████▌    | 557/1000 [50:15<41:57,  5.68s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing on a field.']


 56%|█████▌    | 558/1000 [50:21<41:58,  5.70s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on top of a lush green field.']


 56%|█████▌    | 559/1000 [50:26<41:45,  5.68s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is holding a cell phone.']


 56%|█████▌    | 560/1000 [50:32<41:29,  5.66s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of food on it']


 56%|█████▌    | 561/1000 [50:38<41:51,  5.72s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is sitting down.']


 56%|█████▌    | 562/1000 [50:43<41:48,  5.73s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large grassy area.']


 56%|█████▋    | 563/1000 [50:49<41:11,  5.66s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a yellow table with a bunch of items on it']


 56%|█████▋    | 564/1000 [50:55<41:05,  5.66s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is passing by.']


 56%|█████▋    | 565/1000 [51:01<41:51,  5.77s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing on a dirt field.']


 57%|█████▋    | 566/1000 [51:06<41:15,  5.70s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a lush green hillside.']


 57%|█████▋    | 567/1000 [51:12<40:28,  5.61s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large rock.']


 57%|█████▋    | 568/1000 [51:17<40:13,  5.59s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 57%|█████▋    | 569/1000 [51:23<39:45,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a store with a display of food.']


 57%|█████▋    | 570/1000 [51:28<39:45,  5.55s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man sitting down next to a table.']


 57%|█████▋    | 571/1000 [51:34<40:26,  5.66s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a lot of furniture.']


 57%|█████▋    | 572/1000 [51:40<40:25,  5.67s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 57%|█████▋    | 573/1000 [51:45<39:43,  5.58s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tree in a field']


 57%|█████▋    | 574/1000 [51:51<39:36,  5.58s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tennis player is holding a racket.']


 57%|█████▊    | 575/1000 [51:56<39:09,  5.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large brown and white cow.']


 58%|█████▊    | 576/1000 [52:02<39:26,  5.58s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a horse standing on a grass covered field.']


 58%|█████▊    | 577/1000 [52:08<40:15,  5.71s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a bike on a dirt road.']


 58%|█████▊    | 578/1000 [52:13<39:57,  5.68s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large tree.']


 58%|█████▊    | 579/1000 [52:19<39:19,  5.61s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 58%|█████▊    | 580/1000 [52:25<39:31,  5.65s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a white and blue toothbrush']


 58%|█████▊    | 581/1000 [52:30<38:56,  5.58s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing on a lush green field.']


 58%|█████▊    | 582/1000 [52:36<39:05,  5.61s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

["a close up of a person's hand"]


 58%|█████▊    | 583/1000 [52:41<38:57,  5.61s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a field.']


 58%|█████▊    | 584/1000 [52:47<39:12,  5.66s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a sidewalk.']


 58%|█████▊    | 585/1000 [52:53<38:43,  5.60s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant is standing in the grass.']


 59%|█████▊    | 586/1000 [52:58<38:43,  5.61s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant is standing in the grass.']


 59%|█████▊    | 587/1000 [53:04<38:44,  5.63s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cow standing in a field.']


 59%|█████▉    | 588/1000 [53:09<38:42,  5.64s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a bike on a dirt road.']


 59%|█████▉    | 589/1000 [53:15<38:14,  5.58s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tree in a field.']


 59%|█████▉    | 590/1000 [53:21<38:11,  5.59s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large tree with a few leaves.']


 59%|█████▉    | 591/1000 [53:26<38:07,  5.59s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cow standing in a field.']


 59%|█████▉    | 592/1000 [53:32<38:05,  5.60s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people on a street.']


 59%|█████▉    | 593/1000 [53:38<38:32,  5.68s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing on top of a beach.']


 59%|█████▉    | 594/1000 [53:44<39:59,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing on a field.']


 60%|█████▉    | 595/1000 [53:51<41:49,  6.20s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a couple of animals that are standing in the dirt.']


 60%|█████▉    | 596/1000 [53:57<41:38,  6.18s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small elephant is standing in the grass.']


 60%|█████▉    | 597/1000 [54:03<41:08,  6.13s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman sitting in a chair.']


 60%|█████▉    | 598/1000 [54:09<40:37,  6.06s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of food on it']


 60%|█████▉    | 599/1000 [54:15<40:36,  6.08s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 60%|██████    | 600/1000 [54:21<39:49,  5.97s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small table with a lamp on it.']


 60%|██████    | 601/1000 [54:26<38:48,  5.84s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around a field.']


 60%|██████    | 602/1000 [54:33<40:52,  6.16s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a lot of windows.']


 60%|██████    | 603/1000 [54:39<39:49,  6.02s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 60%|██████    | 604/1000 [54:45<39:05,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a car parked on a street.']


 60%|██████    | 605/1000 [54:50<38:45,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving past a train.']


 61%|██████    | 606/1000 [54:57<39:03,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man sitting down.']


 61%|██████    | 607/1000 [55:02<38:42,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


 61%|██████    | 608/1000 [55:08<38:24,  5.88s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bench and a bench.']


 61%|██████    | 609/1000 [55:14<38:51,  5.96s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man sitting on a bench.']


 61%|██████    | 610/1000 [55:20<38:16,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus parked on the side of a road.']


 61%|██████    | 611/1000 [55:26<38:01,  5.87s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane flying over a field.']


 61%|██████    | 612/1000 [55:32<37:36,  5.81s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman sitting on a bench.']


 61%|██████▏   | 613/1000 [55:37<37:34,  5.83s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a building.']


 61%|██████▏   | 614/1000 [55:43<37:20,  5.80s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on a table.']


 62%|██████▏   | 615/1000 [55:50<38:16,  5.97s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of people on it']


 62%|██████▏   | 616/1000 [55:55<37:37,  5.88s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large area of grass.']


 62%|██████▏   | 617/1000 [56:01<37:27,  5.87s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 62%|██████▏   | 618/1000 [56:07<37:02,  5.82s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer is riding a wave.']


 62%|██████▏   | 619/1000 [56:13<36:55,  5.82s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people playing baseball.']


 62%|██████▏   | 620/1000 [56:18<36:45,  5.80s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a car and a bus']


 62%|██████▏   | 621/1000 [56:24<36:40,  5.81s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man on a snow covered slope.']


 62%|██████▏   | 622/1000 [56:30<37:25,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a lot of traffic.']


 62%|██████▏   | 623/1000 [56:36<36:58,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


 62%|██████▏   | 624/1000 [56:42<36:42,  5.86s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small herd of sheep grazing.']


 62%|██████▎   | 625/1000 [56:48<36:25,  5.83s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of animals that are standing in the grass.']


 63%|██████▎   | 626/1000 [56:54<37:05,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave on a surfboard.']


 63%|██████▎   | 627/1000 [57:00<37:31,  6.04s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man sitting on a bench next to a water.']


 63%|██████▎   | 628/1000 [57:07<38:10,  6.16s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock tower with a tower in the background.']


 63%|██████▎   | 629/1000 [57:13<38:27,  6.22s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding on a skateboard.']


 63%|██████▎   | 630/1000 [57:19<38:35,  6.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bed with a pillow and a pillow']


 63%|██████▎   | 631/1000 [57:25<38:10,  6.21s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a lot of windows.']


 63%|██████▎   | 632/1000 [57:31<37:22,  6.09s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird is standing on a bird.']


 63%|██████▎   | 633/1000 [57:37<36:51,  6.03s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small cell phone.']


 63%|██████▎   | 634/1000 [57:43<36:27,  5.98s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a snow covered slope.']


 64%|██████▎   | 635/1000 [57:49<36:08,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a living room with a couch and a tv.']


 64%|██████▎   | 636/1000 [57:55<36:01,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave on a surfboard.']


 64%|██████▎   | 637/1000 [58:01<35:48,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large grassy field.']


 64%|██████▍   | 638/1000 [58:06<35:31,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small tree']


 64%|██████▍   | 639/1000 [58:12<35:37,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 64%|██████▍   | 640/1000 [58:19<36:35,  6.10s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


 64%|██████▍   | 641/1000 [58:25<36:53,  6.16s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small herd of sheep.']


 64%|██████▍   | 642/1000 [58:32<37:12,  6.24s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large body of water.']


 64%|██████▍   | 643/1000 [58:38<37:07,  6.24s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 64%|██████▍   | 644/1000 [58:44<37:15,  6.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


 64%|██████▍   | 645/1000 [58:51<37:04,  6.27s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 65%|██████▍   | 646/1000 [58:56<36:11,  6.13s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on top of a snow covered slope.']


 65%|██████▍   | 647/1000 [59:02<35:56,  6.11s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave on a sunny day.']


 65%|██████▍   | 648/1000 [59:08<35:34,  6.06s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a couch, chairs, and a table.']


 65%|██████▍   | 649/1000 [59:15<35:39,  6.10s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave.']


 65%|██████▌   | 650/1000 [59:21<35:24,  6.07s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman wearing a hat and holding a cell phone.']


 65%|██████▌   | 651/1000 [59:27<35:14,  6.06s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer is riding a wave on a surfboard.']


 65%|██████▌   | 652/1000 [59:32<34:41,  5.98s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large brown bear.']


 65%|██████▌   | 653/1000 [59:38<34:24,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a woman.']


 65%|██████▌   | 654/1000 [59:44<34:10,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a picture of a room.']


 66%|██████▌   | 655/1000 [59:50<34:02,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 66%|██████▌   | 656/1000 [59:56<33:55,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of people sitting on it']


 66%|██████▌   | 657/1000 [1:00:02<33:46,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 66%|██████▌   | 658/1000 [1:00:08<33:43,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a red and white bus']


 66%|██████▌   | 659/1000 [1:00:14<33:33,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock on a building']


 66%|██████▌   | 660/1000 [1:00:20<33:28,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a chair and a table with a lamp on it.']


 66%|██████▌   | 661/1000 [1:00:25<33:26,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a child is eating a piece of food.']


 66%|██████▌   | 662/1000 [1:00:31<33:24,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is parked on the side of the road.']


 66%|██████▋   | 663/1000 [1:00:37<33:16,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a couple of zebras standing in a field.']


 66%|██████▋   | 664/1000 [1:00:43<33:17,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


 66%|██████▋   | 665/1000 [1:00:49<33:09,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large airplane is parked on a runway.']


 67%|██████▋   | 666/1000 [1:00:55<33:05,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large jetliner sitting on top of a tarmac.']


 67%|██████▋   | 667/1000 [1:01:01<33:01,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a lot of items on it']


 67%|██████▋   | 668/1000 [1:01:07<32:53,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tennis player is holding a racket.']


 67%|██████▋   | 669/1000 [1:01:13<32:50,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a computer on it.']


 67%|██████▋   | 670/1000 [1:01:19<32:38,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a motorcycle parked on the side of a road.']


 67%|██████▋   | 671/1000 [1:01:25<32:30,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snow covered mountain.']


 67%|██████▋   | 672/1000 [1:01:31<32:28,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a lush green field.']


 67%|██████▋   | 673/1000 [1:01:37<32:21,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a sink and a mirror.']


 67%|██████▋   | 674/1000 [1:01:43<32:20,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird is standing on a branch.']


 68%|██████▊   | 675/1000 [1:01:49<32:13,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing on top of a building.']


 68%|██████▊   | 676/1000 [1:01:55<32:15,  5.97s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop and a laptop on it.']


 68%|██████▊   | 677/1000 [1:02:01<32:06,  5.96s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in a field.']


 68%|██████▊   | 678/1000 [1:02:07<31:54,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a young boy is sitting in a chair.']


 68%|██████▊   | 679/1000 [1:02:12<31:31,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in front of a tree.']


 68%|██████▊   | 680/1000 [1:02:18<31:27,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a lush green field.']


 68%|██████▊   | 681/1000 [1:02:24<31:17,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 68%|██████▊   | 682/1000 [1:02:30<31:11,  5.88s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boat is parked on the side of the road.']


 68%|██████▊   | 683/1000 [1:02:36<31:08,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing in front of a ball.']


 68%|██████▊   | 684/1000 [1:02:42<31:05,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 68%|██████▊   | 685/1000 [1:02:48<30:57,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large field with a large grassy area.']


 69%|██████▊   | 686/1000 [1:02:54<30:52,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 69%|██████▊   | 687/1000 [1:03:00<30:49,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people sitting on top of a building.']


 69%|██████▉   | 688/1000 [1:03:06<30:45,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in a field.']


 69%|██████▉   | 689/1000 [1:03:11<30:34,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a skateboard down a snow covered slope.']


 69%|██████▉   | 690/1000 [1:03:17<30:36,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop and a monitor']


 69%|██████▉   | 691/1000 [1:03:23<30:26,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a vase and a vase with flowers on it.']


 69%|██████▉   | 692/1000 [1:03:29<30:16,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving past a train.']


 69%|██████▉   | 693/1000 [1:03:35<30:09,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


 69%|██████▉   | 694/1000 [1:03:41<30:01,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large white and brown building.']


 70%|██████▉   | 695/1000 [1:03:47<30:01,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a couple of horses standing in a field.']


 70%|██████▉   | 696/1000 [1:03:53<31:02,  6.13s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 70%|██████▉   | 697/1000 [1:04:00<31:42,  6.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a lot of traffic.']


 70%|██████▉   | 698/1000 [1:04:07<32:31,  6.46s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a computer desk with a laptop on it.']


 70%|██████▉   | 699/1000 [1:04:14<32:43,  6.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a laptop computer sitting on top of a table.']


 70%|███████   | 700/1000 [1:04:20<32:36,  6.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a woman.']


 70%|███████   | 701/1000 [1:04:27<32:35,  6.54s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bunch of different types of flowers.']


 70%|███████   | 702/1000 [1:04:33<32:21,  6.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bed with a white sheet and a wooden floor.']


 70%|███████   | 703/1000 [1:04:40<32:29,  6.57s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 70%|███████   | 704/1000 [1:04:46<31:58,  6.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of trees']


 70%|███████   | 705/1000 [1:04:53<32:17,  6.57s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large grassy field.']


 71%|███████   | 706/1000 [1:05:00<32:17,  6.59s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a dog is standing in the grass.']


 71%|███████   | 707/1000 [1:05:06<32:04,  6.57s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man on a snow covered slope.']


 71%|███████   | 708/1000 [1:05:13<31:52,  6.55s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a young man sitting on a couch.']


 71%|███████   | 709/1000 [1:05:19<31:50,  6.57s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large group of trees.']


 71%|███████   | 710/1000 [1:05:25<30:59,  6.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


 71%|███████   | 711/1000 [1:05:31<30:20,  6.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is standing in the air.']


 71%|███████   | 712/1000 [1:05:37<29:55,  6.23s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a motorcycle parked on the side of a road.']


 71%|███████▏  | 713/1000 [1:05:43<29:28,  6.16s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding on a skateboard.']


 71%|███████▏  | 714/1000 [1:05:49<29:10,  6.12s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a horse grazing in a field.']


 72%|███████▏  | 715/1000 [1:05:55<28:48,  6.06s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 72%|███████▏  | 716/1000 [1:06:01<28:37,  6.05s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman holding a cell phone.']


 72%|███████▏  | 717/1000 [1:06:07<28:22,  6.01s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 72%|███████▏  | 718/1000 [1:06:13<28:16,  6.01s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a car is sitting in front of a window.']


 72%|███████▏  | 719/1000 [1:06:19<28:05,  6.00s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


 72%|███████▏  | 720/1000 [1:06:25<28:04,  6.02s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a traffic light.']


 72%|███████▏  | 721/1000 [1:06:31<28:01,  6.03s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is standing on a snowy hill.']


 72%|███████▏  | 722/1000 [1:06:37<27:54,  6.02s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a couch and a table']


 72%|███████▏  | 723/1000 [1:06:43<27:45,  6.01s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people on a beach.']


 72%|███████▏  | 724/1000 [1:06:49<27:34,  5.99s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bunch of different types of fruit']


 72%|███████▎  | 725/1000 [1:06:55<27:21,  5.97s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving past a train.']


 73%|███████▎  | 726/1000 [1:07:01<27:17,  5.97s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock on a wall']


 73%|███████▎  | 727/1000 [1:07:07<27:07,  5.96s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large room with a table and chairs.']


 73%|███████▎  | 728/1000 [1:07:13<26:54,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 73%|███████▎  | 729/1000 [1:07:19<27:16,  6.04s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in front of a building.']


 73%|███████▎  | 730/1000 [1:07:26<27:53,  6.20s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man sitting on a bench.']


 73%|███████▎  | 731/1000 [1:07:32<27:20,  6.10s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a snowboard down a snow covered slope.']


 73%|███████▎  | 732/1000 [1:07:38<27:00,  6.05s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boy is playing with a kite.']


 73%|███████▎  | 733/1000 [1:07:44<26:42,  6.00s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 73%|███████▎  | 734/1000 [1:07:49<26:29,  5.98s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tennis player is holding a racket.']


 74%|███████▎  | 735/1000 [1:07:55<26:12,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


 74%|███████▎  | 736/1000 [1:08:01<26:09,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving past a train.']


 74%|███████▎  | 737/1000 [1:08:07<26:03,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bunch of vegetables']


 74%|███████▍  | 738/1000 [1:08:13<25:54,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a table and chairs']


 74%|███████▍  | 739/1000 [1:08:19<25:49,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a sink and a mirror.']


 74%|███████▍  | 740/1000 [1:08:25<25:38,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird is standing on a branch.']


 74%|███████▍  | 741/1000 [1:08:31<25:32,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a truck is parked on the side of the road.']


 74%|███████▍  | 742/1000 [1:08:37<25:23,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a mirror.']


 74%|███████▍  | 743/1000 [1:08:43<25:18,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man holding a cell phone.']


 74%|███████▍  | 744/1000 [1:08:49<25:10,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a dog is standing on a sidewalk.']


 74%|███████▍  | 745/1000 [1:08:54<25:00,  5.88s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock tower with a tower in the background.']


 75%|███████▍  | 746/1000 [1:09:00<24:59,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in a field.']


 75%|███████▍  | 747/1000 [1:09:06<24:51,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus is parked on the side of the road.']


 75%|███████▍  | 748/1000 [1:09:12<24:47,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding on a skateboard.']


 75%|███████▍  | 749/1000 [1:09:18<24:38,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 75%|███████▌  | 750/1000 [1:09:24<24:34,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of food on it']


 75%|███████▌  | 751/1000 [1:09:30<24:28,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in a room.']


 75%|███████▌  | 752/1000 [1:09:36<24:28,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 75%|███████▌  | 753/1000 [1:09:42<24:25,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large jetliner sitting on top of a grass covered field.']


 75%|███████▌  | 754/1000 [1:09:48<24:20,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing on a field.']


 76%|███████▌  | 755/1000 [1:09:54<24:14,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving down the tracks.']


 76%|███████▌  | 756/1000 [1:09:59<24:01,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is holding a piece of food.']


 76%|███████▌  | 757/1000 [1:10:05<24:02,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 76%|███████▌  | 758/1000 [1:10:11<24:01,  5.96s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing on a field.']


 76%|███████▌  | 759/1000 [1:10:17<23:48,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a street sign and a street sign.']


 76%|███████▌  | 760/1000 [1:10:23<23:47,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane is parked on the runway.']


 76%|███████▌  | 761/1000 [1:10:29<23:37,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing on a field.']


 76%|███████▌  | 762/1000 [1:10:35<23:28,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people sitting around a table.']


 76%|███████▋  | 763/1000 [1:10:41<23:25,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing on a field.']


 76%|███████▋  | 764/1000 [1:10:47<23:18,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a bowl of food on it.']


 76%|███████▋  | 765/1000 [1:10:53<23:13,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a young man is holding a cell phone.']


 77%|███████▋  | 766/1000 [1:10:59<23:13,  5.96s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a clock on it.']


 77%|███████▋  | 767/1000 [1:11:05<23:07,  5.96s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a truck parked next to a truck.']


 77%|███████▋  | 768/1000 [1:11:11<23:06,  5.98s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a sink and a mirror.']


 77%|███████▋  | 769/1000 [1:11:17<23:05,  6.00s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in the grass.']


 77%|███████▋  | 770/1000 [1:11:23<23:05,  6.02s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large tree.']


 77%|███████▋  | 771/1000 [1:11:29<23:04,  6.05s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing on a hill.']


 77%|███████▋  | 772/1000 [1:11:35<22:55,  6.03s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock on a building.']


 77%|███████▋  | 773/1000 [1:11:41<22:44,  6.01s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of zebra grazing.']


 77%|███████▋  | 774/1000 [1:11:47<22:42,  6.03s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tennis player is holding a racket.']


 78%|███████▊  | 775/1000 [1:11:53<22:50,  6.09s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a fence.']


 78%|███████▊  | 776/1000 [1:12:00<23:02,  6.17s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing on a field.']


 78%|███████▊  | 777/1000 [1:12:06<22:45,  6.12s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small white and black animal']


 78%|███████▊  | 778/1000 [1:12:12<22:18,  6.03s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 78%|███████▊  | 779/1000 [1:12:18<22:19,  6.06s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large pile of luggage.']


 78%|███████▊  | 780/1000 [1:12:24<22:10,  6.05s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a spoon.']


 78%|███████▊  | 781/1000 [1:12:30<22:19,  6.12s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a young man is holding a small piece of paper.']


 78%|███████▊  | 782/1000 [1:12:36<22:03,  6.07s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a plate and a cup']


 78%|███████▊  | 783/1000 [1:12:42<21:46,  6.02s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop on it']


 78%|███████▊  | 784/1000 [1:12:48<22:00,  6.11s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of items on it']


 78%|███████▊  | 785/1000 [1:12:54<21:39,  6.04s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is standing on a sidewalk.']


 79%|███████▊  | 786/1000 [1:13:00<21:27,  6.02s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding down a hill.']


 79%|███████▊  | 787/1000 [1:13:06<21:16,  5.99s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer on a surfboard in the ocean.']


 79%|███████▉  | 788/1000 [1:13:12<21:05,  5.97s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a white table with a mirror']


 79%|███████▉  | 789/1000 [1:13:18<20:54,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman is holding a plate.']


 79%|███████▉  | 790/1000 [1:13:24<20:46,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small room with a lot of furniture.']


 79%|███████▉  | 791/1000 [1:13:30<20:39,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock tower with a tower in the background.']


 79%|███████▉  | 792/1000 [1:13:36<20:30,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 79%|███████▉  | 793/1000 [1:13:41<20:28,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a building with a clock on it.']


 79%|███████▉  | 794/1000 [1:13:47<20:21,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer on a surfboard in the water.']


 80%|███████▉  | 795/1000 [1:13:53<20:14,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing on a field.']


 80%|███████▉  | 796/1000 [1:13:59<20:08,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a view']


 80%|███████▉  | 797/1000 [1:14:05<19:59,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 80%|███████▉  | 798/1000 [1:14:11<19:53,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 80%|███████▉  | 799/1000 [1:14:17<19:48,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of food on it']


 80%|████████  | 800/1000 [1:14:23<19:46,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of different types of food on it.']


 80%|████████  | 801/1000 [1:14:29<19:49,  5.98s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a couple of horses standing in a field.']


 80%|████████  | 802/1000 [1:14:35<19:40,  5.96s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing on a grass covered field.']


 80%|████████  | 803/1000 [1:14:41<19:59,  6.09s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cow standing on a field.']


 80%|████████  | 804/1000 [1:14:48<20:13,  6.19s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 80%|████████  | 805/1000 [1:14:54<19:50,  6.11s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a sign and a car']


 81%|████████  | 806/1000 [1:15:00<19:36,  6.06s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a building.']


 81%|████████  | 807/1000 [1:15:06<19:19,  6.01s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a computer and a keyboard']


 81%|████████  | 808/1000 [1:15:12<19:18,  6.03s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus driving down a street.']


 81%|████████  | 809/1000 [1:15:17<19:02,  5.98s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane flying in the sky.']


 81%|████████  | 810/1000 [1:15:23<18:52,  5.96s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small group of animals.']


 81%|████████  | 811/1000 [1:15:29<18:52,  5.99s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of giraffes standing around.']


 81%|████████  | 812/1000 [1:15:35<18:48,  6.00s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is parked on the tracks.']


 81%|████████▏ | 813/1000 [1:15:42<18:50,  6.04s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


 81%|████████▏ | 814/1000 [1:15:48<18:40,  6.02s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small plane is parked on the ground.']


 82%|████████▏ | 815/1000 [1:15:54<18:30,  6.00s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large tree']


 82%|████████▏ | 816/1000 [1:15:59<18:15,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


 82%|████████▏ | 817/1000 [1:16:05<18:07,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tennis court with a player on it.']


 82%|████████▏ | 818/1000 [1:16:11<17:53,  5.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave on a surfboard.']


 82%|████████▏ | 819/1000 [1:16:17<17:46,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop on it.']


 82%|████████▏ | 820/1000 [1:16:23<17:39,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


 82%|████████▏ | 821/1000 [1:16:29<17:39,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of animals standing around.']


 82%|████████▏ | 822/1000 [1:16:35<17:32,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a snow covered slope.']


 82%|████████▏ | 823/1000 [1:16:41<17:20,  5.88s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a grassy field with a grazing animal.']


 82%|████████▏ | 824/1000 [1:16:46<17:10,  5.85s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large number of different types of flowers.']


 82%|████████▎ | 825/1000 [1:16:52<17:01,  5.84s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a bed and a table']


 83%|████████▎ | 826/1000 [1:16:58<16:54,  5.83s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around a building.']


 83%|████████▎ | 827/1000 [1:17:04<16:50,  5.84s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bench with a view.']


 83%|████████▎ | 828/1000 [1:17:10<17:00,  5.93s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is parked on the tracks.']


 83%|████████▎ | 829/1000 [1:17:16<17:05,  6.00s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 83%|████████▎ | 830/1000 [1:17:22<17:15,  6.09s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 83%|████████▎ | 831/1000 [1:17:29<17:14,  6.12s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snow covered hill.']


 83%|████████▎ | 832/1000 [1:17:35<17:07,  6.12s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing on a dirt road.']


 83%|████████▎ | 833/1000 [1:17:41<17:12,  6.18s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock tower with a clock on it.']


 83%|████████▎ | 834/1000 [1:17:47<16:58,  6.14s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a sidewalk next to a building.']


 84%|████████▎ | 835/1000 [1:17:53<16:45,  6.09s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant.']


 84%|████████▎ | 836/1000 [1:17:59<16:35,  6.07s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a bed and a table']


 84%|████████▎ | 837/1000 [1:18:05<16:30,  6.08s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a car is parked in front of a car.']


 84%|████████▍ | 838/1000 [1:18:12<16:43,  6.20s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman sitting on a bench next to a car.']


 84%|████████▍ | 839/1000 [1:18:18<16:23,  6.11s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a dog is standing in front of a fence.']


 84%|████████▍ | 840/1000 [1:18:24<16:14,  6.09s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of sheep grazing on a lush green field.']


 84%|████████▍ | 841/1000 [1:18:30<16:02,  6.06s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a view of a tree.']


 84%|████████▍ | 842/1000 [1:18:36<16:00,  6.08s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a view']


 84%|████████▍ | 843/1000 [1:18:42<15:49,  6.05s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer is riding a wave.']


 84%|████████▍ | 844/1000 [1:18:48<15:44,  6.05s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skateboarder is riding on a skateboard.']


 84%|████████▍ | 845/1000 [1:18:54<15:36,  6.04s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a young man is playing with a frisbee.']


 85%|████████▍ | 846/1000 [1:19:00<15:28,  6.03s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of items on it']


 85%|████████▍ | 847/1000 [1:19:06<15:39,  6.14s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man wearing a suit and tie.']


 85%|████████▍ | 848/1000 [1:19:12<15:23,  6.07s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on a chair.']


 85%|████████▍ | 849/1000 [1:19:18<15:11,  6.04s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is holding a cell phone.']


 85%|████████▌ | 850/1000 [1:19:24<15:00,  6.00s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of sheep grazing on a field.']


 85%|████████▌ | 851/1000 [1:19:30<14:55,  6.01s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food.']


 85%|████████▌ | 852/1000 [1:19:36<14:51,  6.02s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock tower with a tower in the background.']


 85%|████████▌ | 853/1000 [1:19:42<14:49,  6.05s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 85%|████████▌ | 854/1000 [1:19:49<15:01,  6.18s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a clock on it.']


 86%|████████▌ | 855/1000 [1:19:55<14:49,  6.14s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a view of a room.']


 86%|████████▌ | 856/1000 [1:20:01<14:48,  6.17s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large building with a lot of windows.']


 86%|████████▌ | 857/1000 [1:20:07<14:38,  6.14s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane is sitting on the tarmac.']


 86%|████████▌ | 858/1000 [1:20:13<14:41,  6.21s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food.']


 86%|████████▌ | 859/1000 [1:20:19<14:28,  6.16s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on a table.']


 86%|████████▌ | 860/1000 [1:20:26<14:19,  6.14s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing on top of a building.']


 86%|████████▌ | 861/1000 [1:20:32<14:25,  6.22s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around a table.']


 86%|████████▌ | 862/1000 [1:20:38<14:22,  6.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave on a surfboard.']


 86%|████████▋ | 863/1000 [1:20:44<14:10,  6.21s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop on it.']


 86%|████████▋ | 864/1000 [1:20:51<14:08,  6.24s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a fire hydrant.']


 86%|████████▋ | 865/1000 [1:20:57<14:09,  6.29s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave.']


 87%|████████▋ | 866/1000 [1:21:03<13:54,  6.23s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork on it.']


 87%|████████▋ | 867/1000 [1:21:09<13:43,  6.20s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock tower with a tower in the background.']


 87%|████████▋ | 868/1000 [1:21:16<13:39,  6.21s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a keyboard and a laptop on it.']


 87%|████████▋ | 869/1000 [1:21:22<13:33,  6.21s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


 87%|████████▋ | 870/1000 [1:21:28<13:24,  6.19s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 87%|████████▋ | 871/1000 [1:21:34<13:19,  6.20s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a clock on a building.']


 87%|████████▋ | 872/1000 [1:21:40<13:14,  6.21s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tree in a field']


 87%|████████▋ | 873/1000 [1:21:47<13:12,  6.24s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus parked next to a bus and a bus.']


 87%|████████▋ | 874/1000 [1:21:54<13:43,  6.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in front of a window.']


 88%|████████▊ | 875/1000 [1:22:01<13:54,  6.68s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on a bench.']


 88%|████████▊ | 876/1000 [1:22:08<13:59,  6.77s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man sitting on a bench.']


 88%|████████▊ | 877/1000 [1:22:15<14:08,  6.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tennis player is holding a racket.']


 88%|████████▊ | 878/1000 [1:22:22<13:58,  6.87s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a lot of people walking around.']


 88%|████████▊ | 879/1000 [1:22:29<13:44,  6.81s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer on a surfboard']


 88%|████████▊ | 880/1000 [1:22:35<13:12,  6.61s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of zebras standing together.']


 88%|████████▊ | 881/1000 [1:22:41<12:51,  6.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tree in a field']


 88%|████████▊ | 882/1000 [1:22:47<12:32,  6.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man riding a bike on a dirt road.']


 88%|████████▊ | 883/1000 [1:22:53<12:18,  6.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a white wall']


 88%|████████▊ | 884/1000 [1:22:59<12:05,  6.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is on the field.']


 88%|████████▊ | 885/1000 [1:23:05<11:56,  6.23s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a field.']


 89%|████████▊ | 886/1000 [1:23:12<11:47,  6.21s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a couch, chair, and a table.']


 89%|████████▊ | 887/1000 [1:23:18<11:41,  6.21s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in front of a fence.']


 89%|████████▉ | 888/1000 [1:23:24<11:32,  6.19s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a counter and a sink.']


 89%|████████▉ | 889/1000 [1:23:30<11:25,  6.17s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork.']


 89%|████████▉ | 890/1000 [1:23:36<11:27,  6.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a dog is standing in front of a wall.']


 89%|████████▉ | 891/1000 [1:23:43<11:17,  6.22s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a building.']


 89%|████████▉ | 892/1000 [1:23:49<11:12,  6.22s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is parked on the tracks.']


 89%|████████▉ | 893/1000 [1:23:55<11:08,  6.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tree in a field']


 89%|████████▉ | 894/1000 [1:24:01<11:01,  6.24s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a laptop on it.']


 90%|████████▉ | 895/1000 [1:24:08<10:52,  6.22s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a small road with a lot of trees.']


 90%|████████▉ | 896/1000 [1:24:14<10:56,  6.31s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a vase with flowers and a vase with flowers.']


 90%|████████▉ | 897/1000 [1:24:21<10:57,  6.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a fire hydrant.']


 90%|████████▉ | 898/1000 [1:24:27<10:55,  6.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a cat sitting on a bench.']


 90%|████████▉ | 899/1000 [1:24:34<10:57,  6.51s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in the middle of a field.']


 90%|█████████ | 900/1000 [1:24:40<10:40,  6.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane is flying over the runway.']


 90%|█████████ | 901/1000 [1:24:46<10:23,  6.30s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large passenger jet.']


 90%|█████████ | 902/1000 [1:24:52<10:11,  6.24s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a counter and a sink']


 90%|█████████ | 903/1000 [1:24:58<09:59,  6.18s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave.']


 90%|█████████ | 904/1000 [1:25:04<09:51,  6.16s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a table and chairs.']


 90%|█████████ | 905/1000 [1:25:10<09:42,  6.14s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer riding a wave.']


 91%|█████████ | 906/1000 [1:25:17<09:36,  6.13s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food']


 91%|█████████ | 907/1000 [1:25:23<09:28,  6.11s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird is flying in the air.']


 91%|█████████ | 908/1000 [1:25:29<09:21,  6.11s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus is parked in front of a building.']


 91%|█████████ | 909/1000 [1:25:35<09:15,  6.10s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in a field.']


 91%|█████████ | 910/1000 [1:25:41<09:11,  6.13s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in the grass.']


 91%|█████████ | 911/1000 [1:25:47<09:04,  6.12s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of zebras grazing.']


 91%|█████████ | 912/1000 [1:25:53<08:56,  6.10s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large airplane flying in the sky.']


 91%|█████████▏| 913/1000 [1:25:59<08:52,  6.12s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a grassy field with a grazing animal.']


 91%|█████████▏| 914/1000 [1:26:05<08:43,  6.09s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a display of food.']


 92%|█████████▏| 915/1000 [1:26:11<08:37,  6.08s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people sitting around a table.']


 92%|█████████▏| 916/1000 [1:26:18<08:33,  6.11s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tennis player on a court']


 92%|█████████▏| 917/1000 [1:26:24<08:28,  6.13s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large field with a herd of cattle.']


 92%|█████████▏| 918/1000 [1:26:30<08:23,  6.14s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a sink and a counter.']


 92%|█████████▏| 919/1000 [1:26:36<08:18,  6.15s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in front of a wall.']


 92%|█████████▏| 920/1000 [1:26:42<08:10,  6.14s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a computer and a laptop on it.']


 92%|█████████▏| 921/1000 [1:26:48<08:05,  6.15s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing in front of a tree.']


 92%|█████████▏| 922/1000 [1:26:54<07:59,  6.14s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boat is in the water near a large body of water.']


 92%|█████████▏| 923/1000 [1:27:01<07:53,  6.15s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large jetliner sitting on top of a cement.']


 92%|█████████▏| 924/1000 [1:27:07<07:48,  6.16s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people sitting on top of a building.']


 92%|█████████▎| 925/1000 [1:27:14<07:59,  6.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 93%|█████████▎| 926/1000 [1:27:21<08:08,  6.61s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus driving down a street.']


 93%|█████████▎| 927/1000 [1:27:28<08:04,  6.64s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of animals grazing on a field.']


 93%|█████████▎| 928/1000 [1:27:34<07:57,  6.63s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a lot of furniture.']


 93%|█████████▎| 929/1000 [1:27:41<07:51,  6.64s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a fork on it.']


 93%|█████████▎| 930/1000 [1:27:47<07:36,  6.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a building.']


 93%|█████████▎| 931/1000 [1:27:53<07:21,  6.40s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a tennis player is on the court.']


 93%|█████████▎| 932/1000 [1:28:00<07:13,  6.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus parked in front of a building.']


 93%|█████████▎| 933/1000 [1:28:06<07:06,  6.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a beach next to a surfboard.']


 93%|█████████▎| 934/1000 [1:28:12<07:03,  6.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 94%|█████████▎| 935/1000 [1:28:19<06:55,  6.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


 94%|█████████▎| 936/1000 [1:28:25<06:48,  6.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boat on a body of water.']


 94%|█████████▎| 937/1000 [1:28:31<06:39,  6.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large tree.']


 94%|█████████▍| 938/1000 [1:28:38<06:47,  6.57s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a bunch of items on it']


 94%|█████████▍| 939/1000 [1:28:46<06:52,  6.75s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a display of items.']


 94%|█████████▍| 940/1000 [1:28:53<06:56,  6.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bed with a blanket']


 94%|█████████▍| 941/1000 [1:29:00<06:50,  6.96s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around each other.']


 94%|█████████▍| 942/1000 [1:29:07<06:40,  6.90s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing in a field.']


 94%|█████████▍| 943/1000 [1:29:13<06:22,  6.72s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large body of water']


 94%|█████████▍| 944/1000 [1:29:19<06:04,  6.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bowl of fruit with a bowl of fruit on it.']


 94%|█████████▍| 945/1000 [1:29:25<05:53,  6.43s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus driving down a street.']


 95%|█████████▍| 946/1000 [1:29:31<05:38,  6.28s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a skier is skiing down a hill.']


 95%|█████████▍| 947/1000 [1:29:37<05:27,  6.17s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around a fence.']


 95%|█████████▍| 948/1000 [1:29:43<05:15,  6.07s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer is riding a wave.']


 95%|█████████▍| 949/1000 [1:29:49<05:07,  6.03s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of birds sitting on top of a grass covered field.']


 95%|█████████▌| 950/1000 [1:29:55<04:58,  5.98s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a skateboard.']


 95%|█████████▌| 951/1000 [1:30:01<04:51,  5.94s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a table with a plate of food on it']


 95%|█████████▌| 952/1000 [1:30:07<04:43,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is driving past a train.']


 95%|█████████▌| 953/1000 [1:30:12<04:37,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a lot of furniture.']


 95%|█████████▌| 954/1000 [1:30:18<04:32,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bench in front of a building.']


 96%|█████████▌| 955/1000 [1:30:24<04:25,  5.91s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a surfboard.']


 96%|█████████▌| 956/1000 [1:30:30<04:19,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on top of a lush green field.']


 96%|█████████▌| 957/1000 [1:30:36<04:13,  5.89s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a herd of cattle grazing.']


 96%|█████████▌| 958/1000 [1:30:42<04:09,  5.95s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of trees']


 96%|█████████▌| 959/1000 [1:30:48<04:02,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a large elephant in the water.']


 96%|█████████▌| 960/1000 [1:30:54<03:56,  5.92s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a tennis court.']


 96%|█████████▌| 961/1000 [1:31:00<03:53,  6.00s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a desk with a bunch of items on it']


 96%|█████████▌| 962/1000 [1:31:06<03:48,  6.01s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of people standing around a building.']


 96%|█████████▋| 963/1000 [1:31:12<03:44,  6.06s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing on a sidewalk.']


 96%|█████████▋| 964/1000 [1:31:18<03:38,  6.07s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is skiing on a snowy day.']


 96%|█████████▋| 965/1000 [1:31:24<03:32,  6.07s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a child eating.']


 97%|█████████▋| 966/1000 [1:31:31<03:28,  6.12s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a surfer on a surfboard']


 97%|█████████▋| 967/1000 [1:31:37<03:21,  6.12s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a man.']


 97%|█████████▋| 968/1000 [1:31:43<03:17,  6.18s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a building.']


 97%|█████████▋| 969/1000 [1:31:50<03:16,  6.34s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing next to a tree.']


 97%|█████████▋| 970/1000 [1:31:56<03:11,  6.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 97%|█████████▋| 971/1000 [1:32:03<03:06,  6.45s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man on a motorcycle in a field.']


 97%|█████████▋| 972/1000 [1:32:09<02:57,  6.33s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a baseball player is standing in front of a ball.']


 97%|█████████▋| 973/1000 [1:32:15<02:50,  6.32s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a woman holding a cell phone.']


 97%|█████████▋| 974/1000 [1:32:21<02:42,  6.25s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


 98%|█████████▊| 975/1000 [1:32:27<02:35,  6.21s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a zebra standing on a dirt field.']


 98%|█████████▊| 976/1000 [1:32:34<02:28,  6.19s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus driving down a street.']


 98%|█████████▊| 977/1000 [1:32:40<02:21,  6.16s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a room with a lot of furniture.']


 98%|█████████▊| 978/1000 [1:32:46<02:15,  6.17s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kite flying over a beach.']


 98%|█████████▊| 979/1000 [1:32:52<02:09,  6.16s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is on a snowy hill.']


 98%|█████████▊| 980/1000 [1:32:58<02:03,  6.16s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a fenced in area with a fence.']


 98%|█████████▊| 981/1000 [1:33:04<01:56,  6.15s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plate of food with a person sitting on it.']


 98%|█████████▊| 982/1000 [1:33:11<01:55,  6.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a plane is flying in the air.']


 98%|█████████▊| 983/1000 [1:33:18<01:48,  6.38s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a ski slope with a skier in the foreground.']


 98%|█████████▊| 984/1000 [1:33:24<01:42,  6.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a kitchen with a stove and a refrigerator.']


 98%|█████████▊| 985/1000 [1:33:30<01:35,  6.36s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a motorcycle parked on the side of a road.']


 99%|█████████▊| 986/1000 [1:33:37<01:29,  6.37s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man standing next to a snow covered slope.']


 99%|█████████▊| 987/1000 [1:33:43<01:23,  6.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bus driving down a street.']


 99%|█████████▉| 988/1000 [1:33:50<01:17,  6.48s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a group of wild animals grazing.']


 99%|█████████▉| 989/1000 [1:33:56<01:11,  6.49s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a man is standing on a skateboard.']


 99%|█████████▉| 990/1000 [1:34:03<01:04,  6.41s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a boat is parked on the side of the road.']


 99%|█████████▉| 991/1000 [1:34:09<00:57,  6.39s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a street with a lot of traffic.']


 99%|█████████▉| 992/1000 [1:34:15<00:51,  6.44s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bird is sitting on a piece of land.']


 99%|█████████▉| 993/1000 [1:34:22<00:45,  6.53s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a young man is holding a cell phone.']


 99%|█████████▉| 994/1000 [1:34:29<00:39,  6.67s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a couple of elephants standing in a field.']


100%|█████████▉| 995/1000 [1:34:35<00:32,  6.52s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a snowboarder is skiing down a hill.']


100%|█████████▉| 996/1000 [1:34:42<00:26,  6.55s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a train is parked on the side of the road.']


100%|█████████▉| 997/1000 [1:34:48<00:19,  6.42s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a giraffe standing in the grass.']


100%|█████████▉| 998/1000 [1:34:54<00:12,  6.35s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a bathroom with a toilet and a sink.']


100%|█████████▉| 999/1000 [1:35:00<00:06,  6.26s/it]

sampling loop time step:   0%|          | 0/19 [00:00<?, ?it/s]

['a living room with a couch, chair, and table.']


100%|██████████| 1000/1000 [1:35:07<00:00,  5.71s/it]


torch.Size([1000, 3, 256, 256])
saved final_subj01_pretrained_40sess_24bs outputs!
