In [1]:
import os
import sys
import json
import argparse
import numpy as np
import math
from einops import rearrange
import time
import random
import string
import h5py
from tqdm import tqdm
import webdataset as wds

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torchvision import transforms
from accelerate import Accelerator, DeepSpeedPlugin

# SDXL unCLIP requires code from https://github.com/Stability-AI/generative-models/tree/main
sys.path.append('generative_models/')
import sgm
from generative_models.sgm.modules.encoders.modules import FrozenOpenCLIPImageEmbedder, FrozenOpenCLIPEmbedder2
from generative_models.sgm.models.diffusion import DiffusionEngine
from generative_models.sgm.util import append_dims
from omegaconf import OmegaConf

# tf32 data type is faster than standard float32
torch.backends.cuda.matmul.allow_tf32 = True

# custom functions #
import utils
from models import *

### Multi-GPU config ###
local_rank = os.getenv('RANK')
if local_rank is None: 
    local_rank = 0
else:
    local_rank = int(local_rank)
print("LOCAL RANK ", local_rank)  

accelerator = Accelerator(split_batches=False, mixed_precision="fp16")
device = accelerator.device
print("device:",device)

  from .autonotebook import tqdm as notebook_tqdm


LOCAL RANK  0
device: cuda


In [13]:
# if running this interactively, can specify jupyter_args here for argparser to use
if utils.is_interactive():
    model_name = "control_MEV2"
    print("model_name:", model_name)

    # global_batch_size and batch_size should already be defined in the above cells
    # other variables can be specified in the following string:
    jupyter_args = f"--data_path=/weka/proj-fmri/shared/mindeyev2_dataset \
                    --model_name={model_name} --subj=1 \
                    --hidden_dim=1024 --n_blocks=4 --new_test"
    print(jupyter_args)
    jupyter_args = jupyter_args.split()
    
    from IPython.display import clear_output # function to clear print outputs in cell
    %load_ext autoreload 
    # this allows you to change functions in models.py or utils.py and have this notebook automatically update with your revisions
    %autoreload 2 

model_name: control_MEV2
--data_path=/weka/proj-fmri/shared/mindeyev2_dataset                     --model_name=control_MEV2 --subj=1                     --hidden_dim=1024 --n_blocks=4 --new_test
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [18]:
parser = argparse.ArgumentParser(description="Model Training Configuration")
parser.add_argument(
    "--model_name", type=str, default="testing",
    help="will load ckpt for model found in ../train_logs/model_name",
)
parser.add_argument(
    "--data_path", type=str, default="/weka/proj-fmri/shared/mindeyev2_dataset",
    help="Path to where NSD data is stored / where to download it to",
)
parser.add_argument(
    "--subj",type=int, default=1, choices=[1,2,3,4,5,6,7,8],
    help="Validate on which subject?",
)
parser.add_argument(
    "--blurry_recon",action=argparse.BooleanOptionalAction,default=True,
)
parser.add_argument(
    "--n_blocks",type=int,default=4,
)
parser.add_argument(
    "--hidden_dim",type=int,default=2048,
)
parser.add_argument(
    "--new_test",action=argparse.BooleanOptionalAction,default=True,
)
parser.add_argument(
    "--seq_len",type=int,default=1,
)
parser.add_argument(
    "--seed",type=int,default=42,
)

parser.add_argument(
    "--ckpt_path",type=str,default="MindEyeV2/src/ablation_ckpt/control_MEV2",
)
if utils.is_interactive():
    args = parser.parse_args(jupyter_args)
else:
    args = parser.parse_args()

# create global variables without the args prefix
for attribute_name in vars(args).keys():
    globals()[attribute_name] = getattr(args, attribute_name)
    
# seed all random functions
utils.seed_everything(seed)

# make output directory
os.makedirs("evals",exist_ok=True)
os.makedirs(f"evals/{model_name}",exist_ok=True)

In [19]:
voxels = {}
# Load hdf5 data for betas
f = h5py.File(f'{data_path}/betas_all_subj0{subj}_fp32_renorm.hdf5', 'r')
betas = f['betas'][:]
betas = torch.Tensor(betas).to("cpu")
num_voxels = betas[0].shape[-1]
voxels[f'subj0{subj}'] = betas
print(f"num_voxels for subj0{subj}: {num_voxels}")

if not new_test: # using old test set from before full dataset released (used in original MindEye paper)
    if subj==3:
        num_test=2113
    elif subj==4:
        num_test=1985
    elif subj==6:
        num_test=2113
    elif subj==8:
        num_test=1985
    else:
        num_test=2770
    test_url = f"{data_path}/wds/subj0{subj}/test/" + "0.tar"
else: # using larger test set from after full dataset released
    if subj==3:
        num_test=2371
    elif subj==4:
        num_test=2188
    elif subj==6:
        num_test=2371
    elif subj==8:
        num_test=2188
    else:
        num_test=3000
    test_url = f"{data_path}/wds/subj0{subj}/new_test/" + "0.tar"
    
print(test_url)
def my_split_by_node(urls): return urls
test_data = wds.WebDataset(test_url,resampled=False,nodesplitter=my_split_by_node)\
                    .decode("torch")\
                    .rename(behav="behav.npy", past_behav="past_behav.npy", future_behav="future_behav.npy", olds_behav="olds_behav.npy")\
                    .to_tuple(*["behav", "past_behav", "future_behav", "olds_behav"])
test_dl = torch.utils.data.DataLoader(test_data, batch_size=num_test, shuffle=False, drop_last=True, pin_memory=True)
print(f"Loaded test dl for subj{subj}!\n")

num_voxels for subj01: 15724
/weka/proj-fmri/shared/mindeyev2_dataset/wds/subj01/new_test/0.tar
Loaded test dl for subj1!



In [5]:
# Prep images but don't load them all to memory
f = h5py.File(f'{data_path}/coco_images_224_float16.hdf5', 'r')
images = f['images']

# Prep test voxels and indices of test images
test_images_idx = []
test_voxels_idx = []
for test_i, (behav, past_behav, future_behav, old_behav) in enumerate(test_dl):
    test_voxels = voxels[f'subj0{subj}'][behav[:,0,5].cpu().long()]
    test_voxels_idx = np.append(test_images_idx, behav[:,0,5].cpu().numpy())
    test_images_idx = np.append(test_images_idx, behav[:,0,0].cpu().numpy())
test_images_idx = test_images_idx.astype(int)
test_voxels_idx = test_voxels_idx.astype(int)

assert (test_i+1) * num_test == len(test_voxels) == len(test_images_idx)
print(test_i, len(test_voxels), len(test_images_idx), len(np.unique(test_images_idx)))

0 3000 3000 1000


In [20]:
clip_img_embedder = FrozenOpenCLIPImageEmbedder(
    arch="ViT-bigG-14",
    version="laion2b_s39b_b160k",
    output_tokens=True,
    only_tokens=True,
)
clip_img_embedder.to(device)
clip_seq_dim = 256
clip_emb_dim = 1664

if blurry_recon:
    from diffusers import AutoencoderKL
    autoenc = AutoencoderKL(
        down_block_types=['DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D'],
        up_block_types=['UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D'],
        block_out_channels=[128, 256, 512, 512],
        layers_per_block=2,
        sample_size=256,
    )
    ckpt = torch.load('/weka/proj-fmri/shared/cache/sd_var_enc/sd_image_var_autoenc.pth')
    autoenc.load_state_dict(ckpt)
    autoenc.eval()
    autoenc.requires_grad_(False)
    autoenc.to(device)
    utils.count_params(autoenc)
    
class MindEyeModule(nn.Module):
    def __init__(self):
        super(MindEyeModule, self).__init__()
    def forward(self, x):
        return x
        
model = MindEyeModule()

class RidgeRegression(torch.nn.Module):
    # make sure to add weight_decay when initializing optimizer
    def __init__(self, input_sizes, out_features, seq_len): 
        super(RidgeRegression, self).__init__()
        self.out_features = out_features
        self.linears = torch.nn.ModuleList([
                torch.nn.Linear(input_size, out_features) for input_size in input_sizes
            ])
    def forward(self, x, subj_idx):
        out = torch.cat([self.linears[subj_idx](x[:,seq]).unsqueeze(1) for seq in range(seq_len)], dim=1)
        return out
        
model.ridge = RidgeRegression([num_voxels], out_features=hidden_dim, seq_len=seq_len)

from diffusers.models.vae import Decoder
class BrainNetwork(nn.Module):
    def __init__(self, h=4096, in_dim=15724, out_dim=768, seq_len=2, n_blocks=n_blocks, drop=.15, 
                 clip_size=768):
        super().__init__()
        self.seq_len = seq_len
        self.h = h
        self.clip_size = clip_size
        
        self.mixer_blocks1 = nn.ModuleList([
            self.mixer_block1(h, drop) for _ in range(n_blocks)
        ])
        self.mixer_blocks2 = nn.ModuleList([
            self.mixer_block2(seq_len, drop) for _ in range(n_blocks)
        ])
        
        # Output linear layer
        self.backbone_linear = nn.Linear(h * seq_len, out_dim, bias=True) 
        self.clip_proj = self.projector(clip_size, clip_size, h=clip_size)
        
        if blurry_recon:
            self.blin1 = nn.Linear(h*seq_len,4*28*28,bias=True)
            self.bdropout = nn.Dropout(.3)
            self.bnorm = nn.GroupNorm(1, 64)
            self.bupsampler = Decoder(
                in_channels=64,
                out_channels=4,
                up_block_types=["UpDecoderBlock2D","UpDecoderBlock2D","UpDecoderBlock2D"],
                block_out_channels=[32, 64, 128],
                layers_per_block=1,
            )
            self.b_maps_projector = nn.Sequential(
                nn.Conv2d(64, 512, 1, bias=False),
                nn.GroupNorm(1,512),
                nn.ReLU(True),
                nn.Conv2d(512, 512, 1, bias=False),
                nn.GroupNorm(1,512),
                nn.ReLU(True),
                nn.Conv2d(512, 512, 1, bias=True),
            )
            
    def projector(self, in_dim, out_dim, h=2048):
        return nn.Sequential(
            nn.LayerNorm(in_dim),
            nn.GELU(),
            nn.Linear(in_dim, h),
            nn.LayerNorm(h),
            nn.GELU(),
            nn.Linear(h, h),
            nn.LayerNorm(h),
            nn.GELU(),
            nn.Linear(h, out_dim)
        )
    
    def mlp(self, in_dim, out_dim, drop):
        return nn.Sequential(
            nn.Linear(in_dim, out_dim),
            nn.GELU(),
            nn.Dropout(drop),
            nn.Linear(out_dim, out_dim),
        )
    
    def mixer_block1(self, h, drop):
        return nn.Sequential(
            nn.LayerNorm(h),
            self.mlp(h, h, drop),  # Token mixing
        )

    def mixer_block2(self, seq_len, drop):
        return nn.Sequential(
            nn.LayerNorm(seq_len),
            self.mlp(seq_len, seq_len, drop)  # Channel mixing
        )
        
    def forward(self, x):
        # make empty tensors
        c,b,t = torch.Tensor([0.]), torch.Tensor([[0.],[0.]]), torch.Tensor([0.])
        
        # Mixer blocks
        residual1 = x
        residual2 = x.permute(0,2,1)
        for block1, block2 in zip(self.mixer_blocks1,self.mixer_blocks2):
            x = block1(x) + residual1
            residual1 = x
            x = x.permute(0,2,1)
            
            x = block2(x) + residual2
            residual2 = x
            x = x.permute(0,2,1)
            
        x = x.reshape(x.size(0), -1)
        backbone = self.backbone_linear(x).reshape(len(x), -1, self.clip_size)
        c = self.clip_proj(backbone)

        if blurry_recon:
            b = self.blin1(x)
            b = self.bdropout(b)
            b = b.reshape(b.shape[0], -1, 7, 7).contiguous()
            b = self.bnorm(b)
            b_aux = self.b_maps_projector(b).flatten(2).permute(0,2,1)
            b_aux = b_aux.view(len(b_aux), 49, 512)
            b = (self.bupsampler(b), b_aux)
        
        return backbone, c, b

model.backbone = BrainNetwork(h=hidden_dim, in_dim=hidden_dim, seq_len=seq_len, 
                          clip_size=clip_emb_dim, out_dim=clip_emb_dim*clip_seq_dim) 
utils.count_params(model.ridge)
utils.count_params(model.backbone)
utils.count_params(model)

# setup diffusion prior network
out_dim = clip_emb_dim
depth = 6
dim_head = 52
heads = clip_emb_dim//52 # heads * dim_head = clip_emb_dim
timesteps = 100

prior_network = VersatileDiffusionPriorNetwork(
        dim=out_dim,
        depth=depth,
        dim_head=dim_head,
        heads=heads,
        causal=False,
        num_tokens = clip_seq_dim,
        learned_query_mode="pos_emb"
    )

model.diffusion_prior = BrainDiffusionPrior(
    net=prior_network,
    image_embed_dim=out_dim,
    condition_on_text_encodings=False,
    timesteps=timesteps,
    cond_drop_prob=0.2,
    image_embed_scale=None,
)
model.to(device)

utils.count_params(model.diffusion_prior)
utils.count_params(model)



param counts:
83,653,863 total
0 trainable
param counts:
16,102,400 total
16,102,400 trainable
param counts:
458,885,116 total
458,885,116 trainable
param counts:
474,987,516 total
474,987,516 trainable
param counts:
259,865,216 total
259,865,200 trainable
param counts:
734,852,732 total
734,852,716 trainable


734852716

In [21]:
outdir = ckpt_path
print(f"\n---loading {outdir} ckpt---\n")
if True:
#     checkpoint = torch.load(outdir+f'/{tag}.pth', map_location='cpu')
#     try:
#         model.module.load_state_dict(state_dict, strict=True)
#     except:
#         model.load_state_dict(state_dict, strict=True)
#     del checkpoint
# except: # probably ckpt is saved using deepspeed format
    import deepspeed
    state_dict = deepspeed.utils.zero_to_fp32.get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir=outdir)
    # try:
    model.load_state_dict(state_dict, strict=False)
    print("ckpt loaded!")
    # except:
        # model.load_state_dict(state_dict, strict=False)
        # print("ckpt loaded!")
    del state_dict
# except:
#     print("Can't load that shit")


---loading /weka/proj-fmri/mihirneal/MindEyeV2/src/ablation_ckpt/control_MEV2 ckpt---

Processing zero checkpoint '/weka/proj-fmri/mihirneal/MindEyeV2/src/ablation_ckpt/control_MEV2/pytorch_model'
Detected checkpoint of type zero stage 2, world_size: 8
Parsing checkpoint created by deepspeed==0.12.6
Reconstructed Frozen fp32 state dict with 1 params 16 elements
Reconstructed fp32 state dict with 230 params 734852716 elements
ckpt loaded!


In [27]:
# setup text caption networks
from transformers import AutoProcessor, AutoModelForCausalLM
from modeling_git import GitForCausalLMClipEmb
processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
clip_text_model = GitForCausalLMClipEmb.from_pretrained("microsoft/git-large-coco")
clip_text_model.to(device) # if you get OOM running this script, you can switch this to cpu and lower minibatch_size to 4
clip_text_model.eval().requires_grad_(False)
clip_text_seq_dim = 257
clip_text_emb_dim = 1024

class CLIPConverter(torch.nn.Module):
    def __init__(self):
        super(CLIPConverter, self).__init__()
        self.linear1 = nn.Linear(clip_seq_dim, clip_text_seq_dim)
        self.linear2 = nn.Linear(clip_emb_dim, clip_text_emb_dim)
    def forward(self, x):
        x = x.permute(0,2,1)
        x = self.linear1(x)
        x = self.linear2(x.permute(0,2,1))
        return x
        
clip_convert = CLIPConverter()
state_dict = torch.load("/weka/proj-fmri/shared/mindeyev2_dataset/bigG_to_L_epoch8.pt", map_location='cpu')['model_state_dict']
clip_convert.load_state_dict(state_dict, strict=True)
clip_convert.to(device) # if you get OOM running this script, you can switch this to cpu and lower minibatch_size to 4
del state_dict

In [29]:
# prep unCLIP
config = OmegaConf.load("generative_models/configs/unclip6.yaml")
config = OmegaConf.to_container(config, resolve=True)
unclip_params = config["model"]["params"]
network_config = unclip_params["network_config"]
denoiser_config = unclip_params["denoiser_config"]
first_stage_config = unclip_params["first_stage_config"]
conditioner_config = unclip_params["conditioner_config"]
sampler_config = unclip_params["sampler_config"]
scale_factor = unclip_params["scale_factor"]
disable_first_stage_autocast = unclip_params["disable_first_stage_autocast"]
offset_noise_level = unclip_params["loss_fn_config"]["params"]["offset_noise_level"]

first_stage_config['target'] = 'sgm.models.autoencoder.AutoencoderKL'
sampler_config['params']['num_steps'] = 38

diffusion_engine = DiffusionEngine(network_config=network_config,
                       denoiser_config=denoiser_config,
                       first_stage_config=first_stage_config,
                       conditioner_config=conditioner_config,
                       sampler_config=sampler_config,
                       scale_factor=scale_factor,
                       disable_first_stage_autocast=disable_first_stage_autocast)
# set to inference
diffusion_engine.eval().requires_grad_(False)
diffusion_engine.to(device)

ckpt_path = '/weka/proj-fmri/shared/cache/sdxl_unclip/unclip6_epoch0_step110000.ckpt'
ckpt = torch.load(ckpt_path, map_location='cpu')
diffusion_engine.load_state_dict(ckpt['state_dict'])

batch={"jpg": torch.randn(1,3,1,1).to(device), # jpg doesnt get used, it's just a placeholder
      "original_size_as_tuple": torch.ones(1, 2).to(device) * 768,
      "crop_coords_top_left": torch.zeros(1, 2).to(device)}
out = diffusion_engine.conditioner(batch)
vector_suffix = out["vector"].to(device)
print("vector_suffix", vector_suffix.shape)



Initialized embedder #0: FrozenOpenCLIPImageEmbedder with 1909889025 params. Trainable: False
Initialized embedder #1: ConcatTimestepEmbedderND with 0 params. Trainable: False
Initialized embedder #2: ConcatTimestepEmbedderND with 0 params. Trainable: False
vector_suffix torch.Size([1, 1024])


In [None]:
# get all reconstructions
model.to(device)
model.eval().requires_grad_(False)

# all_images = None
all_blurryrecons = None
all_recons = None
all_predcaptions = []
all_clipvoxels = None

minibatch_size = 8
num_samples_per_image = 1
plotting = False

with torch.no_grad(), torch.cuda.amp.autocast(dtype=torch.float16):
    for batch in tqdm(range(0,len(np.unique(test_images_idx)),minibatch_size)):
        uniq_imgs = np.unique(test_images_idx)[batch:batch+minibatch_size]
        voxel = None
        for uniq_img in uniq_imgs:
            locs = np.where(test_images_idx==uniq_img)[0]
            if len(locs)==1:
                locs = locs.repeat(3)
            elif len(locs)==2:
                locs = locs.repeat(2)[:3]
            assert len(locs)==3
            if voxel is None:
                voxel = test_voxels[None,locs] # 1, num_image_repetitions, num_voxels
            else:
                voxel = torch.vstack((voxel, test_voxels[None,locs]))
        voxel = voxel.to(device)
        
        for rep in range(3):
            voxel_ridge = model.ridge(voxel[:,[rep]],0) # 0th index of subj_list
            backbone0, clip_voxels0, blurry_image_enc0 = model.backbone(voxel_ridge)
            if rep==0:
                clip_voxels = clip_voxels0
                backbone = backbone0
                blurry_image_enc = blurry_image_enc0[0]
            else:
                clip_voxels += clip_voxels0
                backbone += backbone0
                blurry_image_enc += blurry_image_enc0[0]
        clip_voxels /= 3
        backbone /= 3
        blurry_image_enc /= 3
                
        # Save retrieval submodule outputs
        if all_clipvoxels is None:
            all_clipvoxels = clip_voxels
        else:
            all_clipvoxels = torch.vstack((all_clipvoxels, clip_voxels))
        
        # Feed voxels through OpenCLIP-bigG diffusion prior
        prior_out = model.diffusion_prior.p_sample_loop(backbone.shape, 
                        text_cond = dict(text_embed = backbone), 
                        cond_scale = 1., timesteps = 20)
        
        pred_caption_emb = clip_convert(prior_out)
        generated_ids = clip_text_model.generate(pixel_values=pred_caption_emb, max_length=20)
        generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)
        all_predcaptions = np.hstack((all_predcaptions, generated_caption))
        print(generated_caption)
        
        # Feed diffusion prior outputs through unCLIP
        for i in range(len(voxel)):
            samples = utils.unclip_recon(prior_out[[i]],
                             diffusion_engine,
                             vector_suffix,
                             num_samples=num_samples_per_image)
            if all_recons is None:
                all_recons = samples.cpu()
            else:
                all_recons = torch.vstack((all_recons, samples.cpu()))
            if plotting:
                for s in range(num_samples_per_image):
                    plt.figure(figsize=(2,2))
                    plt.imshow(transforms.ToPILImage()(samples[s]))
                    plt.axis('off')
                    plt.show()

        if blurry_recon:
            blurred_image = (autoenc.decode(blurry_image_enc/0.18215).sample/ 2 + 0.5).clamp(0,1)
            
            for i in range(len(voxel)):
                im = torch.Tensor(blurred_image[i])
                if all_blurryrecons is None:
                    all_blurryrecons = im[None].cpu()
                else:
                    all_blurryrecons = torch.vstack((all_blurryrecons, im[None].cpu()))
                if plotting:
                    plt.figure(figsize=(2,2))
                    plt.imshow(transforms.ToPILImage()(im))
                    plt.axis('off')
                    plt.show()

        if plotting: 
            print(model_name)
            err # dont actually want to run the whole thing with plotting=True

# resize outputs before saving
imsize = 256
all_recons = transforms.Resize((imsize,imsize))(all_recons).float()
if blurry_recon: 
    all_blurryrecons = transforms.Resize((imsize,imsize))(all_blurryrecons).float()
        
# saving
print(all_recons.shape)
# torch.save(all_images,"evals/all_images.pt")
if blurry_recon:
    torch.save(all_blurryrecons,f"evals/{model_name}/{model_name}_all_blurryrecons.pt")
torch.save(all_recons,f"evals/{model_name}/{model_name}_all_recons.pt")
torch.save(all_predcaptions,f"evals/{model_name}/{model_name}_all_predcaptions.pt")
torch.save(all_clipvoxels,f"evals/{model_name}/{model_name}_all_clipvoxels.pt")
print(f"saved {model_name} outputs!")

if not utils.is_interactive():
    sys.exit(0)

  0%|                                                                                                                          | 0/125 [00:00<?, ?it/s]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:   5%|████▋                                                                                     | 1/19 [00:00<00:04,  3.89it/s][A
sampling loop time step:  32%|████████████████████████████▍                                                             | 6/19 [00:00<00:00, 18.23it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 20.52it/s][A
sampling loop time step:  63%|████████████████████████████████████████████████████████▏                                | 12/19 [00:00<00:00, 21.92it/s][A
sampling loop time step:  79%|███████████████████████████████████████████

['a group of people sitting around a table.', 'a man standing in a room next to a table.', 'a surfer riding a wave.', 'a man on a surfboard in the water.', 'a building with a clock on it.', 'a plate of food with a bunch of vegetables on it.', 'a table with a piece of paper on it.', 'a group of people playing a game of soccer.']


  1%|▉                                                                                                               | 1/125 [00:48<1:40:20, 48.55s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.07it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.16it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.31it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a small dog is standing on a grass covered field.', 'a surfer riding a wave on a surfboard.', 'a plane is flying over the airport.', 'a surfer is riding a wave.', 'a bear in a field.', 'a woman sitting on a couch next to a woman.', 'a train is driving on the tracks.', 'a room with a couch, chair, and a table.']


  2%|█▊                                                                                                              | 2/125 [01:34<1:36:03, 46.86s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.07it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.17it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a man sitting down in a chair.', 'a large body of water.', 'a girl playing a game of tennis.', 'a bathroom with a toilet and sink.', 'a child is holding a baby.', 'a man standing next to a woman.', 'a building with a clock on it.', 'a group of people walking on a path.']


  2%|██▋                                                                                                             | 3/125 [02:19<1:34:05, 46.27s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.08it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.17it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.31it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a grassy field.', 'a man standing next to a bench.', 'a bunch of fruit sitting on a table.', 'a plane is flying in the air.', 'a bird is standing on a branch.', 'a girl sitting on a chair in front of a wall.', 'a banana with a few bananas on it.', 'a living room with a couch, chair, and television.']


  3%|███▌                                                                                                            | 4/125 [03:05<1:32:42, 45.97s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.07it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.48it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.17it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a room with a couch and a table', 'a room with a bed and a table', 'a plate of food with a fork.', 'a city with a lot of buildings.', 'a bus driving down a street next to a building.', 'a close up of a plate of food', 'a woman standing on a sidewalk next to a man.', 'a boat on a body of water.']


  4%|████▍                                                                                                           | 5/125 [03:51<1:32:03, 46.03s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.06it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.17it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a man holding a tennis racket.', 'a large tree with a lot of leaves.', 'a large truck is parked on the side of the road.', 'a body of water with a boat in it.', 'a vase with flowers on it', 'a man standing in front of a building.', 'a plate of food with a fork.', 'a plane is parked on the runway.']


  5%|█████▍                                                                                                          | 6/125 [04:37<1:31:04, 45.92s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.08it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.50it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a street with a lot of cars on it', 'a young man is playing a game of tennis.', 'a bird sitting on a branch.', 'a person standing on a sidewalk.', 'a mountain with a mountain in the background.', 'a plane is flying in the sky.', 'a boat is sitting on the water.', 'a large building with a clock on it.']


  6%|██████▎                                                                                                         | 7/125 [05:22<1:30:04, 45.80s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.09it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.50it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a motorcycle parked on the side of a road.', 'a train is driving on a track.', 'a plate of food with a piece of food on it.', 'a baseball player is standing in front of a batter.', 'a bicycle is parked on the sidewalk.', 'a bathroom with a toilet and sink.', 'a tennis player is on the court.', 'a plate of food with a fork']


  6%|███████▏                                                                                                        | 8/125 [06:08<1:29:18, 45.80s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.12it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.50it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a snow covered ski slope.', 'a plate of food with a fork on it.', 'a man on a surfboard in the water.', 'a snowboarder is on a snowy hill.', 'a man standing next to a building.', 'a zebra grazing on grass.', 'a clock tower and a clock on a building.', 'a man on a surfboard in the ocean.']


  7%|████████                                                                                                        | 9/125 [06:53<1:28:15, 45.65s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 37.86it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.44it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.16it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.31it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a large animal in a field.', 'a plate of food', 'a beach with a bunch of people on it', 'a train is driving on a track.', 'a kite flying in the sky.', 'a man sitting down next to a table.', 'a table with a chair and a table with a lamp on it.', 'a person sitting on a couch.']


  8%|████████▉                                                                                                      | 10/125 [07:40<1:27:49, 45.82s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 37.99it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.33it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a glass vase with a plant in it.', 'a train is driving down the tracks.', 'a giraffe standing in a field.', 'a large parking lot.', 'a large field with a herd of cattle.', 'a surfer riding a wave.', 'a man standing on a tennis court.', 'a bird sitting on a ledge.']


  9%|█████████▊                                                                                                     | 11/125 [08:25<1:27:03, 45.82s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.11it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.51it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a man standing on a tennis court.', 'a surfer is riding a wave.', 'a herd of cattle grazing on a lush green field.', 'a living room with a couch, chair, and television.', 'a large area of grass.', 'a kitchen with a counter and a sink.', 'a clock tower with a tower in the background.', 'a plate of food with a spoon.']


 10%|██████████▋                                                                                                    | 12/125 [09:11<1:26:04, 45.71s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.13it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.52it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.20it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.34it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a surfer riding a wave on a surfboard.', 'a bunch of bananas on a table.', 'a black and white photo of a dog.', 'a room with a sink and a mirror.', 'a group of people sitting around a table.', 'a building with a clock on it.', 'a man standing next to a car.', 'a woman standing in a field.']


 10%|███████████▌                                                                                                   | 13/125 [09:57<1:25:28, 45.79s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 37.34it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.28it/s][A
sampling loop time step:  63%|████████████████████████████████████████████████████████▏                                | 12/19 [00:00<00:00, 27.40it/s][A
sampling loop time step:  79%|██████████████████████████████████████████████████████████████████████▎                  | 15/19 [00:00<00:00, 26.38it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a snowboarder is snowboarding on a hill.', 'a bathroom with a toilet and sink.', 'a bird standing on a branch.', 'a boat on a body of water', 'a surfer on a surfboard in the ocean.', 'a truck parked next to a building.', 'a building with a clock on it', 'a skateboarder riding down a hill.']


 11%|████████████▍                                                                                                  | 14/125 [10:43<1:25:07, 46.01s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  21%|██████████████████▉                                                                       | 4/19 [00:00<00:00, 34.16it/s][A
sampling loop time step:  42%|█████████████████████████████████████▉                                                    | 8/19 [00:00<00:00, 27.84it/s][A
sampling loop time step:  58%|███████████████████████████████████████████████████▌                                     | 11/19 [00:00<00:00, 26.51it/s][A
sampling loop time step:  74%|█████████████████████████████████████████████████████████████████▌                       | 14/19 [00:00<00:00, 25.82it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a room with a view', 'a surfer riding a wave on a surfboard.', 'a bunch of food on a table', 'a plane is on the runway.', 'a man sitting on a bench next to a cell phone.', 'a man standing on top of a lush green field.', 'a horse riding on a field.', 'a plate of food with a fork.']


 12%|█████████████▎                                                                                                 | 15/125 [11:29<1:24:26, 46.06s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:   5%|████▋                                                                                     | 1/19 [00:00<00:02,  8.86it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 25.06it/s][A
sampling loop time step:  42%|█████████████████████████████████████▉                                                    | 8/19 [00:00<00:00, 24.86it/s][A
sampling loop time step:  58%|███████████████████████████████████████████████████▌                                     | 11/19 [00:00<00:00, 24.77it/s][A
sampling loop time step:  74%|███████████████████████████████████████████

['a plane is sitting on the runway.', 'a man standing next to a man.', 'a white bathroom with a sink and a mirror.', 'a bathroom with a toilet and a sink.', 'a baseball player is on the field.', 'a close up of a plate of food', 'a bathroom with a toilet and a sink.', 'a room with a view']


 13%|██████████████▏                                                                                                | 16/125 [12:16<1:23:41, 46.07s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.13it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.52it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.20it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.34it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a man standing next to a chair.', 'a room with a couch, chair, and a television.', 'a tree in a field', 'a surfer is riding a wave.', 'a group of people sitting around a table.', 'a man standing on a beach holding a surfboard.', 'a large area of dirt.', 'a man is wearing a suit and tie.']


 14%|███████████████                                                                                                | 17/125 [13:01<1:22:50, 46.03s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.08it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.50it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a man sitting on a bench next to a bench.', 'a train parked on the side of a road.', 'a giraffe standing next to a tree.', 'a vase with flowers on it.', 'a plate of food with a fork.', 'a motorcycle parked on the side of a road.', 'a large bed with a white and blue striped blanket.', 'a group of people sitting down.']


 14%|███████████████▉                                                                                               | 18/125 [13:47<1:21:55, 45.94s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.09it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.51it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.19it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.33it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a bathroom with a toilet and sink.', 'a bike is parked on the side of the road.', 'a close up of a person on a table', 'a grassy field with a tree in the background.', 'a plane is flying in the sky.', 'a bird is standing on a tree.', 'a plane is flying over the runway.', 'a surfer riding a wave on a surfboard.']


 15%|████████████████▊                                                                                              | 19/125 [14:33<1:21:19, 46.04s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.07it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a plate of food with a fork and knife.', 'a fire hydrant is in the foreground.', 'a skier is skiing down a hill.', 'a large elephant standing in a field.', 'a group of people playing a game of soccer.', 'a computer desk with a laptop on it.', 'a man is holding a cell phone.', 'a small tree is in the foreground.']


 16%|█████████████████▊                                                                                             | 20/125 [15:19<1:20:26, 45.97s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.10it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.51it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.19it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.33it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a table with a chair', 'a woman is sitting on a bed.', 'a man standing on a tennis court.', 'a horse is standing on a field.', 'a bathroom with a toilet and a sink.', 'a bus driving down a street.', 'a large jetliner sitting on top of a cement.', 'a large group of people.']


 17%|██████████████████▋                                                                                            | 21/125 [16:05<1:19:48, 46.04s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.06it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.51it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.33it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a man on a beach with a surfboard.', 'a clock tower with a clock on it.', 'a tennis player is holding a racket.', 'a cow standing on a grass covered field.', 'a kitchen with a lot of counter space.', 'a surfer is riding a wave.', 'a bird is standing on a branch.', 'a plane is sitting on the runway.']


 18%|███████████████████▌                                                                                           | 22/125 [16:53<1:19:32, 46.33s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.08it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a plate of food with a fork.', 'a man standing in front of a wall.', 'a table with a bunch of items on it', 'a baseball player is standing on a field.', 'a bird is standing on a piece of land.', 'a train is driving on the tracks.', 'a white toilet', 'a skier is skiing down a hill.']


 18%|████████████████████▍                                                                                          | 23/125 [17:39<1:18:52, 46.40s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 37.88it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.45it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.14it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.29it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a man is holding a piece of paper.', 'a large building with a clock on it.', 'a man standing on a beach next to a surfboard.', 'a group of elephants standing around.', 'a man standing in front of a building.', 'a large elephant is standing in the dirt.', 'a street with a sidewalk and a sidewalk.', 'a tree in a field']


 19%|█████████████████████▎                                                                                         | 24/125 [18:26<1:18:16, 46.50s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.07it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.50it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.19it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.33it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a large building with a lot of windows.', 'a train is driving through a forest.', 'a large bear standing on a rock.', 'a group of people sitting around a table.', 'a white room with a toilet and a mirror.', 'a train is on the tracks.', 'a group of animals standing on top of a dirt field.', 'a train is driving on the tracks.']


 20%|██████████████████████▏                                                                                        | 25/125 [19:12<1:17:24, 46.44s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.06it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a group of people standing around a bunch of people.', 'a snow skier is skiing on a mountain.', 'a group of people on a field with a horse.', 'a snow covered hill.', 'a boat on a body of water.', 'a plate of food with a bowl of food on it.', 'a street with a lot of traffic on it.', 'a building with a clock on it.']


 21%|███████████████████████                                                                                        | 26/125 [19:58<1:16:31, 46.37s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.12it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.53it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.21it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.34it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a clock on a building.', 'a man standing next to a table.', 'a clock tower with a tower in the background.', 'a plane is on the runway.', 'a train is driving down the tracks.', 'a white wall', 'a kitchen with a stove and a sink.', 'a plane is flying over the runway.']


 22%|███████████████████████▉                                                                                       | 27/125 [20:46<1:16:17, 46.70s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.03it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.48it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.31it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a park with a lot of trees and a lot of trees', 'a surfer riding a wave on a surfboard.', 'a group of people eating at a table.', 'a plate of food with a knife and fork.', 'a man and a woman are standing together.', 'a clock tower and a building with a clock on it.', 'a zebra standing in a field.', 'a person holding a cell phone.']


 22%|████████████████████████▊                                                                                      | 28/125 [21:34<1:16:07, 47.09s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:   5%|████▋                                                                                     | 1/19 [00:00<00:03,  5.42it/s][A
sampling loop time step:  32%|████████████████████████████▍                                                             | 6/19 [00:00<00:00, 21.59it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 22.80it/s][A
sampling loop time step:  63%|████████████████████████████████████████████████████████▏                                | 12/19 [00:00<00:00, 23.47it/s][A
sampling loop time step:  79%|███████████████████████████████████████████

['a field with a tree and a fence', 'a large body of water.', 'a kite flying over a large field.', 'a man riding a bike down a street.', 'a zebra standing in a field.', 'a plate of food with a fork.', 'a train is driving down the tracks.', 'a train is parked on the tracks.']


 23%|█████████████████████████▊                                                                                     | 29/125 [22:21<1:15:25, 47.14s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.01it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.48it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a bathroom with a toilet and a sink.', 'a man standing next to a building.', 'a truck parked on the side of a road.', 'a young man is holding a small child.', 'a snowboarder is skiing on a snowy mountain.', 'a large brown and white bear.', 'a bench in front of a tree.', 'a kite flying over a beach.']


 24%|██████████████████████████▋                                                                                    | 30/125 [23:08<1:14:41, 47.17s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.09it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.50it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a white and black cat', 'a mountain with a mountain in the background.', 'a large field with a train and a large building.', 'a surfer riding a wave on a surfboard.', 'a man riding a surfboard on top of a body of water.', 'a plane is flying over the airport.', 'a bus driving down a street.', 'a plate of food with a fork.']


 25%|███████████████████████████▌                                                                                   | 31/125 [23:55<1:13:37, 46.99s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.08it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.17it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a tennis player is holding a racket.', 'a skateboarder is riding on a skateboard.', 'a cow standing on top of a lush green field.', 'a large concrete structure.', 'a snow covered ski slope.', 'a skateboarder is riding on a skateboard.', 'a man is wearing a suit and tie.', 'a white room with a mirror']


 26%|████████████████████████████▍                                                                                  | 32/125 [24:42<1:12:41, 46.90s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.07it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.31it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a man standing on a skateboard.', 'a man sitting on a bench.', 'a man standing next to a building.', 'a person on a skateboard on a sidewalk.', 'a cat sitting on a chair.', 'a table with a plate of food on it', 'a snowboarder is riding down a hill.', 'a train is driving down the tracks.']


 26%|█████████████████████████████▎                                                                                 | 33/125 [25:29<1:12:03, 47.00s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.09it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.50it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.33it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a man standing on a beach next to a body of water.', 'a surfer on a surfboard in the ocean.', 'a woman sitting on a bench.', 'a skateboarder is standing on a skateboard.', 'a giraffe standing in the grass.', 'a vase with flowers and vases on it.', 'a person on a snow covered slope.', 'a bear in a field']


 27%|██████████████████████████████▏                                                                                | 34/125 [26:16<1:11:35, 47.21s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.07it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.50it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a street sign and a street sign.', 'a plate of food with a pizza on it.', 'a man sitting on a couch next to a chair.', 'a desk with a laptop on it.', 'a woman standing next to a man.', 'a horse is standing in the grass.', 'a stuffed animal sitting on top of a table.', 'a skateboarder is riding on a ramp.']


 28%|███████████████████████████████                                                                                | 35/125 [27:05<1:11:18, 47.54s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.03it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.50it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.33it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a large bear standing on top of a lush green field.', 'a building with a clock on it.', 'a boat on a body of water.', 'a clock tower with a tower in the background.', 'a giraffe standing next to a tree.', 'a man standing next to a tree.', 'a room with a view', 'a man is wearing a suit and tie.']


 29%|███████████████████████████████▉                                                                               | 36/125 [27:53<1:10:43, 47.68s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  21%|██████████████████▉                                                                       | 4/19 [00:00<00:00, 34.03it/s][A
sampling loop time step:  42%|█████████████████████████████████████▉                                                    | 8/19 [00:00<00:00, 27.80it/s][A
sampling loop time step:  58%|███████████████████████████████████████████████████▌                                     | 11/19 [00:00<00:00, 26.49it/s][A
sampling loop time step:  74%|█████████████████████████████████████████████████████████████████▌                       | 14/19 [00:00<00:00, 25.80it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a train is driving on the tracks.', 'a living room with a couch, chair, and television.', 'a tennis player is on the court.', 'a zebra standing in a field.', 'a man sitting down.', 'a man and a woman are sitting on a bench.', 'a man walking down a street next to a building.', 'a man standing on a tennis court.']


 30%|████████████████████████████████▊                                                                              | 37/125 [28:41<1:10:08, 47.83s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.05it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.19it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.33it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a tennis player is playing on a court.', 'a building with a lot of windows', 'a bathroom with a toilet and a sink.', 'a man standing next to a building.', 'a bed with a pillow and a blanket', 'a large tree in a field.', 'a tennis player is on the court.', 'a room with a view']


 30%|█████████████████████████████████▋                                                                             | 38/125 [29:29<1:09:39, 48.04s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.05it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.31it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a bus driving down a street.', 'a man sitting on a bench next to a building.', 'a small animal is standing on a ground.', 'a long road with a train on it.', 'a large rock.', 'a bathroom with a sink and a mirror.', 'a plate of food with a knife.', 'a large white bear.']


 31%|██████████████████████████████████▋                                                                            | 39/125 [30:18<1:08:54, 48.07s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 37.54it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.34it/s][A
sampling loop time step:  63%|████████████████████████████████████████████████████████▏                                | 12/19 [00:00<00:00, 27.43it/s][A
sampling loop time step:  79%|██████████████████████████████████████████████████████████████████████▎                  | 15/19 [00:00<00:00, 26.41it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a large animal in a field.', 'a living room with a couch and television.', 'a herd of cattle grazing on a lush green hillside.', 'a large building with a lot of windows.', 'a man sitting on a couch next to a chair.', 'a herd of cattle grazing on a field.', 'a surfer riding a wave on a surfboard.', 'a table with a bunch of flowers on it']


 32%|███████████████████████████████████▌                                                                           | 40/125 [31:06<1:08:24, 48.29s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.01it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.47it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.17it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.31it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a giraffe standing next to a tree.', 'a dog is sitting on a leash.', 'a kitchen with a microwave and a counter.', 'a bear in a field.', 'a cat sitting on a couch.', 'a group of people standing around a table.', 'a baseball player is standing on a field.', 'a bird is standing on a branch.']


 33%|████████████████████████████████████▍                                                                          | 41/125 [31:56<1:08:08, 48.68s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:   5%|████▋                                                                                     | 1/19 [00:00<00:01,  9.21it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 25.40it/s][A
sampling loop time step:  42%|█████████████████████████████████████▉                                                    | 8/19 [00:00<00:00, 25.02it/s][A
sampling loop time step:  58%|███████████████████████████████████████████████████▌                                     | 11/19 [00:00<00:00, 24.87it/s][A
sampling loop time step:  74%|███████████████████████████████████████████

['a man on a surfboard in the water.', 'a snowboarder is skiing down a hill.', 'a large building with a lot of windows.', 'a man riding a skateboard on top of a sidewalk.', 'a man riding a horse next to a fence.', 'a beach with a horse and a man on it', 'a small park with a bench and a tree.', 'a close up of a food']


 34%|█████████████████████████████████████▎                                                                         | 42/125 [32:44<1:07:12, 48.58s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.04it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.17it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a man on a beach with a surfboard.', 'a large white and brown horse.', 'a wall with a window', 'a skier is skiing on a snowy slope.', 'a group of people standing around each other.', 'a desk with a laptop and a monitor.', 'a skier is skiing on a snowy slope.', 'a man standing on top of a lush green field.']


 34%|██████████████████████████████████████▏                                                                        | 43/125 [33:33<1:06:21, 48.55s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.05it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.17it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.31it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a room with a toilet and a mirror.', 'a large body of water', 'a bird standing on a branch.', 'a tree in a field', 'a kite flying in the sky.', 'a close up of a person', 'a lone tree in a field.', 'a room with a couch, chair, and a table.']


 35%|███████████████████████████████████████                                                                        | 44/125 [34:21<1:05:32, 48.55s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.06it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.48it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.17it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.31it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a building with a lot of windows.', 'a snow covered mountain.', 'a restaurant with a lot of food.', 'a man standing on a tennis court.', 'bike parked on the street', 'a man laying down on a bed.', 'a close up of a person', 'a street with a sign and a street']


 36%|███████████████████████████████████████▉                                                                       | 45/125 [35:09<1:04:12, 48.16s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.09it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.50it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a group of people standing around each other.', 'a plane is flying over the building.', 'a banana with a bunch of bananas on it.', 'a bathroom with a toilet and sink.', 'a man standing in front of a table.', 'a herd of elephants walking down a dirt road.', 'a man standing in front of a building.', 'a plane is flying over the runway.']


 37%|████████████████████████████████████████▊                                                                      | 46/125 [35:57<1:03:22, 48.13s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.05it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.49it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a small animal is standing on a sidewalk.', 'a plate of food with a fork on it.', 'a plate of food with a bowl of food on it.', 'a large bear in a zoo.', 'a room with a view.', 'a man walking a dog', 'a skier is skiing down a hill.', 'a horse is standing on a horse.']


 38%|█████████████████████████████████████████▋                                                                     | 47/125 [36:44<1:02:13, 47.86s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.10it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.51it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.18it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.32it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a building with a clock on it.', 'a man standing on a beach next to a surfboard.', 'a man riding a surfboard on top of a wave.', 'a train is on the tracks.', 'a car driving down a street next to a parked car.', 'a man flying a kite in the air.', 'a street with a traffic sign and a street.', 'a large elephant standing in the grass.']


 38%|██████████████████████████████████████████▌                                                                    | 48/125 [37:31<1:01:17, 47.76s/it]
sampling loop time step:   0%|                                                                                                  | 0/19 [00:00<?, ?it/s][A
sampling loop time step:  26%|███████████████████████▋                                                                  | 5/19 [00:00<00:00, 38.05it/s][A
sampling loop time step:  47%|██████████████████████████████████████████▋                                               | 9/19 [00:00<00:00, 29.48it/s][A
sampling loop time step:  68%|████████████████████████████████████████████████████████████▉                            | 13/19 [00:00<00:00, 27.16it/s][A
sampling loop time step:  84%|██████████████████████████████████████████████████████████████████████████▉              | 16/19 [00:00<00:00, 26.30it/s][A
sampling loop time step: 100%|███████████████████████████████████████████

['a surfer is riding a wave.', 'a kite is flying in the sky.', 'a bathroom with a toilet and a sink.', 'a man standing on top of a field.', 'a person sitting down.', 'a white wall with a mirror', 'a man riding a surfboard on top of a wave.', 'a herd of cattle grazing on a field.']
