In [1]:
import torch

torch.cuda.is_available()

True

In [4]:
import os
from datetime import datetime

import torch.utils.data
from torch import optim

from Imagen import Imagen
from Unet import Unet, Base, Super, BaseTest, SuperTest
from generate import load_minimagen, load_params
from t5 import get_encoded_dim
from training import get_minimagen_parser, ConceptualCaptions, get_minimagen_dl_opts, \
    create_directory, get_model_params, get_model_size, save_training_info, get_default_args, MinimagenTrain, \
    load_restart_training_parameters, load_testing_parameters

# Get device
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

# Command line argument parser. See `training.get_minimagen_parser()`.
parser = get_minimagen_parser()
# Add argument for when using `main.py`
parser.add_argument("-ts", "--TIMESTAMP", dest="timestamp", help="Timestamp for training directory", type=str,
                             default=None)
args = parser.parse_args(["-n", "0"])
print(args)
timestamp = args.timestamp

# Get training timestamp for when running train.py as main rather than via main.py
if timestamp is None:
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Create training directory
dir_path = f"./training_{timestamp}"
training_dir = create_directory(dir_path)

# If loading from a parameters/training directory
if args.RESTART_DIRECTORY is not None:
    args = load_restart_training_parameters(args)
elif args.PARAMETERS is not None:
    args = load_restart_training_parameters(args, justparams=True)

# If testing, lower parameter values to lower computational load and also to lower amount of data being used.
if args.TESTING:
    args = load_testing_parameters(args)
    train_dataset, valid_dataset = ConceptualCaptions(args, smalldata=True)
else:
    train_dataset, valid_dataset = ConceptualCaptions(args, smalldata=False)

# Create dataloaders
dl_opts = {**get_minimagen_dl_opts(device), 'batch_size': args.BATCH_SIZE, 'num_workers': args.NUM_WORKERS}
train_dataloader = torch.utils.data.DataLoader(train_dataset, **dl_opts)
valid_dataloader = torch.utils.data.DataLoader(valid_dataset, **dl_opts)

# Create Unets
if args.RESTART_DIRECTORY is None:
    imagen_params = dict(
        image_sizes=(int(args.IMG_SIDE_LEN / 2), args.IMG_SIDE_LEN),
        timesteps=args.TIMESTEPS,
        cond_drop_prob=0.15,
        text_encoder_name=args.T5_NAME
    )

    # If not loading a training from a checkpoint
    if args.TESTING:
        # If testing, use tiny MinImagen for low computational load
        unets_params = [get_default_args(BaseTest), get_default_args(SuperTest)]

    # Else if not loading Unet/Imagen settings from a config (parameters) folder, use defaults
    elif not args.PARAMETERS:
        # If no parameters provided, use params from minimagen.Imagen.Base and minimagen.Imagen.Super built-in classes
        unets_params = [get_default_args(Base), get_default_args(Super)]

    # Else load unet/Imagen configs from config (parameters) folder (override imagen+params)
    else:
        # If parameters are provided, load them
        unets_params, imagen_params = get_model_params(args.PARAMETERS)

    # Create Unets accoridng to unets_params
    unets = [Unet(**unet_params).to(device) for unet_params in unets_params]

    # Create Imagen from UNets with specified imagen parameters
    imagen = Imagen(unets=unets, **imagen_params).to(device)
else:
    # If training is being resumed from a previous one, load all relevant models/info (load config AND state dicts)
    orig_train_dir = os.path.join(os.getcwd(), args.RESTART_DIRECTORY)
    unets_params, imagen_params = load_params(orig_train_dir)
    imagen = load_minimagen(orig_train_dir).to(device)
    unets = imagen.unets

# Fill in unspecified arguments with defaults for complete config (parameters) file
unets_params = [{**get_default_args(Unet), **i} for i in unets_params]
imagen_params = {**get_default_args(Imagen), **imagen_params}

# Get the size of the Imagen model in megabytes
model_size_MB = get_model_size(imagen)

# Save all training info (config files, model size, etc.)
save_training_info(args, timestamp, unets_params, imagen_params, model_size_MB, training_dir)

# Create optimizer
optimizer = optim.Adam(imagen.parameters(), lr=args.OPTIM_LR)

# Train the MinImagen instance
MinimagenTrain(timestamp, args, unets, imagen, train_dataloader, valid_dataloader, training_dir, optimizer, timeout=30)

Namespace(PARAMETERS=None, NUM_WORKERS=0, BATCH_SIZE=2, MAX_NUM_WORDS=64, IMG_SIDE_LEN=128, EPOCHS=5, T5_NAME='t5_base', TRAIN_VALID_FRAC=0.9, TIMESTEPS=1000, OPTIM_LR=0.0001, ACCUM_ITER=1, CHCKPT_NUM=500, VALID_NUM=None, RESTART_DIRECTORY=None, TESTING=False, timestamp=None)


No config specified, defaulting to: conceptual_captions/unlabeled
Found cached dataset conceptual_captions (/afs/csail.mit.edu/u/c/cmarnold/.cache/huggingface/datasets/conceptual_captions/unlabeled/1.0.0/05266784888422e36944016874c44639bccb39069c2227435168ad8b02d600d8)


  0%|          | 0/2 [00:00<?, ?it/s]

betas:
	tensor([0.0001, 0.0001, 0.0001, 0.0002, 0.0002, 0.0002, 0.0002, 0.0002, 0.0003,
        0.0003, 0.0003, 0.0003, 0.0003, 0.0004, 0.0004, 0.0004, 0.0004, 0.0004,
        0.0005, 0.0005, 0.0005, 0.0005, 0.0005, 0.0006, 0.0006, 0.0006, 0.0006,
        0.0006, 0.0007, 0.0007, 0.0007, 0.0007, 0.0007, 0.0008, 0.0008, 0.0008,
        0.0008, 0.0008, 0.0009, 0.0009, 0.0009, 0.0009, 0.0009, 0.0010, 0.0010,
        0.0010, 0.0010, 0.0010, 0.0011, 0.0011, 0.0011, 0.0011, 0.0011, 0.0012,
        0.0012, 0.0012, 0.0012, 0.0012, 0.0013, 0.0013, 0.0013, 0.0013, 0.0013,
        0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0015, 0.0015, 0.0015, 0.0015,
        0.0015, 0.0016, 0.0016, 0.0016, 0.0016, 0.0016, 0.0017, 0.0017, 0.0017,
        0.0017, 0.0017, 0.0018, 0.0018, 0.0018, 0.0018, 0.0018, 0.0019, 0.0019,
        0.0019, 0.0019, 0.0019, 0.0020, 0.0020, 0.0020, 0.0020, 0.0020, 0.0021,
        0.0021, 0.0021, 0.0021, 0.0021, 0.0022, 0.0022, 0.0022, 0.0022, 0.0022,
        0.0023, 0.0023, 0.0023, 


-------------------- EPOCH 1 --------------------

----------Training...----------


For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.
Some weights of the model checkpoint at t5-base were not used when initializing T5EncoderModel: ['decoder.block.0.layer.0.layer_norm.weight', 'decoder.block.1.layer.0.SelfAttention.k.weight', 'decoder.block.4.layer.2.DenseReluDense.wi.weight', 'decoder.block.3.layer.0.SelfAttention.v.weight', 'decoder.block.10.layer.1.EncDecAttention.k.weight', 'decoder.block.8.layer.2.layer_norm.weight', 'decoder.block.7.layer.1.EncDecAttention.k.weight', 'decoder.block.9.layer.1.EncDecAttention.v.weight', 'decoder.block.10.layer.1.EncDecAttention.v.weight', 'decoder.block.7.layer.1.EncDecAttention.o.weight


----------Validation...----------



  0%|                                                                                                                                             | 0/165917 [00:02<?, ?it/s][A
4it [02:32, 38.20s/it]


KeyboardInterrupt: 