In [55]:
%load_ext autoreload
%autoreload 2
import sys  # noqadefined
import os  # noqa
sys.path.append(os.path.abspath(os.path.join('..', '')))  # noqa
from termcolor import colored, cprint
from transformer.random_tf import RandTransformerModel
from options.transformer_options import TransformerOptions
from torch.utils.data import DataLoader
import torch
from datasets.shape_net_z_sets import ShapeNetZSets
from utils.visualizer import Visualizer
import inspect
import time
from dataset_preprocessing.constants import DATA_SET_PATH, FULL_DATA_SET_PATH
from tqdm import tqdm
from torch import profiler

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [64]:
vq_cfg = '../configs/pvqae_configs.yaml'
tf_config = '../configs/tansformer.yaml'
vq_checkpoint = '../raw_dataset/vqvae.pth'

options =  TransformerOptions(config_path=vq_cfg, 
                              tf_config=tf_config,
                              vq_ckpt=vq_checkpoint,
                              name="rand-tf-7",
                              batch_size=3,
                              n_epochs=20,
                              save_epoch_frequency=1,
                              nepochs_decay=5)

model = RandTransformerModel()
model.initialize(options)
visualizer = Visualizer(options)

[*] Enc has Attn at i_level, i_block: 3, 0
Working with z of shape (1, 256, 8, 8, 8) = 131072 dimensions.
[*] Dec has Attn at i_level, i_block: 3, 0
[*] VQVAE: weight successfully load from: ../raw_dataset/vqvae.pth
---------- Networks initialized -------------
-----------------------------------------------
[*] create image directory:
c:\Users\Youssef\Repos\TUM\SS23\Advanced Deep learning\ADL4CV\Project\src\transformer\raw_dataset\logs\rand-tf-7\images...


In [65]:
root_folder = ".."
num_works = os.cpu_count()
shape_dir = f"{root_folder}/{DATA_SET_PATH}"
full_dataset_path = f"{root_folder}/{FULL_DATA_SET_PATH}"
dataset = ShapeNetZSets(shape_dir, cat="chairs",max_data_set_size=10)
train_ds, test_ds = torch.utils.data.random_split(
    dataset, [0.9, 0.1])

train_dl =  DataLoader(
            train_ds,
            batch_size=options.batch_size,
            shuffle=False,
            drop_last=True,
            num_workers=0)

test_dl =  DataLoader(
            test_ds,
            batch_size=options.batch_size,
            shuffle=False,
            drop_last=False,
            num_workers=0)



In [66]:
def get_data_generator(loader):
    while True:
        for data in loader:
            yield data

test_dg = get_data_generator(test_ds)
dataset_size = len(train_ds)


cprint('[*] # training images = %d' % len(train_ds), 'yellow')
cprint('[*] # testing images = %d' % len(test_ds), 'yellow')


[*] # training images = 9
[*] # testing images = 1


In [67]:
cprint("[*] Using pytorch's profiler...", 'blue')
tensorboard_trace_handler = profiler.tensorboard_trace_handler(options.tb_dir)
schedule_args = {'wait': 2, 'warmup': 2, 'active': 6, 'repeat': 1}


[*] Using pytorch's profiler...


In [68]:
schedule = profiler.schedule(**schedule_args)
activities = [profiler.ProfilerActivity.CPU, profiler.ProfilerActivity.CUDA]



In [70]:

def train_one_epoch(pt_profiler=None,epoch=0):
    global total_steps

    epoch_iter = 0
    for i, data in tqdm(enumerate(train_dl), total=len(train_dl)):
        iter_start_time = time.time()
        visualizer.reset()
        total_steps += options.batch_size
        epoch_iter += options.batch_size
        model.set_input(data)

        model.optimize_parameters(total_steps)

        nBatches_has_trained = total_steps // options.batch_size
        # if total_steps % options.print_freq == 0:
        if nBatches_has_trained % options.print_freq == 0:
            errors =model.get_current_errors()

            t = (time.time() - iter_start_time) / options.batch_size
            visualizer.print_current_errors(
                epoch, epoch_iter, total_steps, errors, t)

        if (nBatches_has_trained % options.display_freq == 0) or i == 0:
            # eval
            model.inference(data)
            visualizer.display_current_results(
               model.get_current_visuals(), total_steps, phase='train')

            #model.set_input(next(test_dg))
            test_data = next(test_dg)
            model.inference(test_data.unsqueeze(0))
            visualizer.display_current_results(
               model.get_current_visuals(), total_steps, phase='test')

        if total_steps % options.save_latest_freq == 0:
            cprint('saving the latestmodel (epoch %d, total_steps %d)' %
                   (epoch, total_steps), 'blue')
            latest_name = f'epoch-latest'
            model.save(latest_name)

        if pt_profiler is not None:
            pt_profiler.step()

In [71]:

cprint('[*] Start training. name: %s' % options.name, 'blue')
total_steps = 0
for epoch in range(options.nepochs + options.nepochs_decay):
    epoch_start_time = time.time()
    # epoch_iter = 0

    # profile
    with profiler.profile(
        schedule=schedule,
        activities=activities,
        on_trace_ready=tensorboard_trace_handler,
        record_shapes=True,
        with_stack=True,
    ) as pt_profiler:
        train_one_epoch(pt_profiler,epoch)

    if epoch % options.save_epoch_freq == 0:
        cprint('saving the model at the end of epoch %d, iters %d' %
               (epoch, total_steps), 'blue')
        latest_name = f'epoch-latest'
        model.save(latest_name)
        cur_name = f'epoch-{epoch}'
        model.save(cur_name)

    # eval every 3 epoch
    if epoch % options.save_epoch_freq == 0:
        metrics =model.eval_metrics(test_dl)
        visualizer.print_current_metrics(epoch, metrics, phase='test')
        print(metrics)

    cprint(f'[*] End of epoch %d / %d \t Time Taken: %d sec \n%s' %
           (
               epoch, options.nepochs + options.nepochs_decay,
               time.time() - epoch_start_time,
               os.path.abspath(os.path.join(options.logs_dir, options.name))
           ), 'blue', attrs=['bold']
           )
    model.update_learning_rate()


[*] Start training. name: rand-tf-7


  0%|          | 0/3 [00:00<?, ?it/s]

(GPU: , epoch: 0, iters: 3, time: 1.551) nll: 6.184075 


[*] autoregressively inferencing...:  65%|██████▍   | 332/512 [02:36<01:24,  2.12it/s]
  0%|          | 0/3 [02:41<?, ?it/s]


KeyboardInterrupt: 

In [54]:
tf_checkpoint = './raw_dataset/logs/rand-tf-4/ckpt/rand_tf_epoch-latest.pth'
model.load_ckpt(tf_checkpoint)
# for i, data in enumerate(train_dl):
#     print(data["q_set"].shape)
i = dataset[15]
i["z_set"] = i["z_set"].argmax(-1).unsqueeze(0)
i["q_set"] = i["q_set"].unsqueeze(0)
i["idx"]  = i["idx"].unsqueeze(0)
model.set_input(i)
model.forward()


[*] weight successfully load from: ./raw_dataset/logs/rand-tf-4/ckpt/rand_tf_epoch-latest.pth
BEFORE TRANSFORM
First decode
Second decode
